229 lines
9.9 KiB
Python
229 lines
9.9 KiB
Python
import os, select
|
|
import virt_utils, virt_vm, aexpect
|
|
|
|
|
|
class scheduler:
|
|
"""
|
|
A scheduler that manages several parallel test execution pipelines on a
|
|
single host.
|
|
"""
|
|
|
|
def __init__(self, tests, num_workers, total_cpus, total_mem, bindir):
|
|
"""
|
|
Initialize the class.
|
|
|
|
@param tests: A list of test dictionaries.
|
|
@param num_workers: The number of workers (pipelines).
|
|
@param total_cpus: The total number of CPUs to dedicate to tests.
|
|
@param total_mem: The total amount of memory to dedicate to tests.
|
|
@param bindir: The directory where environment files reside.
|
|
"""
|
|
self.tests = tests
|
|
self.num_workers = num_workers
|
|
self.total_cpus = total_cpus
|
|
self.total_mem = total_mem
|
|
self.bindir = bindir
|
|
# Pipes -- s stands for scheduler, w stands for worker
|
|
self.s2w = [os.pipe() for i in range(num_workers)]
|
|
self.w2s = [os.pipe() for i in range(num_workers)]
|
|
self.s2w_r = [os.fdopen(r, "r", 0) for r, w in self.s2w]
|
|
self.s2w_w = [os.fdopen(w, "w", 0) for r, w in self.s2w]
|
|
self.w2s_r = [os.fdopen(r, "r", 0) for r, w in self.w2s]
|
|
self.w2s_w = [os.fdopen(w, "w", 0) for r, w in self.w2s]
|
|
# "Personal" worker dicts contain modifications that are applied
|
|
# specifically to each worker. For example, each worker must use a
|
|
# different environment file and a different MAC address pool.
|
|
self.worker_dicts = [{"env": "env%d" % i} for i in range(num_workers)]
|
|
|
|
|
|
def worker(self, index, run_test_func):
|
|
"""
|
|
The worker function.
|
|
|
|
Waits for commands from the scheduler and processes them.
|
|
|
|
@param index: The index of this worker (in the range 0..num_workers-1).
|
|
@param run_test_func: A function to be called to run a test
|
|
(e.g. job.run_test).
|
|
"""
|
|
r = self.s2w_r[index]
|
|
w = self.w2s_w[index]
|
|
self_dict = self.worker_dicts[index]
|
|
|
|
# Inform the scheduler this worker is ready
|
|
w.write("ready\n")
|
|
|
|
while True:
|
|
cmd = r.readline().split()
|
|
if not cmd:
|
|
continue
|
|
|
|
# The scheduler wants this worker to run a test
|
|
if cmd[0] == "run":
|
|
test_index = int(cmd[1])
|
|
test = self.tests[test_index].copy()
|
|
test.update(self_dict)
|
|
test_iterations = int(test.get("iterations", 1))
|
|
status = run_test_func("kvm", params=test,
|
|
tag=test.get("shortname"),
|
|
iterations=test_iterations)
|
|
w.write("done %s %s\n" % (test_index, status))
|
|
w.write("ready\n")
|
|
|
|
# The scheduler wants this worker to free its used resources
|
|
elif cmd[0] == "cleanup":
|
|
env_filename = os.path.join(self.bindir, self_dict["env"])
|
|
env = virt_utils.Env(env_filename)
|
|
for obj in env.values():
|
|
if isinstance(obj, virt_vm.VM):
|
|
obj.destroy()
|
|
elif isinstance(obj, aexpect.Spawn):
|
|
obj.close()
|
|
env.save()
|
|
w.write("cleanup_done\n")
|
|
w.write("ready\n")
|
|
|
|
# There's no more work for this worker
|
|
elif cmd[0] == "terminate":
|
|
break
|
|
|
|
|
|
def scheduler(self):
|
|
"""
|
|
The scheduler function.
|
|
|
|
Sends commands to workers, telling them to run tests, clean up or
|
|
terminate execution.
|
|
"""
|
|
idle_workers = []
|
|
closing_workers = []
|
|
test_status = ["waiting"] * len(self.tests)
|
|
test_worker = [None] * len(self.tests)
|
|
used_cpus = [0] * self.num_workers
|
|
used_mem = [0] * self.num_workers
|
|
|
|
while True:
|
|
# Wait for a message from a worker
|
|
r, w, x = select.select(self.w2s_r, [], [])
|
|
|
|
someone_is_ready = False
|
|
|
|
for pipe in r:
|
|
worker_index = self.w2s_r.index(pipe)
|
|
msg = pipe.readline().split()
|
|
if not msg:
|
|
continue
|
|
|
|
# A worker is ready -- add it to the idle_workers list
|
|
if msg[0] == "ready":
|
|
idle_workers.append(worker_index)
|
|
someone_is_ready = True
|
|
|
|
# A worker completed a test
|
|
elif msg[0] == "done":
|
|
test_index = int(msg[1])
|
|
test = self.tests[test_index]
|
|
status = int(eval(msg[2]))
|
|
test_status[test_index] = ("fail", "pass")[status]
|
|
# If the test failed, mark all dependent tests as "failed" too
|
|
if not status:
|
|
for i, other_test in enumerate(self.tests):
|
|
for dep in other_test.get("dep", []):
|
|
if dep in test["name"]:
|
|
test_status[i] = "fail"
|
|
|
|
# A worker is done shutting down its VMs and other processes
|
|
elif msg[0] == "cleanup_done":
|
|
used_cpus[worker_index] = 0
|
|
used_mem[worker_index] = 0
|
|
closing_workers.remove(worker_index)
|
|
|
|
if not someone_is_ready:
|
|
continue
|
|
|
|
for worker in idle_workers[:]:
|
|
# Find a test for this worker
|
|
test_found = False
|
|
for i, test in enumerate(self.tests):
|
|
# We only want "waiting" tests
|
|
if test_status[i] != "waiting":
|
|
continue
|
|
# Make sure the test isn't assigned to another worker
|
|
if test_worker[i] is not None and test_worker[i] != worker:
|
|
continue
|
|
# Make sure the test's dependencies are satisfied
|
|
dependencies_satisfied = True
|
|
for dep in test["dep"]:
|
|
dependencies = [j for j, t in enumerate(self.tests)
|
|
if dep in t["name"]]
|
|
bad_status_deps = [j for j in dependencies
|
|
if test_status[j] != "pass"]
|
|
if bad_status_deps:
|
|
dependencies_satisfied = False
|
|
break
|
|
if not dependencies_satisfied:
|
|
continue
|
|
# Make sure we have enough resources to run the test
|
|
test_used_cpus = int(test.get("used_cpus", 1))
|
|
test_used_mem = int(test.get("used_mem", 128))
|
|
# First make sure the other workers aren't using too many
|
|
# CPUs (not including the workers currently shutting down)
|
|
uc = (sum(used_cpus) - used_cpus[worker] -
|
|
sum(used_cpus[i] for i in closing_workers))
|
|
if uc and uc + test_used_cpus > self.total_cpus:
|
|
continue
|
|
# ... or too much memory
|
|
um = (sum(used_mem) - used_mem[worker] -
|
|
sum(used_mem[i] for i in closing_workers))
|
|
if um and um + test_used_mem > self.total_mem:
|
|
continue
|
|
# If we reached this point it means there are, or will
|
|
# soon be, enough resources to run the test
|
|
test_found = True
|
|
# Now check if the test can be run right now, i.e. if the
|
|
# other workers, including the ones currently shutting
|
|
# down, aren't using too many CPUs
|
|
uc = (sum(used_cpus) - used_cpus[worker])
|
|
if uc and uc + test_used_cpus > self.total_cpus:
|
|
continue
|
|
# ... or too much memory
|
|
um = (sum(used_mem) - used_mem[worker])
|
|
if um and um + test_used_mem > self.total_mem:
|
|
continue
|
|
# Everything is OK -- run the test
|
|
test_status[i] = "running"
|
|
test_worker[i] = worker
|
|
idle_workers.remove(worker)
|
|
# Update used_cpus and used_mem
|
|
used_cpus[worker] = test_used_cpus
|
|
used_mem[worker] = test_used_mem
|
|
# Assign all related tests to this worker
|
|
for j, other_test in enumerate(self.tests):
|
|
for other_dep in other_test["dep"]:
|
|
# All tests that depend on this test
|
|
if other_dep in test["name"]:
|
|
test_worker[j] = worker
|
|
break
|
|
# ... and all tests that share a dependency
|
|
# with this test
|
|
for dep in test["dep"]:
|
|
if dep in other_dep or other_dep in dep:
|
|
test_worker[j] = worker
|
|
break
|
|
# Tell the worker to run the test
|
|
self.s2w_w[worker].write("run %s\n" % i)
|
|
break
|
|
|
|
# If there won't be any tests for this worker to run soon, tell
|
|
# the worker to free its used resources
|
|
if not test_found and (used_cpus[worker] or used_mem[worker]):
|
|
self.s2w_w[worker].write("cleanup\n")
|
|
idle_workers.remove(worker)
|
|
closing_workers.append(worker)
|
|
|
|
# If there are no more new tests to run, terminate the workers and
|
|
# the scheduler
|
|
if len(idle_workers) == self.num_workers:
|
|
for worker in idle_workers:
|
|
self.s2w_w[worker].write("terminate\n")
|
|
break
|