153 lines
6.3 KiB
Python
153 lines
6.3 KiB
Python
# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Site extensions to server_job. Adds distribute_across_machines()."""
|
|
|
|
import os, logging, multiprocessing
|
|
from autotest_lib.server import site_gtest_runner, site_server_job_utils
|
|
from autotest_lib.server import subcommand
|
|
from autotest_lib.server.server_job import base_server_job
|
|
import utils
|
|
|
|
|
|
def get_site_job_data(job):
|
|
"""Add custom data to the job keyval info.
|
|
|
|
When multiple machines are used in a job, change the hostname to
|
|
the platform of the first machine instead of machine1,machine2,... This
|
|
makes the job reports easier to read and keeps the tko_machines table from
|
|
growing too large.
|
|
|
|
Args:
|
|
job: instance of server_job.
|
|
|
|
Returns:
|
|
keyval dictionary with new hostname value, or empty dictionary.
|
|
"""
|
|
site_job_data = {}
|
|
# Only modify hostname on multimachine jobs. Assume all host have the same
|
|
# platform.
|
|
if len(job.machines) > 1:
|
|
# Search through machines for first machine with a platform.
|
|
for host in job.machines:
|
|
keyval_path = os.path.join(job.resultdir, 'host_keyvals', host)
|
|
keyvals = utils.read_keyval(keyval_path)
|
|
host_plat = keyvals.get('platform', None)
|
|
if not host_plat:
|
|
continue
|
|
site_job_data['hostname'] = host_plat
|
|
break
|
|
return site_job_data
|
|
|
|
|
|
class site_server_job(base_server_job):
|
|
"""Extend server_job adding distribute_across_machines."""
|
|
|
|
def __init__(self, *args, **dargs):
|
|
super(site_server_job, self).__init__(*args, **dargs)
|
|
|
|
|
|
def run(self, *args, **dargs):
|
|
"""Extend server_job.run adding gtest_runner to the namespace."""
|
|
|
|
gtest_run = {'gtest_runner': site_gtest_runner.gtest_runner()}
|
|
|
|
# Namespace is the 5th parameter to run(). If args has 5 or more
|
|
# entries in it then we need to fix-up this namespace entry.
|
|
if len(args) >= 5:
|
|
args[4].update(gtest_run)
|
|
# Else, if present, namespace must be in dargs.
|
|
else:
|
|
dargs.setdefault('namespace', gtest_run).update(gtest_run)
|
|
# Now call the original run() with the modified namespace containing a
|
|
# gtest_runner
|
|
super(site_server_job, self).run(*args, **dargs)
|
|
|
|
|
|
def distribute_across_machines(self, tests, machines,
|
|
continuous_parsing=False):
|
|
"""Run each test in tests once using machines.
|
|
|
|
Instead of running each test on each machine like parallel_on_machines,
|
|
run each test once across all machines. Put another way, the total
|
|
number of tests run by parallel_on_machines is len(tests) *
|
|
len(machines). The number of tests run by distribute_across_machines is
|
|
len(tests).
|
|
|
|
Args:
|
|
tests: List of tests to run.
|
|
machines: List of machines to use.
|
|
continuous_parsing: Bool, if true parse job while running.
|
|
"""
|
|
# The Queue is thread safe, but since a machine may have to search
|
|
# through the queue to find a valid test the lock provides exclusive
|
|
# queue access for more than just the get call.
|
|
test_queue = multiprocessing.JoinableQueue()
|
|
test_queue_lock = multiprocessing.Lock()
|
|
|
|
unique_machine_attributes = []
|
|
sub_commands = []
|
|
work_dir = self.resultdir
|
|
|
|
for machine in machines:
|
|
if 'group' in self.resultdir:
|
|
work_dir = os.path.join(self.resultdir, machine)
|
|
|
|
mw = site_server_job_utils.machine_worker(self,
|
|
machine,
|
|
work_dir,
|
|
test_queue,
|
|
test_queue_lock,
|
|
continuous_parsing)
|
|
|
|
# Create the subcommand instance to run this machine worker.
|
|
sub_commands.append(subcommand.subcommand(mw.run,
|
|
[],
|
|
work_dir))
|
|
|
|
# To (potentially) speed up searching for valid tests create a list
|
|
# of unique attribute sets present in the machines for this job. If
|
|
# sets were hashable we could just use a dictionary for fast
|
|
# verification. This at least reduces the search space from the
|
|
# number of machines to the number of unique machines.
|
|
if not mw.attribute_set in unique_machine_attributes:
|
|
unique_machine_attributes.append(mw.attribute_set)
|
|
|
|
# Only queue tests which are valid on at least one machine. Record
|
|
# skipped tests in the status.log file using record_skipped_test().
|
|
for test_entry in tests:
|
|
# Check if it's an old style test entry.
|
|
if len(test_entry) > 2 and not isinstance(test_entry[2], dict):
|
|
test_attribs = {'include': test_entry[2]}
|
|
if len(test_entry) > 3:
|
|
test_attribs['exclude'] = test_entry[3]
|
|
if len(test_entry) > 4:
|
|
test_attribs['attributes'] = test_entry[4]
|
|
|
|
test_entry = list(test_entry[:2])
|
|
test_entry.append(test_attribs)
|
|
|
|
ti = site_server_job_utils.test_item(*test_entry)
|
|
machine_found = False
|
|
for ma in unique_machine_attributes:
|
|
if ti.validate(ma):
|
|
test_queue.put(ti)
|
|
machine_found = True
|
|
break
|
|
if not machine_found:
|
|
self.record_skipped_test(ti)
|
|
|
|
# Run valid tests and wait for completion.
|
|
subcommand.parallel(sub_commands)
|
|
|
|
|
|
def record_skipped_test(self, skipped_test, message=None):
|
|
"""Insert a failure record into status.log for this test."""
|
|
msg = message
|
|
if msg is None:
|
|
msg = 'No valid machines found for test %s.' % skipped_test
|
|
logging.info(msg)
|
|
self.record('START', None, skipped_test.test_name)
|
|
self.record('INFO', None, skipped_test.test_name, msg)
|
|
self.record('END TEST_NA', None, skipped_test.test_name, msg)
|