321 lines
11 KiB
Python
Executable file
321 lines
11 KiB
Python
Executable file
#!/usr/bin/python
|
|
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import logging
|
|
import multiprocessing
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
import common
|
|
from autotest_lib.server import frontend
|
|
from autotest_lib.site_utils.lib import infra
|
|
|
|
DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py')
|
|
POOL_SIZE = 124
|
|
PUSH_ORDER = {'database': 0,
|
|
'database_slave': 0,
|
|
'drone': 1,
|
|
'shard': 1,
|
|
'golo_proxy': 1,
|
|
'sentinel': 1,
|
|
'afe': 2,
|
|
'scheduler': 2,
|
|
'host_scheduler': 2,
|
|
'suite_scheduler': 2}
|
|
|
|
|
|
def discover_servers(afe, server_filter=set()):
|
|
"""Discover the in-production servers to update.
|
|
|
|
@param afe: Server to contact with RPC requests.
|
|
@param server_filter: A set of servers to get status for.
|
|
|
|
@returns: A list of a list of tuple of (server_name, server_status, roles).
|
|
The list is sorted by the order to be updated. Servers in the same
|
|
sublist can be pushed together.
|
|
|
|
"""
|
|
# Example server details....
|
|
# {
|
|
# 'hostname': 'server1',
|
|
# 'status': 'backup',
|
|
# 'roles': ['drone', 'scheduler'],
|
|
# 'attributes': {'max_processes': 300}
|
|
# }
|
|
rpc = frontend.AFE(server=afe)
|
|
servers = rpc.run('get_servers')
|
|
|
|
# Do not update servers that need repair, and filter the server list by
|
|
# given server_filter if needed.
|
|
servers = [s for s in servers
|
|
if (s['status'] != 'repair_required' and
|
|
(not server_filter or s['hostname'] in server_filter))]
|
|
|
|
# Do not update reserve, devserver or crash_server (not YET supported).
|
|
servers = [s for s in servers if 'devserver' not in s['roles'] and
|
|
'crash_server' not in s['roles'] and
|
|
'reserve' not in s['roles']]
|
|
|
|
sorted_servers = []
|
|
for i in range(max(PUSH_ORDER.values()) + 1):
|
|
sorted_servers.append([])
|
|
servers_with_unknown_order = []
|
|
for server in servers:
|
|
info = (server['hostname'], server['status'], server['roles'])
|
|
try:
|
|
order = min([PUSH_ORDER[r] for r in server['roles']
|
|
if r in PUSH_ORDER])
|
|
sorted_servers[order].append(info)
|
|
except ValueError:
|
|
# All roles are not indexed in PUSH_ORDER.
|
|
servers_with_unknown_order.append(info)
|
|
|
|
# Push all servers with unknown roles together.
|
|
if servers_with_unknown_order:
|
|
sorted_servers.append(servers_with_unknown_order)
|
|
|
|
found_servers = set([s['hostname'] for s in servers])
|
|
# Inject the servers passed in by user but not found in server database.
|
|
extra_servers = []
|
|
for server in server_filter - found_servers:
|
|
extra_servers.append((server, 'unknown', ['unknown']))
|
|
if extra_servers:
|
|
sorted_servers.append(extra_servers)
|
|
|
|
return sorted_servers
|
|
|
|
|
|
def parse_arguments(args):
|
|
"""Parse command line arguments.
|
|
|
|
@param args: The command line arguments to parse. (usually sys.argv[1:])
|
|
|
|
@returns An argparse.Namespace populated with argument values.
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
description='Command to update an entire autotest installation.',
|
|
epilog=('Update all servers:\n'
|
|
' deploy_server.py\n'
|
|
'\n'
|
|
'Update one server:\n'
|
|
' deploy_server.py <server>\n'
|
|
'\n'
|
|
'Send arguments to remote deploy_server_local.py:\n'
|
|
' deploy_server.py -- --dryrun\n'
|
|
'\n'
|
|
'See what arguments would be run on specified servers:\n'
|
|
' deploy_server.py --dryrun <server_a> <server_b> --'
|
|
' --skip-update\n'))
|
|
|
|
parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
|
|
help='Log all deploy script output.')
|
|
parser.add_argument('--continue', action='store_true', dest='cont',
|
|
help='Continue to the next server on failure.')
|
|
parser.add_argument('--afe', required=True,
|
|
help='What is the main server for this installation? (cautotest).')
|
|
parser.add_argument('--update_push_servers', action='store_true',
|
|
help='Indicate to update test_push servers.')
|
|
parser.add_argument('--force_update', action='store_true',
|
|
help='Force to run update commands for afe, tko, build_externals')
|
|
parser.add_argument('--dryrun', action='store_true',
|
|
help='Don\'t actually run remote commands.')
|
|
parser.add_argument('--logfile', action='store',
|
|
default='/tmp/deployment.log',
|
|
help='Path to the file to save the deployment log to. Default is '
|
|
'/tmp/deployment.log')
|
|
parser.add_argument('args', nargs=argparse.REMAINDER,
|
|
help=('<server>, <server> ... -- <remote_arg>, <remote_arg> ...'))
|
|
|
|
results = parser.parse_args(args)
|
|
|
|
# We take the args list and further split it down. Everything before --
|
|
# is a server name, and everything after it is an argument to pass along
|
|
# to deploy_server_local.py.
|
|
#
|
|
# This:
|
|
# server_a, server_b -- --dryrun --skip-report
|
|
#
|
|
# Becomes:
|
|
# args.servers['server_a', 'server_b']
|
|
# args.args['--dryrun', '--skip-report']
|
|
try:
|
|
local_args_index = results.args.index('--') + 1
|
|
except ValueError:
|
|
# If -- isn't present, they are all servers.
|
|
results.servers = results.args
|
|
results.args = []
|
|
else:
|
|
# Split arguments.
|
|
results.servers = results.args[:local_args_index-1]
|
|
results.args = results.args[local_args_index:]
|
|
|
|
return results
|
|
|
|
|
|
def update_server(inputs):
|
|
"""Deploy for given server.
|
|
|
|
@param inputs: Inputs for the update action, including:
|
|
server: Name of the server to update.
|
|
status: Status of the server.
|
|
options: Options for the update.
|
|
|
|
@return: A tuple of (server, success, output), where:
|
|
server: Name of the server to be updated.
|
|
sucess: True if update succeeds, False otherwise.
|
|
output: A string of the deploy_server_local script output
|
|
including any errors.
|
|
|
|
"""
|
|
start = time.time()
|
|
server = inputs['server']
|
|
status = inputs['status']
|
|
# Shared list to record the finished server.
|
|
finished_servers = inputs['finished_servers']
|
|
options = inputs['options']
|
|
print('Updating server %s...' % server)
|
|
if status == 'backup':
|
|
extra_args = ['--skip-service-status']
|
|
else:
|
|
extra_args = []
|
|
|
|
cmd = ('%s %s' %
|
|
(DEPLOY_SERVER_LOCAL, ' '.join(options.args + extra_args)))
|
|
output = '%s: %s' % (server, cmd)
|
|
success = True
|
|
if not options.dryrun:
|
|
for i in range(5):
|
|
try:
|
|
print('[%s/5] Try to update server %s' % (i, server))
|
|
output = infra.execute_command(server, cmd)
|
|
finished_servers.append(server)
|
|
break
|
|
except subprocess.CalledProcessError as e:
|
|
print('%s: Command failed with error: %s' % (server, e))
|
|
success = False
|
|
output = e.output
|
|
|
|
print('Time used to update server %s: %s' % (server, time.time()-start))
|
|
return server, success, output
|
|
|
|
|
|
def update_in_parallel(servers, options):
|
|
"""Update a group of servers in parallel.
|
|
|
|
@param servers: A list of tuple of (server_name, server_status, roles).
|
|
@param options: Options for the push.
|
|
|
|
@returns A list of servers that failed to update.
|
|
"""
|
|
# Create a list to record all the finished servers.
|
|
manager = multiprocessing.Manager()
|
|
finished_servers = manager.list()
|
|
|
|
args = []
|
|
for server, status, _ in servers:
|
|
args.append({'server': server,
|
|
'status': status,
|
|
'finished_servers': finished_servers,
|
|
'options': options})
|
|
# The update actions run in parallel. If any update failed, we should wait
|
|
# for other running updates being finished. Abort in the middle of an update
|
|
# may leave the server in a bad state.
|
|
pool = multiprocessing.pool.ThreadPool(POOL_SIZE)
|
|
try:
|
|
failed_servers = []
|
|
results = pool.map_async(update_server, args)
|
|
pool.close()
|
|
|
|
# Track the updating progress for current group of servers.
|
|
incomplete_servers = set()
|
|
server_names = set([s[0] for s in servers])
|
|
while not results.ready():
|
|
incomplete_servers = server_names - set(finished_servers)
|
|
print('Not finished yet. %d servers in this group. '
|
|
'%d servers are still running:\n%s\n' %
|
|
(len(servers), len(incomplete_servers), incomplete_servers))
|
|
# Check the progress every 1 mins
|
|
results.wait(60)
|
|
|
|
# After update finished, parse the result.
|
|
for server, success, output in results.get():
|
|
if options.dryrun:
|
|
print('Dry run, updating server %s is skipped.' % server)
|
|
else:
|
|
if success:
|
|
msg = ('Successfully updated server %s.\n' % server)
|
|
if options.verbose:
|
|
print(output)
|
|
print()
|
|
else:
|
|
msg = ('Failed to update server %s.\nError: %s' %
|
|
(server, output.strip()))
|
|
print(msg)
|
|
failed_servers.append(server)
|
|
# Write the result into logfile.
|
|
with open(options.logfile, 'a') as f:
|
|
f.write(msg)
|
|
finally:
|
|
pool.terminate()
|
|
pool.join()
|
|
|
|
return failed_servers
|
|
|
|
def main(args):
|
|
"""Main routine that drives all the real work.
|
|
|
|
@param args: The command line arguments to parse. (usually sys.argv)
|
|
|
|
@returns The system exit code.
|
|
"""
|
|
options = parse_arguments(args[1:])
|
|
# Remove all the handlers from the root logger to get rid of the handlers
|
|
# introduced by the import packages.
|
|
logging.getLogger().handlers = []
|
|
logging.basicConfig(level=logging.DEBUG
|
|
if options.verbose else logging.INFO)
|
|
|
|
print('Retrieving server status...')
|
|
sorted_servers = discover_servers(options.afe, set(options.servers or []))
|
|
|
|
# Display what we plan to update.
|
|
print('Will update (in this order):')
|
|
i = 1
|
|
for servers in sorted_servers:
|
|
print('%s Group %d (%d servers) %s' % ('='*30, i, len(servers), '='*30))
|
|
for server, status, roles in servers:
|
|
print('\t%-36s:\t%s\t%s' % (server, status, roles))
|
|
i += 1
|
|
print()
|
|
|
|
if os.path.exists(options.logfile):
|
|
os.remove(options.logfile)
|
|
print ('Start updating, push logs of every server will be saved '
|
|
'at %s' % options.logfile)
|
|
failed = []
|
|
skipped = []
|
|
for servers in sorted_servers:
|
|
if not failed or options.cont:
|
|
failed += update_in_parallel(servers, options)
|
|
else:
|
|
skipped.extend(s[0] for s in servers) # Only include server name.
|
|
|
|
if failed:
|
|
print('Errors updating:')
|
|
for server in failed:
|
|
print(' %s' % server)
|
|
print()
|
|
print('To retry:')
|
|
print(' %s <options> %s' %
|
|
(str(args[0]), str(' '.join(failed + skipped))))
|
|
# Exit with error.
|
|
return 1
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main(sys.argv))
|