101 lines
2.7 KiB
Python
Executable file
101 lines
2.7 KiB
Python
Executable file
#!/usr/bin/python
|
|
|
|
"""
|
|
Poll server-status on cautotest to watch for RPCs taking longer than 10s. Then
|
|
we go and ssh around to figure out what the command line of the process that
|
|
caused the RPC was so that one can track down what is generating the expensive
|
|
RPC load.
|
|
"""
|
|
|
|
try:
|
|
from bs4 import BeautifulSoup
|
|
except ImportError:
|
|
print 'Run `apt-get install python-bs4`'
|
|
raise
|
|
|
|
import time
|
|
import subprocess
|
|
import multiprocessing
|
|
|
|
import common
|
|
import requests
|
|
|
|
|
|
def check_cautotest():
|
|
page = requests.get('http://cautotest/server-status').text
|
|
soup = BeautifulSoup(page)
|
|
pids = []
|
|
for row in soup.table.findAll('tr'):
|
|
cols = [x.text.strip() for x in row.findAll('td')]
|
|
if not cols:
|
|
continue
|
|
if cols[3] == 'W' and int(cols[5]) > 10 and cols[1] != '-':
|
|
pids.append((cols[1], cols[3], cols[5]))
|
|
return pids
|
|
|
|
def pull_cautotest_info(proc_id):
|
|
try:
|
|
conn = subprocess.check_output('become chromeos-test@cautotest -- '
|
|
'"sudo lsof -i | grep -e %s | grep -e ESTABLISHED"' % proc_id,
|
|
shell=True)
|
|
remote_info = conn.split()[8].split('->')[1].split(':')
|
|
except Exception:
|
|
remote_info = None
|
|
return remote_info
|
|
|
|
def strace_cautotest(proc_id):
|
|
try:
|
|
straced = subprocess.check_output('become chromeos-test@cautotest -- '
|
|
'"sudo strace -s 500 -p %s 2>&1 | head -n 20"' % proc_id,
|
|
shell=True)
|
|
except subprocess.CalledProcessError:
|
|
straced = ""
|
|
return straced
|
|
|
|
def pull_drone_info(host, port):
|
|
try:
|
|
lsof = subprocess.check_output('become chromeos-test@%s -- '
|
|
'"sudo lsof -i | grep -e :%s | grep -e ESTABLISHED"'
|
|
% (host, port), shell=True)
|
|
proc_id = lsof.split()[1]
|
|
cmdline = subprocess.check_output('become chromeos-test@%s -- '
|
|
'"cat /proc/%s/cmdline"' % (host,proc_id), shell=True)
|
|
except Exception:
|
|
cmdline = ''
|
|
return cmdline
|
|
|
|
def pull_all_data(pid, queue):
|
|
try:
|
|
remote_info = pull_cautotest_info(pid[0])
|
|
if remote_info:
|
|
drone_info = pull_drone_info(*remote_info)
|
|
else:
|
|
drone_info = None
|
|
straced = strace_cautotest(pid[0])
|
|
queue.put((pid, remote_info, drone_info, straced))
|
|
except Exception:
|
|
queue.put(None)
|
|
|
|
def print_data(x):
|
|
(pid, remote_info, drone_info, straced) = x
|
|
print "*** %s stuck in %s for %s secs" % pid
|
|
print remote_info
|
|
print drone_info
|
|
print straced
|
|
print '\a'
|
|
|
|
while True:
|
|
queue = multiprocessing.Queue()
|
|
processes = []
|
|
pids = check_cautotest()
|
|
for pid in pids:
|
|
proc = multiprocessing.Process(target=pull_all_data, args=(pid, queue))
|
|
proc.start()
|
|
processes.append(proc)
|
|
for proc in processes:
|
|
x = queue.get()
|
|
if x:
|
|
print_data(x)
|
|
for proc in processes:
|
|
proc.terminate()
|
|
time.sleep(5)
|