253 lines
8.8 KiB
Python
253 lines
8.8 KiB
Python
# Copyright 2012 Google Inc. All Rights Reserved.
|
|
# Author: mrdmnd@ (Matt Redmond)
|
|
"""A client to pull data from Bartlett.
|
|
|
|
Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py
|
|
|
|
The server houses perf.data.gz, board, chrome version for each upload.
|
|
This script first authenticates with a proper @google.com account, then
|
|
downloads a sample (if it's not already cached) and unzips perf.data
|
|
|
|
Authenticate(): Gets login info and returns an auth token
|
|
DownloadSamples(): Download and unzip samples.
|
|
_GetServePage(): Pulls /serve page from the app engine server
|
|
_DownloadSampleFromServer(): Downloads a local compressed copy of a sample
|
|
_UncompressSample(): Decompresses a sample, deleting the compressed version.
|
|
"""
|
|
import cookielib
|
|
import getpass
|
|
import gzip
|
|
import optparse
|
|
import os
|
|
import urllib
|
|
import urllib2
|
|
|
|
SERVER_NAME = 'http://chromeoswideprofiling.appspot.com'
|
|
APP_NAME = 'chromeoswideprofiling'
|
|
DELIMITER = '~'
|
|
|
|
|
|
def Authenticate(server_name):
|
|
"""Gets credentials from user and attempts to retrieve auth token.
|
|
TODO: Accept OAuth2 instead of password.
|
|
Args:
|
|
server_name: (string) URL that the app engine code is living on.
|
|
Returns:
|
|
authtoken: (string) The authorization token that can be used
|
|
to grab other pages.
|
|
"""
|
|
|
|
if server_name.endswith('/'):
|
|
server_name = server_name.rstrip('/')
|
|
# Grab username and password from user through stdin.
|
|
username = raw_input('Email (must be @google.com account): ')
|
|
password = getpass.getpass('Password: ')
|
|
# Use a cookie to authenticate with GAE.
|
|
cookiejar = cookielib.LWPCookieJar()
|
|
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
|
|
urllib2.install_opener(opener)
|
|
# Get an AuthToken from Google accounts service.
|
|
auth_uri = 'https://www.google.com/accounts/ClientLogin'
|
|
authreq_data = urllib.urlencode({'Email': username,
|
|
'Passwd': password,
|
|
'service': 'ah',
|
|
'source': APP_NAME,
|
|
'accountType': 'HOSTED_OR_GOOGLE'})
|
|
auth_req = urllib2.Request(auth_uri, data=authreq_data)
|
|
try:
|
|
auth_resp = urllib2.urlopen(auth_req)
|
|
except urllib2.URLError:
|
|
print 'Error logging in to Google accounts service.'
|
|
return None
|
|
body = auth_resp.read()
|
|
# Auth response contains several fields.
|
|
# We care about the part after Auth=
|
|
auth_resp_dict = dict(x.split('=') for x in body.split('\n') if x)
|
|
authtoken = auth_resp_dict['Auth']
|
|
return authtoken
|
|
|
|
|
|
def DownloadSamples(server_name, authtoken, output_dir, start, stop):
|
|
"""Download every sample and write unzipped version
|
|
to output directory.
|
|
Args:
|
|
server_name: (string) URL that the app engine code is living on.
|
|
authtoken: (string) Authorization token.
|
|
output_dir (string) Filepath to write output to.
|
|
start: (int) Index to start downloading from, starting at top.
|
|
stop: (int) Index to stop downloading, non-inclusive. -1 for end.
|
|
Returns:
|
|
None
|
|
"""
|
|
|
|
if server_name.endswith('/'):
|
|
server_name = server_name.rstrip('/')
|
|
|
|
serve_page_string = _GetServePage(server_name, authtoken)
|
|
if serve_page_string is None:
|
|
print 'Error getting /serve page.'
|
|
return
|
|
|
|
sample_list = serve_page_string.split('</br>')
|
|
print 'Will download:'
|
|
sample_list_subset = sample_list[start:stop]
|
|
for sample in sample_list_subset:
|
|
print sample
|
|
for sample in sample_list_subset:
|
|
assert sample, 'Sample should be valid.'
|
|
sample_info = [s.strip() for s in sample.split(DELIMITER)]
|
|
key = sample_info[0]
|
|
time = sample_info[1]
|
|
time = time.replace(' ', '_') # No space between date and time.
|
|
# sample_md5 = sample_info[2]
|
|
board = sample_info[3]
|
|
version = sample_info[4]
|
|
|
|
# Put a compressed copy of the samples in output directory.
|
|
_DownloadSampleFromServer(server_name, authtoken, key, time, board, version,
|
|
output_dir)
|
|
_UncompressSample(key, time, board, version, output_dir)
|
|
|
|
|
|
def _BuildFilenameFromParams(key, time, board, version):
|
|
"""Return the filename for our sample.
|
|
Args:
|
|
key: (string) Key indexing our sample in the datastore.
|
|
time: (string) Date that the sample was uploaded.
|
|
board: (string) Board that the sample was taken on.
|
|
version: (string) Version string from /etc/lsb-release
|
|
Returns:
|
|
filename (string)
|
|
"""
|
|
filename = DELIMITER.join([key, time, board, version])
|
|
return filename
|
|
|
|
|
|
def _DownloadSampleFromServer(server_name, authtoken, key, time, board, version,
|
|
output_dir):
|
|
"""Downloads sample_$(samplekey).gz to current dir.
|
|
Args:
|
|
server_name: (string) URL that the app engine code is living on.
|
|
authtoken: (string) Authorization token.
|
|
key: (string) Key indexing our sample in the datastore
|
|
time: (string) Date that the sample was uploaded.
|
|
board: (string) Board that the sample was taken on.
|
|
version: (string) Version string from /etc/lsb-release
|
|
output_dir: (string) Filepath to write to output to.
|
|
Returns:
|
|
None
|
|
"""
|
|
filename = _BuildFilenameFromParams(key, time, board, version)
|
|
compressed_filename = filename + '.gz'
|
|
|
|
if os.path.exists(os.path.join(output_dir, filename)):
|
|
print 'Already downloaded %s, skipping.' % filename
|
|
return
|
|
|
|
serv_uri = server_name + '/serve/' + key
|
|
serv_args = {'continue': serv_uri, 'auth': authtoken}
|
|
full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
|
|
serv_req = urllib2.Request(full_serv_uri)
|
|
serv_resp = urllib2.urlopen(serv_req)
|
|
f = open(os.path.join(output_dir, compressed_filename), 'w+')
|
|
f.write(serv_resp.read())
|
|
f.close()
|
|
|
|
|
|
def _UncompressSample(key, time, board, version, output_dir):
|
|
"""Uncompresses a given sample.gz file and deletes the compressed version.
|
|
Args:
|
|
key: (string) Sample key to uncompress.
|
|
time: (string) Date that the sample was uploaded.
|
|
board: (string) Board that the sample was taken on.
|
|
version: (string) Version string from /etc/lsb-release
|
|
output_dir: (string) Filepath to find sample key in.
|
|
Returns:
|
|
None
|
|
"""
|
|
filename = _BuildFilenameFromParams(key, time, board, version)
|
|
compressed_filename = filename + '.gz'
|
|
|
|
if os.path.exists(os.path.join(output_dir, filename)):
|
|
print 'Already decompressed %s, skipping.' % filename
|
|
return
|
|
|
|
out_file = open(os.path.join(output_dir, filename), 'wb')
|
|
in_file = gzip.open(os.path.join(output_dir, compressed_filename), 'rb')
|
|
out_file.write(in_file.read())
|
|
in_file.close()
|
|
out_file.close()
|
|
os.remove(os.path.join(output_dir, compressed_filename))
|
|
|
|
|
|
def _DeleteSampleFromServer(server_name, authtoken, key):
|
|
"""Opens the /delete page with the specified key
|
|
to delete the sample off the datastore.
|
|
Args:
|
|
server_name: (string) URL that the app engine code is living on.
|
|
authtoken: (string) Authorization token.
|
|
key: (string) Key to delete.
|
|
Returns:
|
|
None
|
|
"""
|
|
|
|
serv_uri = server_name + '/del/' + key
|
|
serv_args = {'continue': serv_uri, 'auth': authtoken}
|
|
full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
|
|
serv_req = urllib2.Request(full_serv_uri)
|
|
urllib2.urlopen(serv_req)
|
|
|
|
|
|
def _GetServePage(server_name, authtoken):
|
|
"""Opens the /serve page and lists all keys.
|
|
Args:
|
|
server_name: (string) URL the app engine code is living on.
|
|
authtoken: (string) Authorization token.
|
|
Returns:
|
|
The text of the /serve page (including HTML tags)
|
|
"""
|
|
|
|
serv_uri = server_name + '/serve'
|
|
serv_args = {'continue': serv_uri, 'auth': authtoken}
|
|
full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
|
|
serv_req = urllib2.Request(full_serv_uri)
|
|
serv_resp = urllib2.urlopen(serv_req)
|
|
return serv_resp.read()
|
|
|
|
|
|
def main():
|
|
parser = optparse.OptionParser()
|
|
parser.add_option('--output_dir',
|
|
dest='output_dir',
|
|
action='store',
|
|
help='Path to output perf data files.')
|
|
parser.add_option('--start',
|
|
dest='start_ind',
|
|
action='store',
|
|
default=0,
|
|
help='Start index.')
|
|
parser.add_option('--stop',
|
|
dest='stop_ind',
|
|
action='store',
|
|
default=-1,
|
|
help='Stop index.')
|
|
options = parser.parse_args()[0]
|
|
if not options.output_dir:
|
|
print 'Must specify --output_dir.'
|
|
return 1
|
|
if not os.path.exists(options.output_dir):
|
|
print 'Specified output_dir does not exist.'
|
|
return 1
|
|
|
|
authtoken = Authenticate(SERVER_NAME)
|
|
if not authtoken:
|
|
print 'Could not obtain authtoken, exiting.'
|
|
return 1
|
|
DownloadSamples(SERVER_NAME, authtoken, options.output_dir, options.start_ind,
|
|
options.stop_ind)
|
|
print 'Downloaded samples.'
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
exit(main())
|