Add 'historic_run.py' script.
This is a refactor of the historic_memory_usage.py script to allow running an
arbitrary command on each commit. The command line accepts a shell command to
run.
Change-Id: Id769fb5fd8b6e0058bd03e648e89cc1edcb7dd05
diff --git a/tools/historic_memory_usage.py b/tools/historic_memory_usage.py
index 376426f..bb08a2e 100755
--- a/tools/historic_memory_usage.py
+++ b/tools/historic_memory_usage.py
@@ -10,13 +10,13 @@
# It will then run the oldest and newest such commit, and gradually fill in
# the commits in between.
+import historic_run
import optparse
import os
import subprocess
import sys
import utils
-MASTER_COMMITS = 'gs://r8-releases/raw/master'
APPS = ['gmscore', 'nest', 'youtube', 'gmail', 'chrome']
COMPILERS = ['d8', 'r8']
@@ -31,7 +31,7 @@
default='gmail',
choices=APPS)
result.add_option('--top',
- default=utils.get_HEAD_sha1(),
+ default=historic_run.top_or_default(),
help='The most recent commit to test')
result.add_option('--bottom',
help='The oldest commit to test')
@@ -44,84 +44,8 @@
help='Set timeout instead of waiting for OOM.')
return result.parse_args(argv)
-
-class GitCommit(object):
- def __init__(self, git_hash, destination_dir, destination, timestamp):
- self.git_hash = git_hash
- self.destination_dir = destination_dir
- self.destination = destination
- self.timestamp = timestamp
-
- def __str__(self):
- return '%s : %s (%s)' % (self.git_hash, self.destination, self.timestamp)
-
- def __repr__(self):
- return self.__str__()
-
-def git_commit_from_hash(hash):
- commit_timestamp = subprocess.check_output(['git', 'show', '--no-patch',
- '--no-notes', '--pretty=\'%ct\'',
- hash]).strip().strip('\'')
- destination_dir = '%s/%s/' % (MASTER_COMMITS, hash)
- destination = '%s%s' % (destination_dir, 'r8.jar')
- commit = GitCommit(hash, destination_dir, destination, commit_timestamp)
- return commit
-
-def enumerate_git_commits(options):
- top = options.top if options.top else utils.get_HEAD_sha1()
- # TODO(ricow): if not set, search back 1000
- if not options.bottom:
- raise Exception('No bottom specified')
- bottom = options.bottom
- output = subprocess.check_output(['git', 'rev-list', '--first-parent', top])
- found_bottom = False
- commits = []
- for c in output.splitlines():
- commits.append(git_commit_from_hash(c.strip()))
- if c.strip() == bottom:
- found_bottom = True
- break
- if not found_bottom:
- raise Exception('Bottom not found, did you not use a merge commit')
- return commits
-
-def get_available_commits(commits):
- cloud_commits = subprocess.check_output(['gsutil.py', 'ls', MASTER_COMMITS]).splitlines()
- available_commits = []
- for commit in commits:
- if commit.destination_dir in cloud_commits:
- available_commits.append(commit)
- return available_commits
-
-def print_commits(commits):
- for commit in commits:
- print(commit)
-
-def permutate_range(start, end):
- diff = end - start
- assert diff >= 0
- if diff == 1:
- return [start, end]
- if diff == 0:
- return [start]
- half = end - (diff / 2)
- numbers = [half]
- first_half = permutate_range(start, half - 1)
- second_half = permutate_range(half + 1, end)
- for index in range(len(first_half)):
- numbers.append(first_half[index])
- if index < len(second_half):
- numbers.append(second_half[index])
- return numbers
-
-def permutate(number_of_commits):
- assert number_of_commits > 0
- numbers = permutate_range(0, number_of_commits - 1)
- assert all(n in numbers for n in range(number_of_commits))
- return numbers
-
-def pull_r8_from_cloud(commit):
- utils.download_file_from_cloud_storage(commit.destination, utils.R8_JAR)
+def make_run_on_app_command(options):
+ return lambda commit: run_on_app(options, commit)
def run_on_app(options, commit):
app = options.app
@@ -142,30 +66,14 @@
f.write(stdout)
print('Wrote stdout to: %s' % stdout_path)
-
-def benchmark(commits, options):
- commit_permutations = permutate(len(commits))
- count = 0
- for index in commit_permutations:
- count += 1
- print('Running commit %s out of %s' % (count, len(commits)))
- commit = commits[index]
- if not utils.cloud_storage_exists(commit.destination):
- # We may have a directory, but no r8.jar
- continue
- pull_r8_from_cloud(commit)
- print('Running for commit: %s' % commit.git_hash)
- run_on_app(options, commit)
-
def main(argv):
(options, args) = ParseOptions(argv)
if not options.app:
raise Exception('Please specify an app')
- commits = enumerate_git_commits(options)
- available_commits = get_available_commits(commits)
- print('Running for:')
- print_commits(available_commits)
- benchmark(available_commits, options)
+ top = historic_run.top_or_default(options.top)
+ bottom = historic_run.bottom_or_default(options.bottom)
+ command = make_run_on_app_command(options)
+ historic_run.run(command, top, bottom)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
diff --git a/tools/historic_run.py b/tools/historic_run.py
new file mode 100755
index 0000000..b0f181f
--- /dev/null
+++ b/tools/historic_run.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+# Copyright (c) 2019, the R8 project authors. Please see the AUTHORS file
+# for details. All rights reserved. Use of this source code is governed by a
+# BSD-style license that can be found in the LICENSE file.
+
+# Convenience script for running a command over builds back in time. This
+# utilizes the prebuilt full r8 jars on cloud storage. The script find all
+# commits that exists on cloud storage in the given range. It will then run the
+# oldest and newest such commit, and gradually fill in the commits in between.
+
+import optparse
+import os
+import subprocess
+import sys
+import time
+import utils
+
+MASTER_COMMITS = 'gs://r8-releases/raw/master'
+
+def ParseOptions(argv):
+ result = optparse.OptionParser()
+ result.add_option(
+ '--cmd',
+ help='Command to run')
+ result.add_option(
+ '--top',
+ default=top_or_default(),
+ help='The most recent commit to test')
+ result.add_option(
+ '--bottom',
+ help='The oldest commit to test')
+ result.add_option(
+ '--dry-run',
+ help='Do not download or run the command, but print the actions',
+ default=False,
+ action='store_true')
+ result.add_option(
+ '--output',
+ default='build',
+ help='Directory where to output results')
+ return result.parse_args(argv)
+
+
+class GitCommit(object):
+ def __init__(self, git_hash, destination_dir, destination, timestamp):
+ self.git_hash = git_hash
+ self.destination_dir = destination_dir
+ self.destination = destination
+ self.timestamp = timestamp
+
+ def __str__(self):
+ return '%s : %s (%s)' % (self.git_hash, self.destination, self.timestamp)
+
+ def __repr__(self):
+ return self.__str__()
+
+def git_commit_from_hash(hash):
+ commit_timestamp = subprocess.check_output(['git', 'show', '--no-patch',
+ '--no-notes', '--pretty=\'%ct\'',
+ hash]).strip().strip('\'')
+ destination_dir = '%s/%s/' % (MASTER_COMMITS, hash)
+ destination = '%s%s' % (destination_dir, 'r8.jar')
+ commit = GitCommit(hash, destination_dir, destination, commit_timestamp)
+ return commit
+
+def enumerate_git_commits(top, bottom):
+ output = subprocess.check_output(['git', 'rev-list', '--first-parent', top])
+ found_bottom = False
+ commits = []
+ for c in output.splitlines():
+ commit_hash = c.strip()
+ commits.append(git_commit_from_hash(commit_hash))
+ if commit_hash == bottom:
+ found_bottom = True
+ break
+ if not found_bottom:
+ raise Exception('Bottom not found, did you not use a merge commit')
+ return commits
+
+def get_available_commits(commits):
+ cloud_commits = subprocess.check_output(
+ ['gsutil.py', 'ls', MASTER_COMMITS]).splitlines()
+ available_commits = []
+ for commit in commits:
+ if commit.destination_dir in cloud_commits:
+ available_commits.append(commit)
+ return available_commits
+
+def print_commits(commits):
+ for commit in commits:
+ print(commit)
+
+def permutate_range(start, end):
+ diff = end - start
+ assert diff >= 0
+ if diff == 1:
+ return [start, end]
+ if diff == 0:
+ return [start]
+ half = end - (diff / 2)
+ numbers = [half]
+ first_half = permutate_range(start, half - 1)
+ second_half = permutate_range(half + 1, end)
+ for index in range(len(first_half)):
+ numbers.append(first_half[index])
+ if index < len(second_half):
+ numbers.append(second_half[index])
+ return numbers
+
+def permutate(number_of_commits):
+ assert number_of_commits > 0
+ numbers = permutate_range(0, number_of_commits - 1)
+ assert all(n in numbers for n in range(number_of_commits))
+ return numbers
+
+def pull_r8_from_cloud(commit):
+ utils.download_file_from_cloud_storage(commit.destination, utils.R8_JAR)
+
+def benchmark(commits, command, dryrun=False):
+ commit_permutations = permutate(len(commits))
+ count = 0
+ for index in commit_permutations:
+ count += 1
+ print('Running commit %s out of %s' % (count, len(commits)))
+ commit = commits[index]
+ if not utils.cloud_storage_exists(commit.destination):
+ # We may have a directory, but no r8.jar
+ continue
+ if not dryrun:
+ pull_r8_from_cloud(commit)
+ print('Running for commit: %s' % commit.git_hash)
+ command(commit)
+
+def top_or_default(top=None):
+ return top if top else utils.get_HEAD_sha1()
+
+def bottom_or_default(bottom=None):
+ # TODO(ricow): if not set, search back 1000
+ if not bottom:
+ raise Exception('No bottom specified')
+ return bottom
+
+def run(command, top, bottom, dryrun=False):
+ commits = enumerate_git_commits(top, bottom)
+ available_commits = get_available_commits(commits)
+ print('Running for:')
+ print_commits(available_commits)
+ benchmark(available_commits, command, dryrun=dryrun)
+
+def make_cmd(options):
+ return lambda commit: run_cmd(options, commit)
+
+def run_cmd(options, commit):
+ cmd = [options.cmd, commit.git_hash]
+ output_path = options.output or 'build'
+ time_commit = '%s_%s' % (commit.timestamp, commit.git_hash)
+ time_commit_path = os.path.join(output_path, time_commit)
+ print ' '.join(cmd)
+ if not options.dry_run:
+ if not os.path.exists(time_commit_path):
+ os.makedirs(time_commit_path)
+ stdout_path = os.path.join(time_commit_path, 'stdout')
+ stderr_path = os.path.join(time_commit_path, 'stderr')
+ with open(stdout_path, 'w') as stdout:
+ with open(stderr_path, 'w') as stderr:
+ process = subprocess.Popen(cmd, stdout=stdout, stderr=stderr)
+ timeout = 1000
+ while process.poll() is None and timeout > 0:
+ time.sleep(1)
+ timeout -= 1
+ if process.poll() is None:
+ process.kill()
+ print "Task timed out"
+ stderr.write("timeout\n")
+ print('Wrote outputs to: %s' % time_commit_path)
+
+def main(argv):
+ (options, args) = ParseOptions(argv)
+ if not options.cmd:
+ raise Exception('Please specify a command')
+ top = top_or_default(options.top)
+ bottom = bottom_or_default(options.bottom)
+ command = make_cmd(options)
+ run(command, top, bottom, dryrun=options.dry_run)
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))