blob: 3158420210137b1dd6657c37e9e9b76de637093a [file] [log] [blame]
#!/usr/bin/env python3
# Copyright (c) 2018, the R8 project authors. Please see the AUTHORS file
# for details. All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.
# Run all internal tests, archive result to cloud storage.
# In the continuous operation flow we have a tester continuously checking
# a specific cloud storage location for a file with a git hash.
# If the file is there, the tester will remove the file, and add another
# file stating that this is now being run. After successfully running,
# the tester will add yet another file, and remove the last one.
# Complete flow with states:
# 1:
# BOT:
# Add file READY_FOR_TESTING (contains git hash)
# Wait until file TESTING_COMPLETE exists (contains git hash)
# Timeout if no progress for RUN_TIMEOUT
# Cleanup READY_FOR_TESTING and TESTING
# 2:
# TESTER:
# Replace file READY_FOR_TESTING by TESTING (contains git hash)
# Run tests for git hash
# Upload commit specific logs if failures
# Upload git specific overall status file (failed or succeeded)
# Replace file TESTING by TESTING_COMPLETE (contains git hash)
# 3:
# BOT:
# Read overall status
# Delete TESTING_COMPLETE
# Exit based on status
import gradle
import optparse
import os
import subprocess
import sys
import time
import utils
import run_on_app
import chrome_data
import youtube_data
# How often the bot/tester should check state
PULL_DELAY = 30
TEST_RESULT_DIR = 'internal'
# Magic files
READY_FOR_TESTING = 'READY_FOR_TESTING'
TESTING = 'TESTING'
TESTING_COMPLETE = 'TESTING_COMPLETE'
ALL_MAGIC = [READY_FOR_TESTING, TESTING, TESTING_COMPLETE]
# Log file names
STDERR = 'stderr'
STDOUT = 'stdout'
EXITCODE = 'exitcode'
TIMED_OUT = 'timed_out'
BENCHMARK_APPS = [chrome_data, youtube_data]
DEPENDENT_PYTHON_FILES = [gradle, utils, run_on_app]
def find_min_xmx_command(app_data):
record = app_data.GetMemoryData(app_data.GetLatestVersion())
assert record['find-xmx-min'] < record['find-xmx-max']
assert record[
'find-xmx-range'] < record['find-xmx-max'] - record['find-xmx-min']
return [
'tools/run_on_app.py', '--compiler=r8', '--compiler-build=lib',
'--app=%s' % app_data.GetName(),
'--version=%s' % app_data.GetLatestVersion(), '--no-debug',
'--no-build', '--find-min-xmx',
'--find-min-xmx-min-memory=%s' % record['find-xmx-min'],
'--find-min-xmx-max-memory=%s' % record['find-xmx-max'],
'--find-min-xmx-range-size=%s' % record['find-xmx-range'],
'--find-min-xmx-archive'
]
def compile_with_memory_max_command(app_data):
# TODO(b/152939233): Remove this special handling when fixed.
factor = 1.25 if app_data.GetName() == 'chrome' else 1.15
record = app_data.GetMemoryData(app_data.GetLatestVersion())
return [] if 'skip-find-xmx-max' in record else [
'tools/run_on_app.py', '--compiler=r8', '--compiler-build=lib',
'--app=%s' % app_data.GetName(),
'--version=%s' %
app_data.GetLatestVersion(), '--no-debug', '--no-build',
'--max-memory=%s' % int(record['oom-threshold'] * factor)
]
def compile_with_memory_min_command(app_data):
record = app_data.GetMemoryData(app_data.GetLatestVersion())
return [
'tools/run_on_app.py', '--compiler=r8', '--compiler-build=lib',
'--app=%s' % app_data.GetName(),
'--version=%s' % app_data.GetLatestVersion(), '--no-debug',
'--no-build', '--expect-oom',
'--max-memory=%s' % int(record['oom-threshold'] * 0.85)
]
CLEAN_COMMANDS = [
# Make sure we have a clean build to not be polluted by old test files
['tools/gradle.py', 'clean'],
]
# TODO(b/210982978): Enable testing of min xmx again
TEST_COMMANDS = [
# Run test.py internal testing.
[
'tools/test.py', '--only_internal', '--slow_tests',
'--java_max_memory_size=8G'
],
# Run internal benchmarks.
['tools/perf.py', '--internal', '--iterations-inner', '3'],
# Ensure that all internal apps compile.
['tools/run_on_app.py', '--run-all', '--out=out', '--workers', '3'],
]
# Command timeout, in seconds.
RUN_TIMEOUT = 3600 * 7
BOT_RUN_TIMEOUT = RUN_TIMEOUT * len(TEST_COMMANDS)
def log(str):
print("%s: %s" % (time.strftime("%c"), str))
sys.stdout.flush()
def ParseOptions():
result = optparse.OptionParser()
result.add_option(
'--continuous',
help='Continuously run internal tests and post results to GCS.',
default=False,
action='store_true')
result.add_option(
'--print_logs',
help=
'Fetch logs from gcs and print them, takes the commit to print for.',
default=None)
result.add_option('--bot',
help='Run in bot mode, i.e., scheduling runs.',
default=False,
action='store_true')
result.add_option('--archive',
help='Post result to GCS, implied by --continuous',
default=False,
action='store_true')
return result.parse_args()
def ensure_git_clean():
# Ensure clean git repo.
diff = subprocess.check_output(['git', 'diff']).decode('utf-8')
if len(diff) > 0:
log('Local modifications to the git repo, exiting')
sys.exit(1)
def git_pull():
ensure_git_clean()
subprocess.check_call(['git', 'checkout', 'main'])
subprocess.check_call(['git', 'pull'])
return utils.get_HEAD_sha1()
def git_checkout(git_hash):
ensure_git_clean()
# Ensure that we are up to date to get the commit.
git_pull()
exitcode = subprocess.call(['git', 'checkout', git_hash])
if exitcode != 0:
return None
return utils.get_HEAD_sha1()
def get_test_result_dir():
return os.path.join(utils.R8_INTERNAL_TEST_RESULTS_BUCKET, TEST_RESULT_DIR)
def get_sha_destination(sha):
return os.path.join(get_test_result_dir(), sha)
def archive_status(failed):
gs_destination = 'gs://%s' % get_sha_destination(utils.get_HEAD_sha1())
utils.archive_value('status', gs_destination, failed)
def get_status(sha):
gs_destination = 'gs://%s/status' % get_sha_destination(sha)
return utils.cat_file_on_cloud_storage(gs_destination)
def archive_log(stdout, stderr, exitcode, timed_out, cmd):
sha = utils.get_HEAD_sha1()
cmd_dir = cmd.replace(' ', '_').replace('/', '_')
destination = os.path.join(get_sha_destination(sha), cmd_dir)
gs_destination = 'gs://%s' % destination
url = 'https://storage.cloud.google.com/%s' % destination
log('Archiving logs to: %s' % gs_destination)
utils.archive_value(EXITCODE, gs_destination, exitcode)
utils.archive_value(TIMED_OUT, gs_destination, timed_out)
utils.archive_file(STDOUT, gs_destination, stdout)
utils.archive_file(STDERR, gs_destination, stderr)
log('Logs available at: %s' % url)
def get_magic_file_base_path():
return 'gs://%s/magic' % get_test_result_dir()
def get_magic_file_gs_path(name):
return '%s/%s' % (get_magic_file_base_path(), name)
def get_magic_file_exists(name):
return utils.file_exists_on_cloud_storage(get_magic_file_gs_path(name))
def delete_magic_file(name):
utils.delete_file_from_cloud_storage(get_magic_file_gs_path(name))
def put_magic_file(name, sha):
utils.archive_value(name, get_magic_file_base_path(), sha)
def get_magic_file_content(name, ignore_errors=False):
return utils.cat_file_on_cloud_storage(get_magic_file_gs_path(name),
ignore_errors=ignore_errors)
def print_magic_file_state():
log('Magic file status:')
for magic in ALL_MAGIC:
if get_magic_file_exists(magic):
content = get_magic_file_content(magic, ignore_errors=True)
log('%s content: %s' % (magic, content))
def fetch_and_print_logs(hash):
gs_base = 'gs://%s' % get_sha_destination(hash)
listing = utils.ls_files_on_cloud_storage(gs_base).strip().split('\n')
for entry in listing:
if not entry.endswith('/status'): # Ignore the overall status file
for to_print in [EXITCODE, TIMED_OUT, STDERR, STDOUT]:
gs_location = '%s%s' % (entry, to_print)
value = utils.cat_file_on_cloud_storage(gs_location)
print('\n\n%s had value:\n%s' % (to_print, value))
print("\n\nPrinting find-min-xmx ranges for apps")
run_on_app.print_min_xmx_ranges_for_hash(hash, 'r8', 'lib')
def run_bot():
print_magic_file_state()
# Ensure that there is nothing currently scheduled (broken/stopped run)
for magic in ALL_MAGIC:
if get_magic_file_exists(magic):
log('ERROR: Synchronizing file %s exists, cleaning up' % magic)
delete_magic_file(magic)
print_magic_file_state()
assert not get_magic_file_exists(READY_FOR_TESTING)
git_hash = utils.get_HEAD_sha1()
put_magic_file(READY_FOR_TESTING, git_hash)
begin = time.time()
while True:
if time.time() - begin > BOT_RUN_TIMEOUT:
log('Timeout exceeded: http://go/internal-r8-doc')
raise Exception('Bot timeout')
if get_magic_file_exists(TESTING_COMPLETE):
if get_magic_file_content(TESTING_COMPLETE) == git_hash:
break
else:
raise Exception(
'Non matching git hashes %s and %s' %
(get_magic_file_content(TESTING_COMPLETE), git_hash))
log('Still waiting for test result')
print_magic_file_state()
time.sleep(PULL_DELAY)
total_time = time.time() - begin
log('Done running test for %s in %ss' % (git_hash, total_time))
test_status = get_status(git_hash)
delete_magic_file(TESTING_COMPLETE)
log('Test status is: %s' % test_status)
if test_status != '0':
print('Tests failed, you can print the logs by running(googlers only):')
print(' tools/internal_test.py --print_logs %s' % git_hash)
return 1
def run_continuously():
while True:
print_magic_file_state()
if get_magic_file_exists(READY_FOR_TESTING):
git_hash = get_magic_file_content(READY_FOR_TESTING)
checked_out = git_checkout(git_hash)
if not checked_out:
# Gerrit change, we don't run these on internal.
archive_status(0)
put_magic_file(TESTING_COMPLETE, git_hash)
delete_magic_file(READY_FOR_TESTING)
continue
# Sanity check, if this does not succeed stop.
if checked_out != git_hash:
log('Inconsistent state: %s %s' % (git_hash, checked_out))
sys.exit(1)
put_magic_file(TESTING, git_hash)
delete_magic_file(READY_FOR_TESTING)
log('Running with hash: %s' % git_hash)
exitcode = run_external()
log('Running finished with exit code %s' % exitcode)
# If the bot timed out or something else triggered the bot to fail, don't
# put up the result (it will not be displayed anywhere, and we can't
# remove the magic file if the bot cleaned up).
if get_magic_file_exists(TESTING):
put_magic_file(TESTING_COMPLETE, git_hash)
# There is still a potential race here (we check, bot deletes, we try to
# delete) - this is unlikely and we ignore it (restart if it happens).
delete_magic_file(TESTING)
time.sleep(PULL_DELAY)
def run_external():
return subprocess.call(
[sys.executable, "tools/internal_test.py", "--archive"])
def handle_output(archive, stderr, stdout, exitcode, timed_out, cmd):
if archive:
archive_log(stdout, stderr, exitcode, timed_out, cmd)
else:
print('Execution of %s resulted in:' % cmd)
print('exit code: %s ' % exitcode)
print('timeout: %s ' % timed_out)
with open(stderr, 'r') as f:
print('stderr: %s' % f.read())
with open(stdout, 'r') as f:
print('stdout: %s' % f.read())
def execute(cmd, archive, env=None):
if cmd == []:
return
assert (cmd[0].endswith('.py'))
cmd = [sys.executable] + cmd
utils.PrintCmd(cmd)
with utils.TempDir() as temp:
try:
stderr_fd = None
stdout_fd = None
exitcode = 0
stderr = os.path.join(temp, 'stderr')
stderr_fd = open(stderr, 'w')
stdout = os.path.join(temp, 'stdout')
stdout_fd = open(stdout, 'w')
popen = subprocess.Popen(cmd,
bufsize=1024 * 1024 * 10,
stdout=stdout_fd,
stderr=stderr_fd,
env=env)
begin = time.time()
timed_out = False
while popen.poll() == None:
if time.time() - begin > RUN_TIMEOUT:
popen.terminate()
timed_out = True
time.sleep(2)
exitcode = popen.returncode
finally:
if stderr_fd:
stderr_fd.close()
if stdout_fd:
stdout_fd.close()
if exitcode != 0:
handle_output(archive, stderr, stdout, popen.returncode,
timed_out, ' '.join(cmd))
return exitcode
def run_once(archive):
git_hash = utils.get_HEAD_sha1()
log('Running once with hash %s' % git_hash)
env = os.environ.copy()
# Bot does not have a lot of memory.
env['R8_GRADLE_CORES_PER_FORK'] = '5'
if archive:
[execute(cmd, archive, env) for cmd in CLEAN_COMMANDS]
failed = any([execute(cmd, archive, env) for cmd in TEST_COMMANDS])
# Gradle daemon occasionally leaks memory, stop it.
gradle.RunGradle(['--stop'])
archive_status(1 if failed else 0)
return failed
def Main():
(options, args) = ParseOptions()
if options.continuous:
run_continuously()
elif options.bot:
return run_bot()
elif options.print_logs:
return fetch_and_print_logs(options.print_logs)
else:
return run_once(options.archive)
if __name__ == '__main__':
sys.exit(Main())