Download perf bucket when merging data

Change-Id: I5b964765adc27da515ae7d799a05a753eb80c2b1
diff --git a/tools/upload_benchmark_data_to_google_storage.py b/tools/upload_benchmark_data_to_google_storage.py
index 02b46e2..9037949 100755
--- a/tools/upload_benchmark_data_to_google_storage.py
+++ b/tools/upload_benchmark_data_to_google_storage.py
@@ -7,59 +7,72 @@
 import json
 import os
 import perf
+import time
 import utils
 
 import sys
 
 APPS = ['NowInAndroidApp', 'TiviApp']
 TARGETS = ['r8-full']
-NUM_COMMITS = 1000
+NUM_COMMITS = 250
 
 
-def ParseJsonFromCloudStorage(filename):
-    gs_location = perf.GetGSLocation(filename)
-    if not utils.file_exists_on_cloud_storage(gs_location):
+def DownloadCloudBucket(dest):
+    os.makedirs(dest)
+    utils.download_file_from_cloud_storage(perf.GetGSLocation('*'),
+                                           dest,
+                                           concurrent=True,
+                                           flags=['-R'])
+
+
+def ParseJsonFromCloudStorage(filename, local_bucket):
+    abs_path = os.path.join(local_bucket, filename)
+    if not os.path.exists(abs_path):
         return None
-    content = utils.cat_file_on_cloud_storage(gs_location)
-    try:
-        return json.loads(''.join(content))
-    except:
-        return None
+    with open(abs_path, 'r') as f:
+        lines = f.readlines()
+        content = ''.join(lines)
+        try:
+            return json.loads(content)
+        except:
+            return None
 
 
 def main():
-    if utils.get_HEAD_branch() != 'main':
-        print('Expected to be on branch \'main\'')
-        sys.exit(1)
-
     # Get the N most recent commits sorted by newest first.
-    top = utils.get_HEAD_sha1()
-    bottom = utils.get_nth_sha1_from_HEAD(NUM_COMMITS - 1)
+    top = utils.get_sha1_from_revision('origin/main')
+    bottom = utils.get_nth_sha1_from_revision(NUM_COMMITS - 1, 'origin/main')
     commits = historic_run.enumerate_git_commits(top, bottom)
     assert len(commits) == NUM_COMMITS
 
-    # Aggregate all the result.json files into a single benchmark_data.json file
-    # that has the same format as tools/perf/benchmark_data.json.
-    benchmark_data = []
-    for commit in commits:
-        benchmarks = {}
-        for app in APPS:
-            for target in TARGETS:
-                filename = perf.GetArtifactLocation(app, target, commit.hash(),
-                                                    'result.json')
-                app_benchmark_data = ParseJsonFromCloudStorage(filename)
-                if app_benchmark_data:
-                    benchmarks[app] = app_benchmark_data
-        if len(benchmarks):
-            benchmark_data.append({
-                'author': commit.author_name(),
-                'hash': commit.hash(),
-                'submitted': commit.committer_timestamp(),
-                'title': commit.title(),
-                'benchmarks': benchmarks
-            })
-
+    # Download all benchmark data from the cloud bucket to a temp folder.
     with utils.TempDir() as temp:
+        local_bucket = os.path.join(temp, perf.BUCKET)
+        DownloadCloudBucket(local_bucket)
+
+        # Aggregate all the result.json files into a single benchmark_data.json file
+        # that has the same format as tools/perf/benchmark_data.json.
+        benchmark_data = []
+        for commit in commits:
+            benchmarks = {}
+            for app in APPS:
+                for target in TARGETS:
+                    filename = perf.GetArtifactLocation(app, target,
+                                                        commit.hash(),
+                                                        'result.json')
+                    app_benchmark_data = ParseJsonFromCloudStorage(
+                        filename, local_bucket)
+                    if app_benchmark_data:
+                        benchmarks[app] = app_benchmark_data
+            if len(benchmarks):
+                benchmark_data.append({
+                    'author': commit.author_name(),
+                    'hash': commit.hash(),
+                    'submitted': commit.committer_timestamp(),
+                    'title': commit.title(),
+                    'benchmarks': benchmarks
+                })
+
         benchmark_data_file = os.path.join(temp, 'benchmark_data.json')
         with open(benchmark_data_file, 'w') as f:
             json.dump(benchmark_data, f)
diff --git a/tools/utils.py b/tools/utils.py
index f483212..983706a 100644
--- a/tools/utils.py
+++ b/tools/utils.py
@@ -350,10 +350,11 @@
         subprocess.check_output(cmd)
 
 
-def get_nth_sha1_from_HEAD(n):
-    result = subprocess.check_output(
-        ['git', 'log', f'--skip={n}', '--max-count=1',
-         '--pretty=format:%H']).decode('utf-8')
+def get_nth_sha1_from_revision(n, revision):
+    result = subprocess.check_output([
+        'git', 'log', revision, f'--skip={n}', '--max-count=1',
+        '--pretty=format:%H'
+    ]).decode('utf-8')
     return result.strip()
 
 
@@ -368,6 +369,13 @@
     return sha1.hexdigest()
 
 
+def get_sha1_from_revision(revision):
+    cmd = ['git', 'rev-parse', revision]
+    PrintCmd(cmd)
+    with ChangedWorkingDirectory(REPO_ROOT):
+        return subprocess.check_output(cmd).decode('utf-8').strip()
+
+
 def get_HEAD_branch():
     result = subprocess.check_output(
         ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).decode('utf-8')
@@ -375,20 +383,13 @@
 
 
 def get_HEAD_sha1():
-    return get_HEAD_sha1_for_checkout(REPO_ROOT)
+    return get_sha1_from_revision('HEAD')
 
 
 def get_HEAD_diff_stat():
     return subprocess.check_output(['git', 'diff', '--stat']).decode('utf-8')
 
 
-def get_HEAD_sha1_for_checkout(checkout):
-    cmd = ['git', 'rev-parse', 'HEAD']
-    PrintCmd(cmd)
-    with ChangedWorkingDirectory(checkout):
-        return subprocess.check_output(cmd).decode('utf-8').strip()
-
-
 def makedirs_if_needed(path):
     try:
         os.makedirs(path)
@@ -475,8 +476,18 @@
     return subprocess.call(cmd) == 0
 
 
-def download_file_from_cloud_storage(source, destination, quiet=False):
-    cmd = [get_gsutil(), 'cp', source, destination]
+def download_file_from_cloud_storage(source,
+                                     destination,
+                                     concurrent=False,
+                                     flags=None,
+                                     quiet=False):
+    cmd = [get_gsutil()]
+    if concurrent:
+        cmd.append('-m')
+    cmd.append('cp')
+    if flags:
+        cmd.extend(flags)
+    cmd.extend([source, destination])
     PrintCmd(cmd, quiet=quiet)
     subprocess.check_call(cmd)