Do not fail when benchmark is nondeterministic

Change-Id: I162b0fc598b385dedcdac4a72ca1f2524c422ecf
diff --git a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkEnvironment.java b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkEnvironment.java
index 267ecc1..e8e5641 100644
--- a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkEnvironment.java
+++ b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkEnvironment.java
@@ -19,6 +19,10 @@
     this.isGolem = isGolem;
   }
 
+  public boolean failOnCodeSizeDifferences() {
+    return System.getProperty("BENCHMARK_IGNORE_CODE_SIZE_DIFFERENCES") == null;
+  }
+
   public BenchmarkConfig getConfig() {
     return config;
   }
diff --git a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsCollection.java b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsCollection.java
index 702374b..da3f3d2 100644
--- a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsCollection.java
+++ b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsCollection.java
@@ -48,12 +48,12 @@
   }
 
   @Override
-  public void printResults(ResultMode mode) {
+  public void printResults(ResultMode mode, boolean failOnCodeSizeDifferences) {
     List<String> sorted = new ArrayList<>(results.keySet());
     sorted.sort(String::compareTo);
     for (String name : sorted) {
       BenchmarkResultsSingle singleResults = results.get(name);
-      singleResults.printResults(mode);
+      singleResults.printResults(mode, failOnCodeSizeDifferences);
     }
   }
 
diff --git a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsSingle.java b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsSingle.java
index d44f1c0..e15dffb 100644
--- a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsSingle.java
+++ b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsSingle.java
@@ -90,7 +90,7 @@
   }
 
   @Override
-  public void printResults(ResultMode mode) {
+  public void printResults(ResultMode mode, boolean failOnCodeSizeDifferences) {
     verifyConfigAndResults();
     if (!runtimeResults.isEmpty()) {
       long sum = runtimeResults.stream().mapToLong(l -> l).sum();
@@ -99,10 +99,12 @@
     }
     if (!codeSizeResults.isEmpty()) {
       long size = codeSizeResults.getLong(0);
-      for (int i = 1; i < codeSizeResults.size(); i++) {
-        if (size != codeSizeResults.getLong(i)) {
-          throw new RuntimeException(
-              "Unexpected code size difference: " + size + " and " + codeSizeResults.getLong(i));
+      if (failOnCodeSizeDifferences) {
+        for (int i = 1; i < codeSizeResults.size(); i++) {
+          if (size != codeSizeResults.getLong(i)) {
+            throw new RuntimeException(
+                "Unexpected code size difference: " + size + " and " + codeSizeResults.getLong(i));
+          }
         }
       }
       printCodeSize(size);
diff --git a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsWarmup.java b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsWarmup.java
index c7319f7..1a4e33c 100644
--- a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsWarmup.java
+++ b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkResultsWarmup.java
@@ -53,7 +53,7 @@
   }
 
   @Override
-  public void printResults(ResultMode mode) {
+  public void printResults(ResultMode mode, boolean failOnCodeSizeDifferences) {
     if (runtimeResults.isEmpty()) {
       throw new BenchmarkConfigError("Expected runtime results for warmup run");
     }
diff --git a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkRunner.java b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkRunner.java
index 72d7adc..78441e3 100644
--- a/src/test/java/com/android/tools/r8/benchmarks/BenchmarkRunner.java
+++ b/src/test/java/com/android/tools/r8/benchmarks/BenchmarkRunner.java
@@ -81,11 +81,11 @@
     if (warmups > 0) {
       printMetaInfo("warmup", warmups, warmupTotalTime);
       if (config.hasTimeWarmupRuns()) {
-        warmupResults.printResults(resultMode);
+        warmupResults.printResults(resultMode, environment.failOnCodeSizeDifferences());
       }
     }
     printMetaInfo("benchmark", getBenchmarkIterations(), benchmarkTotalTime);
-    results.printResults(resultMode);
+    results.printResults(resultMode, environment.failOnCodeSizeDifferences());
     if (environment.hasOutputPath()) {
       writeResults(results);
     }
diff --git a/src/test/testbase/java/com/android/tools/r8/benchmarks/BenchmarkResults.java b/src/test/testbase/java/com/android/tools/r8/benchmarks/BenchmarkResults.java
index e19cd88..9dcfa97 100644
--- a/src/test/testbase/java/com/android/tools/r8/benchmarks/BenchmarkResults.java
+++ b/src/test/testbase/java/com/android/tools/r8/benchmarks/BenchmarkResults.java
@@ -20,7 +20,7 @@
   // This will throw if called on a benchmark without sub-benchmarks.
   BenchmarkResults getSubResults(String name);
 
-  void printResults(ResultMode resultMode);
+  void printResults(ResultMode resultMode, boolean failOnCodeSizeDifferences);
 
   void writeResults(PrintStream out);
 
diff --git a/tools/run_benchmark.py b/tools/run_benchmark.py
index e239c35..dd05251 100755
--- a/tools/run_benchmark.py
+++ b/tools/run_benchmark.py
@@ -152,7 +152,8 @@
     jdkhome = get_jdk_home(options, options.benchmark)
     cmd = [
         jdk.GetJavaExecutable(jdkhome), '-Xms8g', '-Xmx8g',
-        '-XX:+TieredCompilation', '-XX:TieredStopAtLevel=4'
+        '-XX:+TieredCompilation', '-XX:TieredStopAtLevel=4',
+        '-DBENCHMARK_IGNORE_CODE_SIZE_DIFFERENCES'
     ]
     if options.enable_assertions:
         cmd.append('-ea')