Merge "Make test_android_cts eval against non-flaky subset of baseline."
diff --git a/third_party/android_cts_baseline.tar.gz.sha1 b/third_party/android_cts_baseline.tar.gz.sha1
index 7126671..5efd8e5 100644
--- a/third_party/android_cts_baseline.tar.gz.sha1
+++ b/third_party/android_cts_baseline.tar.gz.sha1
@@ -1 +1 @@
-3914f251fc8b8669b2ec92766696267c1a9ac7ff
\ No newline at end of file
+499d86b9b9eba837261c2d10e5b76f3a3a9a9952
\ No newline at end of file
diff --git a/tools/test_android_cts.py b/tools/test_android_cts.py
index 7a796b9..606611d 100755
--- a/tools/test_android_cts.py
+++ b/tools/test_android_cts.py
@@ -14,11 +14,9 @@
# cd build/aosp
# repo manifest -o ../../third_party/aosp_manifest.xml -r
#
-# The baseline is the `test_result.xml` file which is created with an AOSP
-# build which uses the default (JACK) toolset.
-#
-# Use this script, with '--tool=jack' to reproduce the baseline results
-#
+# The baseline is a set of `test_result.xml` files in
+# third_party/android_cts_baseline/jack. The current test considered a success
+# if all tests pass that consistently pass in the baseline.
from __future__ import print_function
from glob import glob
@@ -35,8 +33,8 @@
import gradle
import utils
-CTS_BASELINE = join(utils.REPO_ROOT,
- 'third_party/android_cts_baseline/test_result.xml')
+CTS_BASELINE_FILES_DIR = join(utils.REPO_ROOT,
+ 'third_party/android_cts_baseline/jack')
AOSP_MANIFEST_XML = join(utils.REPO_ROOT, 'third_party',
'aosp_manifest.xml')
AOSP_HELPER_SH = join(utils.REPO_ROOT, 'scripts', 'aosp_helper.sh')
@@ -97,82 +95,33 @@
return False
return True
-# Read the xml test result file into an in-memory tree:
-# Extract only the Module/TestCase/Test names and outcome (True|False for
-# PASS|FAIL):
-#
-# tree[module_name][testcase_name][test_name] = True|False
-#
-def read_test_result_into_tree(filename):
+# Return list of fully qualified names of tests passing in
+# all the files.
+def consistently_passing_tests_from_test_results(filenames):
tree = {}
module = None
testcase = None
- for x in utils.read_cts_test_result(filename):
- if type(x) is utils.CtsModule:
- tree[x.name] = {}
- module = tree[x.name]
- elif type(x) is utils.CtsTestCase:
- module[x.name] = {}
- testcase = module[x.name]
- else:
- testcase[x.name] = x.outcome
+ # Build a tree with leaves True|False|None for passing, failing and flaky
+ # tests.
+ for f in filenames:
+ for x in utils.read_cts_test_result(f):
+ if type(x) is utils.CtsModule:
+ module = tree.setdefault(x.name, {})
+ elif type(x) is utils.CtsTestCase:
+ testcase = module.setdefault(x.name, {})
+ else:
+ outcome = testcase.setdefault(x.name, x.outcome)
+ if outcome is not None and outcome != x.outcome:
+ testcase[x.name] = None
- return tree
+ result = []
+ for module_name, module in tree.iteritems():
+ for test_case_name, test_case in module.iteritems():
+ result.extend(['{}/{}/{}'.format(module_name, test_case_name, test_name)
+ for test_name, test in test_case.iteritems()
+ if test])
-# Report the items with the title
-def report_key_diff(title, items, prefix = ''):
- if len(items) > 0:
- print(title, ":")
- for x in items:
- print("- {}{}".format(prefix, x))
- print()
-
-
-def diff_sets(base_minus_result_title, result_minus_base_title,
- base_set, result_set, prefix = ''):
- base_minus_result = base_set - result_set
- result_minus_base = result_set - base_set
- report_key_diff(base_minus_result_title, base_minus_result, prefix)
- report_key_diff(result_minus_base_title, result_minus_base, prefix)
- return len(base_minus_result) > 0 or len(result_minus_base) > 0
-
-def diff_tree_report(baseline_tree, result_tree):
- baseline_modules = set(baseline_tree.keys())
- result_modules = set(result_tree.keys())
- differ = diff_sets('Modules missing from current result',
- 'New modules appeared in current result',
- baseline_modules, result_modules)
- for module in (result_modules & baseline_modules):
- baseline_module = baseline_tree[module]
- result_module = result_tree[module]
- baseline_testcases = set(baseline_module.keys())
- result_testcases = set(result_module.keys())
- differ = diff_sets('Test cases missing from current result',
- 'New test cases appeared in current result',
- baseline_testcases, result_testcases, module + '/') \
- or differ
- for testcase in (result_testcases & baseline_testcases):
- baseline_testcase = baseline_module[testcase]
- result_testcase = result_module[testcase]
- baseline_tests = set(baseline_testcase.keys())
- result_tests = set(result_testcase.keys())
- differ = diff_sets('Tests missing from current result',
- 'New tests appeared in current result',
- baseline_tests, result_tests, module + '/' + testcase + '/') \
- or differ
- need_newline_at_end = False
- for test in (result_tests & baseline_tests):
- baseline_outcome = baseline_testcase[test]
- result_outcome = result_testcase[test]
- if baseline_outcome != result_outcome:
- differ = True
- print('Test: {}/{}/{}, change: {}'.format(
- module, testcase, test,
- 'PASS -> FAIL' if baseline_outcome else 'FAIL -> PASS'))
- need_newline_at_end = True
- if need_newline_at_end:
- print()
- return differ
+ return result
def setup_and_clean(tool_is_d8, clean_dex):
# Two output dirs, one for the android image and one for cts tests.
@@ -282,8 +231,6 @@
re_summary = re.compile('<Summary ')
summaries = [('Summary from current test results: ', results_xml)]
- if not args.no_baseline:
- summaries.append(('Summary from baseline: ', CTS_BASELINE))
for (title, result_file) in summaries:
print(title, result_file)
@@ -298,10 +245,31 @@
else:
print('Comparing test results to baseline:\n')
- result_tree = read_test_result_into_tree(results_xml)
- baseline_tree = read_test_result_into_tree(CTS_BASELINE)
+ passing_tests = consistently_passing_tests_from_test_results([results_xml])
+ baseline_results = \
+ [f for f in glob(join(CTS_BASELINE_FILES_DIR, '*.xml'))]
+ assert len(baseline_results) != 0
- r = EXIT_FAILURE if diff_tree_report(baseline_tree, result_tree) else 0
+ passing_tests_in_baseline = \
+ consistently_passing_tests_from_test_results(baseline_results)
+
+ missing_or_failing_tests = \
+ set(passing_tests_in_baseline) - set(passing_tests)
+
+ num_tests = len(missing_or_failing_tests)
+ if num_tests != 0:
+ if num_tests > 1:
+ text = '{} tests that consistently pass in the baseline' \
+ ' are missing or failing in the current test:'.format(num_tests)
+ else:
+ text = '1 test that consistently passes in the baseline' \
+ ' is missing or failing in the current test:'
+ print(text)
+ for t in missing_or_failing_tests:
+ print(t)
+ r = EXIT_FAILURE
+ else:
+ r = 0
if args.save_result:
copy2(results_xml, args.save_result)