Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright (c) 2018, the R8 project authors. Please see the AUTHORS file |
| 3 | # for details. All rights reserved. Use of this source code is governed by a |
| 4 | # BSD-style license that can be found in the LICENSE file. |
| 5 | |
| 6 | # Script for checking impact of a change by comparing the sizes of generated |
| 7 | # classes in an apk. |
| 8 | |
| 9 | import glob |
| 10 | import optparse |
| 11 | import os |
| 12 | import shutil |
| 13 | import sys |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 14 | import threading |
| 15 | import time |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 16 | import toolhelper |
| 17 | import utils |
| 18 | import zipfile |
| 19 | import StringIO |
| 20 | |
| 21 | USAGE = """%prog [options] app1 app2 |
| 22 | NOTE: This only makes sense if minification is disabled""" |
| 23 | |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 24 | MAX_THREADS=40 |
| 25 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 26 | def parse_options(): |
| 27 | result = optparse.OptionParser(usage=USAGE) |
| 28 | result.add_option('--temp', |
| 29 | help='Temporary directory to store extracted classes in') |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 30 | result.add_option('--use_code_size', |
| 31 | help='Use the size of code segments instead of the full size of the dex.', |
| 32 | default=False, action='store_true') |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 33 | result.add_option('--report', |
| 34 | help='Print comparison to this location instead of stdout') |
| 35 | return result.parse_args() |
| 36 | |
| 37 | def extract_apk(apk, output): |
| 38 | if os.path.exists(output): |
| 39 | shutil.rmtree(output) |
| 40 | zipfile.ZipFile(apk).extractall(output) |
| 41 | with utils.ChangedWorkingDirectory(output): |
| 42 | dex = glob.glob('*.dex') |
| 43 | return [os.path.join(output, dexfile) for dexfile in dex] |
| 44 | |
| 45 | def ensure_exists(files): |
| 46 | for f in files: |
| 47 | if not os.path.exists(f): |
| 48 | raise Exception('%s does not exist') |
| 49 | |
| 50 | def extract_classes(input, output): |
| 51 | if os.path.exists(output): |
| 52 | shutil.rmtree(output) |
| 53 | os.makedirs(output) |
| 54 | args = ['--file-per-class', |
| 55 | '--output', output] |
| 56 | args.extend(input) |
| 57 | if toolhelper.run('d8', args) is not 0: |
| 58 | raise Exception('Failed running d8') |
| 59 | |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 60 | def get_code_size(path): |
| 61 | segments = toolhelper.run('dexsegments', |
| 62 | [path], |
| 63 | build=False, |
| 64 | return_stdout=True) |
| 65 | for line in segments.splitlines(): |
| 66 | if 'Code' in line: |
| 67 | # The code size line looks like: |
| 68 | # - Code: 264 / 4 |
| 69 | splits = line.split(' ') |
| 70 | return int(splits[3]) |
Rico Wind | c891796 | 2019-01-17 11:15:34 +0100 | [diff] [blame] | 71 | # Some classes has no code. |
| 72 | return 0 |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 73 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 74 | class FileInfo: |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 75 | def __init__(self, path, root): |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 76 | self.path = path |
| 77 | self.full_path = os.path.join(root, path) |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 78 | |
| 79 | def __eq__(self, other): |
| 80 | return self.full_path == other.full_path |
| 81 | |
| 82 | def set_size(self, use_code_size): |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 83 | if use_code_size: |
| 84 | self.size = get_code_size(self.full_path) |
| 85 | else: |
| 86 | self.size = os.path.getsize(self.full_path) |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 87 | |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 88 | def generate_file_info(path, options): |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 89 | file_info_map = {} |
| 90 | with utils.ChangedWorkingDirectory(path): |
| 91 | for root, dirs, files in os.walk('.'): |
| 92 | for f in files: |
| 93 | assert f.endswith('dex') |
| 94 | file_path = os.path.join(root, f) |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 95 | entry = FileInfo(file_path, path) |
| 96 | if not options.use_code_size: |
Rico Wind | 8fcb315 | 2019-06-13 14:38:19 +0200 | [diff] [blame] | 97 | entry.set_size(False) |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 98 | file_info_map[file_path] = entry |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 99 | threads = [] |
| 100 | file_infos = file_info_map.values() if options.use_code_size else [] |
| 101 | while len(file_infos) > 0 or len(threads)> 0: |
| 102 | for t in threads: |
| 103 | if not t.is_alive(): |
| 104 | threads.remove(t) |
| 105 | # sleep |
| 106 | if len(threads) == MAX_THREADS or len(file_infos) == 0: |
| 107 | time.sleep(0.5) |
| 108 | while len(threads) < MAX_THREADS and len(file_infos) > 0: |
| 109 | info = file_infos.pop() |
| 110 | print('Added %s for size calculation' % info.full_path) |
| 111 | t = threading.Thread(target=info.set_size, args=(options.use_code_size,)) |
| 112 | threads.append(t) |
| 113 | t.start() |
| 114 | print('Missing %s files, threads=%s ' % (len(file_infos), len(threads))) |
| 115 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 116 | return file_info_map |
| 117 | |
| 118 | def print_info(app, app_files, only_in_app, bigger_in_app, output): |
| 119 | output.write('Only in %s\n' % app) |
| 120 | only_app_sorted = sorted(only_in_app, |
| 121 | key=lambda a: app_files[a].size, |
| 122 | reverse=True) |
| 123 | output.write('\n'.join([' %s %s bytes' % |
| 124 | (x, app_files[x].size) for x in only_app_sorted])) |
| 125 | output.write('\n\n') |
| 126 | output.write('Bigger in %s\n' % app) |
| 127 | # Sort by the percentage diff compared to size |
| 128 | percent = lambda a: (0.0 + bigger_in_app.get(a))/app_files.get(a).size * 100 |
| 129 | for bigger in sorted(bigger_in_app, key=percent, reverse=True): |
| 130 | output.write(' {0:.3f}% {1} bytes {2}\n'.format(percent(bigger), |
| 131 | bigger_in_app[bigger], |
| 132 | bigger)) |
| 133 | output.write('\n\n') |
| 134 | |
| 135 | |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 136 | def compare(app1_classes_dir, app2_classes_dir, app1, app2, options): |
| 137 | app1_files = generate_file_info(app1_classes_dir, options) |
| 138 | app2_files = generate_file_info(app2_classes_dir, options) |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 139 | only_in_app1 = [k for k in app1_files if k not in app2_files] |
| 140 | only_in_app2 = [k for k in app2_files if k not in app1_files] |
| 141 | in_both = [k for k in app2_files if k in app1_files] |
| 142 | assert len(app1_files) == len(only_in_app1) + len(in_both) |
| 143 | assert len(app2_files) == len(only_in_app2) + len(in_both) |
| 144 | bigger_in_app1 = {} |
| 145 | bigger_in_app2 = {} |
| 146 | same_size = [] |
| 147 | for f in in_both: |
| 148 | app1_entry = app1_files[f] |
| 149 | app2_entry = app2_files[f] |
| 150 | if app1_entry.size > app2_entry.size: |
| 151 | bigger_in_app1[f] = app1_entry.size - app2_entry.size |
| 152 | elif app2_entry.size > app1_entry.size: |
| 153 | bigger_in_app2[f] = app2_entry.size - app1_entry.size |
| 154 | else: |
| 155 | same_size.append(f) |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 156 | output = open(options.report, 'w') if options.report else sys.stdout |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 157 | print_info(app1, app1_files, only_in_app1, bigger_in_app1, output) |
| 158 | print_info(app2, app2_files, only_in_app2, bigger_in_app2, output) |
| 159 | output.write('Same size\n') |
| 160 | output.write('\n'.join([' %s' % x for x in same_size])) |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 161 | if options.report: |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 162 | output.close() |
| 163 | |
| 164 | def Main(): |
| 165 | (options, args) = parse_options() |
| 166 | if len(args) is not 2: |
| 167 | print args |
| 168 | print('Takes exactly two arguments, the two apps to compare') |
| 169 | return 1 |
| 170 | app1 = args[0] |
| 171 | app2 = args[1] |
| 172 | ensure_exists([app1, app2]) |
| 173 | with utils.TempDir() as temporary: |
| 174 | # If a temp dir is passed in, use that instead of the generated temporary |
| 175 | output = options.temp if options.temp else temporary |
| 176 | ensure_exists([output]) |
| 177 | app1_input = [app1] |
| 178 | app2_input = [app2] |
| 179 | if app1.endswith('apk'): |
| 180 | app1_input = extract_apk(app1, os.path.join(output, 'app1')) |
| 181 | if app2.endswith('apk'): |
| 182 | app2_input = extract_apk(app2, os.path.join(output, 'app2')) |
| 183 | app1_classes_dir = os.path.join(output, 'app1_classes') |
| 184 | app2_classes_dir = os.path.join(output, 'app2_classes') |
| 185 | |
| 186 | extract_classes(app1_input, app1_classes_dir) |
| 187 | extract_classes(app2_input, app2_classes_dir) |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 188 | compare(app1_classes_dir, app2_classes_dir, app1, app2, options) |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 189 | |
| 190 | if __name__ == '__main__': |
| 191 | sys.exit(Main()) |