Ian Zerny | dcb172e | 2022-02-22 15:36:45 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 2 | # Copyright (c) 2018, the R8 project authors. Please see the AUTHORS file |
| 3 | # for details. All rights reserved. Use of this source code is governed by a |
| 4 | # BSD-style license that can be found in the LICENSE file. |
| 5 | |
| 6 | # Script for checking impact of a change by comparing the sizes of generated |
| 7 | # classes in an apk. |
| 8 | |
| 9 | import glob |
| 10 | import optparse |
| 11 | import os |
| 12 | import shutil |
| 13 | import sys |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 14 | import threading |
| 15 | import time |
Ian Zerny | e92325b | 2020-03-13 13:29:27 +0100 | [diff] [blame] | 16 | import zipfile |
| 17 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 18 | import toolhelper |
| 19 | import utils |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 20 | |
| 21 | USAGE = """%prog [options] app1 app2 |
| 22 | NOTE: This only makes sense if minification is disabled""" |
| 23 | |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 24 | MAX_THREADS = 40 |
| 25 | |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 26 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 27 | def parse_options(): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 28 | result = optparse.OptionParser(usage=USAGE) |
| 29 | result.add_option('--no-build', |
| 30 | help='Run without building first', |
| 31 | default=False, |
| 32 | action='store_true') |
| 33 | result.add_option('--temp', |
| 34 | help='Temporary directory to store extracted classes in') |
| 35 | result.add_option( |
| 36 | '--use_code_size', |
| 37 | help= |
| 38 | 'Use the size of code segments instead of the full size of the dex.', |
| 39 | default=False, |
| 40 | action='store_true') |
| 41 | result.add_option( |
Rico Wind | a112974 | 2024-04-09 11:05:06 +0200 | [diff] [blame] | 42 | '--ignore_debug_info', |
| 43 | help='Do not include debug info in the comparison.', |
| 44 | default=False, |
| 45 | action='store_true') |
| 46 | result.add_option( |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 47 | '--report', help='Print comparison to this location instead of stdout') |
| 48 | return result.parse_args() |
| 49 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 50 | |
| 51 | def extract_apk(apk, output): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 52 | if os.path.exists(output): |
| 53 | shutil.rmtree(output) |
| 54 | zipfile.ZipFile(apk).extractall(output) |
| 55 | with utils.ChangedWorkingDirectory(output): |
| 56 | dex = glob.glob('*.dex') |
| 57 | return [os.path.join(output, dexfile) for dexfile in dex] |
| 58 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 59 | |
| 60 | def ensure_exists(files): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 61 | for f in files: |
| 62 | if not os.path.exists(f): |
| 63 | raise Exception('%s does not exist' % f) |
| 64 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 65 | |
Morten Krogh-Jespersen | ec9bbe4 | 2022-09-19 15:27:21 +0200 | [diff] [blame] | 66 | def extract_classes(input, output, options): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 67 | if os.path.exists(output): |
| 68 | shutil.rmtree(output) |
| 69 | os.makedirs(output) |
| 70 | args = ['--file-per-class', '--output', output] |
| 71 | if options.no_build: |
| 72 | args.extend(['--no-build']) |
| 73 | args.extend(input) |
Rico Wind | a112974 | 2024-04-09 11:05:06 +0200 | [diff] [blame] | 74 | extra_args = [] |
| 75 | if options.ignore_debug_info: |
| 76 | extra_args.append('-Dcom.android.tools.r8.nullOutDebugInfo=1') |
Ian Zerny | 6a2bb9d | 2024-05-17 09:57:28 +0200 | [diff] [blame] | 77 | if toolhelper.run('d8', args, extra_args=extra_args) != 0: |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 78 | raise Exception('Failed running d8') |
| 79 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 80 | |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 81 | def get_code_size(path): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 82 | segments = toolhelper.run('dexsegments', [path], |
| 83 | build=False, |
| 84 | return_stdout=True) |
| 85 | for line in segments.splitlines(): |
| 86 | if 'Code' in line: |
| 87 | # The code size line looks like: |
| 88 | # - Code: 264 / 4 |
| 89 | splits = line.split(' ') |
| 90 | return int(splits[3]) |
| 91 | # Some classes has no code. |
| 92 | return 0 |
| 93 | |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 94 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 95 | class FileInfo: |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 96 | |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 97 | def __init__(self, path, root): |
| 98 | self.path = path |
| 99 | self.full_path = os.path.join(root, path) |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 100 | |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 101 | def __eq__(self, other): |
| 102 | return self.full_path == other.full_path |
| 103 | |
| 104 | def set_size(self, use_code_size): |
| 105 | if use_code_size: |
| 106 | self.size = get_code_size(self.full_path) |
| 107 | else: |
| 108 | self.size = os.path.getsize(self.full_path) |
| 109 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 110 | |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 111 | def generate_file_info(path, options): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 112 | file_info_map = {} |
| 113 | with utils.ChangedWorkingDirectory(path): |
| 114 | for root, dirs, files in os.walk('.'): |
| 115 | for f in files: |
| 116 | assert f.endswith('dex') |
| 117 | file_path = os.path.join(root, f) |
| 118 | entry = FileInfo(file_path, path) |
| 119 | if not options.use_code_size: |
| 120 | entry.set_size(False) |
| 121 | file_info_map[file_path] = entry |
| 122 | threads = [] |
Rico Wind | fdf6df0 | 2024-04-08 12:58:51 +0200 | [diff] [blame] | 123 | file_infos = list(file_info_map.values()) if options.use_code_size else [] |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 124 | while len(file_infos) > 0 or len(threads) > 0: |
| 125 | for t in threads: |
| 126 | if not t.is_alive(): |
| 127 | threads.remove(t) |
| 128 | # sleep |
| 129 | if len(threads) == MAX_THREADS or len(file_infos) == 0: |
| 130 | time.sleep(0.5) |
| 131 | while len(threads) < MAX_THREADS and len(file_infos) > 0: |
| 132 | info = file_infos.pop() |
| 133 | print('Added %s for size calculation' % info.full_path) |
| 134 | t = threading.Thread(target=info.set_size, |
| 135 | args=(options.use_code_size,)) |
| 136 | threads.append(t) |
| 137 | t.start() |
| 138 | print('Missing %s files, threads=%s ' % (len(file_infos), len(threads))) |
Rico Wind | 97daeb7 | 2019-01-22 09:25:09 +0100 | [diff] [blame] | 139 | |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 140 | return file_info_map |
| 141 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 142 | |
| 143 | def print_info(app, app_files, only_in_app, bigger_in_app, output): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 144 | output.write('Only in %s\n' % app) |
| 145 | only_app_sorted = sorted(only_in_app, |
| 146 | key=lambda a: app_files[a].size, |
| 147 | reverse=True) |
| 148 | output.write('\n'.join( |
| 149 | [' %s %s bytes' % (x, app_files[x].size) for x in only_app_sorted])) |
| 150 | output.write('\n\n') |
| 151 | output.write('Bigger in %s\n' % app) |
| 152 | # Sort by the percentage diff compared to size |
| 153 | percent = lambda a: (0.0 + bigger_in_app.get(a)) / app_files.get(a |
| 154 | ).size * 100 |
| 155 | for bigger in sorted(bigger_in_app, key=percent, reverse=True): |
| 156 | output.write(' {0:.3f}% {1} bytes {2}\n'.format( |
| 157 | percent(bigger), bigger_in_app[bigger], bigger)) |
| 158 | output.write('\n\n') |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 159 | |
| 160 | |
Rico Wind | e285234 | 2019-01-16 14:42:18 +0100 | [diff] [blame] | 161 | def compare(app1_classes_dir, app2_classes_dir, app1, app2, options): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 162 | app1_files = generate_file_info(app1_classes_dir, options) |
| 163 | app2_files = generate_file_info(app2_classes_dir, options) |
| 164 | only_in_app1 = [k for k in app1_files if k not in app2_files] |
| 165 | only_in_app2 = [k for k in app2_files if k not in app1_files] |
| 166 | in_both = [k for k in app2_files if k in app1_files] |
| 167 | assert len(app1_files) == len(only_in_app1) + len(in_both) |
| 168 | assert len(app2_files) == len(only_in_app2) + len(in_both) |
| 169 | bigger_in_app1 = {} |
| 170 | bigger_in_app2 = {} |
| 171 | same_size = [] |
| 172 | for f in in_both: |
| 173 | app1_entry = app1_files[f] |
| 174 | app2_entry = app2_files[f] |
| 175 | if app1_entry.size > app2_entry.size: |
| 176 | bigger_in_app1[f] = app1_entry.size - app2_entry.size |
| 177 | elif app2_entry.size > app1_entry.size: |
| 178 | bigger_in_app2[f] = app2_entry.size - app1_entry.size |
| 179 | else: |
| 180 | same_size.append(f) |
| 181 | output = open(options.report, 'w') if options.report else sys.stdout |
| 182 | print_info(app1, app1_files, only_in_app1, bigger_in_app1, output) |
| 183 | print_info(app2, app2_files, only_in_app2, bigger_in_app2, output) |
| 184 | output.write('Same size\n') |
| 185 | output.write('\n'.join([' %s' % x for x in same_size])) |
| 186 | if options.report: |
| 187 | output.close() |
| 188 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 189 | |
| 190 | def Main(): |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 191 | (options, args) = parse_options() |
Ian Zerny | 6a2bb9d | 2024-05-17 09:57:28 +0200 | [diff] [blame] | 192 | if len(args) != 2: |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 193 | print(args) |
| 194 | print('Takes exactly two arguments, the two apps to compare') |
| 195 | return 1 |
| 196 | app1 = args[0] |
| 197 | app2 = args[1] |
| 198 | ensure_exists([app1, app2]) |
| 199 | with utils.TempDir() as temporary: |
| 200 | # If a temp dir is passed in, use that instead of the generated temporary |
| 201 | output = options.temp if options.temp else temporary |
| 202 | ensure_exists([output]) |
| 203 | app1_input = [app1] |
| 204 | app2_input = [app2] |
| 205 | if app1.endswith('apk'): |
| 206 | app1_input = extract_apk(app1, os.path.join(output, 'app1')) |
| 207 | if app2.endswith('apk'): |
| 208 | app2_input = extract_apk(app2, os.path.join(output, 'app2')) |
| 209 | app1_classes_dir = os.path.join(output, 'app1_classes') |
| 210 | app2_classes_dir = os.path.join(output, 'app2_classes') |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 211 | |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 212 | extract_classes(app1_input, app1_classes_dir, options) |
| 213 | extract_classes(app2_input, app2_classes_dir, options) |
| 214 | compare(app1_classes_dir, app2_classes_dir, app1, app2, options) |
| 215 | |
Rico Wind | 58d0143 | 2018-09-13 14:07:31 +0200 | [diff] [blame] | 216 | |
| 217 | if __name__ == '__main__': |
Christoffer Quist Adamsen | 2434a4d | 2023-10-16 11:29:03 +0200 | [diff] [blame] | 218 | sys.exit(Main()) |