Coverage for diffoscope/comparators/apk.py: 86%

184 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-07 13:38 +0000

1# 

2# diffoscope: in-depth comparison of files, archives, and directories 

3# 

4# Copyright © 2016 Reiner Herrmann <reiner@reiner-h.de> 

5# Copyright © 2016-2022 Chris Lamb <lamby@debian.org> 

6# Copyright © 2022 FC Stegerman <flx@obfusk.net> 

7# 

8# diffoscope is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# diffoscope is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the GNU General Public License 

19# along with diffoscope. If not, see <https://www.gnu.org/licenses/>. 

20 

21import re 

22import binascii 

23import textwrap 

24import os.path 

25import logging 

26import itertools 

27import subprocess 

28 

29from diffoscope.difference import Difference 

30from diffoscope.exc import RequiredToolNotFound 

31from diffoscope.tools import ( 

32 tool_required, 

33 find_executable, 

34 python_module_missing, 

35) 

36from diffoscope.tempfiles import get_temporary_directory 

37 

38from .text import TextFile 

39from .utils.archive import Archive, ArchiveMember 

40from .utils.command import Command 

41from .utils.compare import compare_files 

42from .utils.specialize import specialize_as 

43from .zip import ZipContainer, zipinfo_differences, ZipFileBase 

44from .missing_file import MissingFile 

45 

46logger = logging.getLogger(__name__) 

47 

48try: 

49 import androguard 

50except ImportError: 

51 python_module_missing("androguard") 

52 androguard = None 

53 

54 

55class ApkContainer(Archive): 

56 @property 

57 def path(self): 

58 return self._path 

59 

60 @tool_required("apktool") 

61 @tool_required("zipinfo") 

62 def open_archive(self): 

63 self._members = [] 

64 self._tmpdir = get_temporary_directory(suffix="apk") 

65 self._andmanifest = None 

66 self._andmanifest_orig = None 

67 

68 logger.debug( 

69 "Extracting %s to %s", self.source.name, self._tmpdir.name 

70 ) 

71 

72 subprocess.check_call( 

73 ( 

74 "apktool", 

75 "d", 

76 "-f", 

77 "-k", 

78 "-m", 

79 "-o", 

80 self._tmpdir.name, 

81 self.source.path, 

82 ), 

83 stderr=None, 

84 stdout=subprocess.PIPE, 

85 ) 

86 

87 # Optionally extract a few files that apktool does not 

88 for x in ("classes.dex", "resources.arsc"): 

89 subprocess.call( 

90 ("unzip", "-d", self._tmpdir.name, self.source.path, x), 

91 stderr=subprocess.PIPE, 

92 stdout=subprocess.PIPE, 

93 ) 

94 

95 # ... including "classes2.dex", "classes3.dex", etc. 

96 for x in itertools.count(2): 

97 try: 

98 subprocess.check_call( 

99 ( 

100 "unzip", 

101 "-d", 

102 self._tmpdir.name, 

103 self.source.path, 

104 f"classes{x}.dex", 

105 ), 

106 stderr=subprocess.PIPE, 

107 stdout=subprocess.PIPE, 

108 ) 

109 except subprocess.CalledProcessError: 

110 break 

111 

112 for root, _, files in os.walk(self._tmpdir.name): 

113 current_dir = [] 

114 

115 for filename in files: 

116 abspath = os.path.join(root, filename) 

117 

118 # apktool.yml is a file created by apktool which contains 

119 # metadata information. We eename it for clarity and always 

120 # make it appear at the beginning of the directory listing for 

121 # reproducibility. 

122 if filename == "apktool.yml": 

123 abspath = filter_apk_metadata( 

124 abspath, os.path.basename(self.source.path) 

125 ) 

126 relpath = abspath[len(self._tmpdir.name) + 1 :] 

127 current_dir.insert(0, relpath) 

128 continue 

129 

130 relpath = abspath[len(self._tmpdir.name) + 1 :] 

131 

132 if filename == "AndroidManifest.xml": 

133 containing_dir = root[len(self._tmpdir.name) + 1 :] 

134 if containing_dir == "original": 

135 self._andmanifest_orig = relpath 

136 if containing_dir == "": 

137 self._andmanifest = relpath 

138 continue 

139 

140 current_dir.append(relpath) 

141 

142 self._members.extend(current_dir) 

143 

144 return self 

145 

146 def get_android_manifest(self): 

147 return ( 

148 self.get_member(self._andmanifest) if self._andmanifest else None 

149 ) 

150 

151 def get_original_android_manifest(self): 

152 if self._andmanifest_orig: 

153 return self.get_member(self._andmanifest_orig) 

154 return MissingFile("/dev/null", self._andmanifest_orig) 

155 

156 def close_archive(self): 

157 pass 

158 

159 def get_member_names(self): 

160 return self._members 

161 

162 def get_member(self, member_name): 

163 member = ArchiveMember(self, member_name) 

164 if member_name.endswith(".smali") and member_name.startswith("smali"): 

165 # smali{,_classesN}/**/*.smali files from apktool are always text, 

166 # and using libmagic on thousands of these files takes minutes 

167 return specialize_as(TextFile, member) 

168 return member 

169 

170 def extract(self, member_name, dest_dir): 

171 return os.path.join(self._tmpdir.name, member_name) 

172 

173 def compare_manifests(self, other): 

174 my_android_manifest = self.get_android_manifest() 

175 other_android_manifest = other.get_android_manifest() 

176 comment = None 

177 diff_manifests = None 

178 if my_android_manifest and other_android_manifest: 

179 source = "AndroidManifest.xml (decoded)" 

180 diff_manifests = compare_files( 

181 my_android_manifest, other_android_manifest, source=source 

182 ) 

183 if diff_manifests is None: 

184 comment = "No difference found for decoded AndroidManifest.xml" 

185 else: 

186 comment = ( 

187 "No decoded AndroidManifest.xml found " 

188 + "for one of the APK files." 

189 ) 

190 if diff_manifests: 

191 return diff_manifests 

192 

193 source = "AndroidManifest.xml (original / undecoded)" 

194 diff_manifests = compare_files( 

195 self.get_original_android_manifest(), 

196 other.get_original_android_manifest(), 

197 source=source, 

198 ) 

199 if diff_manifests is not None: 

200 diff_manifests.add_comment(comment) 

201 return diff_manifests 

202 

203 def compare(self, other, *args, **kwargs): 

204 differences = [] 

205 try: 

206 differences.append(self.compare_manifests(other)) 

207 except AttributeError: # no apk-specific methods, e.g. MissingArchive 

208 pass 

209 differences.extend(super().compare(other, *args, **kwargs)) 

210 return differences 

211 

212 

213class Apksigner(Command): 

214 VALID_RETURNCODES = {0, 1} 

215 

216 @tool_required("apksigner") 

217 def cmdline(self): 

218 # Older versions of the `apksigner` binary under /usr/bin (or similar) 

219 # are a symbolic link to the apksigner .jar file. If we detect a .jar 

220 # we resolve its 'real' location and pass that to `java -jar`, so we 

221 # don't need kernel binfmt_misc to execute. We can't do this in all 

222 # situations as later versions of apksigner use a wrapper script and 

223 # will therefore fail to run at all if we use `java -jar`. 

224 apksigner = os.path.realpath(find_executable("apksigner")) 

225 

226 prefix = ["java", "-jar"] if apksigner.endswith(".jar") else [] 

227 

228 return prefix + [ 

229 apksigner, 

230 "verify", 

231 "--verbose", 

232 "--print-certs", 

233 self.path, 

234 ] 

235 

236 

237class ApkFile(ZipFileBase): 

238 DESCRIPTION = "Android APK files" 

239 FILE_TYPE_HEADER_PREFIX = b"PK\x03\x04" 

240 FILE_TYPE_RE = re.compile(r"^(Android package|(Java|Zip) archive data)\b") 

241 FILE_EXTENSION_SUFFIX = {".apk"} 

242 CONTAINER_CLASSES = [ApkContainer, ZipContainer] 

243 

244 @property 

245 def as_container(self): 

246 # If we found no differences before the APK Signing Block we return None 

247 # here to prevent apktool from being run needlessly (which can take up a 

248 # significant amount of extra time) via ApkContainer (since there's no 

249 # API that allows us to selectively disable use of container classes in 

250 # cases like these). 

251 if getattr(self, "_disable_container_compare", False): 

252 return None # don't run apktool 

253 return super().as_container 

254 

255 def compare_details(self, other, source=None): 

256 self.check_differences_before_signing_block(other) 

257 

258 differences = zipinfo_differences(self, other) 

259 

260 try: 

261 x = Difference.from_operation(Apksigner, self.path, other.path) 

262 if x is not None: 

263 differences.insert(0, x) 

264 except RequiredToolNotFound as exc: # noqa 

265 # Don't require apksigner 

266 self.add_comment(exc.get_comment()) 

267 

268 if androguard is None: 

269 self.add_comment( 

270 "'androguard' Python package not installed; cannot extract V2 signing keys." 

271 ) 

272 else: 

273 x = Difference.from_text_readers( 

274 get_v2_signing_keys(self.path), 

275 get_v2_signing_keys(other.path), 

276 self.path, 

277 other.path, 

278 source="APK Signing Block", 

279 ) 

280 if x is not None: 

281 differences.insert(0, x) 

282 

283 return differences 

284 

285 def check_differences_before_signing_block(self, other): 

286 try: 

287 self._check_differences_before_signing_block(other) 

288 except (RequiredToolNotFound, ImportError): 

289 self.add_comment( 

290 "'apksigcopier' Python package not installed; unconditionally running 'apktool'." 

291 ) 

292 return 

293 

294 @tool_required("apksigcopier") 

295 def _check_differences_before_signing_block(self, other): 

296 import apksigcopier 

297 

298 try: 

299 offset_self, _ = apksigcopier.extract_v2_sig(self.path) 

300 offset_other, _ = apksigcopier.extract_v2_sig(other.path) 

301 except Exception: 

302 return 

303 

304 if offset_self != offset_other: 

305 return 

306 

307 with open(self.path, "rb") as fh_self: 

308 with open(other.path, "rb") as fh_other: 

309 while fh_self.tell() < offset_self: 

310 size = min(offset_self - fh_self.tell(), 4096) 

311 if fh_self.read(size) != fh_other.read(size): 

312 return 

313 

314 self.add_comment( 

315 "No differences before APK Signing Block; not running 'apktool'." 

316 ) 

317 

318 self._disable_container_compare = True 

319 other._disable_container_compare = True 

320 

321 

322def get_v2_signing_keys(path): 

323 from androguard.core.bytecodes import apk 

324 

325 try: 

326 instance = apk.APK(path) 

327 instance.parse_v2_signing_block() 

328 except Exception: 

329 return "" 

330 

331 def format_key(x): 

332 return "\n".join(textwrap.wrap(binascii.hexlify(x).decode("utf-8"))) 

333 

334 output = [] 

335 for k, v in sorted(instance._v2_blocks.items()): 

336 output.append("Key {}:\n{}\n".format(hex(k), format_key(v))) 

337 

338 return "\n".join(output) 

339 

340 

341def filter_apk_metadata(filepath, archive_name): 

342 new_filename = os.path.join(os.path.dirname(filepath), "APK metadata") 

343 

344 logger.debug("Moving APK metadata from %s to %s", filepath, new_filename) 

345 

346 # Strip the filename that was passed to apktool as its embedded in the 

347 # output. (It is unclear why this is conditional - see comments on 

348 # reproducible-builds/diffoscope#255) 

349 re_filename = re.compile(r"^apkFileName: %s" % re.escape(archive_name)) 

350 

351 with open(filepath) as in_, open(new_filename, "w") as out: 

352 out.writelines(x for x in in_ if not re_filename.match(x)) 

353 

354 os.remove(filepath) 

355 

356 return new_filename