Coverage for diffoscope/comparators/apk.py: 86%
184 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 13:38 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 13:38 +0000
1#
2# diffoscope: in-depth comparison of files, archives, and directories
3#
4# Copyright © 2016 Reiner Herrmann <reiner@reiner-h.de>
5# Copyright © 2016-2022 Chris Lamb <lamby@debian.org>
6# Copyright © 2022 FC Stegerman <flx@obfusk.net>
7#
8# diffoscope is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# diffoscope is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
21import re
22import binascii
23import textwrap
24import os.path
25import logging
26import itertools
27import subprocess
29from diffoscope.difference import Difference
30from diffoscope.exc import RequiredToolNotFound
31from diffoscope.tools import (
32 tool_required,
33 find_executable,
34 python_module_missing,
35)
36from diffoscope.tempfiles import get_temporary_directory
38from .text import TextFile
39from .utils.archive import Archive, ArchiveMember
40from .utils.command import Command
41from .utils.compare import compare_files
42from .utils.specialize import specialize_as
43from .zip import ZipContainer, zipinfo_differences, ZipFileBase
44from .missing_file import MissingFile
46logger = logging.getLogger(__name__)
48try:
49 import androguard
50except ImportError:
51 python_module_missing("androguard")
52 androguard = None
55class ApkContainer(Archive):
56 @property
57 def path(self):
58 return self._path
60 @tool_required("apktool")
61 @tool_required("zipinfo")
62 def open_archive(self):
63 self._members = []
64 self._tmpdir = get_temporary_directory(suffix="apk")
65 self._andmanifest = None
66 self._andmanifest_orig = None
68 logger.debug(
69 "Extracting %s to %s", self.source.name, self._tmpdir.name
70 )
72 subprocess.check_call(
73 (
74 "apktool",
75 "d",
76 "-f",
77 "-k",
78 "-m",
79 "-o",
80 self._tmpdir.name,
81 self.source.path,
82 ),
83 stderr=None,
84 stdout=subprocess.PIPE,
85 )
87 # Optionally extract a few files that apktool does not
88 for x in ("classes.dex", "resources.arsc"):
89 subprocess.call(
90 ("unzip", "-d", self._tmpdir.name, self.source.path, x),
91 stderr=subprocess.PIPE,
92 stdout=subprocess.PIPE,
93 )
95 # ... including "classes2.dex", "classes3.dex", etc.
96 for x in itertools.count(2):
97 try:
98 subprocess.check_call(
99 (
100 "unzip",
101 "-d",
102 self._tmpdir.name,
103 self.source.path,
104 f"classes{x}.dex",
105 ),
106 stderr=subprocess.PIPE,
107 stdout=subprocess.PIPE,
108 )
109 except subprocess.CalledProcessError:
110 break
112 for root, _, files in os.walk(self._tmpdir.name):
113 current_dir = []
115 for filename in files:
116 abspath = os.path.join(root, filename)
118 # apktool.yml is a file created by apktool which contains
119 # metadata information. We eename it for clarity and always
120 # make it appear at the beginning of the directory listing for
121 # reproducibility.
122 if filename == "apktool.yml":
123 abspath = filter_apk_metadata(
124 abspath, os.path.basename(self.source.path)
125 )
126 relpath = abspath[len(self._tmpdir.name) + 1 :]
127 current_dir.insert(0, relpath)
128 continue
130 relpath = abspath[len(self._tmpdir.name) + 1 :]
132 if filename == "AndroidManifest.xml":
133 containing_dir = root[len(self._tmpdir.name) + 1 :]
134 if containing_dir == "original":
135 self._andmanifest_orig = relpath
136 if containing_dir == "":
137 self._andmanifest = relpath
138 continue
140 current_dir.append(relpath)
142 self._members.extend(current_dir)
144 return self
146 def get_android_manifest(self):
147 return (
148 self.get_member(self._andmanifest) if self._andmanifest else None
149 )
151 def get_original_android_manifest(self):
152 if self._andmanifest_orig:
153 return self.get_member(self._andmanifest_orig)
154 return MissingFile("/dev/null", self._andmanifest_orig)
156 def close_archive(self):
157 pass
159 def get_member_names(self):
160 return self._members
162 def get_member(self, member_name):
163 member = ArchiveMember(self, member_name)
164 if member_name.endswith(".smali") and member_name.startswith("smali"):
165 # smali{,_classesN}/**/*.smali files from apktool are always text,
166 # and using libmagic on thousands of these files takes minutes
167 return specialize_as(TextFile, member)
168 return member
170 def extract(self, member_name, dest_dir):
171 return os.path.join(self._tmpdir.name, member_name)
173 def compare_manifests(self, other):
174 my_android_manifest = self.get_android_manifest()
175 other_android_manifest = other.get_android_manifest()
176 comment = None
177 diff_manifests = None
178 if my_android_manifest and other_android_manifest:
179 source = "AndroidManifest.xml (decoded)"
180 diff_manifests = compare_files(
181 my_android_manifest, other_android_manifest, source=source
182 )
183 if diff_manifests is None:
184 comment = "No difference found for decoded AndroidManifest.xml"
185 else:
186 comment = (
187 "No decoded AndroidManifest.xml found "
188 + "for one of the APK files."
189 )
190 if diff_manifests:
191 return diff_manifests
193 source = "AndroidManifest.xml (original / undecoded)"
194 diff_manifests = compare_files(
195 self.get_original_android_manifest(),
196 other.get_original_android_manifest(),
197 source=source,
198 )
199 if diff_manifests is not None:
200 diff_manifests.add_comment(comment)
201 return diff_manifests
203 def compare(self, other, *args, **kwargs):
204 differences = []
205 try:
206 differences.append(self.compare_manifests(other))
207 except AttributeError: # no apk-specific methods, e.g. MissingArchive
208 pass
209 differences.extend(super().compare(other, *args, **kwargs))
210 return differences
213class Apksigner(Command):
214 VALID_RETURNCODES = {0, 1}
216 @tool_required("apksigner")
217 def cmdline(self):
218 # Older versions of the `apksigner` binary under /usr/bin (or similar)
219 # are a symbolic link to the apksigner .jar file. If we detect a .jar
220 # we resolve its 'real' location and pass that to `java -jar`, so we
221 # don't need kernel binfmt_misc to execute. We can't do this in all
222 # situations as later versions of apksigner use a wrapper script and
223 # will therefore fail to run at all if we use `java -jar`.
224 apksigner = os.path.realpath(find_executable("apksigner"))
226 prefix = ["java", "-jar"] if apksigner.endswith(".jar") else []
228 return prefix + [
229 apksigner,
230 "verify",
231 "--verbose",
232 "--print-certs",
233 self.path,
234 ]
237class ApkFile(ZipFileBase):
238 DESCRIPTION = "Android APK files"
239 FILE_TYPE_HEADER_PREFIX = b"PK\x03\x04"
240 FILE_TYPE_RE = re.compile(r"^(Android package|(Java|Zip) archive data)\b")
241 FILE_EXTENSION_SUFFIX = {".apk"}
242 CONTAINER_CLASSES = [ApkContainer, ZipContainer]
244 @property
245 def as_container(self):
246 # If we found no differences before the APK Signing Block we return None
247 # here to prevent apktool from being run needlessly (which can take up a
248 # significant amount of extra time) via ApkContainer (since there's no
249 # API that allows us to selectively disable use of container classes in
250 # cases like these).
251 if getattr(self, "_disable_container_compare", False):
252 return None # don't run apktool
253 return super().as_container
255 def compare_details(self, other, source=None):
256 self.check_differences_before_signing_block(other)
258 differences = zipinfo_differences(self, other)
260 try:
261 x = Difference.from_operation(Apksigner, self.path, other.path)
262 if x is not None:
263 differences.insert(0, x)
264 except RequiredToolNotFound as exc: # noqa
265 # Don't require apksigner
266 self.add_comment(exc.get_comment())
268 if androguard is None:
269 self.add_comment(
270 "'androguard' Python package not installed; cannot extract V2 signing keys."
271 )
272 else:
273 x = Difference.from_text_readers(
274 get_v2_signing_keys(self.path),
275 get_v2_signing_keys(other.path),
276 self.path,
277 other.path,
278 source="APK Signing Block",
279 )
280 if x is not None:
281 differences.insert(0, x)
283 return differences
285 def check_differences_before_signing_block(self, other):
286 try:
287 self._check_differences_before_signing_block(other)
288 except (RequiredToolNotFound, ImportError):
289 self.add_comment(
290 "'apksigcopier' Python package not installed; unconditionally running 'apktool'."
291 )
292 return
294 @tool_required("apksigcopier")
295 def _check_differences_before_signing_block(self, other):
296 import apksigcopier
298 try:
299 offset_self, _ = apksigcopier.extract_v2_sig(self.path)
300 offset_other, _ = apksigcopier.extract_v2_sig(other.path)
301 except Exception:
302 return
304 if offset_self != offset_other:
305 return
307 with open(self.path, "rb") as fh_self:
308 with open(other.path, "rb") as fh_other:
309 while fh_self.tell() < offset_self:
310 size = min(offset_self - fh_self.tell(), 4096)
311 if fh_self.read(size) != fh_other.read(size):
312 return
314 self.add_comment(
315 "No differences before APK Signing Block; not running 'apktool'."
316 )
318 self._disable_container_compare = True
319 other._disable_container_compare = True
322def get_v2_signing_keys(path):
323 from androguard.core.bytecodes import apk
325 try:
326 instance = apk.APK(path)
327 instance.parse_v2_signing_block()
328 except Exception:
329 return ""
331 def format_key(x):
332 return "\n".join(textwrap.wrap(binascii.hexlify(x).decode("utf-8")))
334 output = []
335 for k, v in sorted(instance._v2_blocks.items()):
336 output.append("Key {}:\n{}\n".format(hex(k), format_key(v)))
338 return "\n".join(output)
341def filter_apk_metadata(filepath, archive_name):
342 new_filename = os.path.join(os.path.dirname(filepath), "APK metadata")
344 logger.debug("Moving APK metadata from %s to %s", filepath, new_filename)
346 # Strip the filename that was passed to apktool as its embedded in the
347 # output. (It is unclear why this is conditional - see comments on
348 # reproducible-builds/diffoscope#255)
349 re_filename = re.compile(r"^apkFileName: %s" % re.escape(archive_name))
351 with open(filepath) as in_, open(new_filename, "w") as out:
352 out.writelines(x for x in in_ if not re_filename.match(x))
354 os.remove(filepath)
356 return new_filename