Coverage for diffoscope/comparators/gettext.py: 100%
37 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 13:38 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 13:38 +0000
1#
2# diffoscope: in-depth comparison of files, archives, and directories
3#
4# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org>
5# Copyright © 2015-2020 Chris Lamb <lamby@debian.org>
6#
7# diffoscope is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# diffoscope is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
20import io
21import re
22import logging
24from diffoscope.tools import tool_required
25from diffoscope.difference import Difference
27from .utils.file import File
28from .utils.command import Command
30logger = logging.getLogger(__name__)
33class Msgunfmt(Command):
34 CHARSET_RE = re.compile(rb'^"Content-Type: [^;]+; charset=([^\\]+)\\n"$')
36 def __init__(self, *args, **kwargs):
37 super().__init__(*args, **kwargs)
38 self._header = io.BytesIO()
39 self._encoding = None
41 @tool_required("msgunfmt")
42 def cmdline(self):
43 return ["msgunfmt", self.path]
45 def filter(self, line):
46 if not self._encoding:
47 self._header.write(line)
48 if line == b"\n":
49 logger.debug(
50 "unable to determine PO encoding, let's hope it's utf-8"
51 )
52 self._encoding = "utf-8"
53 return self._header.getvalue()
54 found = Msgunfmt.CHARSET_RE.match(line)
55 if found:
56 self._encoding = found.group(1).decode("us-ascii").lower()
57 return (
58 self._header.getvalue()
59 .decode(self._encoding)
60 .encode("utf-8")
61 )
62 return b""
64 if self._encoding == "utf-8":
65 return line
67 return line.decode(self._encoding).encode("utf-8")
70class MoFile(File):
71 DESCRIPTION = "Gettext message catalogues"
72 FILE_TYPE_RE = re.compile(r"^GNU message catalog\b")
74 def compare_details(self, other, source=None):
75 return [Difference.from_operation(Msgunfmt, self.path, other.path)]