Coverage for diffoscope/comparators/gettext.py: 100%

37 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-07 13:38 +0000

1# 

2# diffoscope: in-depth comparison of files, archives, and directories 

3# 

4# Copyright © 2014-2015 Jérémy Bobbio <lunar@debian.org> 

5# Copyright © 2015-2020 Chris Lamb <lamby@debian.org> 

6# 

7# diffoscope is free software: you can redistribute it and/or modify 

8# it under the terms of the GNU General Public License as published by 

9# the Free Software Foundation, either version 3 of the License, or 

10# (at your option) any later version. 

11# 

12# diffoscope is distributed in the hope that it will be useful, 

13# but WITHOUT ANY WARRANTY; without even the implied warranty of 

14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

15# GNU General Public License for more details. 

16# 

17# You should have received a copy of the GNU General Public License 

18# along with diffoscope. If not, see <https://www.gnu.org/licenses/>. 

19 

20import io 

21import re 

22import logging 

23 

24from diffoscope.tools import tool_required 

25from diffoscope.difference import Difference 

26 

27from .utils.file import File 

28from .utils.command import Command 

29 

30logger = logging.getLogger(__name__) 

31 

32 

33class Msgunfmt(Command): 

34 CHARSET_RE = re.compile(rb'^"Content-Type: [^;]+; charset=([^\\]+)\\n"$') 

35 

36 def __init__(self, *args, **kwargs): 

37 super().__init__(*args, **kwargs) 

38 self._header = io.BytesIO() 

39 self._encoding = None 

40 

41 @tool_required("msgunfmt") 

42 def cmdline(self): 

43 return ["msgunfmt", self.path] 

44 

45 def filter(self, line): 

46 if not self._encoding: 

47 self._header.write(line) 

48 if line == b"\n": 

49 logger.debug( 

50 "unable to determine PO encoding, let's hope it's utf-8" 

51 ) 

52 self._encoding = "utf-8" 

53 return self._header.getvalue() 

54 found = Msgunfmt.CHARSET_RE.match(line) 

55 if found: 

56 self._encoding = found.group(1).decode("us-ascii").lower() 

57 return ( 

58 self._header.getvalue() 

59 .decode(self._encoding) 

60 .encode("utf-8") 

61 ) 

62 return b"" 

63 

64 if self._encoding == "utf-8": 

65 return line 

66 

67 return line.decode(self._encoding).encode("utf-8") 

68 

69 

70class MoFile(File): 

71 DESCRIPTION = "Gettext message catalogues" 

72 FILE_TYPE_RE = re.compile(r"^GNU message catalog\b") 

73 

74 def compare_details(self, other, source=None): 

75 return [Difference.from_operation(Msgunfmt, self.path, other.path)]