Coverage for diffoscope/comparators/directory.py: 73%

159 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-07 13:38 +0000

1# 

2# diffoscope: in-depth comparison of files, archives, and directories 

3# 

4# Copyright © 2015 Jérémy Bobbio <lunar@debian.org> 

5# Copyright © 2015-2021 Chris Lamb <lamby@debian.org> 

6# 

7# diffoscope is free software: you can redistribute it and/or modify 

8# it under the terms of the GNU General Public License as published by 

9# the Free Software Foundation, either version 3 of the License, or 

10# (at your option) any later version. 

11# 

12# diffoscope is distributed in the hope that it will be useful, 

13# but WITHOUT ANY WARRANTY; without even the implied warranty of 

14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

15# GNU General Public License for more details. 

16# 

17# You should have received a copy of the GNU General Public License 

18# along with diffoscope. If not, see <https://www.gnu.org/licenses/>. 

19 

20import os 

21import re 

22import logging 

23import subprocess 

24 

25from diffoscope.exc import RequiredToolNotFound 

26from diffoscope.tools import python_module_missing, tool_required 

27from diffoscope.config import Config 

28from diffoscope.difference import Difference 

29 

30from .binary import FilesystemFile 

31from .missing_file import AbstractMissingType 

32from .utils.command import Command, our_check_output 

33from .utils.container import Container 

34 

35logger = logging.getLogger(__name__) 

36 

37 

38if os.uname()[0] == "FreeBSD": 

39 

40 class Stat(Command): 

41 @tool_required("stat") 

42 def cmdline(self): 

43 return [ 

44 "stat", 

45 "-t", 

46 "%Y-%m-%d %H:%M:%S", 

47 "-f", 

48 "%Sp %l %Su %Sg %z %Sm %k %b %#Xf", 

49 self.path, 

50 ] 

51 

52else: 

53 

54 class Stat(Command): 

55 @tool_required("stat") 

56 def cmdline(self): 

57 return ["stat", self.path] 

58 

59 FILE_RE = re.compile(r"^\s*File:.*$") 

60 DEVICE_RE = re.compile(r"Device: [0-9a-f]+h/[0-9]+d\s+") 

61 INODE_RE = re.compile(r"Inode: [0-9]+\s+") 

62 ACCESS_TIME_RE = re.compile(r"^Access: [0-9]{4}-[0-9]{2}-[0-9]{2}.*$") 

63 CHANGE_TIME_RE = re.compile(r"^Change: [0-9]{4}-[0-9]{2}-[0-9]{2}.*$") 

64 BIRTH_TIME_RE = re.compile(r"^\s*Birth:.*$") 

65 

66 def filter(self, line): 

67 line = line.decode("utf-8") 

68 line = Stat.FILE_RE.sub("", line) 

69 line = Stat.DEVICE_RE.sub("", line) 

70 line = Stat.INODE_RE.sub("", line) 

71 line = Stat.ACCESS_TIME_RE.sub("", line) 

72 line = Stat.CHANGE_TIME_RE.sub("", line) 

73 line = Stat.BIRTH_TIME_RE.sub("", line) 

74 return line.encode("utf-8") 

75 

76 

77# compare only what matters 

78def stat_results_same(stat1, stat2): 

79 return all( 

80 getattr(stat1, i) == getattr(stat2, i) 

81 for i in [ 

82 "st_mode", 

83 "st_uid", 

84 "st_gid", 

85 "st_size", 

86 "st_mtime", 

87 ] 

88 ) 

89 

90 

91@tool_required("lsattr") 

92def lsattr(path): 

93 """ 

94 NB. Difficult to replace with in-Python version. See 

95 <https://stackoverflow.com/questions/35501249/python-get-linux-file-immutable-attribute/38092961#38092961> 

96 """ 

97 

98 try: 

99 output = our_check_output( 

100 ["lsattr", "-d", path], stderr=subprocess.STDOUT 

101 ).decode("utf-8") 

102 return output.split()[0] 

103 except subprocess.CalledProcessError as e: 

104 if e.returncode == 1: 

105 # filesystem doesn't support xattrs 

106 return "" 

107 

108 

109class Getfacl(Command): 

110 @tool_required("getfacl") 

111 def cmdline(self): 

112 osname = os.uname()[0] 

113 if osname == "FreeBSD": 

114 return ["getfacl", "-q", "-h", self.path] 

115 return ["getfacl", "-p", "-c", self.path] 

116 

117 

118def xattr(path1, path2): 

119 try: 

120 import xattr as xattr_ 

121 except ImportError: 

122 python_module_missing("xattr") 

123 return None 

124 

125 # Support the case where the python3-xattr package is installed but 

126 # python3-pyxattr is not; python3-xattr has an xattr class that can be used 

127 # like a dict. 

128 try: 

129 get_all = xattr_.get_all 

130 except AttributeError: 

131 

132 def get_all(x): 

133 return xattr_.xattr(x).items() 

134 

135 def fn(x): 

136 return "\n".join( 

137 "{}: {}".format( 

138 k.decode("utf-8", "ignore"), v.decode("utf-8", "ignore") 

139 ) 

140 for k, v in get_all(x) 

141 ) 

142 

143 return Difference.from_text( 

144 fn(path1), fn(path2), path1, path2, source="extended file attributes" 

145 ) 

146 

147 

148def compare_meta(path1, path2): 

149 if Config().exclude_directory_metadata in ("yes", "recursive"): 

150 logger.debug( 

151 "Excluding directory metadata for paths (%s, %s)", path1, path2 

152 ) 

153 return [] 

154 

155 logger.debug("compare_meta(%r, %r)", path1, path2) 

156 

157 # Don't run any commands if any of the paths do not exist 

158 # or have other issues. 

159 try: 

160 stat1 = os.lstat(path1) 

161 stat2 = os.lstat(path2) 

162 except Exception: 

163 return [] 

164 

165 differences = [] 

166 if stat_results_same(stat1, stat2): 

167 logger.debug("Stat structs are identical, moving on!") 

168 else: 

169 try: 

170 differences.append( 

171 Difference.from_operation(Stat, path1, path2, short=True) 

172 ) 

173 except RequiredToolNotFound: 

174 logger.error("Unable to find 'stat'! Is PATH wrong?") 

175 

176 if os.path.islink(path1) or os.path.islink(path2): 

177 return [d for d in differences if d is not None] 

178 

179 if Config().extended_filesystem_attributes: 

180 try: 

181 differences.append( 

182 Difference.from_operation(Getfacl, path1, path2, short=True) 

183 ) 

184 except RequiredToolNotFound: 

185 logger.info( 

186 "Unable to find 'getfacl', some directory metadata differences might not be noticed." 

187 ) 

188 

189 try: 

190 lsattr1 = lsattr(path1) 

191 lsattr2 = lsattr(path2) 

192 differences.append( 

193 Difference.from_text( 

194 lsattr1, lsattr2, path1, path2, source="lsattr" 

195 ) 

196 ) 

197 except RequiredToolNotFound: 

198 logger.info( 

199 "Unable to find 'lsattr', some directory metadata differences might not be noticed." 

200 ) 

201 differences.append(xattr(path1, path2)) 

202 

203 return [d for d in differences if d is not None] 

204 

205 

206def compare_directories(path1, path2, source=None): 

207 return FilesystemDirectory(path1).compare(FilesystemDirectory(path2)) 

208 

209 

210class Directory: 

211 DESCRIPTION = "directories" 

212 

213 @classmethod 

214 def recognizes(cls, file): 

215 return file.is_directory() 

216 

217 @classmethod 

218 def fallback_recognizes(cls, file): 

219 return False 

220 

221 

222class FilesystemDirectory(Directory): 

223 def __init__(self, path): 

224 self._path = path 

225 

226 @property 

227 def path(self): 

228 return self._path 

229 

230 @property 

231 def name(self): 

232 return self._path 

233 

234 @property 

235 def progress_name(self): 

236 x = self.name 

237 return x[1:] if x.startswith("./") else x 

238 

239 @property 

240 def as_container(self): 

241 if not hasattr(self, "_as_container"): 

242 self._as_container = DirectoryContainer(self) 

243 return self._as_container 

244 

245 def is_directory(self): 

246 return True 

247 

248 def has_same_content_as(self, other): 

249 # no shortcut 

250 return False 

251 

252 def compare(self, other, source=None): 

253 differences = [] 

254 

255 if not isinstance(other, FilesystemDirectory): 

256 return differences 

257 

258 # We don't need to recurse into subdirectories; DirectoryContainer will 

259 # find them and do that for us. 

260 def list_files(path): 

261 return sorted(os.listdir(os.path.realpath(path))) 

262 

263 listing_diff = Difference.from_text( 

264 "\n".join(list_files(self.path)), 

265 "\n".join(list_files(other.path)), 

266 self.path, 

267 other.path, 

268 source="file list", 

269 ) 

270 if listing_diff: 

271 differences.append(listing_diff) 

272 

273 if not isinstance(other, AbstractMissingType): 

274 differences.extend(compare_meta(self.name, other.name)) 

275 

276 my_container = DirectoryContainer(self) 

277 other_container = DirectoryContainer(other) 

278 differences.extend(my_container.compare(other_container)) 

279 

280 if not differences: 

281 return None 

282 

283 difference = Difference(self.path, other.path, source) 

284 difference.add_details(differences) 

285 return difference 

286 

287 

288class DirectoryContainer(Container): 

289 def get_member_names(self): 

290 return sorted(os.listdir(self.source.path or ".")) 

291 

292 def get_member(self, member_name): 

293 member_path = os.path.join(self.source.path, member_name) 

294 

295 if not os.path.islink(member_path) and os.path.isdir(member_path): 

296 return FilesystemDirectory(member_path) 

297 

298 return FilesystemFile( 

299 os.path.join(self.source.path, member_name), container=self 

300 )