cpython/Lib/test/test_filecmp.py
2024-07-30 08:50:30 +00:00

372 lines
16 KiB
Python

import filecmp
import os
import re
import shutil
import tempfile
import unittest
from test import support
from test.support import os_helper
def _create_file_shallow_equal(template_path, new_path):
"""create a file with the same size and mtime but different content."""
shutil.copy2(template_path, new_path)
with open(new_path, 'r+b') as f:
next_char = bytearray(f.read(1))
next_char[0] = (next_char[0] + 1) % 256
f.seek(0)
f.write(next_char)
shutil.copystat(template_path, new_path)
assert os.stat(new_path).st_size == os.stat(template_path).st_size
assert os.stat(new_path).st_mtime == os.stat(template_path).st_mtime
class FileCompareTestCase(unittest.TestCase):
def setUp(self):
self.name = os_helper.TESTFN
self.name_same = os_helper.TESTFN + '-same'
self.name_diff = os_helper.TESTFN + '-diff'
self.name_same_shallow = os_helper.TESTFN + '-same-shallow'
data = 'Contents of file go here.\n'
for name in [self.name, self.name_same, self.name_diff]:
with open(name, 'w', encoding="utf-8") as output:
output.write(data)
with open(self.name_diff, 'a+', encoding="utf-8") as output:
output.write('An extra line.\n')
for name in [self.name_same, self.name_diff]:
shutil.copystat(self.name, name)
_create_file_shallow_equal(self.name, self.name_same_shallow)
self.dir = tempfile.gettempdir()
def tearDown(self):
os.unlink(self.name)
os.unlink(self.name_same)
os.unlink(self.name_diff)
os.unlink(self.name_same_shallow)
def test_matching(self):
self.assertTrue(filecmp.cmp(self.name, self.name),
"Comparing file to itself fails")
self.assertTrue(filecmp.cmp(self.name, self.name, shallow=False),
"Comparing file to itself fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same),
"Comparing file to identical file fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same, shallow=False),
"Comparing file to identical file fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same_shallow),
"Shallow identical files should be considered equal")
def test_different(self):
self.assertFalse(filecmp.cmp(self.name, self.name_diff),
"Mismatched files compare as equal")
self.assertFalse(filecmp.cmp(self.name, self.dir),
"File and directory compare as equal")
self.assertFalse(filecmp.cmp(self.name, self.name_same_shallow,
shallow=False),
"Mismatched file to shallow identical file compares as equal")
def test_cache_clear(self):
first_compare = filecmp.cmp(self.name, self.name_same, shallow=False)
second_compare = filecmp.cmp(self.name, self.name_diff, shallow=False)
filecmp.clear_cache()
self.assertTrue(len(filecmp._cache) == 0,
"Cache not cleared after calling clear_cache")
class DirCompareTestCase(unittest.TestCase):
def setUp(self):
tmpdir = tempfile.gettempdir()
self.dir = os.path.join(tmpdir, 'dir')
self.dir_same = os.path.join(tmpdir, 'dir-same')
self.dir_diff = os.path.join(tmpdir, 'dir-diff')
self.dir_diff_file = os.path.join(tmpdir, 'dir-diff-file')
self.dir_same_shallow = os.path.join(tmpdir, 'dir-same-shallow')
# Another dir is created under dir_same, but it has a name from the
# ignored list so it should not affect testing results.
self.dir_ignored = os.path.join(self.dir_same, '.hg')
self.caseinsensitive = os.path.normcase('A') == os.path.normcase('a')
data = 'Contents of file go here.\n'
shutil.rmtree(self.dir, True)
os.mkdir(self.dir)
subdir_path = os.path.join(self.dir, 'subdir')
os.mkdir(subdir_path)
dir_file_path = os.path.join(self.dir, "file")
with open(dir_file_path, 'w', encoding="utf-8") as output:
output.write(data)
for dir in (self.dir_same, self.dir_same_shallow,
self.dir_diff, self.dir_diff_file):
shutil.rmtree(dir, True)
os.mkdir(dir)
subdir_path = os.path.join(dir, 'subdir')
os.mkdir(subdir_path)
if self.caseinsensitive and dir is self.dir_same:
fn = 'FiLe' # Verify case-insensitive comparison
else:
fn = 'file'
file_path = os.path.join(dir, fn)
if dir is self.dir_same_shallow:
_create_file_shallow_equal(dir_file_path, file_path)
else:
shutil.copy2(dir_file_path, file_path)
with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
output.write('An extra file.\n')
# Add different file2 with respect to dir_diff
with open(os.path.join(self.dir_diff_file, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')
def tearDown(self):
for dir in (self.dir, self.dir_same, self.dir_diff,
self.dir_same_shallow, self.dir_diff_file):
shutil.rmtree(dir)
def test_default_ignores(self):
self.assertIn('.hg', filecmp.DEFAULT_IGNORES)
def test_cmpfiles(self):
self.assertTrue(filecmp.cmpfiles(self.dir, self.dir, ['file']) ==
(['file'], [], []),
"Comparing directory to itself fails")
self.assertTrue(filecmp.cmpfiles(self.dir, self.dir_same, ['file']) ==
(['file'], [], []),
"Comparing directory to same fails")
# Try it with shallow=False
self.assertTrue(filecmp.cmpfiles(self.dir, self.dir, ['file'],
shallow=False) ==
(['file'], [], []),
"Comparing directory to itself fails")
self.assertTrue(filecmp.cmpfiles(self.dir, self.dir_same, ['file'],
shallow=False),
"Comparing directory to same fails")
self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_diff_file,
['file', 'file2']) ==
(['file'], ['file2'], []),
"Comparing mismatched directories fails")
def test_cmpfiles_invalid_names(self):
# See https://github.com/python/cpython/issues/122400.
for file, desc in [
('\x00', 'NUL bytes filename'),
(__file__ + '\x00', 'filename with embedded NUL bytes'),
("\uD834\uDD1E.py", 'surrogate codes (MUSICAL SYMBOL G CLEF)'),
('a' * 1_000_000, 'very long filename'),
]:
for other_dir in [self.dir, self.dir_same, self.dir_diff]:
with self.subTest(f'cmpfiles: {desc}', other_dir=other_dir):
res = filecmp.cmpfiles(self.dir, other_dir, [file])
self.assertTupleEqual(res, ([], [], [file]))
def test_dircmp_invalid_names(self):
for bad_dir, desc in [
('\x00', 'NUL bytes dirname'),
(f'Top{os.sep}Mid\x00', 'dirname with embedded NUL bytes'),
("\uD834\uDD1E", 'surrogate codes (MUSICAL SYMBOL G CLEF)'),
('a' * 1_000_000, 'very long dirname'),
]:
d1 = filecmp.dircmp(self.dir, bad_dir)
d2 = filecmp.dircmp(bad_dir, self.dir)
for target in [
# attributes where os.listdir() raises OSError or ValueError
'left_list', 'right_list',
'left_only', 'right_only', 'common',
]:
with self.subTest(f'dircmp(ok, bad): {desc}', target=target):
with self.assertRaises((OSError, ValueError)):
getattr(d1, target)
with self.subTest(f'dircmp(bad, ok): {desc}', target=target):
with self.assertRaises((OSError, ValueError)):
getattr(d2, target)
def _assert_lists(self, actual, expected):
"""Assert that two lists are equal, up to ordering."""
self.assertEqual(sorted(actual), sorted(expected))
def test_dircmp_identical_directories(self):
self._assert_dircmp_identical_directories()
self._assert_dircmp_identical_directories(shallow=False)
def test_dircmp_different_file(self):
self._assert_dircmp_different_file()
self._assert_dircmp_different_file(shallow=False)
def test_dircmp_different_directories(self):
self._assert_dircmp_different_directories()
self._assert_dircmp_different_directories(shallow=False)
def _assert_dircmp_identical_directories(self, **options):
# Check attributes for comparison of two identical directories
left_dir, right_dir = self.dir, self.dir_same
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
if self.caseinsensitive:
self._assert_lists(d.left_list, ['file', 'subdir'])
self._assert_lists(d.right_list, ['FiLe', 'subdir'])
else:
self._assert_lists(d.left_list, ['file', 'subdir'])
self._assert_lists(d.right_list, ['file', 'subdir'])
self._assert_lists(d.common, ['file', 'subdir'])
self._assert_lists(d.common_dirs, ['subdir'])
self.assertEqual(d.left_only, [])
self.assertEqual(d.right_only, [])
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
def _assert_dircmp_different_directories(self, **options):
# Check attributes for comparison of two different directories (right)
left_dir, right_dir = self.dir, self.dir_diff
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
self._assert_lists(d.left_list, ['file', 'subdir'])
self._assert_lists(d.right_list, ['file', 'file2', 'subdir'])
self._assert_lists(d.common, ['file', 'subdir'])
self._assert_lists(d.common_dirs, ['subdir'])
self.assertEqual(d.left_only, [])
self.assertEqual(d.right_only, ['file2'])
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_diff),
"Only in {} : ['file2']".format(self.dir_diff),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
# Check attributes for comparison of two different directories (left)
left_dir, right_dir = self.dir_diff, self.dir
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
self._assert_lists(d.left_list, ['file', 'file2', 'subdir'])
self._assert_lists(d.right_list, ['file', 'subdir'])
self._assert_lists(d.common, ['file', 'subdir'])
self.assertEqual(d.left_only, ['file2'])
self.assertEqual(d.right_only, [])
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir_diff, self.dir),
"Only in {} : ['file2']".format(self.dir_diff),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
def _assert_dircmp_different_file(self, **options):
# A different file2
d = filecmp.dircmp(self.dir_diff, self.dir_diff_file, **options)
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, ['file2'])
expected_report = [
"diff {} {}".format(self.dir_diff, self.dir_diff_file),
"Identical files : ['file']",
"Differing files : ['file2']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
def test_dircmp_no_shallow_different_file(self):
# A non shallow different file2
d = filecmp.dircmp(self.dir, self.dir_same_shallow, shallow=False)
self.assertEqual(d.same_files, [])
self.assertEqual(d.diff_files, ['file'])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same_shallow),
"Differing files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
def test_dircmp_shallow_same_file(self):
# A non shallow different file2
d = filecmp.dircmp(self.dir, self.dir_same_shallow)
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same_shallow),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
def test_dircmp_shallow_is_keyword_only(self):
with self.assertRaisesRegex(
TypeError,
re.escape("dircmp.__init__() takes from 3 to 5 positional arguments but 6 were given"),
):
filecmp.dircmp(self.dir, self.dir_same, None, None, True)
self.assertIsInstance(
filecmp.dircmp(self.dir, self.dir_same, None, None, shallow=True),
filecmp.dircmp,
)
def test_dircmp_subdirs_type(self):
"""Check that dircmp.subdirs respects subclassing."""
class MyDirCmp(filecmp.dircmp):
pass
d = MyDirCmp(self.dir, self.dir_diff)
sub_dirs = d.subdirs
self.assertEqual(list(sub_dirs.keys()), ['subdir'])
sub_dcmp = sub_dirs['subdir']
self.assertEqual(type(sub_dcmp), MyDirCmp)
def test_report_partial_closure(self):
left_dir, right_dir = self.dir, self.dir_same
d = filecmp.dircmp(left_dir, right_dir)
left_subdir = os.path.join(left_dir, 'subdir')
right_subdir = os.path.join(right_dir, 'subdir')
expected_report = [
"diff {} {}".format(self.dir, self.dir_same),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
'',
"diff {} {}".format(left_subdir, right_subdir),
]
self._assert_report(d.report_partial_closure, expected_report)
def test_report_full_closure(self):
left_dir, right_dir = self.dir, self.dir_same
d = filecmp.dircmp(left_dir, right_dir)
left_subdir = os.path.join(left_dir, 'subdir')
right_subdir = os.path.join(right_dir, 'subdir')
expected_report = [
"diff {} {}".format(self.dir, self.dir_same),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
'',
"diff {} {}".format(left_subdir, right_subdir),
]
self._assert_report(d.report_full_closure, expected_report)
def _assert_report(self, dircmp_report, expected_report_lines):
with support.captured_stdout() as stdout:
dircmp_report()
report_lines = stdout.getvalue().strip().split('\n')
self.assertEqual(report_lines, expected_report_lines)
if __name__ == "__main__":
unittest.main()