From f265a9634b8fd224da958540ae6d89e9dea03d9a Mon Sep 17 00:00:00 2001 From: Antonio Ossa Guerra Date: Mon, 17 Oct 2022 14:59:04 -0300 Subject: [PATCH 1/5] Apply .gitignore files considering their location When a .gitignore file contains the special rule to ignore every subfolder content (`*/*`) and the file is located in a subfolder relative to where the command is executed (root), the rule is incorrectly applied and ignores every file at the same level of the .gitignore file. The reason for this is that the `gitignore` variable accumulates the rules found in each .gitignore while traversing files and directories recursively. This makes sense and, in general, works as expected. The problem is that the gitignore rules are applied using as the relative path from root to target directory as a reference. This is the cause of the bug. The implemented solution keeps track of every .gitignore file found while traversing the targets and the absolute location of each .gitignore file. Then, when matching files to the .gitignore rules, compare each set of rules with the appropiate relative path to the candidate target file. To make this possible, we changed the single `gitignore` object with a dictionary of similar objects, where the corresponding key is the absolute path to the folder that contains that .gitignore file. This required changing the signature of the `get_sources` function. Also, we introduce a `is_ignored` function that compares a file with every set of rules. Finally, some tests required an update to pass the gitignore object in the new format. Signed-off-by: Antonio Ossa Guerra --- src/black/__init__.py | 12 +++++++----- src/black/files.py | 27 +++++++++++++++++++++++---- tests/test_black.py | 6 +++--- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index 5293796aea1..fc06a9e4fd6 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -625,6 +625,8 @@ def get_sources( sources: Set[Path] = set() root = ctx.obj["root"] + gitignore = None + for s in src: if s == "-" and stdin_filename: p = Path(stdin_filename) @@ -660,14 +662,14 @@ def get_sources( elif p.is_dir(): if exclude is None: exclude = re_compile_maybe_verbose(DEFAULT_EXCLUDES) - gitignore = get_gitignore(root) + root_gitignore = get_gitignore(root) p_gitignore = get_gitignore(p) # No need to use p's gitignore if it is identical to root's gitignore # (i.e. root and p point to the same directory). - if gitignore != p_gitignore: - gitignore += p_gitignore - else: - gitignore = None + if root_gitignore == p_gitignore: + gitignore = {root: root_gitignore} + else: + gitignore = {root: root_gitignore, root / p: p_gitignore} sources.update( gen_python_files( p.iterdir(), diff --git a/src/black/files.py b/src/black/files.py index ed503f5fec7..c58970e53e3 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -198,7 +198,7 @@ def gen_python_files( extend_exclude: Optional[Pattern[str]], force_exclude: Optional[Pattern[str]], report: Report, - gitignore: Optional[PathSpec], + gitignore_dict: Optional[Dict[Path, PathSpec]], *, verbose: bool, quiet: bool, @@ -211,6 +211,19 @@ def gen_python_files( `report` is where output about exclusions goes. """ + + def is_ignored( + gitignore_dict: Dict[Path, PathSpec], child: Path, report: Report + ) -> bool: + for _dir, _gitignore in gitignore_dict.items(): + relative_path = normalize_path_maybe_ignore(child, _dir, report) + if relative_path is None: + break + if _gitignore is not None and _gitignore.match_file(relative_path): + report.path_ignored(child, "matches the .gitignore file content") + return True + return False + assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in paths: normalized_path = normalize_path_maybe_ignore(child, root, report) @@ -218,8 +231,7 @@ def gen_python_files( continue # First ignore files matching .gitignore, if passed - if gitignore is not None and gitignore.match_file(normalized_path): - report.path_ignored(child, "matches the .gitignore file content") + if gitignore_dict is not None and is_ignored(gitignore_dict, child, report): continue # Then ignore with `--exclude` `--extend-exclude` and `--force-exclude` options. @@ -244,6 +256,13 @@ def gen_python_files( if child.is_dir(): # If gitignore is None, gitignore usage is disabled, while a Falsey # gitignore is when the directory doesn't have a .gitignore file. + if gitignore_dict is not None: + new_gitignore_dict = { + **gitignore_dict, + root / child: get_gitignore(child), + } + else: + new_gitignore_dict = None yield from gen_python_files( child.iterdir(), root, @@ -252,7 +271,7 @@ def gen_python_files( extend_exclude, force_exclude, report, - gitignore + get_gitignore(child) if gitignore is not None else None, + new_gitignore_dict, verbose=verbose, quiet=quiet, ) diff --git a/tests/test_black.py b/tests/test_black.py index 5d0175d9d66..83c705a869f 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1989,7 +1989,7 @@ def test_gitignore_exclude(self) -> None: None, None, report, - gitignore, + {path: gitignore}, verbose=False, quiet=False, ) @@ -2018,7 +2018,7 @@ def test_nested_gitignore(self) -> None: None, None, report, - root_gitignore, + {path: root_gitignore}, verbose=False, quiet=False, ) @@ -2110,7 +2110,7 @@ def test_symlink_out_of_root_directory(self) -> None: None, None, report, - gitignore, + {path: gitignore}, verbose=False, quiet=False, ) From 9782c3331ae952a69a1ff867ab3bb027c2a146ae Mon Sep 17 00:00:00 2001 From: Antonio Ossa Guerra Date: Mon, 17 Oct 2022 16:24:13 -0300 Subject: [PATCH 2/5] Test .gitignore with `*/*` is applied correctly The test contains three cases: 1) when the .gitignore with the special rule to ignore every subfolder and its contents (*/*) is in the root, 2) when the file is inside a subfolder relative to root (nested), and 3) when the target folder contains the .gitignore and root is a parent folder of the target. In all of these cases, we compare the files that are visible by Black with a known list of paths containing the expected values. Before the fix introduced in the previous commit, these tests failed when the .gitignore file was nested (second case). Now, the test is passed for all cases. Signed-off-by: Antonio Ossa Guerra --- .../ignore_subfolders_gitignore_tests/a.py | 0 .../subdir/.gitignore | 1 + .../subdir/b.py | 0 .../subdir/subdir/c.py | 0 tests/test_black.py | 26 +++++++++++++++++++ 5 files changed, 27 insertions(+) create mode 100644 tests/data/ignore_subfolders_gitignore_tests/a.py create mode 100644 tests/data/ignore_subfolders_gitignore_tests/subdir/.gitignore create mode 100644 tests/data/ignore_subfolders_gitignore_tests/subdir/b.py create mode 100644 tests/data/ignore_subfolders_gitignore_tests/subdir/subdir/c.py diff --git a/tests/data/ignore_subfolders_gitignore_tests/a.py b/tests/data/ignore_subfolders_gitignore_tests/a.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/data/ignore_subfolders_gitignore_tests/subdir/.gitignore b/tests/data/ignore_subfolders_gitignore_tests/subdir/.gitignore new file mode 100644 index 00000000000..150f68c80f5 --- /dev/null +++ b/tests/data/ignore_subfolders_gitignore_tests/subdir/.gitignore @@ -0,0 +1 @@ +*/* diff --git a/tests/data/ignore_subfolders_gitignore_tests/subdir/b.py b/tests/data/ignore_subfolders_gitignore_tests/subdir/b.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/data/ignore_subfolders_gitignore_tests/subdir/subdir/c.py b/tests/data/ignore_subfolders_gitignore_tests/subdir/subdir/c.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/test_black.py b/tests/test_black.py index 83c705a869f..59c6fea422c 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -2056,6 +2056,32 @@ def test_invalid_nested_gitignore(self) -> None: gitignore = path / "a" / ".gitignore" assert f"Could not parse {gitignore}" in result.stderr_bytes.decode() + def test_gitignore_that_ignores_subfolders(self) -> None: + # If gitignore with */* is in root + root = Path(DATA_DIR / "ignore_subfolders_gitignore_tests" / "subdir") + expected = [root / "b.py"] + ctx = FakeContext() + ctx.obj["root"] = root + assert_collected_sources([root], expected, ctx=ctx) + + # If .gitignore with */* is nested + root = Path(DATA_DIR / "ignore_subfolders_gitignore_tests") + expected = [ + root / "a.py", + root / "subdir" / "b.py", + ] + ctx = FakeContext() + ctx.obj["root"] = root + assert_collected_sources([root], expected, ctx=ctx) + + # If command is executed from outer dir + root = Path(DATA_DIR / "ignore_subfolders_gitignore_tests") + target = root / "subdir" + expected = [target / "b.py"] + ctx = FakeContext() + ctx.obj["root"] = root + assert_collected_sources([target], expected, ctx=ctx) + def test_empty_include(self) -> None: path = DATA_DIR / "include_exclude_tests" src = [path] From f97cccdee4e2d2e62e3c3aac786b3e5df8ef354b Mon Sep 17 00:00:00 2001 From: Antonio Ossa Guerra Date: Mon, 17 Oct 2022 16:43:06 -0300 Subject: [PATCH 3/5] Update CHANGES.md Add entry about fixed bug and changes introduced: ignore files by considering the location of each .gitignore file and the relative path of each target Signed-off-by: Antonio Ossa Guerra --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index ba9f4c06f28..3e51df5cd44 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -18,6 +18,9 @@ +- Fix incorrectly applied .gitignore rules by considering the .gitignore location and + the relative path to the target file (#3338) + ### Packaging From 5aee3b765bd8e52be105ab72423d92d5da9bd9f5 Mon Sep 17 00:00:00 2001 From: Antonio Ossa Guerra Date: Sun, 6 Nov 2022 18:31:20 -0300 Subject: [PATCH 4/5] Small refactor to improve code readability These changes are small improvements to improve code readability: rename a variable to a more descriptive name (from `exclude_is_None` to `using_default_exclude`), use a better syntax to include the type annotation for `gitignore` variable (from typing comment to Python-style typing annotation), and replace an if-else block with a single dictionary definition (in this case, we need to compare keys instead of values, meaning that the change works) Signed-off-by: Antonio Ossa Guerra --- src/black/__init__.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index 8de16cefea4..2786861e9e0 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -628,9 +628,9 @@ def get_sources( sources: Set[Path] = set() root = ctx.obj["root"] - exclude_is_None = exclude is None + using_default_exclude = exclude is None exclude = re_compile_maybe_verbose(DEFAULT_EXCLUDES) if exclude is None else exclude - gitignore = None # type: Optional[PathSpec] + gitignore: Optional[PathSpec] = None root_gitignore = get_gitignore(root) for s in src: @@ -666,14 +666,11 @@ def get_sources( sources.add(p) elif p.is_dir(): - if exclude_is_None: - p_gitignore = get_gitignore(p) - # No need to use p's gitignore if it is identical to root's gitignore - # (i.e. root and p point to the same directory). - if root_gitignore == p_gitignore: - gitignore = {root: root_gitignore} - else: - gitignore = {root: root_gitignore, root / p: p_gitignore} + if using_default_exclude: + gitignore = { + root: root_gitignore, + root / p: get_gitignore(p), + } sources.update( gen_python_files( p.iterdir(), From c09a816aff21ff8018c28348e9fad430db9dfbdc Mon Sep 17 00:00:00 2001 From: Antonio Ossa Guerra Date: Tue, 8 Nov 2022 10:02:36 -0300 Subject: [PATCH 5/5] Make nested function a top-level function The function to match a given path with every discovered .gitignore file does not need to be a nested function and can be a top-level function. The arguments did not change, but the naming of local variables was improved for readability. Signed-off-by: Antonio Ossa Guerra --- src/black/files.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/black/files.py b/src/black/files.py index c58970e53e3..ea517f4ece9 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -182,6 +182,19 @@ def normalize_path_maybe_ignore( return root_relative_path +def path_is_ignored( + path: Path, gitignore_dict: Dict[Path, PathSpec], report: Report +) -> bool: + for gitignore_path, pattern in gitignore_dict.items(): + relative_path = normalize_path_maybe_ignore(path, gitignore_path, report) + if relative_path is None: + break + if pattern.match_file(relative_path): + report.path_ignored(path, "matches a .gitignore file content") + return True + return False + + def path_is_excluded( normalized_path: str, pattern: Optional[Pattern[str]], @@ -212,18 +225,6 @@ def gen_python_files( `report` is where output about exclusions goes. """ - def is_ignored( - gitignore_dict: Dict[Path, PathSpec], child: Path, report: Report - ) -> bool: - for _dir, _gitignore in gitignore_dict.items(): - relative_path = normalize_path_maybe_ignore(child, _dir, report) - if relative_path is None: - break - if _gitignore is not None and _gitignore.match_file(relative_path): - report.path_ignored(child, "matches the .gitignore file content") - return True - return False - assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" for child in paths: normalized_path = normalize_path_maybe_ignore(child, root, report) @@ -231,7 +232,7 @@ def is_ignored( continue # First ignore files matching .gitignore, if passed - if gitignore_dict is not None and is_ignored(gitignore_dict, child, report): + if gitignore_dict and path_is_ignored(child, gitignore_dict, report): continue # Then ignore with `--exclude` `--extend-exclude` and `--force-exclude` options.