Skip to content

Commit

Permalink
wip: re-implement fnmatch
Browse files Browse the repository at this point in the history
  • Loading branch information
nedbat committed Oct 23, 2022
1 parent 57d8649 commit c69c00c
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 64 deletions.
48 changes: 32 additions & 16 deletions coverage/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

"""File wrangling."""

import fnmatch
import hashlib
import ntpath
import os
Expand Down Expand Up @@ -282,7 +281,35 @@ def sep(s):
return the_sep


PATHEX_SUBS = [(re.compile(rx), sub) for rx, sub in [
(r"^\*+[/\\]", r"^(.*[/\\\\])?"),
(r"[/\\]\*+$", r".*"),
(r"\*\*+[/\\]?", r".*"),
(r"[/\\]", r"[/\\\\]"),
(r"\*", r"[^/\\\\]*"),
(r"\?", r"[^/\\\\]"),
(r"\[.*?\]", r"\g<0>"),
(r"[a-zA-Z0-9_-]+", r"\g<0>"),
(r".", r"\\\g<0>"),
]]

def pathex(pattern):
"""Convert a file-path pattern into a regex."""
if not re.search(r"[/\\]", pattern):
pattern = "**/" + pattern
path_rx = ""
pos = 0
while pos < len(pattern):
for rx, sub in PATHEX_SUBS:
m = rx.match(pattern, pos=pos)
if m:
path_rx += m.expand(sub)
pos = m.end()
break
return path_rx

def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
1 # todo: fix this docstring
"""Convert fnmatch patterns to a compiled regex that matches any of them.
Slashes are always converted to match either slash or backslash, for
Expand All @@ -295,24 +322,13 @@ def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
strings.
"""
regexes = (fnmatch.translate(pattern) for pattern in patterns)
# */ at the start should also match nothing.
regexes = (re.sub(r"^\(\?s:\.\*(\\\\|/)", r"(?s:^(.*\1)?", regex) for regex in regexes)
# Be agnostic: / can mean backslash or slash.
regexes = (re.sub(r"/", r"[\\\\/]", regex) for regex in regexes)

if partial:
# fnmatch always adds a \Z to match the whole string, which we don't
# want, so we remove the \Z. While removing it, we only replace \Z if
# followed by paren (introducing flags), or at end, to keep from
# destroying a literal \Z in the pattern.
regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes)

flags = 0
if case_insensitive:
flags |= re.IGNORECASE
compiled = re.compile(join_regex(regexes), flags=flags)

rx = join_regex(map(pathex, patterns))
if not partial:
rx = rf"(?:{rx})\Z"
compiled = re.compile(rx, flags=flags)
return compiled


Expand Down
1 change: 0 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def test_unexecuted_file(self):
assert missing == [1]

def test_filenames(self):

self.make_file("mymain.py", """\
import mymod
a = 1
Expand Down
137 changes: 90 additions & 47 deletions tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""Tests for files.py"""

import itertools
import os
import os.path
from unittest import mock
Expand Down Expand Up @@ -104,59 +105,99 @@ def test_flat_rootname(original, flat):
assert flat_rootname(original) == flat


def gen_params(patterns, case_insensitive=False, partial=False, matches=(), nomatches=()):
pat_id = "|".join(patterns)
for text in matches:
yield pytest.param(
patterns, case_insensitive, partial, text, True,
id=f"{pat_id}:ci{case_insensitive}:par{partial}:{text}:match",
)
for text in nomatches:
yield pytest.param(
patterns, case_insensitive, partial, text, False,
id=f"{pat_id}:ci{case_insensitive}:par{partial}:{text}:nomatch",
)

@pytest.mark.parametrize(
"patterns, case_insensitive, partial," +
"matches," +
"nomatches",
[
(
["abc", "xyz"], False, False,
["abc", "xyz"],
["ABC", "xYz", "abcx", "xabc", "axyz", "xyza"],
"patterns, case_insensitive, partial, text, result",
list(itertools.chain.from_iterable([
gen_params(
["abc", "xyz"],
matches=["abc", "xyz", "sub/mod/abc"],
nomatches=["ABC", "xYz", "abcx", "xabc", "axyz", "xyza", "sub/mod/abcd", "sub/abc/more"],
),
(
["abc", "xyz"], True, False,
["abc", "xyz", "Abc", "XYZ", "AbC"],
["abcx", "xabc", "axyz", "xyza"],
gen_params(
["abc", "xyz"], case_insensitive=True,
matches=["abc", "xyz", "Abc", "XYZ", "AbC"],
nomatches=["abcx", "xabc", "axyz", "xyza"],
),
(
["abc/hi.py"], True, False,
["abc/hi.py", "ABC/hi.py", r"ABC\hi.py"],
["abc_hi.py", "abc/hi.pyc"],
gen_params(
["a?c", "x?z"],
matches=["abc", "xyz", "xYz", "azc", "xaz"],
nomatches=["ABC", "abcx", "xabc", "axyz", "xyza"],
),
(
[r"abc\hi.py"], True, False,
[r"abc\hi.py", r"ABC\hi.py"],
["abc/hi.py", "ABC/hi.py", "abc_hi.py", "abc/hi.pyc"],
gen_params(
["a??d"],
matches=["abcd", "azcd", "a12d"],
nomatches=["ABCD", "abcx", "axyz", "abcde"],
),
(
["abc/*/hi.py"], True, False,
["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
["abc/hi.py", "abc/hi.pyc"],
gen_params(
["abc/hi.py"], case_insensitive=True,
matches=["abc/hi.py", "ABC/hi.py", r"ABC\hi.py"],
nomatches=["abc_hi.py", "abc/hi.pyc"],
),
(
["abc/[a-f]*/hi.py"], True, False,
["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
["abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc"],
gen_params(
[r"abc\hi.py"], case_insensitive=True,
matches=[r"abc\hi.py", r"ABC\hi.py", "abc/hi.py", "ABC/hi.py"],
nomatches=["abc_hi.py", "abc/hi.pyc"],
),
(
["abc/"], True, True,
["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
["abcd/foo.py", "xabc/hi.py"],
gen_params(
["abc/*/hi.py"], case_insensitive=True,
matches=["abc/foo/hi.py", r"ABC\foo/hi.py"],
nomatches=["abc/hi.py", "abc/hi.pyc", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
),
(
["*/foo"], False, True,
["abc/foo/hi.py", "foo/hi.py"],
["abc/xfoo/hi.py"],
gen_params(
["abc/**/hi.py"], case_insensitive=True,
matches=[
"abc/foo/hi.py", r"ABC\foo/hi.py", "abc/hi.py", "ABC/foo/bar/hi.py",
r"ABC\foo/bar/hi.py",
],
nomatches=["abc/hi.pyc"],
),
])
def test_fnmatches_to_regex(patterns, case_insensitive, partial, matches, nomatches):
gen_params(
["abc/[a-f]*/hi.py"], case_insensitive=True,
matches=["abc/foo/hi.py", r"ABC\boo/hi.py"],
nomatches=[
"abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc", "abc/foo/bar/hi.py", r"abc\foo/bar/hi.py",
],
),
gen_params(
["abc/[a-f]/hi.py"], case_insensitive=True,
matches=["abc/f/hi.py", r"ABC\b/hi.py"],
nomatches=[
"abc/foo/hi.py", "abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc", "abc/foo/bar/hi.py",
r"abc\foo/bar/hi.py",
],
),
gen_params(
["abc/"], case_insensitive=True, partial=True,
matches=["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
nomatches=["abcd/foo.py", "xabc/hi.py"],
),
gen_params(
["*/foo"], case_insensitive=False, partial=True,
matches=["abc/foo/hi.py", "foo/hi.py"],
nomatches=["abc/xfoo/hi.py"],
),
gen_params(
["**/foo"],
matches=["foo", "hello/foo", "hi/there/foo"],
nomatches=["foob", "hello/foob", "hello/Foo"],
),
])))
def test_fnmatches_to_regex(patterns, case_insensitive, partial, text, result):
regex = fnmatches_to_regex(patterns, case_insensitive=case_insensitive, partial=partial)
for s in matches:
assert regex.match(s)
for s in nomatches:
assert not regex.match(s)
assert bool(regex.match(text)) == result


class MatcherTest(CoverageTest):
Expand Down Expand Up @@ -235,6 +276,8 @@ def test_fnmatch_matcher_overload(self):
self.assertMatches(fnm, "x007foo.txt", True)
self.assertMatches(fnm, "x123foo.txt", True)
self.assertMatches(fnm, "x798bar.txt", False)
self.assertMatches(fnm, "x499.txt", True)
self.assertMatches(fnm, "x500.txt", False)

def test_fnmatch_windows_paths(self):
# We should be able to match Windows paths even if we are running on
Expand Down Expand Up @@ -309,9 +352,9 @@ def test_multiple_patterns(self, rel_yn):
assert msgs == [
"Aliases (relative=True):",
" Rule: '/home/*/src' -> './mysrc/' using regex " +
"'(?s:[\\\\\\\\/]home[\\\\\\\\/].*[\\\\\\\\/]src[\\\\\\\\/])'",
"'[/\\\\\\\\]home[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]src[/\\\\\\\\]'",
" Rule: '/lib/*/libsrc' -> './mylib/' using regex " +
"'(?s:[\\\\\\\\/]lib[\\\\\\\\/].*[\\\\\\\\/]libsrc[\\\\\\\\/])'",
"'[/\\\\\\\\]lib[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]libsrc[/\\\\\\\\]'",
"Matched path '/home/foo/src/a.py' to rule '/home/*/src' -> './mysrc/', " +
"producing './mysrc/a.py'",
"Matched path '/lib/foo/libsrc/a.py' to rule '/lib/*/libsrc' -> './mylib/', " +
Expand All @@ -321,9 +364,9 @@ def test_multiple_patterns(self, rel_yn):
assert msgs == [
"Aliases (relative=False):",
" Rule: '/home/*/src' -> './mysrc/' using regex " +
"'(?s:[\\\\\\\\/]home[\\\\\\\\/].*[\\\\\\\\/]src[\\\\\\\\/])'",
"'[/\\\\\\\\]home[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]src[/\\\\\\\\]'",
" Rule: '/lib/*/libsrc' -> './mylib/' using regex " +
"'(?s:[\\\\\\\\/]lib[\\\\\\\\/].*[\\\\\\\\/]libsrc[\\\\\\\\/])'",
"'[/\\\\\\\\]lib[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]libsrc[/\\\\\\\\]'",
"Matched path '/home/foo/src/a.py' to rule '/home/*/src' -> './mysrc/', " +
f"producing {files.canonical_filename('./mysrc/a.py')!r}",
"Matched path '/lib/foo/libsrc/a.py' to rule '/lib/*/libsrc' -> './mylib/', " +
Expand Down

0 comments on commit c69c00c

Please sign in to comment.