diff --git a/core/scanner.py b/core/scanner.py new file mode 100644 index 0000000..e17647e --- /dev/null +++ b/core/scanner.py @@ -0,0 +1,41 @@ +import os +import lib.hashlib as hashlib + + +from lib.pathlib import Path +from lib.pathspec import PathSpec + + + +class Scanner: + def __init__(self, base_path, ignore_file=".syncignore", include_file=".syncinclude"): + self.base_path = Path(base_path) + self.ignore_file = self._load_ignore_spec(ignore_file) + self.include_file= self._load_include_spec(include_file) + + def _load_ignore_spec(self, file): + pass + + + def _load_include_spec(self, file): + pass + + def get_current_state(self): + state = {} + + for root , dirs, files in os.walk(self.base_path): + dirs[:] = [d for d in dirs if not self.spec.match_file(str(Path(root, d).relative_to(self.base_path)))] + + for file in files: + full_path = Path(root) / file + rel_path = str(full_path.relative_to(self.base_path)) + + if not self.spec.match_file(rel_path): + state[rel_path] = self._hash_file(rel_path) + + return state + + + def _hash_file(self, path) + return hashlib.md5(open(path, 'rb').read()).hexdigest() + diff --git a/lib/pathlib-1.0.1.dist-info/INSTALLER b/lib/pathlib-1.0.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/lib/pathlib-1.0.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/lib/pathlib-1.0.1.dist-info/LICENSE.txt b/lib/pathlib-1.0.1.dist-info/LICENSE.txt new file mode 100644 index 0000000..b272571 --- /dev/null +++ b/lib/pathlib-1.0.1.dist-info/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2012-2014 Antoine Pitrou and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/pathlib-1.0.1.dist-info/METADATA b/lib/pathlib-1.0.1.dist-info/METADATA new file mode 100644 index 0000000..1e007dd --- /dev/null +++ b/lib/pathlib-1.0.1.dist-info/METADATA @@ -0,0 +1,180 @@ +Metadata-Version: 2.1 +Name: pathlib +Version: 1.0.1 +Summary: Object-oriented filesystem paths +Home-page: https://pathlib.readthedocs.org/ +Download-URL: https://pypi.python.org/pypi/pathlib/ +Author: Antoine Pitrou +Author-email: solipsis@pitrou.net +License: MIT License +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3.2 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: System :: Filesystems +License-File: LICENSE.txt + +pathlib offers a set of classes to handle filesystem paths. It offers the +following advantages over using string objects: + +* No more cumbersome use of os and os.path functions. Everything can be + done easily through operators, attribute accesses, and method calls. + +* Embodies the semantics of different path types. For example, comparing + Windows paths ignores casing. + +* Well-defined semantics, eliminating any warts or ambiguities (forward vs. + backward slashes, etc.). + +Requirements +------------ + +Python 3.2 or later is recommended, but pathlib is also usable with Python 2.7 +and 2.6. + +Install +------- + +In Python 3.4, pathlib is now part of the standard library. For Python 3.3 +and earlier, ``easy_install pathlib`` or ``pip install pathlib`` should do +the trick. + +Examples +-------- + +Importing the module classes:: + + >>> from pathlib import * + +Listing Python source files in a directory:: + + >>> list(p.glob('*.py')) + [PosixPath('test_pathlib.py'), PosixPath('setup.py'), + PosixPath('pathlib.py')] + +Navigating inside a directory tree:: + + >>> p = Path('/etc') + >>> q = p / 'init.d' / 'reboot' + >>> q + PosixPath('/etc/init.d/reboot') + >>> q.resolve() + PosixPath('/etc/rc.d/init.d/halt') + +Querying path properties:: + + >>> q.exists() + True + >>> q.is_dir() + False + +Opening a file:: + + >>> with q.open() as f: f.readline() + ... + '#!/bin/bash\n' + + +Documentation +------------- + +The full documentation can be read at `Read the Docs +`_. + + +Contributing +------------ + +Main development now takes place in the Python standard library: see +the `Python developer's guide `_, and +report issues on the `Python bug tracker `_. + +However, if you find an issue specific to prior versions of Python +(such as 2.7 or 3.2), you can post an issue on the +`BitBucket project page `_. + + +History +------- + +Version 1.0.1 +^^^^^^^^^^^^^ + +- Pull requestion #4: Python 2.6 compatibility by eevee. + +Version 1.0 +^^^^^^^^^^^ + +This version brings ``pathlib`` up to date with the official Python 3.4 +release, and also fixes a couple of 2.7-specific issues. + +- Python issue #20765: Add missing documentation for PurePath.with_name() + and PurePath.with_suffix(). +- Fix test_mkdir_parents when the working directory has additional bits + set (such as the setgid or sticky bits). +- Python issue #20111: pathlib.Path.with_suffix() now sanity checks the + given suffix. +- Python issue #19918: Fix PurePath.relative_to() under Windows. +- Python issue #19921: When Path.mkdir() is called with parents=True, any + missing parent is created with the default permissions, ignoring the mode + argument (mimicking the POSIX "mkdir -p" command). +- Python issue #19887: Improve the Path.resolve() algorithm to support + certain symlink chains. +- Make pathlib usable under Python 2.7 with unicode pathnames (only pure + ASCII, though). +- Issue #21: fix TypeError under Python 2.7 when using new division. +- Add tox support for easier testing. + +Version 0.97 +^^^^^^^^^^^^ + +This version brings ``pathlib`` up to date with the final API specified +in :pep:`428`. The changes are too long to list here, it is recommended +to read the `documentation `_. + +.. warning:: + The API in this version is partially incompatible with pathlib 0.8 and + earlier. Be sure to check your code for possible breakage! + +Version 0.8 +^^^^^^^^^^^ + +- Add PurePath.name and PurePath.anchor. +- Add Path.owner and Path.group. +- Add Path.replace(). +- Add Path.as_uri(). +- Issue #10: when creating a file with Path.open(), don't set the executable + bit. +- Issue #11: fix comparisons with non-Path objects. + +Version 0.7 +^^^^^^^^^^^ + +- Add '**' (recursive) patterns to Path.glob(). +- Fix openat() support after the API refactoring in Python 3.3 beta1. +- Add a *target_is_directory* argument to Path.symlink_to() + +Version 0.6 +^^^^^^^^^^^ + +- Add Path.is_file() and Path.is_symlink() +- Add Path.glob() and Path.rglob() +- Add PurePath.match() + +Version 0.5 +^^^^^^^^^^^ + +- Add Path.mkdir(). +- Add Python 2.7 compatibility by Michele Lacchia. +- Make parent() raise ValueError when the level is greater than the path + length. + + diff --git a/lib/pathlib-1.0.1.dist-info/RECORD b/lib/pathlib-1.0.1.dist-info/RECORD new file mode 100644 index 0000000..f702952 --- /dev/null +++ b/lib/pathlib-1.0.1.dist-info/RECORD @@ -0,0 +1,9 @@ +__pycache__/pathlib.cpython-314.pyc,, +pathlib-1.0.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +pathlib-1.0.1.dist-info/LICENSE.txt,sha256=7FK0RgeHp9ofbbcLvp4eZJxz98sU7rrxDl25_n9-nNk,1080 +pathlib-1.0.1.dist-info/METADATA,sha256=NrN3I2q6qf7u2iD0kK0Or6JrsqSP-WXXK41ZAkQznuI,5149 +pathlib-1.0.1.dist-info/RECORD,, +pathlib-1.0.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pathlib-1.0.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92 +pathlib-1.0.1.dist-info/top_level.txt,sha256=zhefsaNuASYaoZF-ELG3TPFn6S2cuB1fd8PlNFLrMh0,8 +pathlib.py,sha256=9MfboQTEcuscgm7ZALBMTDpLwD-2PCTSUyhlA2tcvqw,41481 diff --git a/lib/pathlib-1.0.1.dist-info/REQUESTED b/lib/pathlib-1.0.1.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/lib/pathlib-1.0.1.dist-info/WHEEL b/lib/pathlib-1.0.1.dist-info/WHEEL new file mode 100644 index 0000000..becc9a6 --- /dev/null +++ b/lib/pathlib-1.0.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/lib/pathlib-1.0.1.dist-info/top_level.txt b/lib/pathlib-1.0.1.dist-info/top_level.txt new file mode 100644 index 0000000..c7709d3 --- /dev/null +++ b/lib/pathlib-1.0.1.dist-info/top_level.txt @@ -0,0 +1 @@ +pathlib diff --git a/lib/pathlib.py b/lib/pathlib.py new file mode 100644 index 0000000..9ab0e70 --- /dev/null +++ b/lib/pathlib.py @@ -0,0 +1,1280 @@ +import fnmatch +import functools +import io +import ntpath +import os +import posixpath +import re +import sys +import time +from collections import Sequence +from contextlib import contextmanager +from errno import EINVAL, ENOENT +from operator import attrgetter +from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +try: + from urllib import quote as urlquote, quote as urlquote_from_bytes +except ImportError: + from urllib.parse import quote as urlquote, quote_from_bytes as urlquote_from_bytes + + +try: + intern = intern +except NameError: + intern = sys.intern +try: + basestring = basestring +except NameError: + basestring = str + +supports_symlinks = True +try: + import nt +except ImportError: + nt = None +else: + if sys.getwindowsversion()[:2] >= (6, 0) and sys.version_info >= (3, 2): + from nt import _getfinalpathname + else: + supports_symlinks = False + _getfinalpathname = None + + +__all__ = [ + "PurePath", "PurePosixPath", "PureWindowsPath", + "Path", "PosixPath", "WindowsPath", + ] + +# +# Internals +# + +_py2 = sys.version_info < (3,) +_py2_fs_encoding = 'ascii' + +def _py2_fsencode(parts): + # py2 => minimal unicode support + return [part.encode(_py2_fs_encoding) if isinstance(part, unicode) + else part for part in parts] + +def _is_wildcard_pattern(pat): + # Whether this pattern needs actual matching using fnmatch, or can + # be looked up directly as a file. + return "*" in pat or "?" in pat or "[" in pat + + +class _Flavour(object): + """A flavour implements a particular (platform-specific) set of path + semantics.""" + + def __init__(self): + self.join = self.sep.join + + def parse_parts(self, parts): + if _py2: + parts = _py2_fsencode(parts) + parsed = [] + sep = self.sep + altsep = self.altsep + drv = root = '' + it = reversed(parts) + for part in it: + if not part: + continue + if altsep: + part = part.replace(altsep, sep) + drv, root, rel = self.splitroot(part) + if sep in rel: + for x in reversed(rel.split(sep)): + if x and x != '.': + parsed.append(intern(x)) + else: + if rel and rel != '.': + parsed.append(intern(rel)) + if drv or root: + if not drv: + # If no drive is present, try to find one in the previous + # parts. This makes the result of parsing e.g. + # ("C:", "/", "a") reasonably intuitive. + for part in it: + drv = self.splitroot(part)[0] + if drv: + break + break + if drv or root: + parsed.append(drv + root) + parsed.reverse() + return drv, root, parsed + + def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): + """ + Join the two paths represented by the respective + (drive, root, parts) tuples. Return a new (drive, root, parts) tuple. + """ + if root2: + if not drv2 and drv: + return drv, root2, [drv + root2] + parts2[1:] + elif drv2: + if drv2 == drv or self.casefold(drv2) == self.casefold(drv): + # Same drive => second path is relative to the first + return drv, root, parts + parts2[1:] + else: + # Second path is non-anchored (common case) + return drv, root, parts + parts2 + return drv2, root2, parts2 + + +class _WindowsFlavour(_Flavour): + # Reference for Windows paths can be found at + # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx + + sep = '\\' + altsep = '/' + has_drv = True + pathmod = ntpath + + is_supported = (nt is not None) + + drive_letters = ( + set(chr(x) for x in range(ord('a'), ord('z') + 1)) | + set(chr(x) for x in range(ord('A'), ord('Z') + 1)) + ) + ext_namespace_prefix = '\\\\?\\' + + reserved_names = ( + set(['CON', 'PRN', 'AUX', 'NUL']) | + set(['COM%d' % i for i in range(1, 10)]) | + set(['LPT%d' % i for i in range(1, 10)]) + ) + + # Interesting findings about extended paths: + # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported + # but '\\?\c:/a' is not + # - extended paths are always absolute; "relative" extended paths will + # fail. + + def splitroot(self, part, sep=sep): + first = part[0:1] + second = part[1:2] + if (second == sep and first == sep): + # XXX extended paths should also disable the collapsing of "." + # components (according to MSDN docs). + prefix, part = self._split_extended_path(part) + first = part[0:1] + second = part[1:2] + else: + prefix = '' + third = part[2:3] + if (second == sep and first == sep and third != sep): + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvvv root + # \\machine\mountpoint\directory\etc\... + # directory ^^^^^^^^^^^^^^ + index = part.find(sep, 2) + if index != -1: + index2 = part.find(sep, index + 1) + # a UNC path can't have two slashes in a row + # (after the initial two) + if index2 != index + 1: + if index2 == -1: + index2 = len(part) + if prefix: + return prefix + part[1:index2], sep, part[index2+1:] + else: + return part[:index2], sep, part[index2+1:] + drv = root = '' + if second == ':' and first in self.drive_letters: + drv = part[:2] + part = part[2:] + first = third + if first == sep: + root = first + part = part.lstrip(sep) + return prefix + drv, root, part + + def casefold(self, s): + return s.lower() + + def casefold_parts(self, parts): + return [p.lower() for p in parts] + + def resolve(self, path): + s = str(path) + if not s: + return os.getcwd() + if _getfinalpathname is not None: + return self._ext_to_normal(_getfinalpathname(s)) + # Means fallback on absolute + return None + + def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): + prefix = '' + if s.startswith(ext_prefix): + prefix = s[:4] + s = s[4:] + if s.startswith('UNC\\'): + prefix += s[:3] + s = '\\' + s[3:] + return prefix, s + + def _ext_to_normal(self, s): + # Turn back an extended path into a normal DOS-like path + return self._split_extended_path(s)[1] + + def is_reserved(self, parts): + # NOTE: the rules for reserved names seem somewhat complicated + # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). + # We err on the side of caution and return True for paths which are + # not considered reserved by Windows. + if not parts: + return False + if parts[0].startswith('\\\\'): + # UNC paths are never reserved + return False + return parts[-1].partition('.')[0].upper() in self.reserved_names + + def make_uri(self, path): + # Under Windows, file URIs use the UTF-8 encoding. + drive = path.drive + if len(drive) == 2 and drive[1] == ':': + # It's a path on a local drive => 'file:///c:/a/b' + rest = path.as_posix()[2:].lstrip('/') + return 'file:///%s/%s' % ( + drive, urlquote_from_bytes(rest.encode('utf-8'))) + else: + # It's a path on a network drive => 'file://host/share/a/b' + return 'file:' + urlquote_from_bytes(path.as_posix().encode('utf-8')) + + +class _PosixFlavour(_Flavour): + sep = '/' + altsep = '' + has_drv = False + pathmod = posixpath + + is_supported = (os.name != 'nt') + + def splitroot(self, part, sep=sep): + if part and part[0] == sep: + stripped_part = part.lstrip(sep) + # According to POSIX path resolution: + # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 + # "A pathname that begins with two successive slashes may be + # interpreted in an implementation-defined manner, although more + # than two leading slashes shall be treated as a single slash". + if len(part) - len(stripped_part) == 2: + return '', sep * 2, stripped_part + else: + return '', sep, stripped_part + else: + return '', '', part + + def casefold(self, s): + return s + + def casefold_parts(self, parts): + return parts + + def resolve(self, path): + sep = self.sep + accessor = path._accessor + seen = {} + def _resolve(path, rest): + if rest.startswith(sep): + path = '' + + for name in rest.split(sep): + if not name or name == '.': + # current dir + continue + if name == '..': + # parent dir + path, _, _ = path.rpartition(sep) + continue + newpath = path + sep + name + if newpath in seen: + # Already seen this path + path = seen[newpath] + if path is not None: + # use cached value + continue + # The symlink is not resolved, so we must have a symlink loop. + raise RuntimeError("Symlink loop from %r" % newpath) + # Resolve the symbolic link + try: + target = accessor.readlink(newpath) + except OSError as e: + if e.errno != EINVAL: + raise + # Not a symlink + path = newpath + else: + seen[newpath] = None # not resolved symlink + path = _resolve(path, target) + seen[newpath] = path # resolved symlink + + return path + # NOTE: according to POSIX, getcwd() cannot contain path components + # which are symlinks. + base = '' if path.is_absolute() else os.getcwd() + return _resolve(base, str(path)) or sep + + def is_reserved(self, parts): + return False + + def make_uri(self, path): + # We represent the path using the local filesystem encoding, + # for portability to other applications. + bpath = bytes(path) + return 'file://' + urlquote_from_bytes(bpath) + + +_windows_flavour = _WindowsFlavour() +_posix_flavour = _PosixFlavour() + + +class _Accessor: + """An accessor implements a particular (system-specific or not) way of + accessing paths on the filesystem.""" + + +class _NormalAccessor(_Accessor): + + def _wrap_strfunc(strfunc): + @functools.wraps(strfunc) + def wrapped(pathobj, *args): + return strfunc(str(pathobj), *args) + return staticmethod(wrapped) + + def _wrap_binary_strfunc(strfunc): + @functools.wraps(strfunc) + def wrapped(pathobjA, pathobjB, *args): + return strfunc(str(pathobjA), str(pathobjB), *args) + return staticmethod(wrapped) + + stat = _wrap_strfunc(os.stat) + + lstat = _wrap_strfunc(os.lstat) + + open = _wrap_strfunc(os.open) + + listdir = _wrap_strfunc(os.listdir) + + chmod = _wrap_strfunc(os.chmod) + + if hasattr(os, "lchmod"): + lchmod = _wrap_strfunc(os.lchmod) + else: + def lchmod(self, pathobj, mode): + raise NotImplementedError("lchmod() not available on this system") + + mkdir = _wrap_strfunc(os.mkdir) + + unlink = _wrap_strfunc(os.unlink) + + rmdir = _wrap_strfunc(os.rmdir) + + rename = _wrap_binary_strfunc(os.rename) + + if sys.version_info >= (3, 3): + replace = _wrap_binary_strfunc(os.replace) + + if nt: + if supports_symlinks: + symlink = _wrap_binary_strfunc(os.symlink) + else: + def symlink(a, b, target_is_directory): + raise NotImplementedError("symlink() not available on this system") + else: + # Under POSIX, os.symlink() takes two args + @staticmethod + def symlink(a, b, target_is_directory): + return os.symlink(str(a), str(b)) + + utime = _wrap_strfunc(os.utime) + + # Helper for resolve() + def readlink(self, path): + return os.readlink(path) + + +_normal_accessor = _NormalAccessor() + + +# +# Globbing helpers +# + +@contextmanager +def _cached(func): + try: + func.__cached__ + yield func + except AttributeError: + cache = {} + def wrapper(*args): + try: + return cache[args] + except KeyError: + value = cache[args] = func(*args) + return value + wrapper.__cached__ = True + try: + yield wrapper + finally: + cache.clear() + +def _make_selector(pattern_parts): + pat = pattern_parts[0] + child_parts = pattern_parts[1:] + if pat == '**': + cls = _RecursiveWildcardSelector + elif '**' in pat: + raise ValueError("Invalid pattern: '**' can only be an entire path component") + elif _is_wildcard_pattern(pat): + cls = _WildcardSelector + else: + cls = _PreciseSelector + return cls(pat, child_parts) + +if hasattr(functools, "lru_cache"): + _make_selector = functools.lru_cache()(_make_selector) + + +class _Selector: + """A selector matches a specific glob pattern part against the children + of a given path.""" + + def __init__(self, child_parts): + self.child_parts = child_parts + if child_parts: + self.successor = _make_selector(child_parts) + else: + self.successor = _TerminatingSelector() + + def select_from(self, parent_path): + """Iterate over all child paths of `parent_path` matched by this + selector. This can contain parent_path itself.""" + path_cls = type(parent_path) + is_dir = path_cls.is_dir + exists = path_cls.exists + listdir = parent_path._accessor.listdir + return self._select_from(parent_path, is_dir, exists, listdir) + + +class _TerminatingSelector: + + def _select_from(self, parent_path, is_dir, exists, listdir): + yield parent_path + + +class _PreciseSelector(_Selector): + + def __init__(self, name, child_parts): + self.name = name + _Selector.__init__(self, child_parts) + + def _select_from(self, parent_path, is_dir, exists, listdir): + if not is_dir(parent_path): + return + path = parent_path._make_child_relpath(self.name) + if exists(path): + for p in self.successor._select_from(path, is_dir, exists, listdir): + yield p + + +class _WildcardSelector(_Selector): + + def __init__(self, pat, child_parts): + self.pat = re.compile(fnmatch.translate(pat)) + _Selector.__init__(self, child_parts) + + def _select_from(self, parent_path, is_dir, exists, listdir): + if not is_dir(parent_path): + return + cf = parent_path._flavour.casefold + for name in listdir(parent_path): + casefolded = cf(name) + if self.pat.match(casefolded): + path = parent_path._make_child_relpath(name) + for p in self.successor._select_from(path, is_dir, exists, listdir): + yield p + + +class _RecursiveWildcardSelector(_Selector): + + def __init__(self, pat, child_parts): + _Selector.__init__(self, child_parts) + + def _iterate_directories(self, parent_path, is_dir, listdir): + yield parent_path + for name in listdir(parent_path): + path = parent_path._make_child_relpath(name) + if is_dir(path): + for p in self._iterate_directories(path, is_dir, listdir): + yield p + + def _select_from(self, parent_path, is_dir, exists, listdir): + if not is_dir(parent_path): + return + with _cached(listdir) as listdir: + yielded = set() + try: + successor_select = self.successor._select_from + for starting_point in self._iterate_directories(parent_path, is_dir, listdir): + for p in successor_select(starting_point, is_dir, exists, listdir): + if p not in yielded: + yield p + yielded.add(p) + finally: + yielded.clear() + + +# +# Public API +# + +class _PathParents(Sequence): + """This object provides sequence-like access to the logical ancestors + of a path. Don't try to construct it yourself.""" + __slots__ = ('_pathcls', '_drv', '_root', '_parts') + + def __init__(self, path): + # We don't store the instance to avoid reference cycles + self._pathcls = type(path) + self._drv = path._drv + self._root = path._root + self._parts = path._parts + + def __len__(self): + if self._drv or self._root: + return len(self._parts) - 1 + else: + return len(self._parts) + + def __getitem__(self, idx): + if idx < 0 or idx >= len(self): + raise IndexError(idx) + return self._pathcls._from_parsed_parts(self._drv, self._root, + self._parts[:-idx - 1]) + + def __repr__(self): + return "<{0}.parents>".format(self._pathcls.__name__) + + +class PurePath(object): + """PurePath represents a filesystem path and offers operations which + don't imply any actual filesystem I/O. Depending on your system, + instantiating a PurePath will return either a PurePosixPath or a + PureWindowsPath object. You can also instantiate either of these classes + directly, regardless of your system. + """ + __slots__ = ( + '_drv', '_root', '_parts', + '_str', '_hash', '_pparts', '_cached_cparts', + ) + + def __new__(cls, *args): + """Construct a PurePath from one or several strings and or existing + PurePath objects. The strings and path objects are combined so as + to yield a canonicalized path, which is incorporated into the + new PurePath object. + """ + if cls is PurePath: + cls = PureWindowsPath if os.name == 'nt' else PurePosixPath + return cls._from_parts(args) + + def __reduce__(self): + # Using the parts tuple helps share interned path parts + # when pickling related paths. + return (self.__class__, tuple(self._parts)) + + @classmethod + def _parse_args(cls, args): + # This is useful when you don't want to create an instance, just + # canonicalize some constructor arguments. + parts = [] + for a in args: + if isinstance(a, PurePath): + parts += a._parts + elif isinstance(a, basestring): + parts.append(a) + else: + raise TypeError( + "argument should be a path or str object, not %r" + % type(a)) + return cls._flavour.parse_parts(parts) + + @classmethod + def _from_parts(cls, args, init=True): + # We need to call _parse_args on the instance, so as to get the + # right flavour. + self = object.__new__(cls) + drv, root, parts = self._parse_args(args) + self._drv = drv + self._root = root + self._parts = parts + if init: + self._init() + return self + + @classmethod + def _from_parsed_parts(cls, drv, root, parts, init=True): + self = object.__new__(cls) + self._drv = drv + self._root = root + self._parts = parts + if init: + self._init() + return self + + @classmethod + def _format_parsed_parts(cls, drv, root, parts): + if drv or root: + return drv + root + cls._flavour.join(parts[1:]) + else: + return cls._flavour.join(parts) + + def _init(self): + # Overriden in concrete Path + pass + + def _make_child(self, args): + drv, root, parts = self._parse_args(args) + drv, root, parts = self._flavour.join_parsed_parts( + self._drv, self._root, self._parts, drv, root, parts) + return self._from_parsed_parts(drv, root, parts) + + def __str__(self): + """Return the string representation of the path, suitable for + passing to system calls.""" + try: + return self._str + except AttributeError: + self._str = self._format_parsed_parts(self._drv, self._root, + self._parts) or '.' + return self._str + + def as_posix(self): + """Return the string representation of the path with forward (/) + slashes.""" + f = self._flavour + return str(self).replace(f.sep, '/') + + def __bytes__(self): + """Return the bytes representation of the path. This is only + recommended to use under Unix.""" + if sys.version_info < (3, 2): + raise NotImplementedError("needs Python 3.2 or later") + return os.fsencode(str(self)) + + def __repr__(self): + return "{0}({1!r})".format(self.__class__.__name__, self.as_posix()) + + def as_uri(self): + """Return the path as a 'file' URI.""" + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + return self._flavour.make_uri(self) + + @property + def _cparts(self): + # Cached casefolded parts, for hashing and comparison + try: + return self._cached_cparts + except AttributeError: + self._cached_cparts = self._flavour.casefold_parts(self._parts) + return self._cached_cparts + + def __eq__(self, other): + if not isinstance(other, PurePath): + return NotImplemented + return self._cparts == other._cparts and self._flavour is other._flavour + + def __ne__(self, other): + return not self == other + + def __hash__(self): + try: + return self._hash + except AttributeError: + self._hash = hash(tuple(self._cparts)) + return self._hash + + def __lt__(self, other): + if not isinstance(other, PurePath) or self._flavour is not other._flavour: + return NotImplemented + return self._cparts < other._cparts + + def __le__(self, other): + if not isinstance(other, PurePath) or self._flavour is not other._flavour: + return NotImplemented + return self._cparts <= other._cparts + + def __gt__(self, other): + if not isinstance(other, PurePath) or self._flavour is not other._flavour: + return NotImplemented + return self._cparts > other._cparts + + def __ge__(self, other): + if not isinstance(other, PurePath) or self._flavour is not other._flavour: + return NotImplemented + return self._cparts >= other._cparts + + drive = property(attrgetter('_drv'), + doc="""The drive prefix (letter or UNC path), if any.""") + + root = property(attrgetter('_root'), + doc="""The root of the path, if any.""") + + @property + def anchor(self): + """The concatenation of the drive and root, or ''.""" + anchor = self._drv + self._root + return anchor + + @property + def name(self): + """The final path component, if any.""" + parts = self._parts + if len(parts) == (1 if (self._drv or self._root) else 0): + return '' + return parts[-1] + + @property + def suffix(self): + """The final component's last suffix, if any.""" + name = self.name + i = name.rfind('.') + if 0 < i < len(name) - 1: + return name[i:] + else: + return '' + + @property + def suffixes(self): + """A list of the final component's suffixes, if any.""" + name = self.name + if name.endswith('.'): + return [] + name = name.lstrip('.') + return ['.' + suffix for suffix in name.split('.')[1:]] + + @property + def stem(self): + """The final path component, minus its last suffix.""" + name = self.name + i = name.rfind('.') + if 0 < i < len(name) - 1: + return name[:i] + else: + return name + + def with_name(self, name): + """Return a new path with the file name changed.""" + if not self.name: + raise ValueError("%r has an empty name" % (self,)) + return self._from_parsed_parts(self._drv, self._root, + self._parts[:-1] + [name]) + + def with_suffix(self, suffix): + """Return a new path with the file suffix changed (or added, if none).""" + # XXX if suffix is None, should the current suffix be removed? + drv, root, parts = self._flavour.parse_parts((suffix,)) + if drv or root or len(parts) != 1: + raise ValueError("Invalid suffix %r" % (suffix)) + suffix = parts[0] + if not suffix.startswith('.'): + raise ValueError("Invalid suffix %r" % (suffix)) + name = self.name + if not name: + raise ValueError("%r has an empty name" % (self,)) + old_suffix = self.suffix + if not old_suffix: + name = name + suffix + else: + name = name[:-len(old_suffix)] + suffix + return self._from_parsed_parts(self._drv, self._root, + self._parts[:-1] + [name]) + + def relative_to(self, *other): + """Return the relative path to another path identified by the passed + arguments. If the operation is not possible (because this is not + a subpath of the other path), raise ValueError. + """ + # For the purpose of this method, drive and root are considered + # separate parts, i.e.: + # Path('c:/').relative_to('c:') gives Path('/') + # Path('c:/').relative_to('/') raise ValueError + if not other: + raise TypeError("need at least one argument") + parts = self._parts + drv = self._drv + root = self._root + if root: + abs_parts = [drv, root] + parts[1:] + else: + abs_parts = parts + to_drv, to_root, to_parts = self._parse_args(other) + if to_root: + to_abs_parts = [to_drv, to_root] + to_parts[1:] + else: + to_abs_parts = to_parts + n = len(to_abs_parts) + cf = self._flavour.casefold_parts + if (root or drv) if n == 0 else cf(abs_parts[:n]) != cf(to_abs_parts): + formatted = self._format_parsed_parts(to_drv, to_root, to_parts) + raise ValueError("{!r} does not start with {!r}" + .format(str(self), str(formatted))) + return self._from_parsed_parts('', root if n == 1 else '', + abs_parts[n:]) + + @property + def parts(self): + """An object providing sequence-like access to the + components in the filesystem path.""" + # We cache the tuple to avoid building a new one each time .parts + # is accessed. XXX is this necessary? + try: + return self._pparts + except AttributeError: + self._pparts = tuple(self._parts) + return self._pparts + + def joinpath(self, *args): + """Combine this path with one or several arguments, and return a + new path representing either a subpath (if all arguments are relative + paths) or a totally different path (if one of the arguments is + anchored). + """ + return self._make_child(args) + + def __truediv__(self, key): + return self._make_child((key,)) + + def __rtruediv__(self, key): + return self._from_parts([key] + self._parts) + + if sys.version_info < (3,): + __div__ = __truediv__ + __rdiv__ = __rtruediv__ + + @property + def parent(self): + """The logical parent of the path.""" + drv = self._drv + root = self._root + parts = self._parts + if len(parts) == 1 and (drv or root): + return self + return self._from_parsed_parts(drv, root, parts[:-1]) + + @property + def parents(self): + """A sequence of this path's logical parents.""" + return _PathParents(self) + + def is_absolute(self): + """True if the path is absolute (has both a root and, if applicable, + a drive).""" + if not self._root: + return False + return not self._flavour.has_drv or bool(self._drv) + + def is_reserved(self): + """Return True if the path contains one of the special names reserved + by the system, if any.""" + return self._flavour.is_reserved(self._parts) + + def match(self, path_pattern): + """ + Return True if this path matches the given pattern. + """ + cf = self._flavour.casefold + path_pattern = cf(path_pattern) + drv, root, pat_parts = self._flavour.parse_parts((path_pattern,)) + if not pat_parts: + raise ValueError("empty pattern") + if drv and drv != cf(self._drv): + return False + if root and root != cf(self._root): + return False + parts = self._cparts + if drv or root: + if len(pat_parts) != len(parts): + return False + pat_parts = pat_parts[1:] + elif len(pat_parts) > len(parts): + return False + for part, pat in zip(reversed(parts), reversed(pat_parts)): + if not fnmatch.fnmatchcase(part, pat): + return False + return True + + +class PurePosixPath(PurePath): + _flavour = _posix_flavour + __slots__ = () + + +class PureWindowsPath(PurePath): + _flavour = _windows_flavour + __slots__ = () + + +# Filesystem-accessing classes + + +class Path(PurePath): + __slots__ = ( + '_accessor', + ) + + def __new__(cls, *args, **kwargs): + if cls is Path: + cls = WindowsPath if os.name == 'nt' else PosixPath + self = cls._from_parts(args, init=False) + if not self._flavour.is_supported: + raise NotImplementedError("cannot instantiate %r on your system" + % (cls.__name__,)) + self._init() + return self + + def _init(self, + # Private non-constructor arguments + template=None, + ): + if template is not None: + self._accessor = template._accessor + else: + self._accessor = _normal_accessor + + def _make_child_relpath(self, part): + # This is an optimization used for dir walking. `part` must be + # a single part relative to this path. + parts = self._parts + [part] + return self._from_parsed_parts(self._drv, self._root, parts) + + def _opener(self, name, flags, mode=0o666): + # A stub for the opener argument to built-in open() + return self._accessor.open(self, flags, mode) + + def _raw_open(self, flags, mode=0o777): + """ + Open the file pointed by this path and return a file descriptor, + as os.open() does. + """ + return self._accessor.open(self, flags, mode) + + # Public API + + @classmethod + def cwd(cls): + """Return a new path pointing to the current working directory + (as returned by os.getcwd()). + """ + return cls(os.getcwd()) + + def iterdir(self): + """Iterate over the files in this directory. Does not yield any + result for the special paths '.' and '..'. + """ + for name in self._accessor.listdir(self): + if name in ('.', '..'): + # Yielding a path object for these makes little sense + continue + yield self._make_child_relpath(name) + + def glob(self, pattern): + """Iterate over this subtree and yield all existing files (of any + kind, including directories) matching the given pattern. + """ + pattern = self._flavour.casefold(pattern) + drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) + if drv or root: + raise NotImplementedError("Non-relative patterns are unsupported") + selector = _make_selector(tuple(pattern_parts)) + for p in selector.select_from(self): + yield p + + def rglob(self, pattern): + """Recursively yield all existing files (of any kind, including + directories) matching the given pattern, anywhere in this subtree. + """ + pattern = self._flavour.casefold(pattern) + drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) + if drv or root: + raise NotImplementedError("Non-relative patterns are unsupported") + selector = _make_selector(("**",) + tuple(pattern_parts)) + for p in selector.select_from(self): + yield p + + def absolute(self): + """Return an absolute version of this path. This function works + even if the path doesn't point to anything. + + No normalization is done, i.e. all '.' and '..' will be kept along. + Use resolve() to get the canonical path to a file. + """ + # XXX untested yet! + if self.is_absolute(): + return self + # FIXME this must defer to the specific flavour (and, under Windows, + # use nt._getfullpathname()) + obj = self._from_parts([os.getcwd()] + self._parts, init=False) + obj._init(template=self) + return obj + + def resolve(self): + """ + Make the path absolute, resolving all symlinks on the way and also + normalizing it (for example turning slashes into backslashes under + Windows). + """ + s = self._flavour.resolve(self) + if s is None: + # No symlink resolution => for consistency, raise an error if + # the path doesn't exist or is forbidden + self.stat() + s = str(self.absolute()) + # Now we have no symlinks in the path, it's safe to normalize it. + normed = self._flavour.pathmod.normpath(s) + obj = self._from_parts((normed,), init=False) + obj._init(template=self) + return obj + + def stat(self): + """ + Return the result of the stat() system call on this path, like + os.stat() does. + """ + return self._accessor.stat(self) + + def owner(self): + """ + Return the login name of the file owner. + """ + import pwd + return pwd.getpwuid(self.stat().st_uid).pw_name + + def group(self): + """ + Return the group name of the file gid. + """ + import grp + return grp.getgrgid(self.stat().st_gid).gr_name + + def open(self, mode='r', buffering=-1, encoding=None, + errors=None, newline=None): + """ + Open the file pointed by this path and return a file object, as + the built-in open() function does. + """ + if sys.version_info >= (3, 3): + return io.open(str(self), mode, buffering, encoding, errors, newline, + opener=self._opener) + else: + return io.open(str(self), mode, buffering, encoding, errors, newline) + + def touch(self, mode=0o666, exist_ok=True): + """ + Create this file with the given access mode, if it doesn't exist. + """ + if exist_ok: + # First try to bump modification time + # Implementation note: GNU touch uses the UTIME_NOW option of + # the utimensat() / futimens() functions. + t = time.time() + try: + self._accessor.utime(self, (t, t)) + except OSError: + # Avoid exception chaining + pass + else: + return + flags = os.O_CREAT | os.O_WRONLY + if not exist_ok: + flags |= os.O_EXCL + fd = self._raw_open(flags, mode) + os.close(fd) + + def mkdir(self, mode=0o777, parents=False): + if not parents: + self._accessor.mkdir(self, mode) + else: + try: + self._accessor.mkdir(self, mode) + except OSError as e: + if e.errno != ENOENT: + raise + self.parent.mkdir(parents=True) + self._accessor.mkdir(self, mode) + + def chmod(self, mode): + """ + Change the permissions of the path, like os.chmod(). + """ + self._accessor.chmod(self, mode) + + def lchmod(self, mode): + """ + Like chmod(), except if the path points to a symlink, the symlink's + permissions are changed, rather than its target's. + """ + self._accessor.lchmod(self, mode) + + def unlink(self): + """ + Remove this file or link. + If the path is a directory, use rmdir() instead. + """ + self._accessor.unlink(self) + + def rmdir(self): + """ + Remove this directory. The directory must be empty. + """ + self._accessor.rmdir(self) + + def lstat(self): + """ + Like stat(), except if the path points to a symlink, the symlink's + status information is returned, rather than its target's. + """ + return self._accessor.lstat(self) + + def rename(self, target): + """ + Rename this path to the given path. + """ + self._accessor.rename(self, target) + + def replace(self, target): + """ + Rename this path to the given path, clobbering the existing + destination if it exists. + """ + if sys.version_info < (3, 3): + raise NotImplementedError("replace() is only available " + "with Python 3.3 and later") + self._accessor.replace(self, target) + + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the given path. + Note the order of arguments (self, target) is the reverse of os.symlink's. + """ + self._accessor.symlink(target, self, target_is_directory) + + # Convenience functions for querying the stat results + + def exists(self): + """ + Whether this path exists. + """ + try: + self.stat() + except OSError as e: + if e.errno != ENOENT: + raise + return False + return True + + def is_dir(self): + """ + Whether this path is a directory. + """ + try: + return S_ISDIR(self.stat().st_mode) + except OSError as e: + if e.errno != ENOENT: + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_file(self): + """ + Whether this path is a regular file (also True for symlinks pointing + to regular files). + """ + try: + return S_ISREG(self.stat().st_mode) + except OSError as e: + if e.errno != ENOENT: + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_symlink(self): + """ + Whether this path is a symbolic link. + """ + try: + return S_ISLNK(self.lstat().st_mode) + except OSError as e: + if e.errno != ENOENT: + raise + # Path doesn't exist + return False + + def is_block_device(self): + """ + Whether this path is a block device. + """ + try: + return S_ISBLK(self.stat().st_mode) + except OSError as e: + if e.errno != ENOENT: + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_char_device(self): + """ + Whether this path is a character device. + """ + try: + return S_ISCHR(self.stat().st_mode) + except OSError as e: + if e.errno != ENOENT: + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_fifo(self): + """ + Whether this path is a FIFO. + """ + try: + return S_ISFIFO(self.stat().st_mode) + except OSError as e: + if e.errno != ENOENT: + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + def is_socket(self): + """ + Whether this path is a socket. + """ + try: + return S_ISSOCK(self.stat().st_mode) + except OSError as e: + if e.errno != ENOENT: + raise + # Path doesn't exist or is a broken symlink + # (see https://bitbucket.org/pitrou/pathlib/issue/12/) + return False + + +class PosixPath(Path, PurePosixPath): + __slots__ = () + +class WindowsPath(Path, PureWindowsPath): + __slots__ = () + diff --git a/lib/pathspec-1.0.4.dist-info/INSTALLER b/lib/pathspec-1.0.4.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/lib/pathspec-1.0.4.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/lib/pathspec-1.0.4.dist-info/METADATA b/lib/pathspec-1.0.4.dist-info/METADATA new file mode 100644 index 0000000..348a68b --- /dev/null +++ b/lib/pathspec-1.0.4.dist-info/METADATA @@ -0,0 +1,356 @@ +Metadata-Version: 2.4 +Name: pathspec +Version: 1.0.4 +Summary: Utility library for gitignore style pattern matching of file paths. +Author-email: "Caleb P. Burns" +Requires-Python: >=3.9 +Description-Content-Type: text/x-rst +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Utilities +License-File: LICENSE +Requires-Dist: hyperscan >=0.7 ; extra == "hyperscan" +Requires-Dist: typing-extensions >=4 ; extra == "optional" +Requires-Dist: google-re2 >=1.1 ; extra == "re2" +Requires-Dist: pytest >=9 ; extra == "tests" +Requires-Dist: typing-extensions >=4.15 ; extra == "tests" +Project-URL: Documentation, https://python-path-specification.readthedocs.io/en/latest/index.html +Project-URL: Issue Tracker, https://github.com/cpburnz/python-pathspec/issues +Project-URL: Source Code, https://github.com/cpburnz/python-pathspec +Provides-Extra: hyperscan +Provides-Extra: optional +Provides-Extra: re2 +Provides-Extra: tests + + +PathSpec +======== + +*pathspec* is a utility library for pattern matching of file paths. So far this +only includes Git's `gitignore`_ pattern matching. + +.. _`gitignore`: http://git-scm.com/docs/gitignore + + +Tutorial +-------- + +Say you have a "Projects" directory and you want to back it up, but only +certain files, and ignore others depending on certain conditions:: + + >>> from pathspec import PathSpec + >>> # The gitignore-style patterns for files to select, but we're including + >>> # instead of ignoring. + >>> spec_text = """ + ... + ... # This is a comment because the line begins with a hash: "#" + ... + ... # Include several project directories (and all descendants) relative to + ... # the current directory. To reference only a directory you must end with a + ... # slash: "/" + ... /project-a/ + ... /project-b/ + ... /project-c/ + ... + ... # Patterns can be negated by prefixing with exclamation mark: "!" + ... + ... # Ignore temporary files beginning or ending with "~" and ending with + ... # ".swp". + ... !~* + ... !*~ + ... !*.swp + ... + ... # These are python projects so ignore compiled python files from + ... # testing. + ... !*.pyc + ... + ... # Ignore the build directories but only directly under the project + ... # directories. + ... !/*/build/ + ... + ... """ + +The ``PathSpec`` class provides an abstraction around pattern implementations, +and we want to compile our patterns as "gitignore" patterns. You could call it a +wrapper for a list of compiled patterns:: + + >>> spec = PathSpec.from_lines('gitignore', spec_text.splitlines()) + +If we wanted to manually compile the patterns, we can use the ``GitIgnoreBasicPattern`` +class directly. It is used in the background for "gitignore" which internally +converts patterns to regular expressions:: + + >>> from pathspec.patterns.gitignore.basic import GitIgnoreBasicPattern + >>> patterns = map(GitIgnoreBasicPattern, spec_text.splitlines()) + >>> spec = PathSpec(patterns) + +``PathSpec.from_lines()`` is a class method which simplifies that. + +If you want to load the patterns from file, you can pass the file object +directly as well:: + + >>> with open('patterns.list', 'r') as fh: + >>> spec = PathSpec.from_lines('gitignore', fh) + +You can perform matching on a whole directory tree with:: + + >>> matches = set(spec.match_tree_files('path/to/directory')) + +Or you can perform matching on a specific set of file paths with:: + + >>> matches = set(spec.match_files(file_paths)) + +Or check to see if an individual file matches:: + + >>> is_matched = spec.match_file(file_path) + +There's actually two implementations of "gitignore". The basic implementation is +used by ``PathSpec`` and follows patterns as documented by `gitignore`_. +However, Git's behavior differs from the documented patterns. There's some +edge-cases, and in particular, Git allows including files from excluded +directories which appears to contradict the documentation. ``GitIgnoreSpec`` +handles these cases to more closely replicate Git's behavior:: + + >>> from pathspec import GitIgnoreSpec + >>> spec = GitIgnoreSpec.from_lines(spec_text.splitlines()) + +You do not specify the style of pattern for ``GitIgnoreSpec`` because it should +always use ``GitIgnoreSpecPattern`` internally. + + +Performance +----------- + +Running lots of regular expression matches against thousands of files in Python +is slow. Alternate regular expression backends can be used to improve +performance. ``PathSpec`` and ``GitIgnoreSpec`` both accept a ``backend`` +parameter to control the backend. The default is "best" to automatically choose +the best available backend. There are currently 3 backends. + +The "simple" backend is the default and it simply uses Python's ``re.Pattern`` +objects that are normally created. This can be the fastest when there's only 1 +or 2 patterns. + +The "hyperscan" backend uses the `hyperscan`_ library. Hyperscan tends to be at +least 2 times faster than "simple", and generally slower than "re2". This can be +faster than "re2" under the right conditions with pattern counts of 1-25. + +The "re2" backend uses the `google-re2`_ library (not to be confused with the +*re2* library on PyPI which is unrelated and abandoned). Google's re2 tends to +be significantly faster than "simple", and 3 times faster than "hyperscan" at +high pattern counts. + +See `benchmarks_backends.md`_ for comparisons between native Python regular +expressions and the optional backends. + + +.. _`benchmarks_backends.md`: https://github.com/cpburnz/python-pathspec/blob/master/benchmarks_backends.md +.. _`google-re2`: https://pypi.org/project/google-re2/ +.. _`hyperscan`: https://pypi.org/project/hyperscan/ + + +FAQ +--- + + +1. How do I ignore files like *.gitignore*? ++++++++++++++++++++++++++++++++++++++++++++ + +``GitIgnoreSpec`` (and ``PathSpec``) positively match files by default. To find +the files to keep, and exclude files like *.gitignore*, you need to set +``negate=True`` to flip the results:: + + >>> from pathspec import GitIgnoreSpec + >>> spec = GitIgnoreSpec.from_lines([...]) + >>> keep_files = set(spec.match_tree_files('path/to/directory', negate=True)) + >>> ignore_files = set(spec.match_tree_files('path/to/directory')) + + +License +------- + +*pathspec* is licensed under the `Mozilla Public License Version 2.0`_. See +`LICENSE`_ or the `FAQ`_ for more information. + +In summary, you may use *pathspec* with any closed or open source project +without affecting the license of the larger work so long as you: + +- give credit where credit is due, + +- and release any custom changes made to *pathspec*. + +.. _`Mozilla Public License Version 2.0`: http://www.mozilla.org/MPL/2.0 +.. _`LICENSE`: LICENSE +.. _`FAQ`: http://www.mozilla.org/MPL/2.0/FAQ.html + + +Source +------ + +The source code for *pathspec* is available from the GitHub repo +`cpburnz/python-pathspec`_. + +.. _`cpburnz/python-pathspec`: https://github.com/cpburnz/python-pathspec + + +Installation +------------ + +*pathspec* is available for install through `PyPI`_:: + + pip install pathspec + +*pathspec* can also be built from source. The following packages will be +required: + +- `build`_ (>=0.6.0) + +*pathspec* can then be built and installed with:: + + python -m build + pip install dist/pathspec-*-py3-none-any.whl + +The following optional dependencies can be installed: + +- `google-re2`_: Enables optional "re2" backend. +- `hyperscan`_: Enables optional "hyperscan" backend. +- `typing-extensions`_: Improves some type hints. + +.. _`PyPI`: http://pypi.python.org/pypi/pathspec +.. _`build`: https://pypi.org/project/build/ +.. _`typing-extensions`: https://pypi.org/project/typing-extensions/ + + +Documentation +------------- + +Documentation for *pathspec* is available on `Read the Docs`_. + +The full change history can be found in `CHANGES.rst`_ and `Change History`_. + +An upgrade guide is available in `UPGRADING.rst`_ and `Upgrade Guide`_. + +.. _`CHANGES.rst`: https://github.com/cpburnz/python-pathspec/blob/master/CHANGES.rst +.. _`Change History`: https://python-path-specification.readthedocs.io/en/stable/changes.html +.. _`Read the Docs`: https://python-path-specification.readthedocs.io +.. _`UPGRADING.rst`: https://github.com/cpburnz/python-pathspec/blob/master/UPGRADING.rst +.. _`Upgrade Guide`: https://python-path-specification.readthedocs.io/en/stable/upgrading.html + + +Other Languages +--------------- + +The related project `pathspec-ruby`_ (by *highb*) provides a similar library as +a `Ruby gem`_. + +.. _`pathspec-ruby`: https://github.com/highb/pathspec-ruby +.. _`Ruby gem`: https://rubygems.org/gems/pathspec + + +Change History +============== + + +1.0.4 (2026-01-26) +------------------ + +- `Issue #103`_: Using re2 fails if pyre2 is also installed. + +.. _`Issue #103`: https://github.com/cpburnz/python-pathspec/issues/103 + + +1.0.3 (2026-01-09) +------------------ + +Bug fixes: + +- `Issue #101`_: pyright strict errors with pathspec >= 1.0.0. +- `Issue #102`_: No module named 'tomllib'. + + +.. _`Issue #101`: https://github.com/cpburnz/python-pathspec/issues/101 +.. _`Issue #102`: https://github.com/cpburnz/python-pathspec/issues/102 + + +1.0.2 (2026-01-07) +------------------ + +Bug fixes: + +- Type hint `collections.abc.Callable` does not properly replace `typing.Callable` until Python 3.9.2. + + +1.0.1 (2026-01-06) +------------------ + +Bug fixes: + +- `Issue #100`_: ValueError(f"{patterns=!r} cannot be empty.") when using black. + + +.. _`Issue #100`: https://github.com/cpburnz/python-pathspec/issues/100 + + +1.0.0 (2026-01-05) +------------------ + +Major changes: + +- `Issue #91`_: Dropped support of EoL Python 3.8. +- Added concept of backends to allow for faster regular expression matching. The backend can be controlled using the `backend` argument to `PathSpec()`, `PathSpec.from_lines()`, `GitIgnoreSpec()`, and `GitIgnoreSpec.from_lines()`. +- Renamed "gitwildmatch" pattern back to "gitignore". The "gitignore" pattern behaves slightly differently when used with `PathSpec` (*gitignore* as documented) than with `GitIgnoreSpec` (replicates *Git*'s edge cases). + +API changes: + +- Breaking: protected method `pathspec.pathspec.PathSpec._match_file()` (with a leading underscore) has been removed and replaced by backends. This does not affect normal usage of `PathSpec` or `GitIgnoreSpec`. Only custom subclasses will be affected. If this breaks your usage, let me know by `opening an issue `_. +- Deprecated: "gitwildmatch" is now an alias for "gitignore". +- Deprecated: `pathspec.patterns.GitWildMatchPattern` is now an alias for `pathspec.patterns.gitignore.spec.GitIgnoreSpecPattern`. +- Deprecated: `pathspec.patterns.gitwildmatch` module has been replaced by the `pathspec.patterns.gitignore` package. +- Deprecated: `pathspec.patterns.gitwildmatch.GitWildMatchPattern` is now an alias for `pathspec.patterns.gitignore.spec.GitIgnoreSpecPattern`. +- Deprecated: `pathspec.patterns.gitwildmatch.GitWildMatchPatternError` is now an alias for `pathspec.patterns.gitignore.GitIgnorePatternError`. +- Removed: `pathspec.patterns.gitwildmatch.GitIgnorePattern` has been deprecated since v0.4 (2016-07-15). +- Signature of method `pathspec.pattern.RegexPattern.match_file()` has been changed from `def match_file(self, file: str) -> RegexMatchResult | None` to `def match_file(self, file: AnyStr) -> RegexMatchResult | None` to reflect usage. +- Signature of class method `pathspec.pattern.RegexPattern.pattern_to_regex()` has been changed from `def pattern_to_regex(cls, pattern: str) -> tuple[str, bool]` to `def pattern_to_regex(cls, pattern: AnyStr) -> tuple[AnyStr | None, bool | None]` to reflect usage and documentation. + +New features: + +- Added optional "hyperscan" backend using `hyperscan`_ library. It will automatically be used when installed. This dependency can be installed with ``pip install 'pathspec[hyperscan]'``. +- Added optional "re2" backend using the `google-re2`_ library. It will automatically be used when installed. This dependency can be installed with ``pip install 'pathspec[re2]'``. +- Added optional dependency on `typing-extensions`_ library to improve some type hints. + +Bug fixes: + +- `Issue #93`_: Do not remove leading spaces. +- `Issue #95`_: Matching for files inside folder does not seem to behave like .gitignore's. +- `Issue #98`_: UnboundLocalError in RegexPattern when initialized with `pattern=None`. +- Type hint on return value of `pathspec.pattern.RegexPattern.match_file()` to match documentation. + +Improvements: + +- Mark Python 3.13 and 3.14 as supported. +- No-op patterns are now filtered out when matching files, slightly improving performance. +- Fix performance regression in `iter_tree_files()` from v0.10. + + +.. _`Issue #38`: https://github.com/cpburnz/python-pathspec/issues/38 +.. _`Issue #91`: https://github.com/cpburnz/python-pathspec/issues/91 +.. _`Issue #93`: https://github.com/cpburnz/python-pathspec/issues/93 +.. _`Issue #95`: https://github.com/cpburnz/python-pathspec/issues/95 +.. _`Issue #98`: https://github.com/cpburnz/python-pathspec/issues/98 +.. _`google-re2`: https://pypi.org/project/google-re2/ +.. _`hyperscan`: https://pypi.org/project/hyperscan/ +.. _`typing-extensions`: https://pypi.org/project/typing-extensions/ + diff --git a/lib/pathspec-1.0.4.dist-info/RECORD b/lib/pathspec-1.0.4.dist-info/RECORD new file mode 100644 index 0000000..3445cad --- /dev/null +++ b/lib/pathspec-1.0.4.dist-info/RECORD @@ -0,0 +1,69 @@ +pathspec-1.0.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +pathspec-1.0.4.dist-info/METADATA,sha256=pekHVZjpp_VHVlDo7U032-fIhSGEbY_V8jjmYrEgaWM,13755 +pathspec-1.0.4.dist-info/RECORD,, +pathspec-1.0.4.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pathspec-1.0.4.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82 +pathspec-1.0.4.dist-info/licenses/LICENSE,sha256=-rPda9qyJvHAhjCx3ZF-Efy07F4eAg4sFvg6ChOGPoU,16726 +pathspec/__init__.py,sha256=0PnZCecVo4UjsfA0EFGsAUikyz1jSDFmQP9gCoKXW_Y,1408 +pathspec/__pycache__/__init__.cpython-314.pyc,, +pathspec/__pycache__/_meta.cpython-314.pyc,, +pathspec/__pycache__/_typing.cpython-314.pyc,, +pathspec/__pycache__/_version.cpython-314.pyc,, +pathspec/__pycache__/backend.cpython-314.pyc,, +pathspec/__pycache__/gitignore.cpython-314.pyc,, +pathspec/__pycache__/pathspec.cpython-314.pyc,, +pathspec/__pycache__/pattern.cpython-314.pyc,, +pathspec/__pycache__/util.cpython-314.pyc,, +pathspec/_backends/__init__.py,sha256=CjgX4uSPMC5UH4iy_IrdFXrcLQ_gwK8MKW5Qbspz_uE,130 +pathspec/_backends/__pycache__/__init__.cpython-314.pyc,, +pathspec/_backends/__pycache__/_utils.cpython-314.pyc,, +pathspec/_backends/__pycache__/agg.cpython-314.pyc,, +pathspec/_backends/_utils.py,sha256=mDjbGpndOyVkt9Fue0WDWKTkk-jVqOejof9Bv9pzArE,1066 +pathspec/_backends/agg.py,sha256=naHFqYXMR53hwtgHtEHrwNJEBFpbUWbdMbF0zguxHlE,2505 +pathspec/_backends/hyperscan/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pathspec/_backends/hyperscan/__pycache__/__init__.cpython-314.pyc,, +pathspec/_backends/hyperscan/__pycache__/_base.cpython-314.pyc,, +pathspec/_backends/hyperscan/__pycache__/base.cpython-314.pyc,, +pathspec/_backends/hyperscan/__pycache__/gitignore.cpython-314.pyc,, +pathspec/_backends/hyperscan/__pycache__/pathspec.cpython-314.pyc,, +pathspec/_backends/hyperscan/_base.py,sha256=b8E_kClW6Wtkdserr3qZzMPWVomrI4yhfxSlGVYdT3c,1719 +pathspec/_backends/hyperscan/base.py,sha256=BclDnsbCH6Fvx58YT6wqxGDcfWKNUQAcy_9jV63WkCI,563 +pathspec/_backends/hyperscan/gitignore.py,sha256=OyqtXEoZWrMB3Uh_2xNzY0aGK5UdBBjkFeGAFKQh7Oo,6761 +pathspec/_backends/hyperscan/pathspec.py,sha256=74RsGQt9x3nTxjz5S5grEQI34x8eFew78wluiIzhOpw,6500 +pathspec/_backends/re2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pathspec/_backends/re2/__pycache__/__init__.cpython-314.pyc,, +pathspec/_backends/re2/__pycache__/_base.cpython-314.pyc,, +pathspec/_backends/re2/__pycache__/base.cpython-314.pyc,, +pathspec/_backends/re2/__pycache__/gitignore.cpython-314.pyc,, +pathspec/_backends/re2/__pycache__/pathspec.cpython-314.pyc,, +pathspec/_backends/re2/_base.py,sha256=VDThfjwEOnrDOfri_EnPifXH8pOYt71nxq3tUQAScfU,2149 +pathspec/_backends/re2/base.py,sha256=0sCZzhDpvyZLg9imO7BdE9KOmy3L0mgyHuzPhHWNbRU,462 +pathspec/_backends/re2/gitignore.py,sha256=0RPjCzg1vxE_6qDOL29V4qAyi9UnMKT2bb3k2XDimew,5094 +pathspec/_backends/re2/pathspec.py,sha256=aUtY_DdVHQyxHMbMGiovmXTIpuLKgIAeGtZerMVHIhI,4871 +pathspec/_backends/simple/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pathspec/_backends/simple/__pycache__/__init__.cpython-314.pyc,, +pathspec/_backends/simple/__pycache__/gitignore.cpython-314.pyc,, +pathspec/_backends/simple/__pycache__/pathspec.cpython-314.pyc,, +pathspec/_backends/simple/gitignore.py,sha256=45SfH2SM-YF7CppdSrQ15z7A4GUAesFzLWs8QaKdER4,2865 +pathspec/_backends/simple/pathspec.py,sha256=Zzebst2evN8-juZr5w6VBwIox7LToYT4K2zD4Jp3M7U,2095 +pathspec/_meta.py,sha256=3sxdG_ghfAmwhV7AGeJS9VUZptsmaBFVSPhQqVLpiMk,2937 +pathspec/_typing.py,sha256=xega7efBH3B4StmBzxpGvrk-yJWYKnD6Lk5Id0IiHzc,1642 +pathspec/_version.py,sha256=iV7XOjXu_8FpfpC966oeh6PC-5XA35XwWlO7oI-p2ys,64 +pathspec/backend.py,sha256=QXFus8SgZ1hKH8LZ8eOnZcyGNTO1_YQYwRM_kTkvi2M,1161 +pathspec/gitignore.py,sha256=oFWfSgeecaJFSCgI0TwdYxz0jluQxztgf-T779OxIN8,5263 +pathspec/pathspec.py,sha256=5JhgxfZTyzUcG0bEUN91xTdcvF_S9sdhXGK59nIpDOY,15151 +pathspec/pattern.py,sha256=smqkNSWc9LmPZS1MqYBGjXFXZRteiSpwF8iAy9250DY,6695 +pathspec/patterns/__init__.py,sha256=6pfTpyrSIJxN8A12hKWpa9JFvVMTR39FV3QE1HBQbho,404 +pathspec/patterns/__pycache__/__init__.cpython-314.pyc,, +pathspec/patterns/__pycache__/gitwildmatch.cpython-314.pyc,, +pathspec/patterns/gitignore/__init__.py,sha256=MaSAZd0DDg0vCH9k1LslaJjBJw5DkX4ty-FuLmB1z_4,422 +pathspec/patterns/gitignore/__pycache__/__init__.cpython-314.pyc,, +pathspec/patterns/gitignore/__pycache__/base.cpython-314.pyc,, +pathspec/patterns/gitignore/__pycache__/basic.cpython-314.pyc,, +pathspec/patterns/gitignore/__pycache__/spec.cpython-314.pyc,, +pathspec/patterns/gitignore/base.py,sha256=mkLYm-prSD2SXNDpxnFhL0FRV8FRPAsIBVeXyNOWjCI,4688 +pathspec/patterns/gitignore/basic.py,sha256=0pTlzzJt8qMpy-SnGHhozZVWVDH9ErPDy29MV3Q8UOw,9924 +pathspec/patterns/gitignore/spec.py,sha256=8jB3Q7Wbb6fLvtIfNax89tEtw2UZgATbAKnpGQleU8Q,10186 +pathspec/patterns/gitwildmatch.py,sha256=bF2PUtc9gOFHuFwHJ035x91y3R8An5dIY5oRibylsco,1463 +pathspec/py.typed,sha256=wq7wwDeyBungK6DsiV4O-IujgKzARwHz94uQshdpdEU,68 +pathspec/util.py,sha256=KbG9seqfTOBLPoSJ8I4CdeDFVof6rDGCMy69cZb4Du4,24728 diff --git a/lib/pathspec-1.0.4.dist-info/REQUESTED b/lib/pathspec-1.0.4.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/lib/pathspec-1.0.4.dist-info/WHEEL b/lib/pathspec-1.0.4.dist-info/WHEEL new file mode 100644 index 0000000..d8b9936 --- /dev/null +++ b/lib/pathspec-1.0.4.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: flit 3.12.0 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/lib/pathspec-1.0.4.dist-info/licenses/LICENSE b/lib/pathspec-1.0.4.dist-info/licenses/LICENSE new file mode 100644 index 0000000..14e2f77 --- /dev/null +++ b/lib/pathspec-1.0.4.dist-info/licenses/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/lib/pathspec/__init__.py b/lib/pathspec/__init__.py new file mode 100644 index 0000000..f41cfee --- /dev/null +++ b/lib/pathspec/__init__.py @@ -0,0 +1,68 @@ +""" +The *pathspec* package provides pattern matching for file paths. So far this +only includes Git's *gitignore* patterns. + +The following classes are imported and made available from the root of the +`pathspec` package: + +- :class:`pathspec.gitignore.GitIgnoreSpec` + +- :class:`pathspec.pathspec.PathSpec` + +- :class:`pathspec.pattern.Pattern` + +- :class:`pathspec.pattern.RegexPattern` + +- :class:`pathspec.util.RecursionError` + +The following functions are also imported: + +- :func:`pathspec.util.lookup_pattern` + +The following deprecated functions are also imported to maintain backward +compatibility: + +- :func:`pathspec.util.iter_tree` + +- :func:`pathspec.util.match_files` +""" + +from .gitignore import ( + GitIgnoreSpec) +from .pathspec import ( + PathSpec) +from .pattern import ( + Pattern, + RegexPattern) +from .util import ( + RecursionError, + iter_tree, # Deprecated since 0.10.0. + lookup_pattern, + match_files) # Deprecated since 0.10.0. + +from ._meta import ( + __author__, + __copyright__, + __credits__, + __license__) +from ._version import ( + __version__) + +# Load pattern implementations. +from . import patterns + +# Declare private imports as part of the public interface. Deprecated imports +# are deliberately excluded. +__all__ = [ + 'GitIgnoreSpec', + 'PathSpec', + 'Pattern', + 'RecursionError', + 'RegexPattern', + '__author__', + '__copyright__', + '__credits__', + '__license__', + '__version__', + 'lookup_pattern', +] diff --git a/lib/pathspec/__pycache__/__init__.cpython-314.pyc b/lib/pathspec/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..99abf80 Binary files /dev/null and b/lib/pathspec/__pycache__/__init__.cpython-314.pyc differ diff --git a/lib/pathspec/__pycache__/_meta.cpython-314.pyc b/lib/pathspec/__pycache__/_meta.cpython-314.pyc new file mode 100644 index 0000000..8e32873 Binary files /dev/null and b/lib/pathspec/__pycache__/_meta.cpython-314.pyc differ diff --git a/lib/pathspec/__pycache__/_typing.cpython-314.pyc b/lib/pathspec/__pycache__/_typing.cpython-314.pyc new file mode 100644 index 0000000..ba3631a Binary files /dev/null and b/lib/pathspec/__pycache__/_typing.cpython-314.pyc differ diff --git a/lib/pathspec/__pycache__/_version.cpython-314.pyc b/lib/pathspec/__pycache__/_version.cpython-314.pyc new file mode 100644 index 0000000..a5ac14b Binary files /dev/null and b/lib/pathspec/__pycache__/_version.cpython-314.pyc differ diff --git a/lib/pathspec/__pycache__/backend.cpython-314.pyc b/lib/pathspec/__pycache__/backend.cpython-314.pyc new file mode 100644 index 0000000..2aaab52 Binary files /dev/null and b/lib/pathspec/__pycache__/backend.cpython-314.pyc differ diff --git a/lib/pathspec/__pycache__/gitignore.cpython-314.pyc b/lib/pathspec/__pycache__/gitignore.cpython-314.pyc new file mode 100644 index 0000000..0fe1eb1 Binary files /dev/null and b/lib/pathspec/__pycache__/gitignore.cpython-314.pyc differ diff --git a/lib/pathspec/__pycache__/pathspec.cpython-314.pyc b/lib/pathspec/__pycache__/pathspec.cpython-314.pyc new file mode 100644 index 0000000..90f1eb3 Binary files /dev/null and b/lib/pathspec/__pycache__/pathspec.cpython-314.pyc differ diff --git a/lib/pathspec/__pycache__/pattern.cpython-314.pyc b/lib/pathspec/__pycache__/pattern.cpython-314.pyc new file mode 100644 index 0000000..4c9e27f Binary files /dev/null and b/lib/pathspec/__pycache__/pattern.cpython-314.pyc differ diff --git a/lib/pathspec/__pycache__/util.cpython-314.pyc b/lib/pathspec/__pycache__/util.cpython-314.pyc new file mode 100644 index 0000000..ed4a0ee Binary files /dev/null and b/lib/pathspec/__pycache__/util.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/__init__.py b/lib/pathspec/_backends/__init__.py new file mode 100644 index 0000000..72c4949 --- /dev/null +++ b/lib/pathspec/_backends/__init__.py @@ -0,0 +1,4 @@ +""" +WARNING: The *pathspec._backends* package is not part of the public API. Its +contents and structure are likely to change. +""" diff --git a/lib/pathspec/_backends/__pycache__/__init__.cpython-314.pyc b/lib/pathspec/_backends/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..2609039 Binary files /dev/null and b/lib/pathspec/_backends/__pycache__/__init__.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/__pycache__/_utils.cpython-314.pyc b/lib/pathspec/_backends/__pycache__/_utils.cpython-314.pyc new file mode 100644 index 0000000..ff16989 Binary files /dev/null and b/lib/pathspec/_backends/__pycache__/_utils.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/__pycache__/agg.cpython-314.pyc b/lib/pathspec/_backends/__pycache__/agg.cpython-314.pyc new file mode 100644 index 0000000..468ba46 Binary files /dev/null and b/lib/pathspec/_backends/__pycache__/agg.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/_utils.py b/lib/pathspec/_backends/_utils.py new file mode 100644 index 0000000..77c7cd9 --- /dev/null +++ b/lib/pathspec/_backends/_utils.py @@ -0,0 +1,45 @@ +""" +This module provides private utility functions for backends. + +WARNING: The *pathspec._backends* package is not part of the public API. Its +contents and structure are likely to change. +""" + +from collections.abc import ( + Iterable) +from typing import ( + TypeVar) + +from pathspec.pattern import ( + Pattern) + +TPattern = TypeVar("TPattern", bound=Pattern) + + +def enumerate_patterns( + patterns: Iterable[TPattern], + filter: bool, + reverse: bool, +) -> list[tuple[int, TPattern]]: + """ + Enumerate the patterns. + + *patterns* (:class:`Iterable` of :class:`.Pattern`) contains the patterns. + + *filter* (:class:`bool`) is whether to remove no-op patterns (:data:`True`), + or keep them (:data:`False`). + + *reverse* (:class:`bool`) is whether to reverse the pattern order + (:data:`True`), or keep the order (:data:`True`). + + Returns the enumerated patterns (:class:`list` of :class:`tuple`). + """ + out_patterns = [ + (__i, __pat) + for __i, __pat in enumerate(patterns) + if not filter or __pat.include is not None + ] + if reverse: + out_patterns.reverse() + + return out_patterns diff --git a/lib/pathspec/_backends/agg.py b/lib/pathspec/_backends/agg.py new file mode 100644 index 0000000..c387146 --- /dev/null +++ b/lib/pathspec/_backends/agg.py @@ -0,0 +1,104 @@ +""" +This module provides aggregated private data and utilities functions about the +available backends. + +WARNING: The *pathspec._backends* package is not part of the public API. Its +contents and structure are likely to change. +""" + +from collections.abc import ( + Sequence) +from typing import ( + cast) + +from pathspec.backend import ( + BackendNamesHint, + _Backend) +from pathspec.pattern import ( + Pattern, + RegexPattern) + +from .hyperscan.base import ( + hyperscan_error) +from .hyperscan.gitignore import ( + HyperscanGiBackend) +from .hyperscan.pathspec import ( + HyperscanPsBackend) +from .re2.base import ( + re2_error) +from .re2.gitignore import ( + Re2GiBackend) +from .re2.pathspec import ( + Re2PsBackend) +from .simple.gitignore import ( + SimpleGiBackend) +from .simple.pathspec import ( + SimplePsBackend) + +_BEST_BACKEND: BackendNamesHint +""" +The best available backend. +""" + +if re2_error is None: + _BEST_BACKEND = 're2' +elif hyperscan_error is None: + _BEST_BACKEND = 'hyperscan' +else: + _BEST_BACKEND = 'simple' + + +def make_gitignore_backend( + name: BackendNamesHint, + patterns: Sequence[Pattern], +) -> _Backend: + """ + Create the specified backend with the supplied patterns for + :class:`~pathspec.gitignore.GitIgnoreSpec`. + + *name* (:class:`str`) is the name of the backend. + + *patterns* (:class:`.Iterable` of :class:`.Pattern`) contains the compiled + patterns. + + Returns the backend (:class:`._Backend`). + """ + if name == 'best': + name = _BEST_BACKEND + + if name == 'hyperscan': + return HyperscanGiBackend(cast(Sequence[RegexPattern], patterns)) + elif name == 're2': + return Re2GiBackend(cast(Sequence[RegexPattern], patterns)) + elif name == 'simple': + return SimpleGiBackend(cast(Sequence[RegexPattern], patterns)) + else: + raise ValueError(f"Backend {name=!r} is invalid.") + + +def make_pathspec_backend( + name: BackendNamesHint, + patterns: Sequence[Pattern], +) -> _Backend: + """ + Create the specified backend with the supplied patterns for + :class:`~pathspec.pathspec.PathSpec`. + + *name* (:class:`str`) is the name of the backend. + + *patterns* (:class:`Iterable` of :class:`Pattern`) contains the compiled + patterns. + + Returns the backend (:class:`._Backend`). + """ + if name == 'best': + name = _BEST_BACKEND + + if name == 'hyperscan': + return HyperscanPsBackend(cast(Sequence[RegexPattern], patterns)) + elif name == 're2': + return Re2PsBackend(cast(Sequence[RegexPattern], patterns)) + elif name == 'simple': + return SimplePsBackend(patterns) + else: + raise ValueError(f"Backend {name=!r} is invalid.") diff --git a/lib/pathspec/_backends/hyperscan/__init__.py b/lib/pathspec/_backends/hyperscan/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/pathspec/_backends/hyperscan/__pycache__/__init__.cpython-314.pyc b/lib/pathspec/_backends/hyperscan/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..74e05bd Binary files /dev/null and b/lib/pathspec/_backends/hyperscan/__pycache__/__init__.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/hyperscan/__pycache__/_base.cpython-314.pyc b/lib/pathspec/_backends/hyperscan/__pycache__/_base.cpython-314.pyc new file mode 100644 index 0000000..71f3a47 Binary files /dev/null and b/lib/pathspec/_backends/hyperscan/__pycache__/_base.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/hyperscan/__pycache__/base.cpython-314.pyc b/lib/pathspec/_backends/hyperscan/__pycache__/base.cpython-314.pyc new file mode 100644 index 0000000..d0ccb6c Binary files /dev/null and b/lib/pathspec/_backends/hyperscan/__pycache__/base.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/hyperscan/__pycache__/gitignore.cpython-314.pyc b/lib/pathspec/_backends/hyperscan/__pycache__/gitignore.cpython-314.pyc new file mode 100644 index 0000000..f10927c Binary files /dev/null and b/lib/pathspec/_backends/hyperscan/__pycache__/gitignore.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/hyperscan/__pycache__/pathspec.cpython-314.pyc b/lib/pathspec/_backends/hyperscan/__pycache__/pathspec.cpython-314.pyc new file mode 100644 index 0000000..552c27b Binary files /dev/null and b/lib/pathspec/_backends/hyperscan/__pycache__/pathspec.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/hyperscan/_base.py b/lib/pathspec/_backends/hyperscan/_base.py new file mode 100644 index 0000000..cb58f48 --- /dev/null +++ b/lib/pathspec/_backends/hyperscan/_base.py @@ -0,0 +1,78 @@ +""" +This module provides private data for the base implementation for the +:module:`hyperscan` library. + +WARNING: The *pathspec._backends.hyperscan* package is not part of the public +API. Its contents and structure are likely to change. +""" +from __future__ import annotations + +from dataclasses import ( + dataclass) +from typing import ( + Union) # Replaced by `X | Y` in 3.10. + +try: + import hyperscan +except ModuleNotFoundError: + hyperscan = None + HS_FLAGS = 0 +else: + HS_FLAGS = hyperscan.HS_FLAG_SINGLEMATCH | hyperscan.HS_FLAG_UTF8 + +HS_FLAGS: int +""" +The hyperscan flags to use: + +- HS_FLAG_SINGLEMATCH is needed to ensure the partial patterns only match once. + +- HS_FLAG_UTF8 is required to support unicode paths. +""" + + +@dataclass(frozen=True) +class HyperscanExprDat(object): + """ + The :class:`HyperscanExprDat` class is used to store data related to an + expression. + """ + + # The slots argument is not supported until Python 3.10. + __slots__ = [ + 'include', + 'index', + 'is_dir_pattern', + ] + + include: bool + """ + *include* (:class:`bool`) is whether is whether the matched files should be + included (:data:`True`), or excluded (:data:`False`). + """ + + index: int + """ + *index* (:class:`int`) is the pattern index. + """ + + is_dir_pattern: bool + """ + *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory + pattern for gitignore. + """ + + +@dataclass(frozen=True) +class HyperscanExprDebug(HyperscanExprDat): + """ + The :class:`HyperscanExprDebug` class stores additional debug information + related to an expression. + """ + + # The slots argument is not supported until Python 3.10. + __slots__ = ['regex'] + + regex: Union[str, bytes] + """ + *regex* (:class:`str` or :class:`bytes`) is the regular expression. + """ diff --git a/lib/pathspec/_backends/hyperscan/base.py b/lib/pathspec/_backends/hyperscan/base.py new file mode 100644 index 0000000..ac219b4 --- /dev/null +++ b/lib/pathspec/_backends/hyperscan/base.py @@ -0,0 +1,24 @@ +""" +This module provides the base implementation for the :module:`hyperscan` +backend. + +WARNING: The *pathspec._backends.hyperscan* package is not part of the public +API. Its contents and structure are likely to change. +""" +from __future__ import annotations + +from typing import ( + Optional) + +try: + import hyperscan + hyperscan_error = None +except ModuleNotFoundError as e: + hyperscan = None + hyperscan_error = e + +hyperscan_error: Optional[ModuleNotFoundError] +""" +*hyperscan_error* (:class:`ModuleNotFoundError` or :data:`None`) is the +hyperscan import error. +""" diff --git a/lib/pathspec/_backends/hyperscan/gitignore.py b/lib/pathspec/_backends/hyperscan/gitignore.py new file mode 100644 index 0000000..2428b59 --- /dev/null +++ b/lib/pathspec/_backends/hyperscan/gitignore.py @@ -0,0 +1,245 @@ +""" +This module provides the :module:`hyperscan` backend for :class:`~pathspec.gitignore.GitIgnoreSpec`. + +WARNING: The *pathspec._backends.hyperscan* package is not part of the public +API. Its contents and structure are likely to change. +""" +from __future__ import annotations + +from collections.abc import ( + Sequence) +from typing import ( + Any, + Callable, # Replaced by `collections.abc.Callable` in 3.9.2. + Optional, # Replaced by `X | None` in 3.10. + Union) # Replaced by `X | Y` in 3.10. + +try: + import hyperscan +except ModuleNotFoundError: + hyperscan = None + +from pathspec.pattern import ( + RegexPattern) +from pathspec.patterns.gitignore.spec import ( + GitIgnoreSpecPattern, + _BYTES_ENCODING, + _DIR_MARK_CG, + _DIR_MARK_OPT) +from pathspec._typing import ( + override) # Added in 3.12. + +from ._base import ( + HS_FLAGS, + HyperscanExprDat, + HyperscanExprDebug) +from .pathspec import ( + HyperscanPsBackend) + + +class HyperscanGiBackend(HyperscanPsBackend): + """ + The :class:`HyperscanGiBackend` class is the :module:`hyperscan` + implementation used by :class:`~pathspec.gitignore.GitIgnoreSpec`. The + Hyperscan database uses block mode for matching files. + """ + + # Change type hint. + _out: tuple[Optional[bool], int, int] + + def __init__( + self, + patterns: Sequence[RegexPattern], + *, + _debug_exprs: Optional[bool] = None, + _test_sort: Optional[Callable[[list], None]] = None, + ) -> None: + """ + Initialize the :class:`HyperscanMatcher` instance. + + *patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the + compiled patterns. + """ + super().__init__(patterns, _debug_exprs=_debug_exprs, _test_sort=_test_sort) + + self._out = (None, -1, 0) + """ + *_out* (:class:`tuple`) stores the current match: + + - *0* (:class:`bool` or :data:`None`) is the match include. + + - *1* (:class:`int`) is the match index. + + - *2* (:class:`int`) is the match priority. + """ + + @override + @staticmethod + def _init_db( + db: hyperscan.Database, + debug: bool, + patterns: list[tuple[int, RegexPattern]], + sort_ids: Optional[Callable[[list[int]], None]], + ) -> list[HyperscanExprDat]: + """ + Create the Hyperscan database from the given patterns. + + *db* (:class:`hyperscan.Hyperscan`) is the Hyperscan database. + + *debug* (:class:`bool`) is whether to include additional debugging + information for the expressions. + + *patterns* (:class:`~collections.abc.Sequence` of :class:`.RegexPattern`) + contains the patterns. + + *sort_ids* (:class:`callable` or :data:`None`) is a function used to sort + the compiled expression ids. This is used during testing to ensure the order + of expressions is not accidentally relied on. + + Returns a :class:`list` indexed by expression id (:class:`int`) to its data + (:class:`HyperscanExprDat`). + """ + # WARNING: Hyperscan raises a `hyperscan.error` exception when compiled with + # zero elements. + assert patterns, patterns + + # Prepare patterns. + expr_data: list[HyperscanExprDat] = [] + exprs: list[bytes] = [] + for pattern_index, pattern in patterns: + assert pattern.include is not None, (pattern_index, pattern) + + # Encode regex. + assert isinstance(pattern, RegexPattern), pattern + regex = pattern.regex.pattern + + use_regexes: list[tuple[Union[str, bytes], bool]] = [] + if isinstance(pattern, GitIgnoreSpecPattern): + # GitIgnoreSpecPattern uses capture groups for its directory marker but + # Hyperscan does not support capture groups. Handle this scenario. + regex_str: str + if isinstance(regex, str): + regex_str: str = regex + else: + assert isinstance(regex, bytes), regex + regex_str = regex.decode(_BYTES_ENCODING) + + if _DIR_MARK_CG in regex_str: + # Found directory marker. + if regex_str.endswith(_DIR_MARK_OPT): + # Regex has optional directory marker. Split regex into directory + # and file variants. + base_regex = regex_str[:-len(_DIR_MARK_OPT)] + use_regexes.append((f'{base_regex}/', True)) + use_regexes.append((f'{base_regex}$', False)) + else: + # Remove capture group. + base_regex = regex_str.replace(_DIR_MARK_CG, '/') + use_regexes.append((base_regex, True)) + + if not use_regexes: + # No special case for regex. + use_regexes.append((regex, False)) + + for regex, is_dir_pattern in use_regexes: + if isinstance(regex, bytes): + regex_bytes = regex + else: + assert isinstance(regex, str), regex + regex_bytes = regex.encode('utf8') + + if debug: + expr_data.append(HyperscanExprDebug( + include=pattern.include, + index=pattern_index, + is_dir_pattern=is_dir_pattern, + regex=regex, + )) + else: + expr_data.append(HyperscanExprDat( + include=pattern.include, + index=pattern_index, + is_dir_pattern=is_dir_pattern, + )) + + exprs.append(regex_bytes) + + # Sort expressions. + ids = list(range(len(exprs))) + if sort_ids is not None: + sort_ids(ids) + exprs = [exprs[__id] for __id in ids] + + # Compile patterns. + db.compile( + expressions=exprs, + ids=ids, + elements=len(exprs), + flags=HS_FLAGS, + ) + return expr_data + + @override + def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *file* (:class:`str`) is the normalized file path to check. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + # NOTICE: According to benchmarking, a method callback is 13% faster than + # using a closure here. + db = self._db + if self._db is None: + # Database was not initialized because there were no patterns. Return no + # match. + return (None, None) + + self._out = (None, -1, 0) + db.scan(file.encode('utf8'), match_event_handler=self.__on_match) + + out_include, out_index = self._out[:2] + if out_index == -1: + out_index = None + + return (out_include, out_index) + + @override + def __on_match( + self, + expr_id: int, + _from: int, + _to: int, + _flags: int, + _context: Any, + ) -> Optional[bool]: + """ + Called on each match. + + *expr_id* (:class:`int`) is the expression id (index) of the matched + pattern. + """ + expr_dat = self._expr_data[expr_id] + + is_dir_pattern = expr_dat.is_dir_pattern + if is_dir_pattern: + # Pattern matched by a directory pattern. + priority = 1 + else: + # Pattern matched by a file pattern. + priority = 2 + + # WARNING: Hyperscan does not guarantee matches will be produced in order! + include = expr_dat.include + index = expr_dat.index + prev_index = self._out[1] + prev_priority = self._out[2] + if ( + (include and is_dir_pattern and index > prev_index) + or (priority == prev_priority and index > prev_index) + or priority > prev_priority + ): + self._out = (include, expr_dat.index, priority) diff --git a/lib/pathspec/_backends/hyperscan/pathspec.py b/lib/pathspec/_backends/hyperscan/pathspec.py new file mode 100644 index 0000000..d55c314 --- /dev/null +++ b/lib/pathspec/_backends/hyperscan/pathspec.py @@ -0,0 +1,251 @@ +""" +This module provides the :module:`hyperscan` backend for :class:`~pathspec.pathspec.PathSpec`. + +WARNING: The *pathspec._backends.hyperscan* package is not part of the public +API. Its contents and structure are likely to change. +""" +from __future__ import annotations + +from collections.abc import ( + Sequence) +from typing import ( + Any, + Callable, # Replaced by `collections.abc.Callable` in 3.9.2. + Optional) # Replaced by `X | None` in 3.10. + +try: + import hyperscan +except ModuleNotFoundError: + hyperscan = None + +from pathspec.backend import ( + _Backend) +from pathspec.pattern import ( + RegexPattern) +from pathspec._typing import ( + override) # Added in 3.12. + +from .._utils import ( + enumerate_patterns) + +from .base import ( + hyperscan_error) +from ._base import ( + HS_FLAGS, + HyperscanExprDat, + HyperscanExprDebug) + + +class HyperscanPsBackend(_Backend): + """ + The :class:`HyperscanPsBackend` class is the :module:`hyperscan` + implementation used by :class:`~pathspec.pathspec.PathSpec` for matching + files. The Hyperscan database uses block mode for matching files. + """ + + def __init__( + self, + patterns: Sequence[RegexPattern], + *, + _debug_exprs: Optional[bool] = None, + _test_sort: Optional[Callable[[list], None]] = None, + ) -> None: + """ + Initialize the :class:`HyperscanPsBackend` instance. + + *patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the + compiled patterns. + """ + if hyperscan is None: + raise hyperscan_error + + if patterns and not isinstance(patterns[0], RegexPattern): + raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.") + + use_patterns = enumerate_patterns( + patterns, filter=True, reverse=False, + ) + + debug_exprs = bool(_debug_exprs) + if use_patterns: + db = self._make_db() + expr_data = self._init_db( + db=db, + debug=debug_exprs, + patterns=use_patterns, + sort_ids=_test_sort, + ) + else: + # WARNING: The hyperscan database cannot be initialized with zero + # patterns. + db = None + expr_data = [] + + self._db: Optional[hyperscan.Database] = db + """ + *_db* (:class:`hyperscan.Database`) is the Hyperscan database. + """ + + self._debug_exprs = debug_exprs + """ + *_debug_exprs* (:class:`bool`) is whether to include additional debugging + information for the expressions. + """ + + self._expr_data: list[HyperscanExprDat] = expr_data + """ + *_expr_data* (:class:`list`) maps expression index (:class:`int`) to + expression data (:class:`:class:`HyperscanExprDat`). + """ + + self._out: tuple[Optional[bool], int] = (None, -1) + """ + *_out* (:class:`tuple`) stores the current match: + + - *0* (:class:`bool` or :data:`None`) is the match include. + + - *1* (:class:`int`) is the match index. + """ + + self._patterns: dict[int, RegexPattern] = dict(use_patterns) + """ + *_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern + (:class:`RegexPattern`). + """ + + @staticmethod + def _init_db( + db: hyperscan.Database, + debug: bool, + patterns: list[tuple[int, RegexPattern]], + sort_ids: Optional[Callable[[list[int]], None]], + ) -> list[HyperscanExprDat]: + """ + Initialize the Hyperscan database from the given patterns. + + *db* (:class:`hyperscan.Hyperscan`) is the Hyperscan database. + + *debug* (:class:`bool`) is whether to include additional debugging + information for the expressions. + + *patterns* (:class:`~collections.abc.Sequence` of :class:`.RegexPattern`) + contains the patterns. + + *sort_ids* (:class:`callable` or :data:`None`) is a function used to sort + the compiled expression ids. This is used during testing to ensure the order + of expressions is not accidentally relied on. + + Returns a :class:`list` indexed by expression id (:class:`int`) to its data + (:class:`HyperscanExprDat`). + """ + # WARNING: Hyperscan raises a `hyperscan.error` exception when compiled with + # zero elements. + assert patterns, patterns + + # Prepare patterns. + expr_data: list[HyperscanExprDat] = [] + exprs: list[bytes] = [] + for pattern_index, pattern in patterns: + assert pattern.include is not None, (pattern_index, pattern) + + # Encode regex. + assert isinstance(pattern, RegexPattern), pattern + regex = pattern.regex.pattern + + if isinstance(regex, bytes): + regex_bytes = regex + else: + assert isinstance(regex, str), regex + regex_bytes = regex.encode('utf8') + + if debug: + expr_data.append(HyperscanExprDebug( + include=pattern.include, + index=pattern_index, + is_dir_pattern=False, + regex=regex, + )) + else: + expr_data.append(HyperscanExprDat( + include=pattern.include, + index=pattern_index, + is_dir_pattern=False, + )) + + exprs.append(regex_bytes) + + # Sort expressions. + ids = list(range(len(exprs))) + if sort_ids is not None: + sort_ids(ids) + exprs = [exprs[__id] for __id in ids] + + # Compile patterns. + db.compile( + expressions=exprs, + ids=ids, + elements=len(exprs), + flags=HS_FLAGS, + ) + + return expr_data + + @override + def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *file* (:class:`str`) is the normalized file path to check. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + # NOTICE: According to benchmarking, a method callback is 20% faster than + # using a closure here. + db = self._db + if self._db is None: + # Database was not initialized because there were no patterns. Return no + # match. + return (None, None) + + self._out = (None, -1) + db.scan(file.encode('utf8'), match_event_handler=self.__on_match) + + out_include, out_index = self._out + if out_index == -1: + out_index = None + + return (out_include, out_index) + + @staticmethod + def _make_db() -> hyperscan.Database: + """ + Create the Hyperscan database. + + Returns the database (:class:`hyperscan.Database`). + """ + return hyperscan.Database(mode=hyperscan.HS_MODE_BLOCK) + + def __on_match( + self, + expr_id: int, + _from: int, + _to: int, + _flags: int, + _context: Any, + ) -> Optional[bool]: + """ + Called on each match. + + *expr_id* (:class:`int`) is the expression id (index) of the matched + pattern. + """ + # Store match. + # - WARNING: Hyperscan does not guarantee matches will be produced in order! + # Later expressions have higher priority. + expr_dat = self._expr_data[expr_id] + index = expr_dat.index + prev_index = self._out[1] + if index > prev_index: + self._out = (expr_dat.include, index) diff --git a/lib/pathspec/_backends/re2/__init__.py b/lib/pathspec/_backends/re2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/pathspec/_backends/re2/__pycache__/__init__.cpython-314.pyc b/lib/pathspec/_backends/re2/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..8888f22 Binary files /dev/null and b/lib/pathspec/_backends/re2/__pycache__/__init__.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/re2/__pycache__/_base.cpython-314.pyc b/lib/pathspec/_backends/re2/__pycache__/_base.cpython-314.pyc new file mode 100644 index 0000000..f774a38 Binary files /dev/null and b/lib/pathspec/_backends/re2/__pycache__/_base.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/re2/__pycache__/base.cpython-314.pyc b/lib/pathspec/_backends/re2/__pycache__/base.cpython-314.pyc new file mode 100644 index 0000000..e5e32d2 Binary files /dev/null and b/lib/pathspec/_backends/re2/__pycache__/base.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/re2/__pycache__/gitignore.cpython-314.pyc b/lib/pathspec/_backends/re2/__pycache__/gitignore.cpython-314.pyc new file mode 100644 index 0000000..a6dc402 Binary files /dev/null and b/lib/pathspec/_backends/re2/__pycache__/gitignore.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/re2/__pycache__/pathspec.cpython-314.pyc b/lib/pathspec/_backends/re2/__pycache__/pathspec.cpython-314.pyc new file mode 100644 index 0000000..3d921c5 Binary files /dev/null and b/lib/pathspec/_backends/re2/__pycache__/pathspec.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/re2/_base.py b/lib/pathspec/_backends/re2/_base.py new file mode 100644 index 0000000..4e6ae9f --- /dev/null +++ b/lib/pathspec/_backends/re2/_base.py @@ -0,0 +1,95 @@ +""" +This module provides private data for the base implementation for the +:module:`re2` library. + +WARNING: The *pathspec._backends.re2* package is not part of the public API. Its +contents and structure are likely to change. +""" +from __future__ import annotations + +from dataclasses import ( + dataclass) +from typing import ( + Optional, # Replaced by `X | None` in 3.10. + Union) # Replaced by `X | Y` in 3.10. + +try: + import re2 + re2_error = None +except ModuleNotFoundError as e: + re2 = None + re2_error = e + RE2_OPTIONS = None +else: + # Both the `google-re2` and `pyre2` libraries use the `re2` namespace. + # `google-re2` is the only one currently supported. + try: + RE2_OPTIONS = re2.Options() + RE2_OPTIONS.log_errors = False + RE2_OPTIONS.never_capture = True + except Exception as e: + re2_error = e + RE2_OPTIONS = None + +RE2_OPTIONS: re2.Options +""" +The re2 options to use: + +- `log_errors=False` disables logging to stderr. + +- `never_capture=True` disables capture groups because they effectively cannot + be utilized with :class:`re2.Set`. +""" + +re2_error: Optional[Exception] +""" +*re2_error* (:class:`Exception` or :data:`None`) is the re2 import error. +""" + + +@dataclass(frozen=True) +class Re2RegexDat(object): + """ + The :class:`Re2RegexDat` class is used to store data related to a regular + expression. + """ + + # The slots argument is not supported until Python 3.10. + __slots__ = [ + 'include', + 'index', + 'is_dir_pattern', + ] + + include: bool + """ + *include* (:class:`bool`) is whether is whether the matched files should be + included (:data:`True`), or excluded (:data:`False`). + """ + + index: int + """ + *index* (:class:`int`) is the pattern index. + """ + + is_dir_pattern: bool + """ + *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory + pattern for gitignore. + """ + + +@dataclass(frozen=True) +class Re2RegexDebug(Re2RegexDat): + """ + The :class:`Re2RegexDebug` class stores additional debug information related + to a regular expression. + """ + + # The slots argument is not supported until Python 3.10. + __slots__ = ['regex'] + + regex: Union[str, bytes] + """ + *regex* (:class:`str` or :class:`bytes`) is the regular expression. + """ diff --git a/lib/pathspec/_backends/re2/base.py b/lib/pathspec/_backends/re2/base.py new file mode 100644 index 0000000..fa24f4d --- /dev/null +++ b/lib/pathspec/_backends/re2/base.py @@ -0,0 +1,18 @@ +""" +This module provides the base implementation for the :module:`re2` backend. + +WARNING: The *pathspec._backends.re2* package is not part of the public API. Its +contents and structure are likely to change. +""" +from __future__ import annotations + +from typing import ( + Optional) # Replaced by `X | None` in 3.10. + +from ._base import ( + re2_error) + +re2_error: Optional[Exception] +""" +*re2_error* (:class:`Exception` or :data:`None`) is the re2 import error. +""" diff --git a/lib/pathspec/_backends/re2/gitignore.py b/lib/pathspec/_backends/re2/gitignore.py new file mode 100644 index 0000000..cb2525f --- /dev/null +++ b/lib/pathspec/_backends/re2/gitignore.py @@ -0,0 +1,179 @@ +""" +This module provides the :module:`re2` backend for :class:`~pathspec.gitignore.GitIgnoreSpec`. + +WARNING: The *pathspec._backends.re2* package is not part of the public API. Its +contents and structure are likely to change. +""" +from __future__ import annotations + +from typing import ( + Callable, # Replaced by `collections.abc.Callable` in 3.9.2. + Optional, # Replaced by `X | None` in 3.10. + Union) # Replaced by `X | Y` in 3.10. + +try: + import re2 +except ModuleNotFoundError: + re2 = None + +from pathspec.pattern import ( + RegexPattern) +from pathspec.patterns.gitignore.spec import ( + GitIgnoreSpecPattern, + _BYTES_ENCODING, + _DIR_MARK_CG, + _DIR_MARK_OPT) +from pathspec._typing import ( + override) # Added in 3.12. + +from ._base import ( + Re2RegexDat, + Re2RegexDebug) +from .pathspec import ( + Re2PsBackend) + + +class Re2GiBackend(Re2PsBackend): + """ + The :class:`Re2GiBackend` class is the :module:`re2` implementation used by + :class:`~pathspec.gitignore.GitIgnoreSpec` for matching files. + """ + + @override + @staticmethod + def _init_set( + debug: bool, + patterns: dict[int, RegexPattern], + regex_set: re2.Set, + sort_indices: Optional[Callable[[list[int]], None]], + ) -> list[Re2RegexDat]: + """ + Create the re2 regex set. + + *debug* (:class:`bool`) is whether to include additional debugging + information for the regular expressions. + + *patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern + (:class:`.RegexPattern`). + + *regex_set* (:class:`re2.Set`) is the regex set. + + *sort_indices* (:class:`callable` or :data:`None`) is a function used to + sort the patterns by index. This is used during testing to ensure the order + of patterns is not accidentally relied on. + + Returns a :class:`list` indexed by regex id (:class:`int`) to its data + (:class:`Re2RegexDat`). + """ + # Sort patterns. + indices = list(patterns.keys()) + if sort_indices is not None: + sort_indices(indices) + + # Prepare patterns. + regex_data: list[Re2RegexDat] = [] + for pattern_index in indices: + pattern = patterns[pattern_index] + if pattern.include is None: + continue + + assert isinstance(pattern, RegexPattern), pattern + regex = pattern.regex.pattern + + use_regexes: list[tuple[Union[str, bytes], bool]] = [] + if isinstance(pattern, GitIgnoreSpecPattern): + # GitIgnoreSpecPattern uses capture groups for its directory marker. Re2 + # supports capture groups, but they cannot be utilized when using + # `re2.Set`. Handle this scenario. + regex_str: str + if isinstance(regex, str): + regex_str = regex + else: + assert isinstance(regex, bytes), regex + regex_str = regex.decode(_BYTES_ENCODING) + + if _DIR_MARK_CG in regex_str: + # Found directory marker. + if regex_str.endswith(_DIR_MARK_OPT): + # Regex has optional directory marker. Split regex into directory + # and file variants. + base_regex = regex_str[:-len(_DIR_MARK_OPT)] + use_regexes.append((f'{base_regex}/', True)) + use_regexes.append((f'{base_regex}$', False)) + else: + # Remove capture group. + base_regex = regex_str.replace(_DIR_MARK_CG, '/') + use_regexes.append((base_regex, True)) + + if not use_regexes: + # No special case for regex. + use_regexes.append((regex, False)) + + for regex, is_dir_pattern in use_regexes: + if debug: + regex_data.append(Re2RegexDebug( + include=pattern.include, + index=pattern_index, + is_dir_pattern=is_dir_pattern, + regex=regex, + )) + else: + regex_data.append(Re2RegexDat( + include=pattern.include, + index=pattern_index, + is_dir_pattern=is_dir_pattern, + )) + + regex_set.Add(regex) + + # Compile patterns. + regex_set.Compile() + return regex_data + + @override + def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *file* (:class:`str`) is the normalized file path to check. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + # Find best match. + match_ids: Optional[list[int]] = self._set.Match(file) + if not match_ids: + return (None, None) + + out_include: Optional[bool] = None + out_index: int = -1 + out_priority = -1 + + regex_data = self._regex_data + for regex_id in match_ids: + regex_dat = regex_data[regex_id] + + is_dir_pattern = regex_dat.is_dir_pattern + if is_dir_pattern: + # Pattern matched by a directory pattern. + priority = 1 + else: + # Pattern matched by a file pattern. + priority = 2 + + # WARNING: According to the documentation on `RE2::Set::Match()`, there is + # no guarantee matches will be produced in order! + include = regex_dat.include + index = regex_dat.index + if ( + (include and is_dir_pattern and index > out_index) + or (priority == out_priority and index > out_index) + or priority > out_priority + ): + out_include = include + out_index = index + out_priority = priority + + assert out_index != -1, (out_index, out_include, out_priority) + return (out_include, out_index) diff --git a/lib/pathspec/_backends/re2/pathspec.py b/lib/pathspec/_backends/re2/pathspec.py new file mode 100644 index 0000000..2c58b45 --- /dev/null +++ b/lib/pathspec/_backends/re2/pathspec.py @@ -0,0 +1,187 @@ +""" +This module provides the :module:`re2` backend for :class:`~pathspec.pathspec.PathSpec`. + +WARNING: The *pathspec._backends.re2* package is not part of the public API. Its +contents and structure are likely to change. +""" +from __future__ import annotations + +from collections.abc import ( + Sequence) +from typing import ( + Callable, # Replaced by `collections.abc.Callable` in 3.9.2. + Optional) # Replaced by `X | None` in 3.10. + +try: + import re2 +except ModuleNotFoundError: + re2 = None + +from pathspec.backend import ( + _Backend) +from pathspec.pattern import ( + RegexPattern) +from pathspec._typing import ( + override) # Added in 3.12. + +from .._utils import ( + enumerate_patterns) + +from .base import ( + re2_error) +from ._base import ( + RE2_OPTIONS, + Re2RegexDat, + Re2RegexDebug) + + +class Re2PsBackend(_Backend): + """ + The :class:`Re2PsBackend` class is the :module:`re2` implementation used by + :class:`~pathspec.pathspec.PathSpec` for matching files. + """ + + def __init__( + self, + patterns: Sequence[RegexPattern], + *, + _debug_regex: Optional[bool] = None, + _test_sort: Optional[Callable[[list], None]] = None, + ) -> None: + """ + Initialize the :class:`Re2PsBackend` instance. + + *patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the + compiled patterns. + """ + if re2_error is not None: + raise re2_error + + if patterns and not isinstance(patterns[0], RegexPattern): + raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.") + + use_patterns = dict(enumerate_patterns( + patterns, filter=True, reverse=False, + )) + regex_set = self._make_set() + + self._debug_regex = bool(_debug_regex) + """ + *_debug_regex* (:class:`bool`) is whether to include additional debugging + information for the regular expressions. + """ + + self._patterns: dict[int, RegexPattern] = use_patterns + """ + *_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern + (:class:`RegexPattern`). + """ + + self._regex_data: list[Re2RegexDat] = self._init_set( + debug=self._debug_regex, + patterns=use_patterns, + regex_set=regex_set, + sort_indices=_test_sort, + ) + """ + *_regex_data* (:class:`list`) maps regex index (:class:`int`) to regex data + (:class:`Re2RegexDat`). + """ + + self._set: re2.Set = regex_set + """ + *_set* (:class:`re2.Set`) is the re2 regex set. + """ + + @staticmethod + def _init_set( + debug: bool, + patterns: dict[int, RegexPattern], + regex_set: re2.Set, + sort_indices: Optional[Callable[[list[int]], None]], + ) -> list[Re2RegexDat]: + """ + Create the re2 regex set. + + *debug* (:class:`bool`) is whether to include additional debugging + information for the regular expressions. + + *patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern + (:class:`.RegexPattern`). + + *regex_set* (:class:`re2.Set`) is the regex set. + + *sort_indices* (:class:`callable` or :data:`None`) is a function used to + sort the patterns by index. This is used during testing to ensure the order + of patterns is not accidentally relied on. + + Returns a :class:`list` indexed by regex id (:class:`int`) to its data + (:class:`Re2RegexDat`). + """ + # Sort patterns. + indices = list(patterns.keys()) + if sort_indices is not None: + sort_indices(indices) + + # Prepare patterns. + regex_data: list[Re2RegexDat] = [] + for pattern_index in indices: + pattern = patterns[pattern_index] + if pattern.include is None: + continue + + assert isinstance(pattern, RegexPattern), pattern + regex = pattern.regex.pattern + + if debug: + regex_data.append(Re2RegexDebug( + include=pattern.include, + index=pattern_index, + is_dir_pattern=False, + regex=regex, + )) + else: + regex_data.append(Re2RegexDat( + include=pattern.include, + index=pattern_index, + is_dir_pattern=False, + )) + + regex_set.Add(regex) + + # Compile patterns. + regex_set.Compile() + return regex_data + + @staticmethod + def _make_set() -> re2.Set: + """ + Create the re2 regex set. + + Returns the set (:class:`re2.Set`). + """ + return re2.Set.SearchSet(RE2_OPTIONS) + + @override + def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *file* (:class:`str`) is the normalized file path to check. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + # Find best match. + # - WARNING: According to the documentation on `RE2::Set::Match()`, there is + # no guarantee matches will be produced in order! Later expressions have + # higher priority. + match_ids: Optional[list[int]] = self._set.Match(file) + if not match_ids: + return (None, None) + + regex_data = self._regex_data + pattern_index = max(regex_data[__id].index for __id in match_ids) + pattern = self._patterns[pattern_index] + return (pattern.include, pattern_index) diff --git a/lib/pathspec/_backends/simple/__init__.py b/lib/pathspec/_backends/simple/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/pathspec/_backends/simple/__pycache__/__init__.cpython-314.pyc b/lib/pathspec/_backends/simple/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..bbfe8bd Binary files /dev/null and b/lib/pathspec/_backends/simple/__pycache__/__init__.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/simple/__pycache__/gitignore.cpython-314.pyc b/lib/pathspec/_backends/simple/__pycache__/gitignore.cpython-314.pyc new file mode 100644 index 0000000..4d67fac Binary files /dev/null and b/lib/pathspec/_backends/simple/__pycache__/gitignore.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/simple/__pycache__/pathspec.cpython-314.pyc b/lib/pathspec/_backends/simple/__pycache__/pathspec.cpython-314.pyc new file mode 100644 index 0000000..076cdb4 Binary files /dev/null and b/lib/pathspec/_backends/simple/__pycache__/pathspec.cpython-314.pyc differ diff --git a/lib/pathspec/_backends/simple/gitignore.py b/lib/pathspec/_backends/simple/gitignore.py new file mode 100644 index 0000000..bdacc7e --- /dev/null +++ b/lib/pathspec/_backends/simple/gitignore.py @@ -0,0 +1,104 @@ +""" +This module provides the simple backend for :class:`~pathspec.gitignore.GitIgnoreSpec`. + +WARNING: The *pathspec._backends.simple* package is not part of the public API. +Its contents and structure are likely to change. +""" + +from collections.abc import ( + Sequence) +from typing import ( + Optional) # Replaced by `X | None` in 3.10. + +from pathspec.pattern import ( + RegexPattern) +from pathspec.patterns.gitignore.spec import ( + _DIR_MARK) +from pathspec._typing import ( + override) # Added in 3.12. + +from .pathspec import ( + SimplePsBackend) + + +class SimpleGiBackend(SimplePsBackend): + """ + The :class:`SimpleGiBackend` class is the default (or simple) implementation + used by :class:`~pathspec.gitignore.GitIgnoreSpec` for matching files. + """ + + # Change type hint. + _patterns: list[tuple[int, RegexPattern]] + + def __init__( + self, + patterns: Sequence[RegexPattern], + *, + no_filter: Optional[bool] = None, + no_reverse: Optional[bool] = None, + ) -> None: + """ + Initialize the :class:`SimpleGiBackend` instance. + + *patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the + compiled patterns. + + *no_filter* (:class:`bool`) is whether to keep no-op patterns (:data:`True`), + or remove them (:data:`False`). + + *no_reverse* (:class:`bool`) is whether to keep the pattern order + (:data:`True`), or reverse the order (:data:`True`). + """ + super().__init__(patterns, no_filter=no_filter, no_reverse=no_reverse) + + @override + def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *file* (:class:`str`) is the normalized file path to check. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + is_reversed = self._is_reversed + + out_include: Optional[bool] = None + out_index: Optional[int] = None + out_priority = 0 + for index, pattern in self._patterns: + if ( + (include := pattern.include) is not None + and (match := pattern.match_file(file)) is not None + ): + # Pattern matched. + + # Check for directory marker. + dir_mark = match.match.groupdict().get(_DIR_MARK) + + if dir_mark: + # Pattern matched by a directory pattern. + priority = 1 + else: + # Pattern matched by a file pattern. + priority = 2 + + if is_reversed: + if priority > out_priority: + out_include = include + out_index = index + out_priority = priority + else: + # Forward. + if (include and dir_mark) or priority >= out_priority: + out_include = include + out_index = index + out_priority = priority + + if is_reversed and priority == 2: + # Patterns are being checked in reverse order. The first pattern that + # matches with priority 2 takes precedence. + break + + return (out_include, out_index) diff --git a/lib/pathspec/_backends/simple/pathspec.py b/lib/pathspec/_backends/simple/pathspec.py new file mode 100644 index 0000000..2ded1be --- /dev/null +++ b/lib/pathspec/_backends/simple/pathspec.py @@ -0,0 +1,76 @@ +""" +This module provides the simple backend for :class:`~pathspec.pathspec.PathSpec`. + +WARNING: The *pathspec._backends.simple* package is not part of the public API. +Its contents and structure are likely to change. +""" + +from collections.abc import ( + Sequence) +from typing import ( + Optional) # Replaced by `X | None` in 3.10. + +from pathspec.backend import ( + _Backend) +from pathspec.pattern import ( + Pattern) +from pathspec._typing import ( + override) # Added in 3.12. +from pathspec.util import ( + check_match_file) + +from .._utils import ( + enumerate_patterns) + + +class SimplePsBackend(_Backend): + """ + The :class:`SimplePsBackend` class is the default (or simple) implementation + used by :class:`~pathspec.pathspec.PathSpec` for matching files. + """ + + def __init__( + self, + patterns: Sequence[Pattern], + *, + no_filter: Optional[bool] = None, + no_reverse: Optional[bool] = None, + ) -> None: + """ + Initialize the :class:`SimplePsBackend` instance. + + *patterns* (:class:`Sequence` of :class:`.Pattern`) contains the compiled + patterns. + + *no_filter* (:class:`bool`) is whether to keep no-op patterns (:data:`True`), + or remove them (:data:`False`). + + *no_reverse* (:class:`bool`) is whether to keep the pattern order + (:data:`True`), or reverse the order (:data:`True`). + """ + + self._is_reversed: bool = not no_reverse + """ + *_is_reversed* (:class:`bool`) is whether to the pattern order was reversed. + """ + + self._patterns: list[tuple[int, Pattern]] = enumerate_patterns( + patterns, filter=not no_filter, reverse=not no_reverse, + ) + """ + *_patterns* (:class:`list` of :class:`tuple`) contains the enumerated + patterns. + """ + + @override + def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *file* (:class:`str`) is the normalized file path to check. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + return check_match_file(self._patterns, file, self._is_reversed) diff --git a/lib/pathspec/_meta.py b/lib/pathspec/_meta.py new file mode 100644 index 0000000..4e4c782 --- /dev/null +++ b/lib/pathspec/_meta.py @@ -0,0 +1,67 @@ +""" +This module contains the project meta-data. +""" + +__author__ = "Caleb P. Burns" +__copyright__ = "Copyright © 2013-2026 Caleb P. Burns" +__credits__ = [ + "Hong Minhee ", + "Brandon High ", + "029xue ", + "Michael Huynh ", + "Nick Humrich ", + "David Fraser ", + "Charles Samborski ", + "George Hickman ", + "Vincent Driessen ", + "Adrien Vergé ", + "Anders Blomdell ", + "Xavier Thomas ", + "Wim Jeantine-Glenn ", + "Hugo van Kemenade ", + "Dan Cecile ", + "MrOutis ", + "Jon Dufresne ", + "Greg Roodt ", + "Florin T. ", + "Ben Felder ", + "Nicholas Hollander ", + "KOLANICH ", + "Jon Hays ", + "Isaac0616 ", + "Sebastiaan Zeeff ", + "Roel Adriaans ", + "Ravi Selker ", + "Johan Vergeer ", + "danjer ", + "Jan-Hein Bührman ", + "Wim-Peter Dirks ", + "Karthikeyan Singaravelan ", + "John Vandenberg ", + "John T. Wodder II ", + "Tomasz Kłoczko ", + "Oren ", + "SP Mohanty ", + "Richard Si ", + "Jakub Kuczys ", + "Michał Górny ", + "Bartłomiej Żak ", + "Matthias ", + "Avasam ", + "Anıl Karagenç ", + "Yannic Schröder ", + "axesider ", + "TomRuk ", + "Oleh Prypin ", + "Lumina ", + "Kurt McKee ", + "Dobatymo ", + "Tomoki Nakamaru ", + "Sebastien Eskenazi ", + "Bar Vered ", + "Tzach Shabtay ", + "Adam Dangoor ", + "Marcel Telka ", + "Dmytro Kostochko ", +] +__license__ = "MPL 2.0" diff --git a/lib/pathspec/_typing.py b/lib/pathspec/_typing.py new file mode 100644 index 0000000..049966c --- /dev/null +++ b/lib/pathspec/_typing.py @@ -0,0 +1,64 @@ +""" +This module provides stubs for type hints not supported by all relevant Python +versions. + +NOTICE: This project should have zero required dependencies which means it +cannot simply require :module:`typing_extensions`, and I do not want to maintain +a vendored copy of :module:`typing_extensions`. +""" + +import functools +import warnings +from typing import ( + Any, + Callable, # Replaced by `collections.abc.Callable` in 3.9.2. + Optional, # Replaced by `X | None` in 3.10. + TypeVar) +try: + from typing import AnyStr # Removed in 3.18. +except ImportError: + AnyStr = TypeVar('AnyStr', str, bytes) +try: + from typing import Never # Added in 3.11. +except ImportError: + from typing import NoReturn as Never + +F = TypeVar('F', bound=Callable[..., Any]) + +try: + from warnings import deprecated # Added in 3.13. +except ImportError: + try: + from typing_extensions import deprecated + except ImportError: + def deprecated( + message: str, + /, *, + category: Optional[type[Warning]] = DeprecationWarning, + stacklevel: int = 1, + ) -> Callable[[F], F]: + def decorator(f: F) -> F: + @functools.wraps(f) + def wrapper(*a, **k): + warnings.warn(message, category=category, stacklevel=stacklevel+1) + return f(*a, **k) + return wrapper + return decorator + +try: + from typing import override # Added in 3.12. +except ImportError: + try: + from typing_extensions import override + except ImportError: + def override(f: F) -> F: + return f + + +def assert_unreachable(message: str) -> Never: + """ + The code path is unreachable. Raises an :class:`AssertionError`. + + *message* (:class:`str`) is the error message. + """ + raise AssertionError(message) diff --git a/lib/pathspec/_version.py b/lib/pathspec/_version.py new file mode 100644 index 0000000..421d8fa --- /dev/null +++ b/lib/pathspec/_version.py @@ -0,0 +1,5 @@ +""" +This module defines the version. +""" + +__version__ = "1.0.4" diff --git a/lib/pathspec/backend.py b/lib/pathspec/backend.py new file mode 100644 index 0000000..f1def28 --- /dev/null +++ b/lib/pathspec/backend.py @@ -0,0 +1,40 @@ +""" +This module defines the necessary classes and type hints for exposing the bare +minimum of the internal implementations for the pattern (regular expression) +matching backends. The exact structure of the backends is not solidified and is +subject to change. +""" + +from typing import ( + Literal, + Optional) + +BackendNamesHint = Literal['best', 'hyperscan', 're2', 'simple'] +""" +The supported backend values. +""" + + +class _Backend(object): + """ + .. warning:: This class is not part of the public API. It is subject to + change. + + The :class:`_Backend` class is the abstract base class defining how to match + files against patterns. + """ + + def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *file* (:class:`str`) is the normalized file path to check. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + raise NotImplementedError(( + f"{self.__class__.__module__}.{self.__class__.__qualname__}.match_file() " + f"must be implemented." + )) # NotImplementedError diff --git a/lib/pathspec/gitignore.py b/lib/pathspec/gitignore.py new file mode 100644 index 0000000..93c3d76 --- /dev/null +++ b/lib/pathspec/gitignore.py @@ -0,0 +1,165 @@ +""" +This module provides :class:`.GitIgnoreSpec` which replicates *.gitignore* +behavior, and handles edge-cases where Git's behavior differs from what's +documented. Git allows including files from excluded directories which directly +contradicts the documentation. This uses :class:`.GitIgnoreSpecPattern` to fully +replicate Git's handling. +""" +from __future__ import annotations + +from collections.abc import ( + Iterable, + Sequence) +from typing import ( + Callable, # Replaced by `collections.abc.Callable` in 3.9.2. + Optional, # Replaced by `X | None` in 3.10. + TypeVar, + Union, # Replaced by `X | Y` in 3.10. + cast, + overload) + +from pathspec.backend import ( + BackendNamesHint, + _Backend) +from pathspec._backends.agg import ( + make_gitignore_backend) +from pathspec.pathspec import ( + PathSpec) +from pathspec.pattern import ( + Pattern) +from pathspec.patterns.gitignore.basic import ( + GitIgnoreBasicPattern) +from pathspec.patterns.gitignore.spec import ( + GitIgnoreSpecPattern) +from pathspec._typing import ( + AnyStr, # Removed in 3.18. + override) # Added in 3.12. +from pathspec.util import ( + _is_iterable, + lookup_pattern) + +Self = TypeVar("Self", bound='GitIgnoreSpec') +""" +:class:`.GitIgnoreSpec` self type hint to support Python v<3.11 using PEP 673 +recommendation. +""" + + +class GitIgnoreSpec(PathSpec): + """ + The :class:`GitIgnoreSpec` class extends :class:`.PathSpec` to replicate + *gitignore* behavior. This is uses :class:`.GitIgnoreSpecPattern` to fully + replicate Git's handling. + """ + + def __eq__(self, other: object) -> bool: + """ + Tests the equality of this gitignore-spec with *other* (:class:`.GitIgnoreSpec`) + by comparing their :attr:`self.patterns <.PathSpec.patterns>` attributes. A + non-:class:`GitIgnoreSpec` will not compare equal. + """ + if isinstance(other, GitIgnoreSpec): + return super().__eq__(other) + elif isinstance(other, PathSpec): + return False + else: + return NotImplemented + + # Support reversed order of arguments from PathSpec. + @overload + @classmethod + def from_lines( + cls: type[Self], + pattern_factory: Union[str, Callable[[AnyStr], Pattern], None], + lines: Iterable[AnyStr], + *, + backend: Union[BackendNamesHint, str, None] = None, + _test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None, + ) -> Self: + ... + + @overload + @classmethod + def from_lines( + cls: type[Self], + lines: Iterable[AnyStr], + pattern_factory: Union[str, Callable[[AnyStr], Pattern], None] = None, + *, + backend: Union[BackendNamesHint, str, None] = None, + _test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None, + ) -> Self: + ... + + @override + @classmethod + def from_lines( + cls: type[Self], + lines: Iterable[AnyStr], + pattern_factory: Union[str, Callable[[AnyStr], Pattern], None] = None, + *, + backend: Union[BackendNamesHint, str, None] = None, + _test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None, + ) -> Self: + """ + Compiles the pattern lines. + + *lines* (:class:`~collections.abc.Iterable`) yields each uncompiled pattern + (:class:`str`). This simply has to yield each line, so it can be a + :class:`io.TextIOBase` (e.g., from :func:`open` or :class:`io.StringIO`) or + the result from :meth:`str.splitlines`. + + *pattern_factory* does not need to be set for :class:`GitIgnoreSpec`. If + set, it should be either ``"gitignore"`` or :class:`.GitIgnoreSpecPattern`. + There is no guarantee it will work with any other pattern class. Default is + :data:`None` for :class:`.GitIgnoreSpecPattern`. + + *backend* (:class:`str` or :data:`None`) is the pattern (regular expression) + matching backend to use. Default is :data:`None` for "best" to use the best + available backend. Priority of backends is: "re2", "hyperscan", "simple". + The "simple" backend is always available. + + Returns the :class:`GitIgnoreSpec` instance. + """ + if (isinstance(lines, (str, bytes)) or callable(lines)) and _is_iterable(pattern_factory): + # Support reversed order of arguments from PathSpec. + pattern_factory, lines = lines, pattern_factory + + if pattern_factory is None: + pattern_factory = GitIgnoreSpecPattern + elif pattern_factory == 'gitignore': + # Force use of GitIgnoreSpecPattern for "gitignore" to handle edge-cases. + # This makes usage easier. + pattern_factory = GitIgnoreSpecPattern + + if isinstance(pattern_factory, str): + pattern_factory = lookup_pattern(pattern_factory) + + if issubclass(pattern_factory, GitIgnoreBasicPattern): + raise TypeError(( + f"{pattern_factory=!r} cannot be {GitIgnoreBasicPattern} because it " + f"will give unexpected results." + )) # TypeError + + self = super().from_lines(pattern_factory, lines, backend=backend, _test_backend_factory=_test_backend_factory) + return cast(Self, self) + + @override + @staticmethod + def _make_backend( + name: BackendNamesHint, + patterns: Sequence[Pattern], + ) -> _Backend: + """ + .. warning:: This method is not part of the public API. It is subject to + change. + + Create the backend for the patterns. + + *name* (:class:`str`) is the name of the backend. + + *patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`) + contains the compiled patterns. + + Returns the backend (:class:`._Backend`). + """ + return make_gitignore_backend(name, patterns) diff --git a/lib/pathspec/pathspec.py b/lib/pathspec/pathspec.py new file mode 100644 index 0000000..bb88cbf --- /dev/null +++ b/lib/pathspec/pathspec.py @@ -0,0 +1,460 @@ +""" +This module provides :class:`.PathSpec` which is an object-oriented interface +for pattern matching of files. +""" +from __future__ import annotations + +from collections.abc import ( + Collection, + Iterable, + Iterator, + Sequence) +from itertools import ( + zip_longest) +from typing import ( + Callable, # Replaced by `collections.abc.Callable` in 3.9.2. + Optional, # Replaced by `X | None` in 3.10. + TypeVar, + Union, # Replaced by `X | Y` in 3.10. + cast) + +Self = TypeVar("Self", bound='PathSpec') +""" +:class:`.PathSpec` self type hint to support Python v<3.11 using PEP 673 +recommendation. +""" + +from pathspec import util +from pathspec.backend import ( + _Backend, + BackendNamesHint) +from pathspec._backends.agg import ( + make_pathspec_backend) +from pathspec.pattern import ( + Pattern) +from pathspec._typing import ( + AnyStr, # Removed in 3.18. + deprecated) # Added in 3.13. +from pathspec.util import ( + CheckResult, + StrPath, + TStrPath, + TreeEntry, + _is_iterable, + normalize_file) + + +class PathSpec(object): + """ + The :class:`PathSpec` class is a wrapper around a list of compiled + :class:`.Pattern` instances. + """ + + def __init__( + self, + patterns: Union[Sequence[Pattern], Iterable[Pattern]], + *, + backend: Union[BackendNamesHint, str, None] = None, + _test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None, + ) -> None: + """ + Initializes the :class:`.PathSpec` instance. + + *patterns* (:class:`~collections.abc.Sequence` or :class:`~collections.abc.Iterable`) + contains each compiled pattern (:class:`.Pattern`). If not a sequence, it + will be converted to a :class:`list`. + + *backend* (:class:`str` or :data:`None`) is the pattern (regular expression) + matching backend to use. Default is :data:`None` for "best" to use the best + available backend. Priority of backends is: "re2", "hyperscan", "simple". + The "simple" backend is always available. + """ + if not isinstance(patterns, Sequence): + patterns = list(patterns) + + if backend is None: + backend = 'best' + + backend = cast(BackendNamesHint, backend) + if _test_backend_factory is not None: + use_backend = _test_backend_factory(patterns) + else: + use_backend = self._make_backend(backend, patterns) + + self._backend: _Backend = use_backend + """ + *_backend* (:class:`._Backend`) is the pattern (regular expression) matching + backend. + """ + + self._backend_name: BackendNamesHint = backend + """ + *_backend_name* (:class:`str`) is the name of backend to use. + """ + + self.patterns: Sequence[Pattern] = patterns + """ + *patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`) + contains the compiled patterns. + """ + + def __add__(self: Self, other: PathSpec) -> Self: + """ + Combines the :attr:`self.patterns <.PathSpec.patterns>` patterns from two + :class:`PathSpec` instances. + """ + if isinstance(other, PathSpec): + return self.__class__(self.patterns + other.patterns, backend=self._backend_name) + else: + return NotImplemented + + def __eq__(self, other: object) -> bool: + """ + Tests the equality of this path-spec with *other* (:class:`PathSpec`) by + comparing their :attr:`self.patterns <.PathSpec.patterns>` attributes. + """ + if isinstance(other, PathSpec): + paired_patterns = zip_longest(self.patterns, other.patterns) + return all(a == b for a, b in paired_patterns) + else: + return NotImplemented + + def __iadd__(self: Self, other: PathSpec) -> Self: + """ + Adds the :attr:`self.patterns <.PathSpec.patterns>` from *other* + (:class:`PathSpec`) to this instance. + """ + if isinstance(other, PathSpec): + self.patterns += other.patterns + self._backend = self._make_backend(self._backend_name, self.patterns) + return self + else: + return NotImplemented + + def __len__(self) -> int: + """ + Returns the number of :attr:`self.patterns <.PathSpec.patterns>` this + path-spec contains (:class:`int`). + """ + return len(self.patterns) + + def check_file( + self, + file: TStrPath, + separators: Optional[Collection[str]] = None, + ) -> CheckResult[TStrPath]: + """ + Check the files against this path-spec. + + *file* (:class:`str` or :class:`os.PathLike`) is the file path to be matched + against :attr:`self.patterns <.PathSpec.patterns>`. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. See + :func:`.normalize_file` for more information. + + Returns the file check result (:class:`.CheckResult`). + """ + norm_file = normalize_file(file, separators) + include, index = self._backend.match_file(norm_file) + return CheckResult(file, include, index) + + def check_files( + self, + files: Iterable[TStrPath], + separators: Optional[Collection[str]] = None, + ) -> Iterator[CheckResult[TStrPath]]: + """ + Check the files against this path-spec. + + *files* (:class:`~collections.abc.Iterable` of :class:`str` or + :class:`os.PathLike`) contains the file paths to be checked against + :attr:`self.patterns <.PathSpec.patterns>`. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. See + :func:`.normalize_file` for more information. + + Returns an :class:`~collections.abc.Iterator` yielding each file check + result (:class:`.CheckResult`). + """ + if not _is_iterable(files): + raise TypeError(f"files:{files!r} is not an iterable.") + + for orig_file in files: + norm_file = normalize_file(orig_file, separators) + include, index = self._backend.match_file(norm_file) + yield CheckResult(orig_file, include, index) + + def check_tree_files( + self, + root: StrPath, + on_error: Optional[Callable[[OSError], None]] = None, + follow_links: Optional[bool] = None, + ) -> Iterator[CheckResult[str]]: + """ + Walks the specified root path for all files and checks them against this + path-spec. + + *root* (:class:`str` or :class:`os.PathLike`) is the root directory to + search for files. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally + is the error handler for file-system exceptions. It will be called with the + exception (:exc:`OSError`). Reraise the exception to abort the walk. Default + is :data:`None` to ignore file-system exceptions. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk + symbolic links that resolve to directories. Default is :data:`None` for + :data:`True`. + + *negate* (:class:`bool` or :data:`None`) is whether to negate the match + results of the patterns. If :data:`True`, a pattern matching a file will + exclude the file rather than include it. Default is :data:`None` for + :data:`False`. + + Returns an :class:`~collections.abc.Iterator` yielding each file check + result (:class:`.CheckResult`). + """ + files = util.iter_tree_files(root, on_error=on_error, follow_links=follow_links) + yield from self.check_files(files) + + @classmethod + def from_lines( + cls: type[Self], + pattern_factory: Union[str, Callable[[AnyStr], Pattern]], + lines: Iterable[AnyStr], + *, + backend: Union[BackendNamesHint, str, None] = None, + _test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None, + ) -> Self: + """ + Compiles the pattern lines. + + *pattern_factory* can be either the name of a registered pattern factory + (:class:`str`), or a :class:`~collections.abc.Callable` used to compile + patterns. It must accept an uncompiled pattern (:class:`str`) and return the + compiled pattern (:class:`.Pattern`). + + *lines* (:class:`~collections.abc.Iterable`) yields each uncompiled pattern + (:class:`str`). This simply has to yield each line so that it can be a + :class:`io.TextIOBase` (e.g., from :func:`open` or :class:`io.StringIO`) or + the result from :meth:`str.splitlines`. + + *backend* (:class:`str` or :data:`None`) is the pattern (or regular + expression) matching backend to use. Default is :data:`None` for "best" to + use the best available backend. Priority of backends is: "re2", "hyperscan", + "simple". The "simple" backend is always available. + + Returns the :class:`PathSpec` instance. + """ + if isinstance(pattern_factory, str): + pattern_factory = util.lookup_pattern(pattern_factory) + + if not callable(pattern_factory): + raise TypeError(f"pattern_factory:{pattern_factory!r} is not callable.") + + if not _is_iterable(lines): + raise TypeError(f"lines:{lines!r} is not an iterable.") + + patterns = [pattern_factory(line) for line in lines if line] + return cls(patterns, backend=backend, _test_backend_factory=_test_backend_factory) + + @staticmethod + def _make_backend( + name: BackendNamesHint, + patterns: Sequence[Pattern], + ) -> _Backend: + """ + .. warning:: This method is not part of the public API. It is subject to + change. + + Create the backend for the patterns. + + *name* (:class:`str`) is the name of the backend. + + *patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`) + contains the compiled patterns. + + Returns the matcher (:class:`._Backend`). + """ + return make_pathspec_backend(name, patterns) + + def match_entries( + self, + entries: Iterable[TreeEntry], + separators: Optional[Collection[str]] = None, + *, + negate: Optional[bool] = None, + ) -> Iterator[TreeEntry]: + """ + Matches the entries to this path-spec. + + *entries* (:class:`~collections.abc.Iterable` of :class:`.TreeEntry`) + contains the entries to be matched against :attr:`self.patterns <.PathSpec.patterns>`. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. See + :func:`.normalize_file` for more information. + + *negate* (:class:`bool` or :data:`None`) is whether to negate the match + results of the patterns. If :data:`True`, a pattern matching a file will + exclude the file rather than include it. Default is :data:`None` for + :data:`False`. + + Returns the matched entries (:class:`~collections.abc.Iterator` of + :class:`.TreeEntry`). + """ + if not _is_iterable(entries): + raise TypeError(f"entries:{entries!r} is not an iterable.") + + for entry in entries: + norm_file = normalize_file(entry.path, separators) + include, _index = self._backend.match_file(norm_file) + + if negate: + include = not include + + if include: + yield entry + + def match_file( + self, + file: StrPath, + separators: Optional[Collection[str]] = None, + ) -> bool: + """ + Matches the file to this path-spec. + + *file* (:class:`str` or :class:`os.PathLike`) is the file path to be matched + against :attr:`self.patterns <.PathSpec.patterns>`. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`) + optionally contains the path separators to normalize. See + :func:`.normalize_file` for more information. + + Returns :data:`True` if *file* matched; otherwise, :data:`False`. + """ + norm_file = normalize_file(file, separators) + include, _index = self._backend.match_file(norm_file) + return bool(include) + + def match_files( + self, + files: Iterable[StrPath], + separators: Optional[Collection[str]] = None, + *, + negate: Optional[bool] = None, + ) -> Iterator[StrPath]: + """ + Matches the files to this path-spec. + + *files* (:class:`~collections.abc.Iterable` of :class:`str` or + :class:`os.PathLike`) contains the file paths to be matched against + :attr:`self.patterns <.PathSpec.patterns>`. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. See + :func:`.normalize_file` for more information. + + *negate* (:class:`bool` or :data:`None`) is whether to negate the match + results of the patterns. If :data:`True`, a pattern matching a file will + exclude the file rather than include it. Default is :data:`None` for + :data:`False`. + + Returns the matched files (:class:`~collections.abc.Iterator` of + :class:`str` or :class:`os.PathLike`). + """ + if not _is_iterable(files): + raise TypeError(f"files:{files!r} is not an iterable.") + + for orig_file in files: + norm_file = normalize_file(orig_file, separators) + include, _index = self._backend.match_file(norm_file) + + if negate: + include = not include + + if include: + yield orig_file + + def match_tree_entries( + self, + root: StrPath, + on_error: Optional[Callable[[OSError], None]] = None, + follow_links: Optional[bool] = None, + *, + negate: Optional[bool] = None, + ) -> Iterator[TreeEntry]: + """ + Walks the specified root path for all files and matches them to this + path-spec. + + *root* (:class:`str` or :class:`os.PathLike`) is the root directory to + search. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally + is the error handler for file-system exceptions. It will be called with the + exception (:exc:`OSError`). Reraise the exception to abort the walk. Default + is :data:`None` to ignore file-system exceptions. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk + symbolic links that resolve to directories. Default is :data:`None` for + :data:`True`. + + *negate* (:class:`bool` or :data:`None`) is whether to negate the match + results of the patterns. If :data:`True`, a pattern matching a file will + exclude the file rather than include it. Default is :data:`None` for + :data:`False`. + + Returns the matched files (:class:`~collections.abc.Iterator` of + :class:`.TreeEntry`). + """ + entries = util.iter_tree_entries(root, on_error=on_error, follow_links=follow_links) + yield from self.match_entries(entries, negate=negate) + + # NOTICE: The deprecation warning was only added in 1.0.0 (from 2026-01-05). + @deprecated(( + "PathSpec.match_tree() is deprecated. Use .match_tree_files() instead." + )) + def match_tree(self, *args, **kw) -> Iterator[str]: + """ + .. version-deprecated:: 0.3.2 + This is an alias for the :meth:`self.match_tree_files <.PathSpec.match_tree_files>` + method. + """ + return self.match_tree_files(*args, **kw) + + def match_tree_files( + self, + root: StrPath, + on_error: Optional[Callable[[OSError], None]] = None, + follow_links: Optional[bool] = None, + *, + negate: Optional[bool] = None, + ) -> Iterator[str]: + """ + Walks the specified root path for all files and matches them to this + path-spec. + + *root* (:class:`str` or :class:`os.PathLike`) is the root directory to + search for files. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally + is the error handler for file-system exceptions. It will be called with the + exception (:exc:`OSError`). Reraise the exception to abort the walk. Default + is :data:`None` to ignore file-system exceptions. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk + symbolic links that resolve to directories. Default is :data:`None` for + :data:`True`. + + *negate* (:class:`bool` or :data:`None`) is whether to negate the match + results of the patterns. If :data:`True`, a pattern matching a file will + exclude the file rather than include it. Default is :data:`None` for + :data:`False`. + + Returns the matched files (:class:`~collections.abc.Iterable` of :class:`str`). + """ + files = util.iter_tree_files(root, on_error=on_error, follow_links=follow_links) + yield from self.match_files(files, negate=negate) diff --git a/lib/pathspec/pattern.py b/lib/pathspec/pattern.py new file mode 100644 index 0000000..a4b8a2c --- /dev/null +++ b/lib/pathspec/pattern.py @@ -0,0 +1,241 @@ +""" +This module provides the base definition for patterns. +""" +from __future__ import annotations + +import re +from collections.abc import ( + Iterable, + Iterator) +from dataclasses import ( + dataclass) +from typing import ( + Any, + Optional, # Replaced by `X | None` in 3.10. + TypeVar, + Union) # Replaced by `X | Y` in 3.10. + +from ._typing import ( + AnyStr, # Removed in 3.18. + deprecated, # Added in 3.13. + override) # Added in 3.12. + +RegexPatternSelf = TypeVar("RegexPatternSelf", bound='RegexPattern') +""" +:class:`.RegexPattern` self type hint to support Python v<3.11 using PEP 673 +recommendation. +""" + +class Pattern(object): + """ + The :class:`Pattern` class is the abstract definition of a pattern. + """ + + # Make the class dict-less. + __slots__ = ( + 'include', + ) + + def __init__(self, include: Optional[bool]) -> None: + """ + Initializes the :class:`Pattern` instance. + + *include* (:class:`bool` or :data:`None`) is whether the matched files + should be included (:data:`True`), excluded (:data:`False`), or is a + null-operation (:data:`None`). + """ + + self.include = include + """ + *include* (:class:`bool` or :data:`None`) is whether the matched files + should be included (:data:`True`), excluded (:data:`False`), or is a + null-operation (:data:`None`). + """ + + @deprecated(( + "Pattern.match() is deprecated. Use Pattern.match_file() with a loop for " + "similar results." + )) + def match(self, files: Iterable[str]) -> Iterator[str]: + """ + .. version-deprecated:: 0.10.0 + This method is no longer used. Use the :meth:`self.match_file <.Pattern.match_file>` + method with a loop for similar results. + + Matches this pattern against the specified files. + + *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains each + file relative to the root directory. + + Returns an :class:`~collections.abc.Iterable` yielding each matched file + path (:class:`str`). + """ + for file in files: + if self.match_file(file) is not None: + yield file + + def match_file(self, file: str) -> Optional[Any]: + """ + Matches this pattern against the specified file. + + *file* (:class:`str`) is the normalized file path to match against. + + Returns the match result if *file* matched; otherwise, :data:`None`. + """ + raise NotImplementedError(( + "{cls.__module__}.{cls.__qualname__} must override match_file()." + ).format(cls=self.__class__)) + + +class RegexPattern(Pattern): + """ + The :class:`RegexPattern` class is an implementation of a pattern using + regular expressions. + """ + + # Keep the class dict-less. + __slots__ = ( + 'pattern', + 'regex', + ) + + def __init__( + self, + pattern: Union[AnyStr, re.Pattern, None], + include: Optional[bool] = None, + ) -> None: + """ + Initializes the :class:`RegexPattern` instance. + + *pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or + :data:`None`) is the pattern to compile into a regular expression. + + *include* (:class:`bool` or :data:`None`) must be :data:`None` unless + *pattern* is a precompiled regular expression (:class:`re.Pattern`) in which + case it is whether matched files should be included (:data:`True`), excluded + (:data:`False`), or is a null operation (:data:`None`). + + .. note:: Subclasses do not need to support the *include* parameter. + """ + + if isinstance(pattern, (str, bytes)): + assert include is None, ( + f"include:{include!r} must be null when pattern:{pattern!r} is a string." + ) + regex, include = self.pattern_to_regex(pattern) + # NOTE: Make sure to allow a null regular expression to be + # returned for a null-operation. + if include is not None: + regex = re.compile(regex) + + elif pattern is not None and hasattr(pattern, 'match'): + # Assume pattern is a precompiled regular expression. + # - NOTE: Used specified *include*. + regex = pattern + + elif pattern is None: + # NOTE: Make sure to allow a null pattern to be passed for a + # null-operation. + assert include is None, ( + f"include:{include!r} must be null when pattern:{pattern!r} is null." + ) + regex = None + + else: + raise TypeError(f"pattern:{pattern!r} is not a string, re.Pattern, or None.") + + super(RegexPattern, self).__init__(include) + + self.pattern: Union[AnyStr, re.Pattern, None] = pattern + """ + *pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or + :data:`None`) is the uncompiled, input pattern. This is for reference. + """ + + self.regex: Optional[re.Pattern] = regex + """ + *regex* (:class:`re.Pattern` or :data:`None`) is the compiled regular + expression for the pattern. + """ + + def __copy__(self: RegexPatternSelf) -> RegexPatternSelf: + """ + Performa a shallow copy of the pattern. + + Returns the copy (:class:`RegexPattern`). + """ + other = self.__class__(self.regex, self.include) + other.pattern = self.pattern + return other + + def __eq__(self, other: RegexPattern) -> bool: + """ + Tests the equality of this regex pattern with *other* (:class:`RegexPattern`) + by comparing their :attr:`~Pattern.include` and :attr:`~RegexPattern.regex` + attributes. + """ + if isinstance(other, RegexPattern): + return self.include == other.include and self.regex == other.regex + else: + return NotImplemented + + @override + def match_file(self, file: AnyStr) -> Optional[RegexMatchResult]: + """ + Matches this pattern against the specified file. + + *file* (:class:`str` or :class:`bytes`) is the file path relative to the + root directory (e.g., "relative/path/to/file"). + + Returns the match result (:class:`.RegexMatchResult`) if *file* matched; + otherwise, :data:`None`. + """ + if self.include is not None: + match = self.regex.search(file) + if match is not None: + return RegexMatchResult(match) + + return None + + @classmethod + def pattern_to_regex( + cls, + pattern: AnyStr, + ) -> tuple[Optional[AnyStr], Optional[bool]]: + """ + Convert the pattern into an uncompiled regular expression. + + *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a + regular expression. + + Returns a :class:`tuple` containing: + + - *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the + uncompiled regular expression . + + - *include* (:class:`bool` or :data:`None`) is whether matched files + should be included (:data:`True`), excluded (:data:`False`), or is a + null-operation (:data:`None`). + + .. note:: The default implementation simply returns *pattern* and + :data:`True`. + """ + return pattern, True + + +@dataclass() +class RegexMatchResult(object): + """ + The :class:`RegexMatchResult` data class is used to return information about + the matched regular expression. + """ + + # Keep the class dict-less. + __slots__ = ( + 'match', + ) + + match: re.Match + """ + *match* (:class:`re.Match`) is the regex match result. + """ diff --git a/lib/pathspec/patterns/__init__.py b/lib/pathspec/patterns/__init__.py new file mode 100644 index 0000000..f1738a5 --- /dev/null +++ b/lib/pathspec/patterns/__init__.py @@ -0,0 +1,12 @@ +""" +The *pathspec.patterns* package contains the pattern matching implementations. +""" + +# Load pattern implementations. +from .gitignore import basic as _ +from .gitignore import spec as _ + +# DEPRECATED: Deprecated since 0.11.0 (from 2023-01-24). Expose the +# GitWildMatchPattern class in this module for backward compatibility with +# 0.5.0 (from 2016-08-22). +from .gitwildmatch import GitWildMatchPattern diff --git a/lib/pathspec/patterns/__pycache__/__init__.cpython-314.pyc b/lib/pathspec/patterns/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..e6e37d0 Binary files /dev/null and b/lib/pathspec/patterns/__pycache__/__init__.cpython-314.pyc differ diff --git a/lib/pathspec/patterns/__pycache__/gitwildmatch.cpython-314.pyc b/lib/pathspec/patterns/__pycache__/gitwildmatch.cpython-314.pyc new file mode 100644 index 0000000..cb4d88e Binary files /dev/null and b/lib/pathspec/patterns/__pycache__/gitwildmatch.cpython-314.pyc differ diff --git a/lib/pathspec/patterns/gitignore/__init__.py b/lib/pathspec/patterns/gitignore/__init__.py new file mode 100644 index 0000000..e440754 --- /dev/null +++ b/lib/pathspec/patterns/gitignore/__init__.py @@ -0,0 +1,17 @@ +""" +The *pathspec.patterns.gitignore* package provides the *gitignore* +implementations. + +The following classes are imported and made available from this package: + +- :class:`pathspec.patterns.gitignore.base.GitIgnorePatternError` +""" + +# Expose the GitIgnorePatternError for convenience. +from .base import ( + GitIgnorePatternError) + +# Declare imports as part of the public interface. +__all__ = [ + 'GitIgnorePatternError', +] diff --git a/lib/pathspec/patterns/gitignore/__pycache__/__init__.cpython-314.pyc b/lib/pathspec/patterns/gitignore/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..1d801f3 Binary files /dev/null and b/lib/pathspec/patterns/gitignore/__pycache__/__init__.cpython-314.pyc differ diff --git a/lib/pathspec/patterns/gitignore/__pycache__/base.cpython-314.pyc b/lib/pathspec/patterns/gitignore/__pycache__/base.cpython-314.pyc new file mode 100644 index 0000000..c437620 Binary files /dev/null and b/lib/pathspec/patterns/gitignore/__pycache__/base.cpython-314.pyc differ diff --git a/lib/pathspec/patterns/gitignore/__pycache__/basic.cpython-314.pyc b/lib/pathspec/patterns/gitignore/__pycache__/basic.cpython-314.pyc new file mode 100644 index 0000000..c514bd4 Binary files /dev/null and b/lib/pathspec/patterns/gitignore/__pycache__/basic.cpython-314.pyc differ diff --git a/lib/pathspec/patterns/gitignore/__pycache__/spec.cpython-314.pyc b/lib/pathspec/patterns/gitignore/__pycache__/spec.cpython-314.pyc new file mode 100644 index 0000000..fdf34e0 Binary files /dev/null and b/lib/pathspec/patterns/gitignore/__pycache__/spec.cpython-314.pyc differ diff --git a/lib/pathspec/patterns/gitignore/base.py b/lib/pathspec/patterns/gitignore/base.py new file mode 100644 index 0000000..0e1dd3c --- /dev/null +++ b/lib/pathspec/patterns/gitignore/base.py @@ -0,0 +1,176 @@ +""" +This module provides common classes for the gitignore patterns. +""" + +import re + +from pathspec.pattern import ( + RegexPattern) +from pathspec._typing import ( + AnyStr) # Removed in 3.18. + +_BYTES_ENCODING = 'latin1' +""" +The encoding to use when parsing a byte string pattern. +""" + + +class _GitIgnoreBasePattern(RegexPattern): + """ + .. warning:: This class is not part of the public API. It is subject to + change. + + The :class:`_GitIgnoreBasePattern` class is the base implementation for a + compiled gitignore pattern. + """ + + # Keep the dict-less class hierarchy. + __slots__ = () + + @staticmethod + def escape(s: AnyStr) -> AnyStr: + """ + Escape special characters in the given string. + + *s* (:class:`str` or :class:`bytes`) a filename or a string that you want to + escape, usually before adding it to a ".gitignore". + + Returns the escaped string (:class:`str` or :class:`bytes`). + """ + if isinstance(s, str): + return_type = str + string = s + elif isinstance(s, bytes): + return_type = bytes + string = s.decode(_BYTES_ENCODING) + else: + raise TypeError(f"s:{s!r} is not a unicode or byte string.") + + # Reference: https://git-scm.com/docs/gitignore#_pattern_format + out_string = ''.join((f"\\{x}" if x in '[]!*#?' else x) for x in string) + + if return_type is bytes: + return out_string.encode(_BYTES_ENCODING) + else: + return out_string + + @staticmethod + def _translate_segment_glob(pattern: str) -> str: + """ + Translates the glob pattern to a regular expression. This is used in the + constructor to translate a path segment glob pattern to its corresponding + regular expression. + + *pattern* (:class:`str`) is the glob pattern. + + Returns the regular expression (:class:`str`). + """ + # NOTE: This is derived from `fnmatch.translate()` and is similar to the + # POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. + + escape = False + regex = '' + i, end = 0, len(pattern) + while i < end: + # Get next character. + char = pattern[i] + i += 1 + + if escape: + # Escape the character. + escape = False + regex += re.escape(char) + + elif char == '\\': + # Escape character, escape next character. + escape = True + + elif char == '*': + # Multi-character wildcard. Match any string (except slashes), including + # an empty string. + regex += '[^/]*' + + elif char == '?': + # Single-character wildcard. Match any single character (except a + # slash). + regex += '[^/]' + + elif char == '[': + # Bracket expression wildcard. Except for the beginning exclamation + # mark, the whole bracket expression can be used directly as regex, but + # we have to find where the expression ends. + # - "[][!]" matches ']', '[' and '!'. + # - "[]-]" matches ']' and '-'. + # - "[!]a-]" matches any character except ']', 'a' and '-'. + j = i + + # Pass bracket expression negation. + if j < end and (pattern[j] == '!' or pattern[j] == '^'): + j += 1 + + # Pass first closing bracket if it is at the beginning of the + # expression. + if j < end and pattern[j] == ']': + j += 1 + + # Find closing bracket. Stop once we reach the end or find it. + while j < end and pattern[j] != ']': + j += 1 + + if j < end: + # Found end of bracket expression. Increment j to be one past the + # closing bracket: + # + # [...] + # ^ ^ + # i j + # + j += 1 + expr = '[' + + if pattern[i] == '!': + # Bracket expression needs to be negated. + expr += '^' + i += 1 + elif pattern[i] == '^': + # POSIX declares that the regex bracket expression negation "[^...]" + # is undefined in a glob pattern. Python's `fnmatch.translate()` + # escapes the caret ('^') as a literal. Git supports the using a + # caret for negation. Maintain consistency with Git because that is + # the expected behavior. + expr += '^' + i += 1 + + # Build regex bracket expression. Escape slashes so they are treated + # as literal slashes by regex as defined by POSIX. + expr += pattern[i:j].replace('\\', '\\\\') + + # Add regex bracket expression to regex result. + regex += expr + + # Set i to one past the closing bracket. + i = j + + else: + # Failed to find closing bracket, treat opening bracket as a bracket + # literal instead of as an expression. + regex += '\\[' + + else: + # Regular character, escape it for regex. + regex += re.escape(char) + + if escape: + raise ValueError(( + f"Escape character found with no next character to escape: {pattern!r}" + )) # ValueError + + return regex + + +class GitIgnorePatternError(ValueError): + """ + The :class:`GitIgnorePatternError` class indicates an invalid gitignore + pattern. + """ + pass diff --git a/lib/pathspec/patterns/gitignore/basic.py b/lib/pathspec/patterns/gitignore/basic.py new file mode 100644 index 0000000..95d7915 --- /dev/null +++ b/lib/pathspec/patterns/gitignore/basic.py @@ -0,0 +1,317 @@ +""" +This module provides :class:`GitIgnoreBasicPattern` which implements Git's +`gitignore`_ patterns as documented. This differs from how Git actually behaves +when including files in excluded directories. + +.. _`gitignore`: https://git-scm.com/docs/gitignore +""" + +from typing import ( + Optional) # Replaced by `X | None` in 3.10. + +from pathspec import util +from pathspec._typing import ( + AnyStr, # Removed in 3.18. + assert_unreachable, + override) # Added in 3.12. + +from .base import ( + GitIgnorePatternError, + _BYTES_ENCODING, + _GitIgnoreBasePattern) + + +class GitIgnoreBasicPattern(_GitIgnoreBasePattern): + """ + The :class:`GitIgnoreBasicPattern` class represents a compiled gitignore + pattern as documented. This is registered as "gitignore". + """ + + # Keep the dict-less class hierarchy. + __slots__ = () + + @staticmethod + def __normalize_segments( + is_dir_pattern: bool, + pattern_segs: list[str], + ) -> tuple[Optional[list[str]], Optional[str]]: + """ + Normalize the pattern segments to make processing easier. + + *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory + pattern (i.e., ends with a slash '/'). + + *pattern_segs* (:class:`list` of :class:`str`) contains the pattern + segments. This may be modified in place. + + Returns a :class:`tuple` containing either: + + - The normalized segments (:class:`list` of :class:`str`; or :data:`None`). + + - The regular expression override (:class:`str` or :data:`None`). + """ + if not pattern_segs[0]: + # A pattern beginning with a slash ('/') should match relative to the root + # directory. Remove the empty first segment to make the pattern relative + # to root. + del pattern_segs[0] + + elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]): + # A single segment pattern with or without a trailing slash ('/') will + # match any descendant path. This is equivalent to "**/{pattern}". Prepend + # double-asterisk segment to make pattern relative to root. + if pattern_segs[0] != '**': + pattern_segs.insert(0, '**') + + else: + # A pattern without a beginning slash ('/') but contains at least one + # prepended directory (e.g., "dir/{pattern}") should match relative to the + # root directory. No segment modification is needed. + pass + + if not pattern_segs: + # After normalization, we end up with no pattern at all. This must be + # because the pattern is invalid. + raise ValueError("Pattern normalized to nothing.") + + if not pattern_segs[-1]: + # A pattern ending with a slash ('/') will match all descendant paths if + # it is a directory but not if it is a regular file. This is equivalent to + # "{pattern}/**". Set empty last segment to a double-asterisk to include + # all descendants. + pattern_segs[-1] = '**' + + # EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**'). + # Iterate over the segments in reverse order and remove the duplicate double + # asterisks as we go. + for i in range(len(pattern_segs) - 1, 0, -1): + prev = pattern_segs[i-1] + seg = pattern_segs[i] + if prev == '**' and seg == '**': + del pattern_segs[i] + + seg_count = len(pattern_segs) + if seg_count == 1 and pattern_segs[0] == '**': + if is_dir_pattern: + # The pattern "**/" will be normalized to "**", but it should match + # everything except for files in the root. Special case this pattern. + return (None, '/') + else: + # The pattern "**" will match every path. Special case this pattern. + return (None, '.') + + elif ( + seg_count == 2 + and pattern_segs[0] == '**' + and pattern_segs[1] == '*' + ): + # The pattern "*" will be normalized to "**/*" and will match every + # path. Special case this pattern for efficiency. + return (None, '.') + + elif ( + seg_count == 3 + and pattern_segs[0] == '**' + and pattern_segs[1] == '*' + and pattern_segs[2] == '**' + ): + # The pattern "*/" will be normalized to "**/*/**" which will match every + # file not in the root directory. Special case this pattern for + # efficiency. + return (None, '/') + + # No regular expression override, return modified pattern segments. + return (pattern_segs, None) + + @override + @classmethod + def pattern_to_regex( + cls, + pattern: AnyStr, + ) -> tuple[Optional[AnyStr], Optional[bool]]: + """ + Convert the pattern into a regular expression. + + *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a + regular expression. + + Returns a :class:`tuple` containing: + + - *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the + uncompiled regular expression. + + - *include* (:class:`bool` or :data:`None`) is whether matched files + should be included (:data:`True`), excluded (:data:`False`), or is a + null-operation (:data:`None`). + """ + if isinstance(pattern, str): + pattern_str = pattern + return_type = str + elif isinstance(pattern, bytes): + pattern_str = pattern.decode(_BYTES_ENCODING) + return_type = bytes + else: + raise TypeError(f"{pattern=!r} is not a unicode or byte string.") + + original_pattern = pattern_str + del pattern + + if pattern_str.endswith('\\ '): + # EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends + # with a backslash is followed by a space, do not strip from the left. + pass + else: + # EDGE CASE: Leading spaces should be kept (only trailing spaces should be + # removed). + pattern_str = pattern_str.rstrip() + + regex: Optional[str] + include: Optional[bool] + + if not pattern_str: + # A blank pattern is a null-operation (neither includes nor excludes + # files). + return (None, None) + + elif pattern_str.startswith('#'): + # A pattern starting with a hash ('#') serves as a comment (neither + # includes nor excludes files). Escape the hash with a backslash to match + # a literal hash (i.e., '\#'). + return (None, None) + + if pattern_str.startswith('!'): + # A pattern starting with an exclamation mark ('!') negates the pattern + # (exclude instead of include). Escape the exclamation mark with a back + # slash to match a literal exclamation mark (i.e., '\!'). + include = False + # Remove leading exclamation mark. + pattern_str = pattern_str[1:] + else: + include = True + + # Split pattern into segments. + pattern_segs = pattern_str.split('/') + + # Check whether the pattern is specifically a directory pattern before + # normalization. + is_dir_pattern = not pattern_segs[-1] + + if pattern_str == '/': + # EDGE CASE: A single slash ('/') is not addressed by the gitignore + # documentation. Git treats it as a no-op (does not match any files). The + # straight forward interpretation is to treat it as a directory and match + # every descendant path (equivalent to '**'). Remove the directory pattern + # flag so that it is treated as '**' instead of '**/'. + is_dir_pattern = False + + # Normalize pattern to make processing easier. + try: + pattern_segs, override_regex = cls.__normalize_segments( + is_dir_pattern, pattern_segs, + ) + except ValueError as e: + raise GitIgnorePatternError(( + f"Invalid git pattern: {original_pattern!r}" + )) from e # GitIgnorePatternError + + if override_regex is not None: + # Use regex override. + regex = override_regex + + elif pattern_segs is not None: + # Build regular expression from pattern. + try: + regex_parts = cls.__translate_segments(pattern_segs) + except ValueError as e: + raise GitIgnorePatternError(( + f"Invalid git pattern: {original_pattern!r}" + )) from e # GitIgnorePatternError + + regex = ''.join(regex_parts) + + else: + assert_unreachable(( + f"{override_regex=} and {pattern_segs=} cannot both be null." + )) # assert_unreachable + + # Encode regex if needed. + out_regex: AnyStr + if regex is not None and return_type is bytes: + out_regex = regex.encode(_BYTES_ENCODING) + else: + out_regex = regex + + return (out_regex, include) + + @classmethod + def __translate_segments(cls, pattern_segs: list[str]) -> list[str]: + """ + Translate the pattern segments to regular expressions. + + *pattern_segs* (:class:`list` of :class:`str`) contains the pattern + segments. + + Returns the regular expression parts (:class:`list` of :class:`str`). + """ + # Build regular expression from pattern. + out_parts = [] + need_slash = False + end = len(pattern_segs) - 1 + for i, seg in enumerate(pattern_segs): + if seg == '**': + if i == 0: + # A normalized pattern beginning with double-asterisks ('**') will + # match any leading path segments. + # - NOTICE: '(?:^|/)' benchmarks slower using p15 (sm=0.9382, + # hs=0.9966, re2=0.9337). + out_parts.append('^(?:.+/)?') + + elif i < end: + # A pattern with inner double-asterisks ('**') will match multiple (or + # zero) inner path segments. + out_parts.append('(?:/.+)?') + need_slash = True + + else: + assert i == end, (i, end) + # A normalized pattern ending with double-asterisks ('**') will match + # any trailing path segments. + out_parts.append('/') + + else: + # Match path segment. + if i == 0: + # Anchor to root directory. + out_parts.append('^') + + if need_slash: + out_parts.append('/') + + if seg == '*': + # Match whole path segment. + out_parts.append('[^/]+') + + else: + # Match segment glob pattern. + out_parts.append(cls._translate_segment_glob(seg)) + + if i == end: + if seg == '*': + # A pattern ending with an asterisk ('*') will match a file or + # directory (without matching descendant paths). E.g., "foo/*" + # matches "foo/test.json", "foo/bar/", but not "foo/bar/hello.c". + out_parts.append('/?$') + + else: + # A pattern ending without a slash ('/') will match a file or a + # directory (with paths underneath it). E.g., "foo" matches "foo", + # "foo/bar", "foo/bar/baz", etc. + out_parts.append('(?:/|$)') + + need_slash = True + + return out_parts + + +# Register GitIgnoreBasicPattern as "gitignore". +util.register_pattern('gitignore', GitIgnoreBasicPattern) diff --git a/lib/pathspec/patterns/gitignore/spec.py b/lib/pathspec/patterns/gitignore/spec.py new file mode 100644 index 0000000..ee77457 --- /dev/null +++ b/lib/pathspec/patterns/gitignore/spec.py @@ -0,0 +1,335 @@ +""" +This module provides :class:`GitIgnoreSpecPattern` which implements Git's +`gitignore`_ patterns, and handles edge-cases where Git's behavior differs from +what's documented. Git allows including files from excluded directories which +appears to contradict the documentation. This is used by +:class:`~pathspec.gitignore.GitIgnoreSpec` to fully replicate Git's handling. + +.. _`gitignore`: https://git-scm.com/docs/gitignore +""" + +from typing import ( + Optional) # Replaced by `X | None` in 3.10. + +from pathspec._typing import ( + AnyStr, # Removed in 3.18. + assert_unreachable, + override) # Added in 3.12. + +from .base import ( + GitIgnorePatternError, + _BYTES_ENCODING, + _GitIgnoreBasePattern) + +_DIR_MARK = 'ps_d' +""" +The regex group name for the directory marker. This is only used by +:class:`GitIgnoreSpec`. +""" + +_DIR_MARK_CG = f'(?P<{_DIR_MARK}>/)' +""" +This regular expression matches the directory marker. +""" + +_DIR_MARK_OPT = f'(?:{_DIR_MARK_CG}|$)' +""" +This regular expression matches the optional directory marker and sub-path. +""" + + +class GitIgnoreSpecPattern(_GitIgnoreBasePattern): + """ + The :class:`GitIgnoreSpecPattern` class represents a compiled gitignore + pattern with special handling for edge-cases to replicate Git's behavior. + + This is registered under the deprecated name "gitwildmatch" for backward + compatibility with v0.12. The registered name will be removed in a future + version. + """ + + # Keep the dict-less class hierarchy. + __slots__ = () + + @staticmethod + def __normalize_segments( + is_dir_pattern: bool, + pattern_segs: list[str], + ) -> tuple[Optional[list[str]], Optional[str]]: + """ + Normalize the pattern segments to make processing easier. + + *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory + pattern (i.e., ends with a slash '/'). + + *pattern_segs* (:class:`list` of :class:`str`) contains the pattern + segments. This may be modified in place. + + Returns a :class:`tuple` containing either: + + - The normalized segments (:class:`list` of :class:`str`; or :data:`None`). + + - The regular expression override (:class:`str` or :data:`None`). + """ + if not pattern_segs[0]: + # A pattern beginning with a slash ('/') should match relative to the root + # directory. Remove the empty first segment to make the pattern relative + # to root. + del pattern_segs[0] + + elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]): + # A single segment pattern with or without a trailing slash ('/') will + # match any descendant path. This is equivalent to "**/{pattern}". Prepend + # double-asterisk segment to make pattern relative to root. + if pattern_segs[0] != '**': + pattern_segs.insert(0, '**') + + else: + # A pattern without a beginning slash ('/') but contains at least one + # prepended directory (e.g., "dir/{pattern}") should match relative to the + # root directory. No segment modification is needed. + pass + + if not pattern_segs: + # After normalization, we end up with no pattern at all. This must be + # because the pattern is invalid. + raise ValueError("Pattern normalized to nothing.") + + if not pattern_segs[-1]: + # A pattern ending with a slash ('/') will match all descendant paths if + # it is a directory but not if it is a regular file. This is equivalent to + # "{pattern}/**". Set empty last segment to a double-asterisk to include + # all descendants. + pattern_segs[-1] = '**' + + # EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**'). + # Iterate over the segments in reverse order and remove the duplicate double + # asterisks as we go. + for i in range(len(pattern_segs) - 1, 0, -1): + prev = pattern_segs[i-1] + seg = pattern_segs[i] + if prev == '**' and seg == '**': + del pattern_segs[i] + + seg_count = len(pattern_segs) + if seg_count == 1 and pattern_segs[0] == '**': + if is_dir_pattern: + # The pattern "**/" will be normalized to "**", but it should match + # everything except for files in the root. Special case this pattern. + return (None, _DIR_MARK_CG) + else: + # The pattern "**" will match every path. Special case this pattern. + return (None, '.') + + elif ( + seg_count == 2 + and pattern_segs[0] == '**' + and pattern_segs[1] == '*' + ): + # The pattern "*" will be normalized to "**/*" and will match every + # path. Special case this pattern for efficiency. + return (None, '.') + + elif ( + seg_count == 3 + and pattern_segs[0] == '**' + and pattern_segs[1] == '*' + and pattern_segs[2] == '**' + ): + # The pattern "*/" will be normalized to "**/*/**" which will match every + # file not in the root directory. Special case this pattern for + # efficiency. + if is_dir_pattern: + return (None, _DIR_MARK_CG) + else: + return (None, '/') + + # No regular expression override, return modified pattern segments. + return (pattern_segs, None) + + @override + @classmethod + def pattern_to_regex( + cls, + pattern: AnyStr, + ) -> tuple[Optional[AnyStr], Optional[bool]]: + """ + Convert the pattern into a regular expression. + + *pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a + regular expression. + + Returns a :class:`tuple` containing: + + - *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the + uncompiled regular expression. + + - *include* (:class:`bool` or :data:`None`) is whether matched files + should be included (:data:`True`), excluded (:data:`False`), or is a + null-operation (:data:`None`). + """ + if isinstance(pattern, str): + pattern_str = pattern + return_type = str + elif isinstance(pattern, bytes): + pattern_str = pattern.decode(_BYTES_ENCODING) + return_type = bytes + else: + raise TypeError(f"{pattern=!r} is not a unicode or byte string.") + + original_pattern = pattern_str + del pattern + + if pattern_str.endswith('\\ '): + # EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends + # with a backslash is followed by a space, do not strip from the left. + pass + else: + # EDGE CASE: Leading spaces should be kept (only trailing spaces should be + # removed). Git does not remove leading spaces. + pattern_str = pattern_str.rstrip() + + regex: Optional[str] + include: Optional[bool] + + if not pattern_str: + # A blank pattern is a null-operation (neither includes nor excludes + # files). + return (None, None) + + elif pattern_str.startswith('#'): + # A pattern starting with a hash ('#') serves as a comment (neither + # includes nor excludes files). Escape the hash with a backslash to match + # a literal hash (i.e., '\#'). + return (None, None) + + elif pattern_str == '/': + # EDGE CASE: According to `git check-ignore` (v2.4.1), a single '/' does + # not match any file. + return (None, None) + + if pattern_str.startswith('!'): + # A pattern starting with an exclamation mark ('!') negates the pattern + # (exclude instead of include). Escape the exclamation mark with a back + # slash to match a literal exclamation mark (i.e., '\!'). + include = False + # Remove leading exclamation mark. + pattern_str = pattern_str[1:] + else: + include = True + + # Split pattern into segments. + pattern_segs = pattern_str.split('/') + + # Check whether the pattern is specifically a directory pattern before + # normalization. + is_dir_pattern = not pattern_segs[-1] + + # Normalize pattern to make processing easier. + try: + pattern_segs, override_regex = cls.__normalize_segments( + is_dir_pattern, pattern_segs, + ) + except ValueError as e: + raise GitIgnorePatternError(( + f"Invalid git pattern: {original_pattern!r}" + )) from e # GitIgnorePatternError + + if override_regex is not None: + # Use regex override. + regex = override_regex + + elif pattern_segs is not None: + # Build regular expression from pattern. + try: + regex_parts = cls.__translate_segments(is_dir_pattern, pattern_segs) + except ValueError as e: + raise GitIgnorePatternError(( + f"Invalid git pattern: {original_pattern!r}" + )) from e # GitIgnorePatternError + + regex = ''.join(regex_parts) + + else: + assert_unreachable(( + f"{override_regex=} and {pattern_segs=} cannot both be null." + )) # assert_unreachable + + # Encode regex if needed. + out_regex: AnyStr + if regex is not None and return_type is bytes: + out_regex = regex.encode(_BYTES_ENCODING) + else: + out_regex = regex + + return (out_regex, include) + + @classmethod + def __translate_segments( + cls, + is_dir_pattern: bool, + pattern_segs: list[str], + ) -> list[str]: + """ + Translate the pattern segments to regular expressions. + + *is_dir_pattern* (:class:`bool`) is whether the pattern is a directory + pattern (i.e., ends with a slash '/'). + + *pattern_segs* (:class:`list` of :class:`str`) contains the pattern + segments. + + Returns the regular expression parts (:class:`list` of :class:`str`). + """ + # Build regular expression from pattern. + out_parts = [] + need_slash = False + end = len(pattern_segs) - 1 + for i, seg in enumerate(pattern_segs): + if seg == '**': + if i == 0: + # A normalized pattern beginning with double-asterisks ('**') will + # match any leading path segments. + out_parts.append('^(?:.+/)?') + + elif i < end: + # A pattern with inner double-asterisks ('**') will match multiple (or + # zero) inner path segments. + out_parts.append('(?:/.+)?') + need_slash = True + + else: + assert i == end, (i, end) + # A normalized pattern ending with double-asterisks ('**') will match + # any trailing path segments. + if is_dir_pattern: + out_parts.append(_DIR_MARK_CG) + else: + out_parts.append('/') + + else: + # Match path segment. + if i == 0: + # Anchor to root directory. + out_parts.append('^') + + if need_slash: + out_parts.append('/') + + if seg == '*': + # Match whole path segment. + out_parts.append('[^/]+') + + else: + # Match segment glob pattern. + out_parts.append(cls._translate_segment_glob(seg)) + + if i == end: + # A pattern ending without a slash ('/') will match a file or a + # directory (with paths underneath it). E.g., "foo" matches "foo", + # "foo/bar", "foo/bar/baz", etc. + out_parts.append(_DIR_MARK_OPT) + + need_slash = True + + return out_parts diff --git a/lib/pathspec/patterns/gitwildmatch.py b/lib/pathspec/patterns/gitwildmatch.py new file mode 100644 index 0000000..b44d961 --- /dev/null +++ b/lib/pathspec/patterns/gitwildmatch.py @@ -0,0 +1,52 @@ +""" +.. version-deprecated: 1.0.0 + This module is superseded by :module:`pathspec.patterns.gitignore`. +""" + +from pathspec import util +from pathspec._typing import ( + deprecated, # Added in 3.13. + override) # Added in 3.12. + +from .gitignore.spec import ( + GitIgnoreSpecPattern) + +# DEPRECATED: Deprecated since version 1.0.0. Expose GitWildMatchPatternError +# in this module for backward compatibility. +from .gitignore import ( + GitIgnorePatternError as GitWildMatchPatternError) + + +class GitWildMatchPattern(GitIgnoreSpecPattern): + """ + .. version-deprecated:: 1.0.0 + This class is superseded by :class:`GitIgnoreSpecPattern` and + :class:`~pathspec.patterns.gitignore.basic.GitIgnoreBasicPattern`. + """ + + @deprecated(( + "GitWildMatchPattern ('gitwildmatch') is deprecated. Use 'gitignore' for " + "GitIgnoreBasicPattern or GitIgnoreSpecPattern instead." + )) + def __init__(self, *args, **kw) -> None: + """ + Warn about deprecation. + """ + super().__init__(*args, **kw) + + @override + @classmethod + @deprecated(( + "GitWildMatchPattern ('gitwildmatch') is deprecated. Use 'gitignore' for " + "GitIgnoreBasicPattern or GitIgnoreSpecPattern instead." + )) + def pattern_to_regex(cls, *args, **kw): + """ + Warn about deprecation. + """ + return super().pattern_to_regex(*args, **kw) + + +# DEPRECATED: Deprecated since version 1.0.0. Register GitWildMatchPattern as +# "gitwildmatch" for backward compatibility. +util.register_pattern('gitwildmatch', GitWildMatchPattern) diff --git a/lib/pathspec/py.typed b/lib/pathspec/py.typed new file mode 100644 index 0000000..b01eaaf --- /dev/null +++ b/lib/pathspec/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. The pathspec package uses inline types. diff --git a/lib/pathspec/util.py b/lib/pathspec/util.py new file mode 100644 index 0000000..ea2dbee --- /dev/null +++ b/lib/pathspec/util.py @@ -0,0 +1,847 @@ +""" +This module provides utility methods for dealing with path-specs. +""" + +import os +import os.path +import pathlib +import posixpath +import stat +from collections.abc import ( + Collection, + Iterable, + Iterator, + Sequence) +from dataclasses import ( + dataclass) +from typing import ( + Any, + Callable, # Replaced by `collections.abc.Callable` in 3.9.2. + Generic, + Optional, # Replaced by `X | None` in 3.10. + TypeVar, + Union) # Replaced by `X | Y` in 3.10. + +from .pattern import ( + Pattern) +from ._typing import ( + AnyStr, # Removed in 3.18. + deprecated) # Added in 3.13. + +StrPath = Union[str, os.PathLike[str]] + +TStrPath = TypeVar("TStrPath", bound=StrPath) +""" +Type variable for :class:`str` or :class:`os.PathLike`. +""" + +NORMALIZE_PATH_SEPS = [ + __sep + for __sep in [os.sep, os.altsep] + if __sep and __sep != posixpath.sep +] +""" +*NORMALIZE_PATH_SEPS* (:class:`list` of :class:`str`) contains the path +separators that need to be normalized to the POSIX separator for the current +operating system. The separators are determined by examining :data:`os.sep` and +:data:`os.altsep`. +""" + +_registered_patterns = {} +""" +*_registered_patterns* (:class:`dict`) maps a name (:class:`str`) to the +registered pattern factory (:class:`~collections.abc.Callable`). +""" + + +def append_dir_sep(path: pathlib.Path) -> str: + """ + Appends the path separator to the path if the path is a directory. This can be + used to aid in distinguishing between directories and files on the file-system + by relying on the presence of a trailing path separator. + + *path* (:class:`pathlib.Path`) is the path to use. + + Returns the path (:class:`str`). + """ + str_path = str(path) + if path.is_dir(): + str_path += os.sep + + return str_path + + +def check_match_file( + patterns: Iterable[tuple[int, Pattern]], + file: str, + is_reversed: Optional[bool] = None, +) -> tuple[Optional[bool], Optional[int]]: + """ + Check the file against the patterns. + + *patterns* (:class:`~collections.abc.Iterable`) yields each indexed pattern + (:class:`tuple`) which contains the pattern index (:class:`int`) and actua + pattern (:class:`.Pattern`). + + *file* (:class:`str`) is the normalized file path to be matched against + *patterns*. + + *is_reversed* (:class:`bool` or :data:`None`) is whether the order of the + patterns has been reversed. Default is :data:`None` for :data:`False`. + Reversing the order of the patterns is an optimization. + + Returns a :class:`tuple` containing whether to include *file* (:class:`bool` + or :data:`None`), and the index of the last matched pattern (:class:`int` or + :data:`None`). + """ + if is_reversed: + # Check patterns in reverse order. The first pattern that matches takes + # precedence. + for index, pattern in patterns: + if pattern.include is not None and pattern.match_file(file) is not None: + return pattern.include, index + + return None, None + + else: + # Check all patterns. The last pattern that matches takes precedence. + out_include: Optional[bool] = None + out_index: Optional[int] = None + for index, pattern in patterns: + if pattern.include is not None and pattern.match_file(file) is not None: + out_include = pattern.include + out_index = index + + return out_include, out_index + + +def detailed_match_files( + patterns: Iterable[Pattern], + files: Iterable[str], + all_matches: Optional[bool] = None, +) -> dict[str, 'MatchDetail']: + """ + Matches the files to the patterns, and returns which patterns matched the + files. + + *patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains + the patterns to use. + + *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains the + normalized file paths to be matched against *patterns*. + + *all_matches* (:class:`bool` or :data:`None`) is whether to return all matches + patterns (:data:`True`), or only the last matched pattern (:data:`False`). + Default is :data:`None` for :data:`False`. + + Returns the matched files (:class:`dict`) which maps each matched file + (:class:`str`) to the patterns that matched in order (:class:`.MatchDetail`). + """ + all_files = files if isinstance(files, Collection) else list(files) + return_files = {} + for pattern in patterns: + if pattern.include is not None: + result_files = pattern.match(all_files) # TODO: Replace with `.match_file()`. + if pattern.include: + # Add files and record pattern. + for result_file in result_files: + if result_file in return_files: + if all_matches: + return_files[result_file].patterns.append(pattern) + else: + return_files[result_file].patterns[0] = pattern + else: + return_files[result_file] = MatchDetail([pattern]) + + else: + # Remove files. + for file in result_files: + del return_files[file] + + return return_files + + +def _filter_check_patterns( + patterns: Iterable[Pattern], +) -> list[tuple[int, Pattern]]: + """ + Filters out null-patterns. + + *patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains + the patterns. + + Returns a :class:`list` containing each indexed pattern (:class:`tuple`) which + contains the pattern index (:class:`int`) and the actual pattern + (:class:`.Pattern`). + """ + return [ + (__index, __pat) + for __index, __pat in enumerate(patterns) + if __pat.include is not None + ] + + +def _is_iterable(value: Any) -> bool: + """ + Check whether the value is an iterable (excludes strings). + + *value* is the value to check, + + Returns whether *value* is an iterable (:class:`bool`). + """ + return isinstance(value, Iterable) and not isinstance(value, (str, bytes)) + + +@deprecated(( + "pathspec.util.iter_tree() is deprecated. Use iter_tree_files() instead." +)) +def iter_tree(root, on_error=None, follow_links=None): + """ + .. version-deprecated:: 0.10.0 + This is an alias for the :func:`.iter_tree_files` function. + """ + return iter_tree_files(root, on_error=on_error, follow_links=follow_links) + + +def iter_tree_entries( + root: StrPath, + on_error: Optional[Callable[[OSError], None]] = None, + follow_links: Optional[bool] = None, +) -> Iterator['TreeEntry']: + """ + Walks the specified directory for all files and directories. + + *root* (:class:`str` or :class:`os.PathLike`) is the root directory to search. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is + the error handler for file-system exceptions. It will be called with the + exception (:exc:`OSError`). Reraise the exception to abort the walk. Default + is :data:`None` to ignore file-system exceptions. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk + symbolic links that resolve to directories. Default is :data:`None` for + :data:`True`. + + Raises :exc:`.RecursionError` if recursion is detected. + + Returns an :class:`~collections.abc.Iterator` yielding each file or directory + entry (:class:`.TreeEntry`) relative to *root*. + """ + if on_error is not None and not callable(on_error): + raise TypeError(f"on_error:{on_error!r} is not callable.") + + if follow_links is None: + follow_links = True + + yield from _iter_tree_entries_next(os.path.abspath(root), '', {}, on_error, follow_links) + + +def _iter_tree_entries_next( + root_full: str, + dir_rel: str, + memo: dict[str, str], + on_error: Callable[[OSError], None], + follow_links: bool, +) -> Iterator['TreeEntry']: + """ + Scan the directory for all descendant files. + + *root_full* (:class:`str`) the absolute path to the root directory. + + *dir_rel* (:class:`str`) the path to the directory to scan relative to + *root_full*. + + *memo* (:class:`dict`) keeps track of ancestor directories encountered. Maps + each ancestor real path (:class:`str`) to relative path (:class:`str`). + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is + the error handler for file-system exceptions. + + *follow_links* (:class:`bool`) is whether to walk symbolic links that resolve + to directories. + + Yields each entry (:class:`.TreeEntry`). + """ + dir_full = os.path.join(root_full, dir_rel) + dir_real = os.path.realpath(dir_full) + + # Remember each encountered ancestor directory and its canonical (real) path. + # If a canonical path is encountered more than once, recursion has occurred. + if dir_real not in memo: + memo[dir_real] = dir_rel + else: + raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel) + + with os.scandir(dir_full) as scan_iter: + node_ent: os.DirEntry + for node_ent in scan_iter: + node_rel = os.path.join(dir_rel, node_ent.name) + + # Inspect child node. + try: + node_lstat = node_ent.stat(follow_symlinks=False) + except OSError as e: + if on_error is not None: + on_error(e) + continue + + if node_ent.is_symlink(): + # Child node is a link, inspect the target node. + try: + node_stat = node_ent.stat() + except OSError as e: + if on_error is not None: + on_error(e) + continue + else: + node_stat = node_lstat + + if node_ent.is_dir(follow_symlinks=follow_links): + # Child node is a directory, recurse into it and yield its descendant + # files. + yield TreeEntry(node_ent.name, node_rel, node_lstat, node_stat) + + yield from _iter_tree_entries_next(root_full, node_rel, memo, on_error, follow_links) + + elif node_ent.is_file() or node_ent.is_symlink(): + # Child node is either a file or an unfollowed link, yield it. + yield TreeEntry(node_ent.name, node_rel, node_lstat, node_stat) + + # NOTE: Make sure to remove the canonical (real) path of the directory from + # the ancestors memo once we are done with it. This allows the same directory + # to appear multiple times. If this is not done, the second occurrence of the + # directory will be incorrectly interpreted as a recursion. See + # . + del memo[dir_real] + + +def iter_tree_files( + root: StrPath, + on_error: Optional[Callable[[OSError], None]] = None, + follow_links: Optional[bool] = None, +) -> Iterator[str]: + """ + Walks the specified directory for all files. + + *root* (:class:`str` or :class:`os.PathLike`) is the root directory to search + for files. + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is + the error handler for file-system exceptions. It will be called with the + exception (:exc:`OSError`). Reraise the exception to abort the walk. Default + is :data:`None` to ignore file-system exceptions. + + *follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk + symbolic links that resolve to directories. Default is :data:`None` for + :data:`True`. + + Raises :exc:`.RecursionError` if recursion is detected. + + Returns an :class:`~collections.abc.Iterator` yielding the path to each file + (:class:`str`) relative to *root*. + """ + if on_error is not None and not callable(on_error): + raise TypeError(f"on_error:{on_error!r} is not callable.") + + if follow_links is None: + follow_links = True + + yield from _iter_tree_files_next(os.path.abspath(root), '', {}, on_error, follow_links) + + +def _iter_tree_files_next( + root_full: str, + dir_rel: str, + memo: dict[str, str], + on_error: Callable[[OSError], None], + follow_links: bool, +) -> Iterator[str]: + """ + Scan the directory for all descendant files. + + *root_full* (:class:`str`) the absolute path to the root directory. + + *dir_rel* (:class:`str`) the path to the directory to scan relative to + *root_full*. + + *memo* (:class:`dict`) keeps track of ancestor directories encountered. Maps + each ancestor real path (:class:`str`) to relative path (:class:`str`). + + *on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is + the error handler for file-system exceptions. + + *follow_links* (:class:`bool`) is whether to walk symbolic links that resolve + to directories. + + Yields each file path (:class:`str`). + """ + dir_full = os.path.join(root_full, dir_rel) + dir_real = os.path.realpath(dir_full) + + # Remember each encountered ancestor directory and its canonical (real) path. + # If a canonical path is encountered more than once, recursion has occurred. + if dir_real not in memo: + memo[dir_real] = dir_rel + else: + raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel) + + with os.scandir(dir_full) as scan_iter: + node_ent: os.DirEntry + for node_ent in scan_iter: + node_rel = os.path.join(dir_rel, node_ent.name) + + if node_ent.is_dir(follow_symlinks=follow_links): + # Child node is a directory, recurse into it and yield its descendant + # files. + yield from _iter_tree_files_next(root_full, node_rel, memo, on_error, follow_links) + + elif node_ent.is_file(): + # Child node is a file, yield it. + yield node_rel + + elif not follow_links and node_ent.is_symlink(): + # Child node is an unfollowed link, yield it. + yield node_rel + + # NOTE: Make sure to remove the canonical (real) path of the directory from + # the ancestors memo once we are done with it. This allows the same directory + # to appear multiple times. If this is not done, the second occurrence of the + # directory will be incorrectly interpreted as a recursion. See + # . + del memo[dir_real] + + +def lookup_pattern(name: str) -> Callable[[AnyStr], Pattern]: + """ + Lookups a registered pattern factory by name. + + *name* (:class:`str`) is the name of the pattern factory. + + Returns the registered pattern factory (:class:`~collections.abc.Callable`). + If no pattern factory is registered, raises :exc:`KeyError`. + """ + return _registered_patterns[name] + + +def match_file(patterns: Iterable[Pattern], file: str) -> bool: + """ + Matches the file to the patterns. + + *patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains + the patterns to use. + + *file* (:class:`str`) is the normalized file path to be matched against + *patterns*. + + Returns :data:`True` if *file* matched; otherwise, :data:`False`. + """ + matched = False + for pattern in patterns: + if pattern.include is not None and pattern.match_file(file) is not None: + matched = pattern.include + + return matched + + +@deprecated(( + "pathspec.util.match_files() is deprecated. Use match_file() with a loop for " + "better results." +)) +def match_files( + patterns: Iterable[Pattern], + files: Iterable[str], +) -> set[str]: + """ + .. version-deprecated:: 0.10.0 + This function is no longer used. Use the :func:`.match_file` function with a + loop for better results. + + Matches the files to the patterns. + + *patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains + the patterns to use. + + *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains the + normalized file paths to be matched against *patterns*. + + Returns the matched files (:class:`set` of :class:`str`). + """ + use_patterns = [__pat for __pat in patterns if __pat.include is not None] + + return_files = set() + for file in files: + if match_file(use_patterns, file): + return_files.add(file) + + return return_files + + +def normalize_file( + file: StrPath, + separators: Optional[Collection[str]] = None, +) -> str: + """ + Normalizes the file path to use the POSIX path separator (i.e., ``"/"``), and + make the paths relative (remove leading ``"/"``). + + *file* (:class:`str` or :class:`os.PathLike`) is the file path. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. This does + not need to include the POSIX path separator (``"/"``), but including it will + not affect the results. Default is ``None`` for :data:`.NORMALIZE_PATH_SEPS`. + To prevent normalization, pass an empty container (e.g., an empty tuple + ``()``). + + Returns the normalized file path (:class:`str`). + """ + # Normalize path separators. + if separators is None: + separators = NORMALIZE_PATH_SEPS + + # Convert path object to string. + norm_file: str = os.fspath(file) + + for sep in separators: + norm_file = norm_file.replace(sep, posixpath.sep) + + if norm_file.startswith('/'): + # Make path relative. + norm_file = norm_file[1:] + + elif norm_file.startswith('./'): + # Remove current directory prefix. + norm_file = norm_file[2:] + + return norm_file + + +@deprecated(( + "pathspec.util.normalize_files() is deprecated. Use normalize_file() with a " + "loop for better results." +)) +def normalize_files( + files: Iterable[StrPath], + separators: Optional[Collection[str]] = None, +) -> dict[str, list[StrPath]]: + """ + .. version-deprecated:: 0.10.0 + This function is no longer used. Use the :func:`.normalize_file` function + with a loop for better results. + + Normalizes the file paths to use the POSIX path separator. + + *files* (:class:`~collections.abc.Iterable` of :class:`str` or + :class:`os.PathLike`) contains the file paths to be normalized. + + *separators* (:class:`~collections.abc.Collection` of :class:`str`; or + :data:`None`) optionally contains the path separators to normalize. See + :func:`.normalize_file` for more information. + + Returns a :class:`dict` mapping each normalized file path (:class:`str`) to + the original file paths (:class:`list` of :class:`str` or + :class:`os.PathLike`). + """ + norm_files = {} + for path in files: + norm_file = normalize_file(path, separators=separators) + if norm_file in norm_files: + norm_files[norm_file].append(path) + else: + norm_files[norm_file] = [path] + + return norm_files + + +def register_pattern( + name: str, + pattern_factory: Callable[[AnyStr], Pattern], + override: Optional[bool] = None, +) -> None: + """ + Registers the specified pattern factory. + + *name* (:class:`str`) is the name to register the pattern factory under. + + *pattern_factory* (:class:`~collections.abc.Callable`) is used to compile + patterns. It must accept an uncompiled pattern (:class:`str`) and return the + compiled pattern (:class:`.Pattern`). + + *override* (:class:`bool` or :data:`None`) optionally is whether to allow + overriding an already registered pattern under the same name (:data:`True`), + instead of raising an :exc:`.AlreadyRegisteredError` (:data:`False`). Default + is :data:`None` for :data:`False`. + """ + if not isinstance(name, str): + raise TypeError(f"name:{name!r} is not a string.") + + if not callable(pattern_factory): + raise TypeError(f"pattern_factory:{pattern_factory!r} is not callable.") + + if name in _registered_patterns and not override: + raise AlreadyRegisteredError(name, _registered_patterns[name]) + + _registered_patterns[name] = pattern_factory + + +class AlreadyRegisteredError(Exception): + """ + The :exc:`AlreadyRegisteredError` exception is raised when a pattern factory + is registered under a name already in use. + """ + + def __init__( + self, + name: str, + pattern_factory: Callable[[AnyStr], Pattern], + ) -> None: + """ + Initializes the :exc:`AlreadyRegisteredError` instance. + + *name* (:class:`str`) is the name of the registered pattern. + + *pattern_factory* (:class:`~collections.abc.Callable`) is the registered + pattern factory. + """ + super().__init__(name, pattern_factory) + + @property + def message(self) -> str: + """ + *message* (:class:`str`) is the error message. + """ + return ( + f"{self.name!r} is already registered for pattern factory=" + f"{self.pattern_factory!r}." + ) + + @property + def name(self) -> str: + """ + *name* (:class:`str`) is the name of the registered pattern. + """ + return self.args[0] + + @property + def pattern_factory(self) -> Callable[[AnyStr], Pattern]: + """ + *pattern_factory* (:class:`~collections.abc.Callable`) is the registered + pattern factory. + """ + return self.args[1] + + +class RecursionError(Exception): + """ + The :exc:`RecursionError` exception is raised when recursion is detected. + """ + + def __init__( + self, + real_path: str, + first_path: str, + second_path: str, + ) -> None: + """ + Initializes the :exc:`RecursionError` instance. + + *real_path* (:class:`str`) is the real path that recursion was encountered + on. + + *first_path* (:class:`str`) is the first path encountered for *real_path*. + + *second_path* (:class:`str`) is the second path encountered for *real_path*. + """ + super().__init__(real_path, first_path, second_path) + + @property + def first_path(self) -> str: + """ + *first_path* (:class:`str`) is the first path encountered for + :attr:`self.real_path `. + """ + return self.args[1] + + @property + def message(self) -> str: + """ + *message* (:class:`str`) is the error message. + """ + return ( + f"Real path {self.real_path!r} was encountered at {self.first_path!r} " + f"and then {self.second_path!r}." + ) + + @property + def real_path(self) -> str: + """ + *real_path* (:class:`str`) is the real path that recursion was + encountered on. + """ + return self.args[0] + + @property + def second_path(self) -> str: + """ + *second_path* (:class:`str`) is the second path encountered for + :attr:`self.real_path `. + """ + return self.args[2] + + +@dataclass(frozen=True) +class CheckResult(Generic[TStrPath]): + """ + The :class:`CheckResult` class contains information about the file and which + pattern matched it. + """ + + # Make the class dict-less. + __slots__ = ( + 'file', + 'include', + 'index', + ) + + file: TStrPath + """ + *file* (:class:`str` or :class:`os.PathLike`) is the file path. + """ + + include: Optional[bool] + """ + *include* (:class:`bool` or :data:`None`) is whether to include or exclude the + file. If :data:`None`, no pattern matched. + """ + + index: Optional[int] + """ + *index* (:class:`int` or :data:`None`) is the index of the last pattern that + matched. If :data:`None`, no pattern matched. + """ + + +class MatchDetail(object): + """ + The :class:`.MatchDetail` class contains information about + """ + + # Make the class dict-less. + __slots__ = ('patterns',) + + def __init__(self, patterns: Sequence[Pattern]) -> None: + """ + Initialize the :class:`.MatchDetail` instance. + + *patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`) + contains the patterns that matched the file in the order they were encountered. + """ + + self.patterns = patterns + """ + *patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`) + contains the patterns that matched the file in the order they were + encountered. + """ + + +class TreeEntry(object): + """ + The :class:`TreeEntry` class contains information about a file-system entry. + """ + + # Make the class dict-less. + __slots__ = ('_lstat', 'name', 'path', '_stat') + + def __init__( + self, + name: str, + path: str, + lstat: os.stat_result, + stat: os.stat_result, + ) -> None: + """ + Initialize the :class:`TreeEntry` instance. + + *name* (:class:`str`) is the base name of the entry. + + *path* (:class:`str`) is the relative path of the entry. + + *lstat* (:class:`os.stat_result`) is the stat result of the direct entry. + + *stat* (:class:`os.stat_result`) is the stat result of the entry, + potentially linked. + """ + + self._lstat: os.stat_result = lstat + """ + *_lstat* (:class:`os.stat_result`) is the stat result of the direct entry. + """ + + self.name: str = name + """ + *name* (:class:`str`) is the base name of the entry. + """ + + self.path: str = path + """ + *path* (:class:`str`) is the path of the entry. + """ + + self._stat: os.stat_result = stat + """ + *_stat* (:class:`os.stat_result`) is the stat result of the linked entry. + """ + + def is_dir(self, follow_links: Optional[bool] = None) -> bool: + """ + Get whether the entry is a directory. + + *follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic + links. If this is :data:`True`, a symlink to a directory will result in + :data:`True`. Default is :data:`None` for :data:`True`. + + Returns whether the entry is a directory (:class:`bool`). + """ + if follow_links is None: + follow_links = True + + node_stat = self._stat if follow_links else self._lstat + return stat.S_ISDIR(node_stat.st_mode) + + def is_file(self, follow_links: Optional[bool] = None) -> bool: + """ + Get whether the entry is a regular file. + + *follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic + links. If this is :data:`True`, a symlink to a regular file will result in + :data:`True`. Default is :data:`None` for :data:`True`. + + Returns whether the entry is a regular file (:class:`bool`). + """ + if follow_links is None: + follow_links = True + + node_stat = self._stat if follow_links else self._lstat + return stat.S_ISREG(node_stat.st_mode) + + def is_symlink(self) -> bool: + """ + Returns whether the entry is a symbolic link (:class:`bool`). + """ + return stat.S_ISLNK(self._lstat.st_mode) + + def stat(self, follow_links: Optional[bool] = None) -> os.stat_result: + """ + Get the cached stat result for the entry. + + *follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic + links. If this is :data:`True`, the stat result of the linked file will be + returned. Default is :data:`None` for :data:`True`. + + Returns that stat result (:class:`os.stat_result`). + """ + if follow_links is None: + follow_links = True + + return self._stat if follow_links else self._lstat diff --git a/newplan?.md b/newplan?.md new file mode 100644 index 0000000..303abd6 --- /dev/null +++ b/newplan?.md @@ -0,0 +1,14 @@ +1. Dir Scanner + +2. file filter + +3. Hasher + +4. Manifest + + + + + + +1.