scanner code v1 & new libs

This commit is contained in:
2026-03-31 00:43:39 +03:00
parent 67ca2a8b63
commit 734c3bfff1
79 changed files with 6630 additions and 0 deletions

41
core/scanner.py Normal file
View File

@@ -0,0 +1,41 @@
import os
import lib.hashlib as hashlib
from lib.pathlib import Path
from lib.pathspec import PathSpec
class Scanner:
def __init__(self, base_path, ignore_file=".syncignore", include_file=".syncinclude"):
self.base_path = Path(base_path)
self.ignore_file = self._load_ignore_spec(ignore_file)
self.include_file= self._load_include_spec(include_file)
def _load_ignore_spec(self, file):
pass
def _load_include_spec(self, file):
pass
def get_current_state(self):
state = {}
for root , dirs, files in os.walk(self.base_path):
dirs[:] = [d for d in dirs if not self.spec.match_file(str(Path(root, d).relative_to(self.base_path)))]
for file in files:
full_path = Path(root) / file
rel_path = str(full_path.relative_to(self.base_path))
if not self.spec.match_file(rel_path):
state[rel_path] = self._hash_file(rel_path)
return state
def _hash_file(self, path)
return hashlib.md5(open(path, 'rb').read()).hexdigest()

View File

@@ -0,0 +1 @@
pip

View File

@@ -0,0 +1,19 @@
Copyright (c) 2012-2014 Antoine Pitrou and contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,180 @@
Metadata-Version: 2.1
Name: pathlib
Version: 1.0.1
Summary: Object-oriented filesystem paths
Home-page: https://pathlib.readthedocs.org/
Download-URL: https://pypi.python.org/pypi/pathlib/
Author: Antoine Pitrou
Author-email: solipsis@pitrou.net
License: MIT License
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Topic :: Software Development :: Libraries
Classifier: Topic :: System :: Filesystems
License-File: LICENSE.txt
pathlib offers a set of classes to handle filesystem paths. It offers the
following advantages over using string objects:
* No more cumbersome use of os and os.path functions. Everything can be
done easily through operators, attribute accesses, and method calls.
* Embodies the semantics of different path types. For example, comparing
Windows paths ignores casing.
* Well-defined semantics, eliminating any warts or ambiguities (forward vs.
backward slashes, etc.).
Requirements
------------
Python 3.2 or later is recommended, but pathlib is also usable with Python 2.7
and 2.6.
Install
-------
In Python 3.4, pathlib is now part of the standard library. For Python 3.3
and earlier, ``easy_install pathlib`` or ``pip install pathlib`` should do
the trick.
Examples
--------
Importing the module classes::
>>> from pathlib import *
Listing Python source files in a directory::
>>> list(p.glob('*.py'))
[PosixPath('test_pathlib.py'), PosixPath('setup.py'),
PosixPath('pathlib.py')]
Navigating inside a directory tree::
>>> p = Path('/etc')
>>> q = p / 'init.d' / 'reboot'
>>> q
PosixPath('/etc/init.d/reboot')
>>> q.resolve()
PosixPath('/etc/rc.d/init.d/halt')
Querying path properties::
>>> q.exists()
True
>>> q.is_dir()
False
Opening a file::
>>> with q.open() as f: f.readline()
...
'#!/bin/bash\n'
Documentation
-------------
The full documentation can be read at `Read the Docs
<https://pathlib.readthedocs.org/>`_.
Contributing
------------
Main development now takes place in the Python standard library: see
the `Python developer's guide <http://docs.python.org/devguide/>`_, and
report issues on the `Python bug tracker <http://bugs.python.org/>`_.
However, if you find an issue specific to prior versions of Python
(such as 2.7 or 3.2), you can post an issue on the
`BitBucket project page <https://bitbucket.org/pitrou/pathlib/>`_.
History
-------
Version 1.0.1
^^^^^^^^^^^^^
- Pull requestion #4: Python 2.6 compatibility by eevee.
Version 1.0
^^^^^^^^^^^
This version brings ``pathlib`` up to date with the official Python 3.4
release, and also fixes a couple of 2.7-specific issues.
- Python issue #20765: Add missing documentation for PurePath.with_name()
and PurePath.with_suffix().
- Fix test_mkdir_parents when the working directory has additional bits
set (such as the setgid or sticky bits).
- Python issue #20111: pathlib.Path.with_suffix() now sanity checks the
given suffix.
- Python issue #19918: Fix PurePath.relative_to() under Windows.
- Python issue #19921: When Path.mkdir() is called with parents=True, any
missing parent is created with the default permissions, ignoring the mode
argument (mimicking the POSIX "mkdir -p" command).
- Python issue #19887: Improve the Path.resolve() algorithm to support
certain symlink chains.
- Make pathlib usable under Python 2.7 with unicode pathnames (only pure
ASCII, though).
- Issue #21: fix TypeError under Python 2.7 when using new division.
- Add tox support for easier testing.
Version 0.97
^^^^^^^^^^^^
This version brings ``pathlib`` up to date with the final API specified
in :pep:`428`. The changes are too long to list here, it is recommended
to read the `documentation <https://pathlib.readthedocs.org/>`_.
.. warning::
The API in this version is partially incompatible with pathlib 0.8 and
earlier. Be sure to check your code for possible breakage!
Version 0.8
^^^^^^^^^^^
- Add PurePath.name and PurePath.anchor.
- Add Path.owner and Path.group.
- Add Path.replace().
- Add Path.as_uri().
- Issue #10: when creating a file with Path.open(), don't set the executable
bit.
- Issue #11: fix comparisons with non-Path objects.
Version 0.7
^^^^^^^^^^^
- Add '**' (recursive) patterns to Path.glob().
- Fix openat() support after the API refactoring in Python 3.3 beta1.
- Add a *target_is_directory* argument to Path.symlink_to()
Version 0.6
^^^^^^^^^^^
- Add Path.is_file() and Path.is_symlink()
- Add Path.glob() and Path.rglob()
- Add PurePath.match()
Version 0.5
^^^^^^^^^^^
- Add Path.mkdir().
- Add Python 2.7 compatibility by Michele Lacchia.
- Make parent() raise ValueError when the level is greater than the path
length.

View File

@@ -0,0 +1,9 @@
__pycache__/pathlib.cpython-314.pyc,,
pathlib-1.0.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
pathlib-1.0.1.dist-info/LICENSE.txt,sha256=7FK0RgeHp9ofbbcLvp4eZJxz98sU7rrxDl25_n9-nNk,1080
pathlib-1.0.1.dist-info/METADATA,sha256=NrN3I2q6qf7u2iD0kK0Or6JrsqSP-WXXK41ZAkQznuI,5149
pathlib-1.0.1.dist-info/RECORD,,
pathlib-1.0.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
pathlib-1.0.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
pathlib-1.0.1.dist-info/top_level.txt,sha256=zhefsaNuASYaoZF-ELG3TPFn6S2cuB1fd8PlNFLrMh0,8
pathlib.py,sha256=9MfboQTEcuscgm7ZALBMTDpLwD-2PCTSUyhlA2tcvqw,41481

View File

View File

@@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.1)
Root-Is-Purelib: true
Tag: py3-none-any

View File

@@ -0,0 +1 @@
pathlib

1280
lib/pathlib.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
pip

View File

@@ -0,0 +1,356 @@
Metadata-Version: 2.4
Name: pathspec
Version: 1.0.4
Summary: Utility library for gitignore style pattern matching of file paths.
Author-email: "Caleb P. Burns" <cpburnz@gmail.com>
Requires-Python: >=3.9
Description-Content-Type: text/x-rst
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Utilities
License-File: LICENSE
Requires-Dist: hyperscan >=0.7 ; extra == "hyperscan"
Requires-Dist: typing-extensions >=4 ; extra == "optional"
Requires-Dist: google-re2 >=1.1 ; extra == "re2"
Requires-Dist: pytest >=9 ; extra == "tests"
Requires-Dist: typing-extensions >=4.15 ; extra == "tests"
Project-URL: Documentation, https://python-path-specification.readthedocs.io/en/latest/index.html
Project-URL: Issue Tracker, https://github.com/cpburnz/python-pathspec/issues
Project-URL: Source Code, https://github.com/cpburnz/python-pathspec
Provides-Extra: hyperscan
Provides-Extra: optional
Provides-Extra: re2
Provides-Extra: tests
PathSpec
========
*pathspec* is a utility library for pattern matching of file paths. So far this
only includes Git's `gitignore`_ pattern matching.
.. _`gitignore`: http://git-scm.com/docs/gitignore
Tutorial
--------
Say you have a "Projects" directory and you want to back it up, but only
certain files, and ignore others depending on certain conditions::
>>> from pathspec import PathSpec
>>> # The gitignore-style patterns for files to select, but we're including
>>> # instead of ignoring.
>>> spec_text = """
...
... # This is a comment because the line begins with a hash: "#"
...
... # Include several project directories (and all descendants) relative to
... # the current directory. To reference only a directory you must end with a
... # slash: "/"
... /project-a/
... /project-b/
... /project-c/
...
... # Patterns can be negated by prefixing with exclamation mark: "!"
...
... # Ignore temporary files beginning or ending with "~" and ending with
... # ".swp".
... !~*
... !*~
... !*.swp
...
... # These are python projects so ignore compiled python files from
... # testing.
... !*.pyc
...
... # Ignore the build directories but only directly under the project
... # directories.
... !/*/build/
...
... """
The ``PathSpec`` class provides an abstraction around pattern implementations,
and we want to compile our patterns as "gitignore" patterns. You could call it a
wrapper for a list of compiled patterns::
>>> spec = PathSpec.from_lines('gitignore', spec_text.splitlines())
If we wanted to manually compile the patterns, we can use the ``GitIgnoreBasicPattern``
class directly. It is used in the background for "gitignore" which internally
converts patterns to regular expressions::
>>> from pathspec.patterns.gitignore.basic import GitIgnoreBasicPattern
>>> patterns = map(GitIgnoreBasicPattern, spec_text.splitlines())
>>> spec = PathSpec(patterns)
``PathSpec.from_lines()`` is a class method which simplifies that.
If you want to load the patterns from file, you can pass the file object
directly as well::
>>> with open('patterns.list', 'r') as fh:
>>> spec = PathSpec.from_lines('gitignore', fh)
You can perform matching on a whole directory tree with::
>>> matches = set(spec.match_tree_files('path/to/directory'))
Or you can perform matching on a specific set of file paths with::
>>> matches = set(spec.match_files(file_paths))
Or check to see if an individual file matches::
>>> is_matched = spec.match_file(file_path)
There's actually two implementations of "gitignore". The basic implementation is
used by ``PathSpec`` and follows patterns as documented by `gitignore`_.
However, Git's behavior differs from the documented patterns. There's some
edge-cases, and in particular, Git allows including files from excluded
directories which appears to contradict the documentation. ``GitIgnoreSpec``
handles these cases to more closely replicate Git's behavior::
>>> from pathspec import GitIgnoreSpec
>>> spec = GitIgnoreSpec.from_lines(spec_text.splitlines())
You do not specify the style of pattern for ``GitIgnoreSpec`` because it should
always use ``GitIgnoreSpecPattern`` internally.
Performance
-----------
Running lots of regular expression matches against thousands of files in Python
is slow. Alternate regular expression backends can be used to improve
performance. ``PathSpec`` and ``GitIgnoreSpec`` both accept a ``backend``
parameter to control the backend. The default is "best" to automatically choose
the best available backend. There are currently 3 backends.
The "simple" backend is the default and it simply uses Python's ``re.Pattern``
objects that are normally created. This can be the fastest when there's only 1
or 2 patterns.
The "hyperscan" backend uses the `hyperscan`_ library. Hyperscan tends to be at
least 2 times faster than "simple", and generally slower than "re2". This can be
faster than "re2" under the right conditions with pattern counts of 1-25.
The "re2" backend uses the `google-re2`_ library (not to be confused with the
*re2* library on PyPI which is unrelated and abandoned). Google's re2 tends to
be significantly faster than "simple", and 3 times faster than "hyperscan" at
high pattern counts.
See `benchmarks_backends.md`_ for comparisons between native Python regular
expressions and the optional backends.
.. _`benchmarks_backends.md`: https://github.com/cpburnz/python-pathspec/blob/master/benchmarks_backends.md
.. _`google-re2`: https://pypi.org/project/google-re2/
.. _`hyperscan`: https://pypi.org/project/hyperscan/
FAQ
---
1. How do I ignore files like *.gitignore*?
+++++++++++++++++++++++++++++++++++++++++++
``GitIgnoreSpec`` (and ``PathSpec``) positively match files by default. To find
the files to keep, and exclude files like *.gitignore*, you need to set
``negate=True`` to flip the results::
>>> from pathspec import GitIgnoreSpec
>>> spec = GitIgnoreSpec.from_lines([...])
>>> keep_files = set(spec.match_tree_files('path/to/directory', negate=True))
>>> ignore_files = set(spec.match_tree_files('path/to/directory'))
License
-------
*pathspec* is licensed under the `Mozilla Public License Version 2.0`_. See
`LICENSE`_ or the `FAQ`_ for more information.
In summary, you may use *pathspec* with any closed or open source project
without affecting the license of the larger work so long as you:
- give credit where credit is due,
- and release any custom changes made to *pathspec*.
.. _`Mozilla Public License Version 2.0`: http://www.mozilla.org/MPL/2.0
.. _`LICENSE`: LICENSE
.. _`FAQ`: http://www.mozilla.org/MPL/2.0/FAQ.html
Source
------
The source code for *pathspec* is available from the GitHub repo
`cpburnz/python-pathspec`_.
.. _`cpburnz/python-pathspec`: https://github.com/cpburnz/python-pathspec
Installation
------------
*pathspec* is available for install through `PyPI`_::
pip install pathspec
*pathspec* can also be built from source. The following packages will be
required:
- `build`_ (>=0.6.0)
*pathspec* can then be built and installed with::
python -m build
pip install dist/pathspec-*-py3-none-any.whl
The following optional dependencies can be installed:
- `google-re2`_: Enables optional "re2" backend.
- `hyperscan`_: Enables optional "hyperscan" backend.
- `typing-extensions`_: Improves some type hints.
.. _`PyPI`: http://pypi.python.org/pypi/pathspec
.. _`build`: https://pypi.org/project/build/
.. _`typing-extensions`: https://pypi.org/project/typing-extensions/
Documentation
-------------
Documentation for *pathspec* is available on `Read the Docs`_.
The full change history can be found in `CHANGES.rst`_ and `Change History`_.
An upgrade guide is available in `UPGRADING.rst`_ and `Upgrade Guide`_.
.. _`CHANGES.rst`: https://github.com/cpburnz/python-pathspec/blob/master/CHANGES.rst
.. _`Change History`: https://python-path-specification.readthedocs.io/en/stable/changes.html
.. _`Read the Docs`: https://python-path-specification.readthedocs.io
.. _`UPGRADING.rst`: https://github.com/cpburnz/python-pathspec/blob/master/UPGRADING.rst
.. _`Upgrade Guide`: https://python-path-specification.readthedocs.io/en/stable/upgrading.html
Other Languages
---------------
The related project `pathspec-ruby`_ (by *highb*) provides a similar library as
a `Ruby gem`_.
.. _`pathspec-ruby`: https://github.com/highb/pathspec-ruby
.. _`Ruby gem`: https://rubygems.org/gems/pathspec
Change History
==============
1.0.4 (2026-01-26)
------------------
- `Issue #103`_: Using re2 fails if pyre2 is also installed.
.. _`Issue #103`: https://github.com/cpburnz/python-pathspec/issues/103
1.0.3 (2026-01-09)
------------------
Bug fixes:
- `Issue #101`_: pyright strict errors with pathspec >= 1.0.0.
- `Issue #102`_: No module named 'tomllib'.
.. _`Issue #101`: https://github.com/cpburnz/python-pathspec/issues/101
.. _`Issue #102`: https://github.com/cpburnz/python-pathspec/issues/102
1.0.2 (2026-01-07)
------------------
Bug fixes:
- Type hint `collections.abc.Callable` does not properly replace `typing.Callable` until Python 3.9.2.
1.0.1 (2026-01-06)
------------------
Bug fixes:
- `Issue #100`_: ValueError(f"{patterns=!r} cannot be empty.") when using black.
.. _`Issue #100`: https://github.com/cpburnz/python-pathspec/issues/100
1.0.0 (2026-01-05)
------------------
Major changes:
- `Issue #91`_: Dropped support of EoL Python 3.8.
- Added concept of backends to allow for faster regular expression matching. The backend can be controlled using the `backend` argument to `PathSpec()`, `PathSpec.from_lines()`, `GitIgnoreSpec()`, and `GitIgnoreSpec.from_lines()`.
- Renamed "gitwildmatch" pattern back to "gitignore". The "gitignore" pattern behaves slightly differently when used with `PathSpec` (*gitignore* as documented) than with `GitIgnoreSpec` (replicates *Git*'s edge cases).
API changes:
- Breaking: protected method `pathspec.pathspec.PathSpec._match_file()` (with a leading underscore) has been removed and replaced by backends. This does not affect normal usage of `PathSpec` or `GitIgnoreSpec`. Only custom subclasses will be affected. If this breaks your usage, let me know by `opening an issue <https://github.com/cpburnz/python-pathspec/issues>`_.
- Deprecated: "gitwildmatch" is now an alias for "gitignore".
- Deprecated: `pathspec.patterns.GitWildMatchPattern` is now an alias for `pathspec.patterns.gitignore.spec.GitIgnoreSpecPattern`.
- Deprecated: `pathspec.patterns.gitwildmatch` module has been replaced by the `pathspec.patterns.gitignore` package.
- Deprecated: `pathspec.patterns.gitwildmatch.GitWildMatchPattern` is now an alias for `pathspec.patterns.gitignore.spec.GitIgnoreSpecPattern`.
- Deprecated: `pathspec.patterns.gitwildmatch.GitWildMatchPatternError` is now an alias for `pathspec.patterns.gitignore.GitIgnorePatternError`.
- Removed: `pathspec.patterns.gitwildmatch.GitIgnorePattern` has been deprecated since v0.4 (2016-07-15).
- Signature of method `pathspec.pattern.RegexPattern.match_file()` has been changed from `def match_file(self, file: str) -> RegexMatchResult | None` to `def match_file(self, file: AnyStr) -> RegexMatchResult | None` to reflect usage.
- Signature of class method `pathspec.pattern.RegexPattern.pattern_to_regex()` has been changed from `def pattern_to_regex(cls, pattern: str) -> tuple[str, bool]` to `def pattern_to_regex(cls, pattern: AnyStr) -> tuple[AnyStr | None, bool | None]` to reflect usage and documentation.
New features:
- Added optional "hyperscan" backend using `hyperscan`_ library. It will automatically be used when installed. This dependency can be installed with ``pip install 'pathspec[hyperscan]'``.
- Added optional "re2" backend using the `google-re2`_ library. It will automatically be used when installed. This dependency can be installed with ``pip install 'pathspec[re2]'``.
- Added optional dependency on `typing-extensions`_ library to improve some type hints.
Bug fixes:
- `Issue #93`_: Do not remove leading spaces.
- `Issue #95`_: Matching for files inside folder does not seem to behave like .gitignore's.
- `Issue #98`_: UnboundLocalError in RegexPattern when initialized with `pattern=None`.
- Type hint on return value of `pathspec.pattern.RegexPattern.match_file()` to match documentation.
Improvements:
- Mark Python 3.13 and 3.14 as supported.
- No-op patterns are now filtered out when matching files, slightly improving performance.
- Fix performance regression in `iter_tree_files()` from v0.10.
.. _`Issue #38`: https://github.com/cpburnz/python-pathspec/issues/38
.. _`Issue #91`: https://github.com/cpburnz/python-pathspec/issues/91
.. _`Issue #93`: https://github.com/cpburnz/python-pathspec/issues/93
.. _`Issue #95`: https://github.com/cpburnz/python-pathspec/issues/95
.. _`Issue #98`: https://github.com/cpburnz/python-pathspec/issues/98
.. _`google-re2`: https://pypi.org/project/google-re2/
.. _`hyperscan`: https://pypi.org/project/hyperscan/
.. _`typing-extensions`: https://pypi.org/project/typing-extensions/

View File

@@ -0,0 +1,69 @@
pathspec-1.0.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
pathspec-1.0.4.dist-info/METADATA,sha256=pekHVZjpp_VHVlDo7U032-fIhSGEbY_V8jjmYrEgaWM,13755
pathspec-1.0.4.dist-info/RECORD,,
pathspec-1.0.4.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
pathspec-1.0.4.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
pathspec-1.0.4.dist-info/licenses/LICENSE,sha256=-rPda9qyJvHAhjCx3ZF-Efy07F4eAg4sFvg6ChOGPoU,16726
pathspec/__init__.py,sha256=0PnZCecVo4UjsfA0EFGsAUikyz1jSDFmQP9gCoKXW_Y,1408
pathspec/__pycache__/__init__.cpython-314.pyc,,
pathspec/__pycache__/_meta.cpython-314.pyc,,
pathspec/__pycache__/_typing.cpython-314.pyc,,
pathspec/__pycache__/_version.cpython-314.pyc,,
pathspec/__pycache__/backend.cpython-314.pyc,,
pathspec/__pycache__/gitignore.cpython-314.pyc,,
pathspec/__pycache__/pathspec.cpython-314.pyc,,
pathspec/__pycache__/pattern.cpython-314.pyc,,
pathspec/__pycache__/util.cpython-314.pyc,,
pathspec/_backends/__init__.py,sha256=CjgX4uSPMC5UH4iy_IrdFXrcLQ_gwK8MKW5Qbspz_uE,130
pathspec/_backends/__pycache__/__init__.cpython-314.pyc,,
pathspec/_backends/__pycache__/_utils.cpython-314.pyc,,
pathspec/_backends/__pycache__/agg.cpython-314.pyc,,
pathspec/_backends/_utils.py,sha256=mDjbGpndOyVkt9Fue0WDWKTkk-jVqOejof9Bv9pzArE,1066
pathspec/_backends/agg.py,sha256=naHFqYXMR53hwtgHtEHrwNJEBFpbUWbdMbF0zguxHlE,2505
pathspec/_backends/hyperscan/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
pathspec/_backends/hyperscan/__pycache__/__init__.cpython-314.pyc,,
pathspec/_backends/hyperscan/__pycache__/_base.cpython-314.pyc,,
pathspec/_backends/hyperscan/__pycache__/base.cpython-314.pyc,,
pathspec/_backends/hyperscan/__pycache__/gitignore.cpython-314.pyc,,
pathspec/_backends/hyperscan/__pycache__/pathspec.cpython-314.pyc,,
pathspec/_backends/hyperscan/_base.py,sha256=b8E_kClW6Wtkdserr3qZzMPWVomrI4yhfxSlGVYdT3c,1719
pathspec/_backends/hyperscan/base.py,sha256=BclDnsbCH6Fvx58YT6wqxGDcfWKNUQAcy_9jV63WkCI,563
pathspec/_backends/hyperscan/gitignore.py,sha256=OyqtXEoZWrMB3Uh_2xNzY0aGK5UdBBjkFeGAFKQh7Oo,6761
pathspec/_backends/hyperscan/pathspec.py,sha256=74RsGQt9x3nTxjz5S5grEQI34x8eFew78wluiIzhOpw,6500
pathspec/_backends/re2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
pathspec/_backends/re2/__pycache__/__init__.cpython-314.pyc,,
pathspec/_backends/re2/__pycache__/_base.cpython-314.pyc,,
pathspec/_backends/re2/__pycache__/base.cpython-314.pyc,,
pathspec/_backends/re2/__pycache__/gitignore.cpython-314.pyc,,
pathspec/_backends/re2/__pycache__/pathspec.cpython-314.pyc,,
pathspec/_backends/re2/_base.py,sha256=VDThfjwEOnrDOfri_EnPifXH8pOYt71nxq3tUQAScfU,2149
pathspec/_backends/re2/base.py,sha256=0sCZzhDpvyZLg9imO7BdE9KOmy3L0mgyHuzPhHWNbRU,462
pathspec/_backends/re2/gitignore.py,sha256=0RPjCzg1vxE_6qDOL29V4qAyi9UnMKT2bb3k2XDimew,5094
pathspec/_backends/re2/pathspec.py,sha256=aUtY_DdVHQyxHMbMGiovmXTIpuLKgIAeGtZerMVHIhI,4871
pathspec/_backends/simple/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
pathspec/_backends/simple/__pycache__/__init__.cpython-314.pyc,,
pathspec/_backends/simple/__pycache__/gitignore.cpython-314.pyc,,
pathspec/_backends/simple/__pycache__/pathspec.cpython-314.pyc,,
pathspec/_backends/simple/gitignore.py,sha256=45SfH2SM-YF7CppdSrQ15z7A4GUAesFzLWs8QaKdER4,2865
pathspec/_backends/simple/pathspec.py,sha256=Zzebst2evN8-juZr5w6VBwIox7LToYT4K2zD4Jp3M7U,2095
pathspec/_meta.py,sha256=3sxdG_ghfAmwhV7AGeJS9VUZptsmaBFVSPhQqVLpiMk,2937
pathspec/_typing.py,sha256=xega7efBH3B4StmBzxpGvrk-yJWYKnD6Lk5Id0IiHzc,1642
pathspec/_version.py,sha256=iV7XOjXu_8FpfpC966oeh6PC-5XA35XwWlO7oI-p2ys,64
pathspec/backend.py,sha256=QXFus8SgZ1hKH8LZ8eOnZcyGNTO1_YQYwRM_kTkvi2M,1161
pathspec/gitignore.py,sha256=oFWfSgeecaJFSCgI0TwdYxz0jluQxztgf-T779OxIN8,5263
pathspec/pathspec.py,sha256=5JhgxfZTyzUcG0bEUN91xTdcvF_S9sdhXGK59nIpDOY,15151
pathspec/pattern.py,sha256=smqkNSWc9LmPZS1MqYBGjXFXZRteiSpwF8iAy9250DY,6695
pathspec/patterns/__init__.py,sha256=6pfTpyrSIJxN8A12hKWpa9JFvVMTR39FV3QE1HBQbho,404
pathspec/patterns/__pycache__/__init__.cpython-314.pyc,,
pathspec/patterns/__pycache__/gitwildmatch.cpython-314.pyc,,
pathspec/patterns/gitignore/__init__.py,sha256=MaSAZd0DDg0vCH9k1LslaJjBJw5DkX4ty-FuLmB1z_4,422
pathspec/patterns/gitignore/__pycache__/__init__.cpython-314.pyc,,
pathspec/patterns/gitignore/__pycache__/base.cpython-314.pyc,,
pathspec/patterns/gitignore/__pycache__/basic.cpython-314.pyc,,
pathspec/patterns/gitignore/__pycache__/spec.cpython-314.pyc,,
pathspec/patterns/gitignore/base.py,sha256=mkLYm-prSD2SXNDpxnFhL0FRV8FRPAsIBVeXyNOWjCI,4688
pathspec/patterns/gitignore/basic.py,sha256=0pTlzzJt8qMpy-SnGHhozZVWVDH9ErPDy29MV3Q8UOw,9924
pathspec/patterns/gitignore/spec.py,sha256=8jB3Q7Wbb6fLvtIfNax89tEtw2UZgATbAKnpGQleU8Q,10186
pathspec/patterns/gitwildmatch.py,sha256=bF2PUtc9gOFHuFwHJ035x91y3R8An5dIY5oRibylsco,1463
pathspec/py.typed,sha256=wq7wwDeyBungK6DsiV4O-IujgKzARwHz94uQshdpdEU,68
pathspec/util.py,sha256=KbG9seqfTOBLPoSJ8I4CdeDFVof6rDGCMy69cZb4Du4,24728

View File

View File

@@ -0,0 +1,4 @@
Wheel-Version: 1.0
Generator: flit 3.12.0
Root-Is-Purelib: true
Tag: py3-none-any

View File

@@ -0,0 +1,373 @@
Mozilla Public License Version 2.0
==================================
1. Definitions
--------------
1.1. "Contributor"
means each individual or legal entity that creates, contributes to
the creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used
by a Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached
the notice in Exhibit A, the Executable Form of such Source Code
Form, and Modifications of such Source Code Form, in each case
including portions thereof.
1.5. "Incompatible With Secondary Licenses"
means
(a) that the initial Contributor has attached the notice described
in Exhibit B to the Covered Software; or
(b) that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the
terms of a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible,
whether at the time of the initial grant or subsequently, any and
all of the rights conveyed by this License.
1.10. "Modifications"
means any of the following:
(a) any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered
Software; or
(b) any new file in Source Code Form that contains any Covered
Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the
License, by the making, using, selling, offering for sale, having
made, import, or transfer of either its Contributions or its
Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU
Lesser General Public License, Version 2.1, the GNU Affero General
Public License, Version 3.0, or any later versions of those
licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that
controls, is controlled by, or is under common control with You. For
purposes of this definition, "control" means (a) the power, direct
or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (b) ownership of more than
fifty percent (50%) of the outstanding shares or beneficial
ownership of such entity.
2. License Grants and Conditions
--------------------------------
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
(b) under Patent Claims of such Contributor to make, use, sell, offer
for sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
(a) for any code that a Contributor has removed from Covered Software;
or
(b) for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
(c) under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.
3. Responsibilities
-------------------
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
(a) such Covered Software must also be made available in Source Code
Form, as described in Section 3.1, and You must inform recipients of
the Executable Form how they can obtain a copy of such Source Code
Form by reasonable means in a timely manner, at a charge no more
than the cost of distribution to the recipient; and
(b) You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter
the recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------
If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.
5. Termination
--------------
5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.
************************************************************************
* *
* 6. Disclaimer of Warranty *
* ------------------------- *
* *
* Covered Software is provided under this License on an "as is" *
* basis, without warranty of any kind, either expressed, implied, or *
* statutory, including, without limitation, warranties that the *
* Covered Software is free of defects, merchantable, fit for a *
* particular purpose or non-infringing. The entire risk as to the *
* quality and performance of the Covered Software is with You. *
* Should any Covered Software prove defective in any respect, You *
* (not any Contributor) assume the cost of any necessary servicing, *
* repair, or correction. This disclaimer of warranty constitutes an *
* essential part of this License. No use of any Covered Software is *
* authorized under this License except under this disclaimer. *
* *
************************************************************************
************************************************************************
* *
* 7. Limitation of Liability *
* -------------------------- *
* *
* Under no circumstances and under no legal theory, whether tort *
* (including negligence), contract, or otherwise, shall any *
* Contributor, or anyone who distributes Covered Software as *
* permitted above, be liable to You for any direct, indirect, *
* special, incidental, or consequential damages of any character *
* including, without limitation, damages for lost profits, loss of *
* goodwill, work stoppage, computer failure or malfunction, or any *
* and all other commercial damages or losses, even if such party *
* shall have been informed of the possibility of such damages. This *
* limitation of liability shall not apply to liability for death or *
* personal injury resulting from such party's negligence to the *
* extent applicable law prohibits such limitation. Some *
* jurisdictions do not allow the exclusion or limitation of *
* incidental or consequential damages, so this exclusion and *
* limitation may not apply to You. *
* *
************************************************************************
8. Litigation
-------------
Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.
9. Miscellaneous
----------------
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.
10. Versions of the License
---------------------------
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.

68
lib/pathspec/__init__.py Normal file
View File

@@ -0,0 +1,68 @@
"""
The *pathspec* package provides pattern matching for file paths. So far this
only includes Git's *gitignore* patterns.
The following classes are imported and made available from the root of the
`pathspec` package:
- :class:`pathspec.gitignore.GitIgnoreSpec`
- :class:`pathspec.pathspec.PathSpec`
- :class:`pathspec.pattern.Pattern`
- :class:`pathspec.pattern.RegexPattern`
- :class:`pathspec.util.RecursionError`
The following functions are also imported:
- :func:`pathspec.util.lookup_pattern`
The following deprecated functions are also imported to maintain backward
compatibility:
- :func:`pathspec.util.iter_tree`
- :func:`pathspec.util.match_files`
"""
from .gitignore import (
GitIgnoreSpec)
from .pathspec import (
PathSpec)
from .pattern import (
Pattern,
RegexPattern)
from .util import (
RecursionError,
iter_tree, # Deprecated since 0.10.0.
lookup_pattern,
match_files) # Deprecated since 0.10.0.
from ._meta import (
__author__,
__copyright__,
__credits__,
__license__)
from ._version import (
__version__)
# Load pattern implementations.
from . import patterns
# Declare private imports as part of the public interface. Deprecated imports
# are deliberately excluded.
__all__ = [
'GitIgnoreSpec',
'PathSpec',
'Pattern',
'RecursionError',
'RegexPattern',
'__author__',
'__copyright__',
'__credits__',
'__license__',
'__version__',
'lookup_pattern',
]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,4 @@
"""
WARNING: The *pathspec._backends* package is not part of the public API. Its
contents and structure are likely to change.
"""

Binary file not shown.

View File

@@ -0,0 +1,45 @@
"""
This module provides private utility functions for backends.
WARNING: The *pathspec._backends* package is not part of the public API. Its
contents and structure are likely to change.
"""
from collections.abc import (
Iterable)
from typing import (
TypeVar)
from pathspec.pattern import (
Pattern)
TPattern = TypeVar("TPattern", bound=Pattern)
def enumerate_patterns(
patterns: Iterable[TPattern],
filter: bool,
reverse: bool,
) -> list[tuple[int, TPattern]]:
"""
Enumerate the patterns.
*patterns* (:class:`Iterable` of :class:`.Pattern`) contains the patterns.
*filter* (:class:`bool`) is whether to remove no-op patterns (:data:`True`),
or keep them (:data:`False`).
*reverse* (:class:`bool`) is whether to reverse the pattern order
(:data:`True`), or keep the order (:data:`True`).
Returns the enumerated patterns (:class:`list` of :class:`tuple`).
"""
out_patterns = [
(__i, __pat)
for __i, __pat in enumerate(patterns)
if not filter or __pat.include is not None
]
if reverse:
out_patterns.reverse()
return out_patterns

View File

@@ -0,0 +1,104 @@
"""
This module provides aggregated private data and utilities functions about the
available backends.
WARNING: The *pathspec._backends* package is not part of the public API. Its
contents and structure are likely to change.
"""
from collections.abc import (
Sequence)
from typing import (
cast)
from pathspec.backend import (
BackendNamesHint,
_Backend)
from pathspec.pattern import (
Pattern,
RegexPattern)
from .hyperscan.base import (
hyperscan_error)
from .hyperscan.gitignore import (
HyperscanGiBackend)
from .hyperscan.pathspec import (
HyperscanPsBackend)
from .re2.base import (
re2_error)
from .re2.gitignore import (
Re2GiBackend)
from .re2.pathspec import (
Re2PsBackend)
from .simple.gitignore import (
SimpleGiBackend)
from .simple.pathspec import (
SimplePsBackend)
_BEST_BACKEND: BackendNamesHint
"""
The best available backend.
"""
if re2_error is None:
_BEST_BACKEND = 're2'
elif hyperscan_error is None:
_BEST_BACKEND = 'hyperscan'
else:
_BEST_BACKEND = 'simple'
def make_gitignore_backend(
name: BackendNamesHint,
patterns: Sequence[Pattern],
) -> _Backend:
"""
Create the specified backend with the supplied patterns for
:class:`~pathspec.gitignore.GitIgnoreSpec`.
*name* (:class:`str`) is the name of the backend.
*patterns* (:class:`.Iterable` of :class:`.Pattern`) contains the compiled
patterns.
Returns the backend (:class:`._Backend`).
"""
if name == 'best':
name = _BEST_BACKEND
if name == 'hyperscan':
return HyperscanGiBackend(cast(Sequence[RegexPattern], patterns))
elif name == 're2':
return Re2GiBackend(cast(Sequence[RegexPattern], patterns))
elif name == 'simple':
return SimpleGiBackend(cast(Sequence[RegexPattern], patterns))
else:
raise ValueError(f"Backend {name=!r} is invalid.")
def make_pathspec_backend(
name: BackendNamesHint,
patterns: Sequence[Pattern],
) -> _Backend:
"""
Create the specified backend with the supplied patterns for
:class:`~pathspec.pathspec.PathSpec`.
*name* (:class:`str`) is the name of the backend.
*patterns* (:class:`Iterable` of :class:`Pattern`) contains the compiled
patterns.
Returns the backend (:class:`._Backend`).
"""
if name == 'best':
name = _BEST_BACKEND
if name == 'hyperscan':
return HyperscanPsBackend(cast(Sequence[RegexPattern], patterns))
elif name == 're2':
return Re2PsBackend(cast(Sequence[RegexPattern], patterns))
elif name == 'simple':
return SimplePsBackend(patterns)
else:
raise ValueError(f"Backend {name=!r} is invalid.")

View File

@@ -0,0 +1,78 @@
"""
This module provides private data for the base implementation for the
:module:`hyperscan` library.
WARNING: The *pathspec._backends.hyperscan* package is not part of the public
API. Its contents and structure are likely to change.
"""
from __future__ import annotations
from dataclasses import (
dataclass)
from typing import (
Union) # Replaced by `X | Y` in 3.10.
try:
import hyperscan
except ModuleNotFoundError:
hyperscan = None
HS_FLAGS = 0
else:
HS_FLAGS = hyperscan.HS_FLAG_SINGLEMATCH | hyperscan.HS_FLAG_UTF8
HS_FLAGS: int
"""
The hyperscan flags to use:
- HS_FLAG_SINGLEMATCH is needed to ensure the partial patterns only match once.
- HS_FLAG_UTF8 is required to support unicode paths.
"""
@dataclass(frozen=True)
class HyperscanExprDat(object):
"""
The :class:`HyperscanExprDat` class is used to store data related to an
expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = [
'include',
'index',
'is_dir_pattern',
]
include: bool
"""
*include* (:class:`bool`) is whether is whether the matched files should be
included (:data:`True`), or excluded (:data:`False`).
"""
index: int
"""
*index* (:class:`int`) is the pattern index.
"""
is_dir_pattern: bool
"""
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern for gitignore.
"""
@dataclass(frozen=True)
class HyperscanExprDebug(HyperscanExprDat):
"""
The :class:`HyperscanExprDebug` class stores additional debug information
related to an expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = ['regex']
regex: Union[str, bytes]
"""
*regex* (:class:`str` or :class:`bytes`) is the regular expression.
"""

View File

@@ -0,0 +1,24 @@
"""
This module provides the base implementation for the :module:`hyperscan`
backend.
WARNING: The *pathspec._backends.hyperscan* package is not part of the public
API. Its contents and structure are likely to change.
"""
from __future__ import annotations
from typing import (
Optional)
try:
import hyperscan
hyperscan_error = None
except ModuleNotFoundError as e:
hyperscan = None
hyperscan_error = e
hyperscan_error: Optional[ModuleNotFoundError]
"""
*hyperscan_error* (:class:`ModuleNotFoundError` or :data:`None`) is the
hyperscan import error.
"""

View File

@@ -0,0 +1,245 @@
"""
This module provides the :module:`hyperscan` backend for :class:`~pathspec.gitignore.GitIgnoreSpec`.
WARNING: The *pathspec._backends.hyperscan* package is not part of the public
API. Its contents and structure are likely to change.
"""
from __future__ import annotations
from collections.abc import (
Sequence)
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
Union) # Replaced by `X | Y` in 3.10.
try:
import hyperscan
except ModuleNotFoundError:
hyperscan = None
from pathspec.pattern import (
RegexPattern)
from pathspec.patterns.gitignore.spec import (
GitIgnoreSpecPattern,
_BYTES_ENCODING,
_DIR_MARK_CG,
_DIR_MARK_OPT)
from pathspec._typing import (
override) # Added in 3.12.
from ._base import (
HS_FLAGS,
HyperscanExprDat,
HyperscanExprDebug)
from .pathspec import (
HyperscanPsBackend)
class HyperscanGiBackend(HyperscanPsBackend):
"""
The :class:`HyperscanGiBackend` class is the :module:`hyperscan`
implementation used by :class:`~pathspec.gitignore.GitIgnoreSpec`. The
Hyperscan database uses block mode for matching files.
"""
# Change type hint.
_out: tuple[Optional[bool], int, int]
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
_debug_exprs: Optional[bool] = None,
_test_sort: Optional[Callable[[list], None]] = None,
) -> None:
"""
Initialize the :class:`HyperscanMatcher` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
"""
super().__init__(patterns, _debug_exprs=_debug_exprs, _test_sort=_test_sort)
self._out = (None, -1, 0)
"""
*_out* (:class:`tuple`) stores the current match:
- *0* (:class:`bool` or :data:`None`) is the match include.
- *1* (:class:`int`) is the match index.
- *2* (:class:`int`) is the match priority.
"""
@override
@staticmethod
def _init_db(
db: hyperscan.Database,
debug: bool,
patterns: list[tuple[int, RegexPattern]],
sort_ids: Optional[Callable[[list[int]], None]],
) -> list[HyperscanExprDat]:
"""
Create the Hyperscan database from the given patterns.
*db* (:class:`hyperscan.Hyperscan`) is the Hyperscan database.
*debug* (:class:`bool`) is whether to include additional debugging
information for the expressions.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.RegexPattern`)
contains the patterns.
*sort_ids* (:class:`callable` or :data:`None`) is a function used to sort
the compiled expression ids. This is used during testing to ensure the order
of expressions is not accidentally relied on.
Returns a :class:`list` indexed by expression id (:class:`int`) to its data
(:class:`HyperscanExprDat`).
"""
# WARNING: Hyperscan raises a `hyperscan.error` exception when compiled with
# zero elements.
assert patterns, patterns
# Prepare patterns.
expr_data: list[HyperscanExprDat] = []
exprs: list[bytes] = []
for pattern_index, pattern in patterns:
assert pattern.include is not None, (pattern_index, pattern)
# Encode regex.
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
use_regexes: list[tuple[Union[str, bytes], bool]] = []
if isinstance(pattern, GitIgnoreSpecPattern):
# GitIgnoreSpecPattern uses capture groups for its directory marker but
# Hyperscan does not support capture groups. Handle this scenario.
regex_str: str
if isinstance(regex, str):
regex_str: str = regex
else:
assert isinstance(regex, bytes), regex
regex_str = regex.decode(_BYTES_ENCODING)
if _DIR_MARK_CG in regex_str:
# Found directory marker.
if regex_str.endswith(_DIR_MARK_OPT):
# Regex has optional directory marker. Split regex into directory
# and file variants.
base_regex = regex_str[:-len(_DIR_MARK_OPT)]
use_regexes.append((f'{base_regex}/', True))
use_regexes.append((f'{base_regex}$', False))
else:
# Remove capture group.
base_regex = regex_str.replace(_DIR_MARK_CG, '/')
use_regexes.append((base_regex, True))
if not use_regexes:
# No special case for regex.
use_regexes.append((regex, False))
for regex, is_dir_pattern in use_regexes:
if isinstance(regex, bytes):
regex_bytes = regex
else:
assert isinstance(regex, str), regex
regex_bytes = regex.encode('utf8')
if debug:
expr_data.append(HyperscanExprDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
regex=regex,
))
else:
expr_data.append(HyperscanExprDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
))
exprs.append(regex_bytes)
# Sort expressions.
ids = list(range(len(exprs)))
if sort_ids is not None:
sort_ids(ids)
exprs = [exprs[__id] for __id in ids]
# Compile patterns.
db.compile(
expressions=exprs,
ids=ids,
elements=len(exprs),
flags=HS_FLAGS,
)
return expr_data
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# NOTICE: According to benchmarking, a method callback is 13% faster than
# using a closure here.
db = self._db
if self._db is None:
# Database was not initialized because there were no patterns. Return no
# match.
return (None, None)
self._out = (None, -1, 0)
db.scan(file.encode('utf8'), match_event_handler=self.__on_match)
out_include, out_index = self._out[:2]
if out_index == -1:
out_index = None
return (out_include, out_index)
@override
def __on_match(
self,
expr_id: int,
_from: int,
_to: int,
_flags: int,
_context: Any,
) -> Optional[bool]:
"""
Called on each match.
*expr_id* (:class:`int`) is the expression id (index) of the matched
pattern.
"""
expr_dat = self._expr_data[expr_id]
is_dir_pattern = expr_dat.is_dir_pattern
if is_dir_pattern:
# Pattern matched by a directory pattern.
priority = 1
else:
# Pattern matched by a file pattern.
priority = 2
# WARNING: Hyperscan does not guarantee matches will be produced in order!
include = expr_dat.include
index = expr_dat.index
prev_index = self._out[1]
prev_priority = self._out[2]
if (
(include and is_dir_pattern and index > prev_index)
or (priority == prev_priority and index > prev_index)
or priority > prev_priority
):
self._out = (include, expr_dat.index, priority)

View File

@@ -0,0 +1,251 @@
"""
This module provides the :module:`hyperscan` backend for :class:`~pathspec.pathspec.PathSpec`.
WARNING: The *pathspec._backends.hyperscan* package is not part of the public
API. Its contents and structure are likely to change.
"""
from __future__ import annotations
from collections.abc import (
Sequence)
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional) # Replaced by `X | None` in 3.10.
try:
import hyperscan
except ModuleNotFoundError:
hyperscan = None
from pathspec.backend import (
_Backend)
from pathspec.pattern import (
RegexPattern)
from pathspec._typing import (
override) # Added in 3.12.
from .._utils import (
enumerate_patterns)
from .base import (
hyperscan_error)
from ._base import (
HS_FLAGS,
HyperscanExprDat,
HyperscanExprDebug)
class HyperscanPsBackend(_Backend):
"""
The :class:`HyperscanPsBackend` class is the :module:`hyperscan`
implementation used by :class:`~pathspec.pathspec.PathSpec` for matching
files. The Hyperscan database uses block mode for matching files.
"""
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
_debug_exprs: Optional[bool] = None,
_test_sort: Optional[Callable[[list], None]] = None,
) -> None:
"""
Initialize the :class:`HyperscanPsBackend` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
"""
if hyperscan is None:
raise hyperscan_error
if patterns and not isinstance(patterns[0], RegexPattern):
raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.")
use_patterns = enumerate_patterns(
patterns, filter=True, reverse=False,
)
debug_exprs = bool(_debug_exprs)
if use_patterns:
db = self._make_db()
expr_data = self._init_db(
db=db,
debug=debug_exprs,
patterns=use_patterns,
sort_ids=_test_sort,
)
else:
# WARNING: The hyperscan database cannot be initialized with zero
# patterns.
db = None
expr_data = []
self._db: Optional[hyperscan.Database] = db
"""
*_db* (:class:`hyperscan.Database`) is the Hyperscan database.
"""
self._debug_exprs = debug_exprs
"""
*_debug_exprs* (:class:`bool`) is whether to include additional debugging
information for the expressions.
"""
self._expr_data: list[HyperscanExprDat] = expr_data
"""
*_expr_data* (:class:`list`) maps expression index (:class:`int`) to
expression data (:class:`:class:`HyperscanExprDat`).
"""
self._out: tuple[Optional[bool], int] = (None, -1)
"""
*_out* (:class:`tuple`) stores the current match:
- *0* (:class:`bool` or :data:`None`) is the match include.
- *1* (:class:`int`) is the match index.
"""
self._patterns: dict[int, RegexPattern] = dict(use_patterns)
"""
*_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`RegexPattern`).
"""
@staticmethod
def _init_db(
db: hyperscan.Database,
debug: bool,
patterns: list[tuple[int, RegexPattern]],
sort_ids: Optional[Callable[[list[int]], None]],
) -> list[HyperscanExprDat]:
"""
Initialize the Hyperscan database from the given patterns.
*db* (:class:`hyperscan.Hyperscan`) is the Hyperscan database.
*debug* (:class:`bool`) is whether to include additional debugging
information for the expressions.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.RegexPattern`)
contains the patterns.
*sort_ids* (:class:`callable` or :data:`None`) is a function used to sort
the compiled expression ids. This is used during testing to ensure the order
of expressions is not accidentally relied on.
Returns a :class:`list` indexed by expression id (:class:`int`) to its data
(:class:`HyperscanExprDat`).
"""
# WARNING: Hyperscan raises a `hyperscan.error` exception when compiled with
# zero elements.
assert patterns, patterns
# Prepare patterns.
expr_data: list[HyperscanExprDat] = []
exprs: list[bytes] = []
for pattern_index, pattern in patterns:
assert pattern.include is not None, (pattern_index, pattern)
# Encode regex.
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
if isinstance(regex, bytes):
regex_bytes = regex
else:
assert isinstance(regex, str), regex
regex_bytes = regex.encode('utf8')
if debug:
expr_data.append(HyperscanExprDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
regex=regex,
))
else:
expr_data.append(HyperscanExprDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
))
exprs.append(regex_bytes)
# Sort expressions.
ids = list(range(len(exprs)))
if sort_ids is not None:
sort_ids(ids)
exprs = [exprs[__id] for __id in ids]
# Compile patterns.
db.compile(
expressions=exprs,
ids=ids,
elements=len(exprs),
flags=HS_FLAGS,
)
return expr_data
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# NOTICE: According to benchmarking, a method callback is 20% faster than
# using a closure here.
db = self._db
if self._db is None:
# Database was not initialized because there were no patterns. Return no
# match.
return (None, None)
self._out = (None, -1)
db.scan(file.encode('utf8'), match_event_handler=self.__on_match)
out_include, out_index = self._out
if out_index == -1:
out_index = None
return (out_include, out_index)
@staticmethod
def _make_db() -> hyperscan.Database:
"""
Create the Hyperscan database.
Returns the database (:class:`hyperscan.Database`).
"""
return hyperscan.Database(mode=hyperscan.HS_MODE_BLOCK)
def __on_match(
self,
expr_id: int,
_from: int,
_to: int,
_flags: int,
_context: Any,
) -> Optional[bool]:
"""
Called on each match.
*expr_id* (:class:`int`) is the expression id (index) of the matched
pattern.
"""
# Store match.
# - WARNING: Hyperscan does not guarantee matches will be produced in order!
# Later expressions have higher priority.
expr_dat = self._expr_data[expr_id]
index = expr_dat.index
prev_index = self._out[1]
if index > prev_index:
self._out = (expr_dat.include, index)

View File

View File

@@ -0,0 +1,95 @@
"""
This module provides private data for the base implementation for the
:module:`re2` library.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from dataclasses import (
dataclass)
from typing import (
Optional, # Replaced by `X | None` in 3.10.
Union) # Replaced by `X | Y` in 3.10.
try:
import re2
re2_error = None
except ModuleNotFoundError as e:
re2 = None
re2_error = e
RE2_OPTIONS = None
else:
# Both the `google-re2` and `pyre2` libraries use the `re2` namespace.
# `google-re2` is the only one currently supported.
try:
RE2_OPTIONS = re2.Options()
RE2_OPTIONS.log_errors = False
RE2_OPTIONS.never_capture = True
except Exception as e:
re2_error = e
RE2_OPTIONS = None
RE2_OPTIONS: re2.Options
"""
The re2 options to use:
- `log_errors=False` disables logging to stderr.
- `never_capture=True` disables capture groups because they effectively cannot
be utilized with :class:`re2.Set`.
"""
re2_error: Optional[Exception]
"""
*re2_error* (:class:`Exception` or :data:`None`) is the re2 import error.
"""
@dataclass(frozen=True)
class Re2RegexDat(object):
"""
The :class:`Re2RegexDat` class is used to store data related to a regular
expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = [
'include',
'index',
'is_dir_pattern',
]
include: bool
"""
*include* (:class:`bool`) is whether is whether the matched files should be
included (:data:`True`), or excluded (:data:`False`).
"""
index: int
"""
*index* (:class:`int`) is the pattern index.
"""
is_dir_pattern: bool
"""
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern for gitignore.
"""
@dataclass(frozen=True)
class Re2RegexDebug(Re2RegexDat):
"""
The :class:`Re2RegexDebug` class stores additional debug information related
to a regular expression.
"""
# The slots argument is not supported until Python 3.10.
__slots__ = ['regex']
regex: Union[str, bytes]
"""
*regex* (:class:`str` or :class:`bytes`) is the regular expression.
"""

View File

@@ -0,0 +1,18 @@
"""
This module provides the base implementation for the :module:`re2` backend.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from ._base import (
re2_error)
re2_error: Optional[Exception]
"""
*re2_error* (:class:`Exception` or :data:`None`) is the re2 import error.
"""

View File

@@ -0,0 +1,179 @@
"""
This module provides the :module:`re2` backend for :class:`~pathspec.gitignore.GitIgnoreSpec`.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
Union) # Replaced by `X | Y` in 3.10.
try:
import re2
except ModuleNotFoundError:
re2 = None
from pathspec.pattern import (
RegexPattern)
from pathspec.patterns.gitignore.spec import (
GitIgnoreSpecPattern,
_BYTES_ENCODING,
_DIR_MARK_CG,
_DIR_MARK_OPT)
from pathspec._typing import (
override) # Added in 3.12.
from ._base import (
Re2RegexDat,
Re2RegexDebug)
from .pathspec import (
Re2PsBackend)
class Re2GiBackend(Re2PsBackend):
"""
The :class:`Re2GiBackend` class is the :module:`re2` implementation used by
:class:`~pathspec.gitignore.GitIgnoreSpec` for matching files.
"""
@override
@staticmethod
def _init_set(
debug: bool,
patterns: dict[int, RegexPattern],
regex_set: re2.Set,
sort_indices: Optional[Callable[[list[int]], None]],
) -> list[Re2RegexDat]:
"""
Create the re2 regex set.
*debug* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
*patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`.RegexPattern`).
*regex_set* (:class:`re2.Set`) is the regex set.
*sort_indices* (:class:`callable` or :data:`None`) is a function used to
sort the patterns by index. This is used during testing to ensure the order
of patterns is not accidentally relied on.
Returns a :class:`list` indexed by regex id (:class:`int`) to its data
(:class:`Re2RegexDat`).
"""
# Sort patterns.
indices = list(patterns.keys())
if sort_indices is not None:
sort_indices(indices)
# Prepare patterns.
regex_data: list[Re2RegexDat] = []
for pattern_index in indices:
pattern = patterns[pattern_index]
if pattern.include is None:
continue
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
use_regexes: list[tuple[Union[str, bytes], bool]] = []
if isinstance(pattern, GitIgnoreSpecPattern):
# GitIgnoreSpecPattern uses capture groups for its directory marker. Re2
# supports capture groups, but they cannot be utilized when using
# `re2.Set`. Handle this scenario.
regex_str: str
if isinstance(regex, str):
regex_str = regex
else:
assert isinstance(regex, bytes), regex
regex_str = regex.decode(_BYTES_ENCODING)
if _DIR_MARK_CG in regex_str:
# Found directory marker.
if regex_str.endswith(_DIR_MARK_OPT):
# Regex has optional directory marker. Split regex into directory
# and file variants.
base_regex = regex_str[:-len(_DIR_MARK_OPT)]
use_regexes.append((f'{base_regex}/', True))
use_regexes.append((f'{base_regex}$', False))
else:
# Remove capture group.
base_regex = regex_str.replace(_DIR_MARK_CG, '/')
use_regexes.append((base_regex, True))
if not use_regexes:
# No special case for regex.
use_regexes.append((regex, False))
for regex, is_dir_pattern in use_regexes:
if debug:
regex_data.append(Re2RegexDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
regex=regex,
))
else:
regex_data.append(Re2RegexDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=is_dir_pattern,
))
regex_set.Add(regex)
# Compile patterns.
regex_set.Compile()
return regex_data
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# Find best match.
match_ids: Optional[list[int]] = self._set.Match(file)
if not match_ids:
return (None, None)
out_include: Optional[bool] = None
out_index: int = -1
out_priority = -1
regex_data = self._regex_data
for regex_id in match_ids:
regex_dat = regex_data[regex_id]
is_dir_pattern = regex_dat.is_dir_pattern
if is_dir_pattern:
# Pattern matched by a directory pattern.
priority = 1
else:
# Pattern matched by a file pattern.
priority = 2
# WARNING: According to the documentation on `RE2::Set::Match()`, there is
# no guarantee matches will be produced in order!
include = regex_dat.include
index = regex_dat.index
if (
(include and is_dir_pattern and index > out_index)
or (priority == out_priority and index > out_index)
or priority > out_priority
):
out_include = include
out_index = index
out_priority = priority
assert out_index != -1, (out_index, out_include, out_priority)
return (out_include, out_index)

View File

@@ -0,0 +1,187 @@
"""
This module provides the :module:`re2` backend for :class:`~pathspec.pathspec.PathSpec`.
WARNING: The *pathspec._backends.re2* package is not part of the public API. Its
contents and structure are likely to change.
"""
from __future__ import annotations
from collections.abc import (
Sequence)
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional) # Replaced by `X | None` in 3.10.
try:
import re2
except ModuleNotFoundError:
re2 = None
from pathspec.backend import (
_Backend)
from pathspec.pattern import (
RegexPattern)
from pathspec._typing import (
override) # Added in 3.12.
from .._utils import (
enumerate_patterns)
from .base import (
re2_error)
from ._base import (
RE2_OPTIONS,
Re2RegexDat,
Re2RegexDebug)
class Re2PsBackend(_Backend):
"""
The :class:`Re2PsBackend` class is the :module:`re2` implementation used by
:class:`~pathspec.pathspec.PathSpec` for matching files.
"""
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
_debug_regex: Optional[bool] = None,
_test_sort: Optional[Callable[[list], None]] = None,
) -> None:
"""
Initialize the :class:`Re2PsBackend` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
"""
if re2_error is not None:
raise re2_error
if patterns and not isinstance(patterns[0], RegexPattern):
raise TypeError(f"{patterns[0]=!r} must be a RegexPattern.")
use_patterns = dict(enumerate_patterns(
patterns, filter=True, reverse=False,
))
regex_set = self._make_set()
self._debug_regex = bool(_debug_regex)
"""
*_debug_regex* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
"""
self._patterns: dict[int, RegexPattern] = use_patterns
"""
*_patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`RegexPattern`).
"""
self._regex_data: list[Re2RegexDat] = self._init_set(
debug=self._debug_regex,
patterns=use_patterns,
regex_set=regex_set,
sort_indices=_test_sort,
)
"""
*_regex_data* (:class:`list`) maps regex index (:class:`int`) to regex data
(:class:`Re2RegexDat`).
"""
self._set: re2.Set = regex_set
"""
*_set* (:class:`re2.Set`) is the re2 regex set.
"""
@staticmethod
def _init_set(
debug: bool,
patterns: dict[int, RegexPattern],
regex_set: re2.Set,
sort_indices: Optional[Callable[[list[int]], None]],
) -> list[Re2RegexDat]:
"""
Create the re2 regex set.
*debug* (:class:`bool`) is whether to include additional debugging
information for the regular expressions.
*patterns* (:class:`dict`) maps pattern index (:class:`int`) to pattern
(:class:`.RegexPattern`).
*regex_set* (:class:`re2.Set`) is the regex set.
*sort_indices* (:class:`callable` or :data:`None`) is a function used to
sort the patterns by index. This is used during testing to ensure the order
of patterns is not accidentally relied on.
Returns a :class:`list` indexed by regex id (:class:`int`) to its data
(:class:`Re2RegexDat`).
"""
# Sort patterns.
indices = list(patterns.keys())
if sort_indices is not None:
sort_indices(indices)
# Prepare patterns.
regex_data: list[Re2RegexDat] = []
for pattern_index in indices:
pattern = patterns[pattern_index]
if pattern.include is None:
continue
assert isinstance(pattern, RegexPattern), pattern
regex = pattern.regex.pattern
if debug:
regex_data.append(Re2RegexDebug(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
regex=regex,
))
else:
regex_data.append(Re2RegexDat(
include=pattern.include,
index=pattern_index,
is_dir_pattern=False,
))
regex_set.Add(regex)
# Compile patterns.
regex_set.Compile()
return regex_data
@staticmethod
def _make_set() -> re2.Set:
"""
Create the re2 regex set.
Returns the set (:class:`re2.Set`).
"""
return re2.Set.SearchSet(RE2_OPTIONS)
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
# Find best match.
# - WARNING: According to the documentation on `RE2::Set::Match()`, there is
# no guarantee matches will be produced in order! Later expressions have
# higher priority.
match_ids: Optional[list[int]] = self._set.Match(file)
if not match_ids:
return (None, None)
regex_data = self._regex_data
pattern_index = max(regex_data[__id].index for __id in match_ids)
pattern = self._patterns[pattern_index]
return (pattern.include, pattern_index)

View File

@@ -0,0 +1,104 @@
"""
This module provides the simple backend for :class:`~pathspec.gitignore.GitIgnoreSpec`.
WARNING: The *pathspec._backends.simple* package is not part of the public API.
Its contents and structure are likely to change.
"""
from collections.abc import (
Sequence)
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from pathspec.pattern import (
RegexPattern)
from pathspec.patterns.gitignore.spec import (
_DIR_MARK)
from pathspec._typing import (
override) # Added in 3.12.
from .pathspec import (
SimplePsBackend)
class SimpleGiBackend(SimplePsBackend):
"""
The :class:`SimpleGiBackend` class is the default (or simple) implementation
used by :class:`~pathspec.gitignore.GitIgnoreSpec` for matching files.
"""
# Change type hint.
_patterns: list[tuple[int, RegexPattern]]
def __init__(
self,
patterns: Sequence[RegexPattern],
*,
no_filter: Optional[bool] = None,
no_reverse: Optional[bool] = None,
) -> None:
"""
Initialize the :class:`SimpleGiBackend` instance.
*patterns* (:class:`Sequence` of :class:`.RegexPattern`) contains the
compiled patterns.
*no_filter* (:class:`bool`) is whether to keep no-op patterns (:data:`True`),
or remove them (:data:`False`).
*no_reverse* (:class:`bool`) is whether to keep the pattern order
(:data:`True`), or reverse the order (:data:`True`).
"""
super().__init__(patterns, no_filter=no_filter, no_reverse=no_reverse)
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
is_reversed = self._is_reversed
out_include: Optional[bool] = None
out_index: Optional[int] = None
out_priority = 0
for index, pattern in self._patterns:
if (
(include := pattern.include) is not None
and (match := pattern.match_file(file)) is not None
):
# Pattern matched.
# Check for directory marker.
dir_mark = match.match.groupdict().get(_DIR_MARK)
if dir_mark:
# Pattern matched by a directory pattern.
priority = 1
else:
# Pattern matched by a file pattern.
priority = 2
if is_reversed:
if priority > out_priority:
out_include = include
out_index = index
out_priority = priority
else:
# Forward.
if (include and dir_mark) or priority >= out_priority:
out_include = include
out_index = index
out_priority = priority
if is_reversed and priority == 2:
# Patterns are being checked in reverse order. The first pattern that
# matches with priority 2 takes precedence.
break
return (out_include, out_index)

View File

@@ -0,0 +1,76 @@
"""
This module provides the simple backend for :class:`~pathspec.pathspec.PathSpec`.
WARNING: The *pathspec._backends.simple* package is not part of the public API.
Its contents and structure are likely to change.
"""
from collections.abc import (
Sequence)
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from pathspec.backend import (
_Backend)
from pathspec.pattern import (
Pattern)
from pathspec._typing import (
override) # Added in 3.12.
from pathspec.util import (
check_match_file)
from .._utils import (
enumerate_patterns)
class SimplePsBackend(_Backend):
"""
The :class:`SimplePsBackend` class is the default (or simple) implementation
used by :class:`~pathspec.pathspec.PathSpec` for matching files.
"""
def __init__(
self,
patterns: Sequence[Pattern],
*,
no_filter: Optional[bool] = None,
no_reverse: Optional[bool] = None,
) -> None:
"""
Initialize the :class:`SimplePsBackend` instance.
*patterns* (:class:`Sequence` of :class:`.Pattern`) contains the compiled
patterns.
*no_filter* (:class:`bool`) is whether to keep no-op patterns (:data:`True`),
or remove them (:data:`False`).
*no_reverse* (:class:`bool`) is whether to keep the pattern order
(:data:`True`), or reverse the order (:data:`True`).
"""
self._is_reversed: bool = not no_reverse
"""
*_is_reversed* (:class:`bool`) is whether to the pattern order was reversed.
"""
self._patterns: list[tuple[int, Pattern]] = enumerate_patterns(
patterns, filter=not no_filter, reverse=not no_reverse,
)
"""
*_patterns* (:class:`list` of :class:`tuple`) contains the enumerated
patterns.
"""
@override
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
return check_match_file(self._patterns, file, self._is_reversed)

67
lib/pathspec/_meta.py Normal file
View File

@@ -0,0 +1,67 @@
"""
This module contains the project meta-data.
"""
__author__ = "Caleb P. Burns"
__copyright__ = "Copyright © 2013-2026 Caleb P. Burns"
__credits__ = [
"Hong Minhee <https://github.com/dahlia>",
"Brandon High <https://github.com/highb>",
"029xue <https://github.com/029xue>",
"Michael Huynh <https://github.com/mikexstudios>",
"Nick Humrich <https://github.com/nhumrich>",
"David Fraser <https://github.com/davidfraser>",
"Charles Samborski <https://github.com/demurgos>",
"George Hickman <https://github.com/ghickman>",
"Vincent Driessen <https://github.com/nvie>",
"Adrien Vergé <https://github.com/adrienverge>",
"Anders Blomdell <https://github.com/AndersBlomdell>",
"Xavier Thomas <https://github.com/thmxv>",
"Wim Jeantine-Glenn <https://github.com/wimglenn>",
"Hugo van Kemenade <https://github.com/hugovk>",
"Dan Cecile <https://github.com/dcecile>",
"MrOutis <https://github.com/mroutis>",
"Jon Dufresne <https://github.com/jdufresne>",
"Greg Roodt <https://github.com/groodt>",
"Florin T. <https://github.com/ftrofin>",
"Ben Felder <https://github.com/pykong>",
"Nicholas Hollander <https://github.com/nhhollander>",
"KOLANICH <https://github.com/KOLANICH>",
"Jon Hays <https://github.com/JonjonHays>",
"Isaac0616 <https://github.com/Isaac0616>",
"Sebastiaan Zeeff <https://github.com/SebastiaanZ>",
"Roel Adriaans <https://github.com/RoelAdriaans>",
"Ravi Selker <https://github.com/raviselker>",
"Johan Vergeer <https://github.com/johanvergeer>",
"danjer <https://github.com/danjer>",
"Jan-Hein Bührman <https://github.com/jhbuhrman>",
"Wim-Peter Dirks <https://github.com/WPDOrdina>",
"Karthikeyan Singaravelan <https://github.com/tirkarthi>",
"John Vandenberg <https://github.com/jayvdb>",
"John T. Wodder II <https://github.com/jwodder>",
"Tomasz Kłoczko <https://github.com/kloczek>",
"Oren <https://github.com/orens>",
"SP Mohanty <https://github.com/spMohanty>",
"Richard Si <https://github.com/ichard26>",
"Jakub Kuczys <https://github.com/jack1142>",
"Michał Górny <https://github.com/mgorny>",
"Bartłomiej Żak <https://github.com/bzakdd>",
"Matthias <https://github.com/haimat>",
"Avasam <https://github.com/Avasam>",
"Anıl Karagenç <https://github.com/karagenc>",
"Yannic Schröder <https://github.com/yschroeder>",
"axesider <https://github.com/axesider>",
"TomRuk <https://github.com/tomruk>",
"Oleh Prypin <https://github.com/oprypin>",
"Lumina <https://github.com/lumina37>",
"Kurt McKee <https://github.com/kurtmckee>",
"Dobatymo <https://github.com/Dobatymo>",
"Tomoki Nakamaru <https://github.com/tomokinakamaru>",
"Sebastien Eskenazi <https://github.com/SebastienEske>",
"Bar Vered <https://github.com/barv-jfrog>",
"Tzach Shabtay <https://github.com/tzachshabtay>",
"Adam Dangoor <https://github.com/adamtheturtle>",
"Marcel Telka <https://github.com/mtelka>",
"Dmytro Kostochko <https://github.com/Alerion>",
]
__license__ = "MPL 2.0"

64
lib/pathspec/_typing.py Normal file
View File

@@ -0,0 +1,64 @@
"""
This module provides stubs for type hints not supported by all relevant Python
versions.
NOTICE: This project should have zero required dependencies which means it
cannot simply require :module:`typing_extensions`, and I do not want to maintain
a vendored copy of :module:`typing_extensions`.
"""
import functools
import warnings
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
TypeVar)
try:
from typing import AnyStr # Removed in 3.18.
except ImportError:
AnyStr = TypeVar('AnyStr', str, bytes)
try:
from typing import Never # Added in 3.11.
except ImportError:
from typing import NoReturn as Never
F = TypeVar('F', bound=Callable[..., Any])
try:
from warnings import deprecated # Added in 3.13.
except ImportError:
try:
from typing_extensions import deprecated
except ImportError:
def deprecated(
message: str,
/, *,
category: Optional[type[Warning]] = DeprecationWarning,
stacklevel: int = 1,
) -> Callable[[F], F]:
def decorator(f: F) -> F:
@functools.wraps(f)
def wrapper(*a, **k):
warnings.warn(message, category=category, stacklevel=stacklevel+1)
return f(*a, **k)
return wrapper
return decorator
try:
from typing import override # Added in 3.12.
except ImportError:
try:
from typing_extensions import override
except ImportError:
def override(f: F) -> F:
return f
def assert_unreachable(message: str) -> Never:
"""
The code path is unreachable. Raises an :class:`AssertionError`.
*message* (:class:`str`) is the error message.
"""
raise AssertionError(message)

5
lib/pathspec/_version.py Normal file
View File

@@ -0,0 +1,5 @@
"""
This module defines the version.
"""
__version__ = "1.0.4"

40
lib/pathspec/backend.py Normal file
View File

@@ -0,0 +1,40 @@
"""
This module defines the necessary classes and type hints for exposing the bare
minimum of the internal implementations for the pattern (regular expression)
matching backends. The exact structure of the backends is not solidified and is
subject to change.
"""
from typing import (
Literal,
Optional)
BackendNamesHint = Literal['best', 'hyperscan', 're2', 'simple']
"""
The supported backend values.
"""
class _Backend(object):
"""
.. warning:: This class is not part of the public API. It is subject to
change.
The :class:`_Backend` class is the abstract base class defining how to match
files against patterns.
"""
def match_file(self, file: str) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*file* (:class:`str`) is the normalized file path to check.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
raise NotImplementedError((
f"{self.__class__.__module__}.{self.__class__.__qualname__}.match_file() "
f"must be implemented."
)) # NotImplementedError

165
lib/pathspec/gitignore.py Normal file
View File

@@ -0,0 +1,165 @@
"""
This module provides :class:`.GitIgnoreSpec` which replicates *.gitignore*
behavior, and handles edge-cases where Git's behavior differs from what's
documented. Git allows including files from excluded directories which directly
contradicts the documentation. This uses :class:`.GitIgnoreSpecPattern` to fully
replicate Git's handling.
"""
from __future__ import annotations
from collections.abc import (
Iterable,
Sequence)
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union, # Replaced by `X | Y` in 3.10.
cast,
overload)
from pathspec.backend import (
BackendNamesHint,
_Backend)
from pathspec._backends.agg import (
make_gitignore_backend)
from pathspec.pathspec import (
PathSpec)
from pathspec.pattern import (
Pattern)
from pathspec.patterns.gitignore.basic import (
GitIgnoreBasicPattern)
from pathspec.patterns.gitignore.spec import (
GitIgnoreSpecPattern)
from pathspec._typing import (
AnyStr, # Removed in 3.18.
override) # Added in 3.12.
from pathspec.util import (
_is_iterable,
lookup_pattern)
Self = TypeVar("Self", bound='GitIgnoreSpec')
"""
:class:`.GitIgnoreSpec` self type hint to support Python v<3.11 using PEP 673
recommendation.
"""
class GitIgnoreSpec(PathSpec):
"""
The :class:`GitIgnoreSpec` class extends :class:`.PathSpec` to replicate
*gitignore* behavior. This is uses :class:`.GitIgnoreSpecPattern` to fully
replicate Git's handling.
"""
def __eq__(self, other: object) -> bool:
"""
Tests the equality of this gitignore-spec with *other* (:class:`.GitIgnoreSpec`)
by comparing their :attr:`self.patterns <.PathSpec.patterns>` attributes. A
non-:class:`GitIgnoreSpec` will not compare equal.
"""
if isinstance(other, GitIgnoreSpec):
return super().__eq__(other)
elif isinstance(other, PathSpec):
return False
else:
return NotImplemented
# Support reversed order of arguments from PathSpec.
@overload
@classmethod
def from_lines(
cls: type[Self],
pattern_factory: Union[str, Callable[[AnyStr], Pattern], None],
lines: Iterable[AnyStr],
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> Self:
...
@overload
@classmethod
def from_lines(
cls: type[Self],
lines: Iterable[AnyStr],
pattern_factory: Union[str, Callable[[AnyStr], Pattern], None] = None,
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> Self:
...
@override
@classmethod
def from_lines(
cls: type[Self],
lines: Iterable[AnyStr],
pattern_factory: Union[str, Callable[[AnyStr], Pattern], None] = None,
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> Self:
"""
Compiles the pattern lines.
*lines* (:class:`~collections.abc.Iterable`) yields each uncompiled pattern
(:class:`str`). This simply has to yield each line, so it can be a
:class:`io.TextIOBase` (e.g., from :func:`open` or :class:`io.StringIO`) or
the result from :meth:`str.splitlines`.
*pattern_factory* does not need to be set for :class:`GitIgnoreSpec`. If
set, it should be either ``"gitignore"`` or :class:`.GitIgnoreSpecPattern`.
There is no guarantee it will work with any other pattern class. Default is
:data:`None` for :class:`.GitIgnoreSpecPattern`.
*backend* (:class:`str` or :data:`None`) is the pattern (regular expression)
matching backend to use. Default is :data:`None` for "best" to use the best
available backend. Priority of backends is: "re2", "hyperscan", "simple".
The "simple" backend is always available.
Returns the :class:`GitIgnoreSpec` instance.
"""
if (isinstance(lines, (str, bytes)) or callable(lines)) and _is_iterable(pattern_factory):
# Support reversed order of arguments from PathSpec.
pattern_factory, lines = lines, pattern_factory
if pattern_factory is None:
pattern_factory = GitIgnoreSpecPattern
elif pattern_factory == 'gitignore':
# Force use of GitIgnoreSpecPattern for "gitignore" to handle edge-cases.
# This makes usage easier.
pattern_factory = GitIgnoreSpecPattern
if isinstance(pattern_factory, str):
pattern_factory = lookup_pattern(pattern_factory)
if issubclass(pattern_factory, GitIgnoreBasicPattern):
raise TypeError((
f"{pattern_factory=!r} cannot be {GitIgnoreBasicPattern} because it "
f"will give unexpected results."
)) # TypeError
self = super().from_lines(pattern_factory, lines, backend=backend, _test_backend_factory=_test_backend_factory)
return cast(Self, self)
@override
@staticmethod
def _make_backend(
name: BackendNamesHint,
patterns: Sequence[Pattern],
) -> _Backend:
"""
.. warning:: This method is not part of the public API. It is subject to
change.
Create the backend for the patterns.
*name* (:class:`str`) is the name of the backend.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the compiled patterns.
Returns the backend (:class:`._Backend`).
"""
return make_gitignore_backend(name, patterns)

460
lib/pathspec/pathspec.py Normal file
View File

@@ -0,0 +1,460 @@
"""
This module provides :class:`.PathSpec` which is an object-oriented interface
for pattern matching of files.
"""
from __future__ import annotations
from collections.abc import (
Collection,
Iterable,
Iterator,
Sequence)
from itertools import (
zip_longest)
from typing import (
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union, # Replaced by `X | Y` in 3.10.
cast)
Self = TypeVar("Self", bound='PathSpec')
"""
:class:`.PathSpec` self type hint to support Python v<3.11 using PEP 673
recommendation.
"""
from pathspec import util
from pathspec.backend import (
_Backend,
BackendNamesHint)
from pathspec._backends.agg import (
make_pathspec_backend)
from pathspec.pattern import (
Pattern)
from pathspec._typing import (
AnyStr, # Removed in 3.18.
deprecated) # Added in 3.13.
from pathspec.util import (
CheckResult,
StrPath,
TStrPath,
TreeEntry,
_is_iterable,
normalize_file)
class PathSpec(object):
"""
The :class:`PathSpec` class is a wrapper around a list of compiled
:class:`.Pattern` instances.
"""
def __init__(
self,
patterns: Union[Sequence[Pattern], Iterable[Pattern]],
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> None:
"""
Initializes the :class:`.PathSpec` instance.
*patterns* (:class:`~collections.abc.Sequence` or :class:`~collections.abc.Iterable`)
contains each compiled pattern (:class:`.Pattern`). If not a sequence, it
will be converted to a :class:`list`.
*backend* (:class:`str` or :data:`None`) is the pattern (regular expression)
matching backend to use. Default is :data:`None` for "best" to use the best
available backend. Priority of backends is: "re2", "hyperscan", "simple".
The "simple" backend is always available.
"""
if not isinstance(patterns, Sequence):
patterns = list(patterns)
if backend is None:
backend = 'best'
backend = cast(BackendNamesHint, backend)
if _test_backend_factory is not None:
use_backend = _test_backend_factory(patterns)
else:
use_backend = self._make_backend(backend, patterns)
self._backend: _Backend = use_backend
"""
*_backend* (:class:`._Backend`) is the pattern (regular expression) matching
backend.
"""
self._backend_name: BackendNamesHint = backend
"""
*_backend_name* (:class:`str`) is the name of backend to use.
"""
self.patterns: Sequence[Pattern] = patterns
"""
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the compiled patterns.
"""
def __add__(self: Self, other: PathSpec) -> Self:
"""
Combines the :attr:`self.patterns <.PathSpec.patterns>` patterns from two
:class:`PathSpec` instances.
"""
if isinstance(other, PathSpec):
return self.__class__(self.patterns + other.patterns, backend=self._backend_name)
else:
return NotImplemented
def __eq__(self, other: object) -> bool:
"""
Tests the equality of this path-spec with *other* (:class:`PathSpec`) by
comparing their :attr:`self.patterns <.PathSpec.patterns>` attributes.
"""
if isinstance(other, PathSpec):
paired_patterns = zip_longest(self.patterns, other.patterns)
return all(a == b for a, b in paired_patterns)
else:
return NotImplemented
def __iadd__(self: Self, other: PathSpec) -> Self:
"""
Adds the :attr:`self.patterns <.PathSpec.patterns>` from *other*
(:class:`PathSpec`) to this instance.
"""
if isinstance(other, PathSpec):
self.patterns += other.patterns
self._backend = self._make_backend(self._backend_name, self.patterns)
return self
else:
return NotImplemented
def __len__(self) -> int:
"""
Returns the number of :attr:`self.patterns <.PathSpec.patterns>` this
path-spec contains (:class:`int`).
"""
return len(self.patterns)
def check_file(
self,
file: TStrPath,
separators: Optional[Collection[str]] = None,
) -> CheckResult[TStrPath]:
"""
Check the files against this path-spec.
*file* (:class:`str` or :class:`os.PathLike`) is the file path to be matched
against :attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns the file check result (:class:`.CheckResult`).
"""
norm_file = normalize_file(file, separators)
include, index = self._backend.match_file(norm_file)
return CheckResult(file, include, index)
def check_files(
self,
files: Iterable[TStrPath],
separators: Optional[Collection[str]] = None,
) -> Iterator[CheckResult[TStrPath]]:
"""
Check the files against this path-spec.
*files* (:class:`~collections.abc.Iterable` of :class:`str` or
:class:`os.PathLike`) contains the file paths to be checked against
:attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns an :class:`~collections.abc.Iterator` yielding each file check
result (:class:`.CheckResult`).
"""
if not _is_iterable(files):
raise TypeError(f"files:{files!r} is not an iterable.")
for orig_file in files:
norm_file = normalize_file(orig_file, separators)
include, index = self._backend.match_file(norm_file)
yield CheckResult(orig_file, include, index)
def check_tree_files(
self,
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
) -> Iterator[CheckResult[str]]:
"""
Walks the specified root path for all files and checks them against this
path-spec.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to
search for files.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally
is the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns an :class:`~collections.abc.Iterator` yielding each file check
result (:class:`.CheckResult`).
"""
files = util.iter_tree_files(root, on_error=on_error, follow_links=follow_links)
yield from self.check_files(files)
@classmethod
def from_lines(
cls: type[Self],
pattern_factory: Union[str, Callable[[AnyStr], Pattern]],
lines: Iterable[AnyStr],
*,
backend: Union[BackendNamesHint, str, None] = None,
_test_backend_factory: Optional[Callable[[Sequence[Pattern]], _Backend]] = None,
) -> Self:
"""
Compiles the pattern lines.
*pattern_factory* can be either the name of a registered pattern factory
(:class:`str`), or a :class:`~collections.abc.Callable` used to compile
patterns. It must accept an uncompiled pattern (:class:`str`) and return the
compiled pattern (:class:`.Pattern`).
*lines* (:class:`~collections.abc.Iterable`) yields each uncompiled pattern
(:class:`str`). This simply has to yield each line so that it can be a
:class:`io.TextIOBase` (e.g., from :func:`open` or :class:`io.StringIO`) or
the result from :meth:`str.splitlines`.
*backend* (:class:`str` or :data:`None`) is the pattern (or regular
expression) matching backend to use. Default is :data:`None` for "best" to
use the best available backend. Priority of backends is: "re2", "hyperscan",
"simple". The "simple" backend is always available.
Returns the :class:`PathSpec` instance.
"""
if isinstance(pattern_factory, str):
pattern_factory = util.lookup_pattern(pattern_factory)
if not callable(pattern_factory):
raise TypeError(f"pattern_factory:{pattern_factory!r} is not callable.")
if not _is_iterable(lines):
raise TypeError(f"lines:{lines!r} is not an iterable.")
patterns = [pattern_factory(line) for line in lines if line]
return cls(patterns, backend=backend, _test_backend_factory=_test_backend_factory)
@staticmethod
def _make_backend(
name: BackendNamesHint,
patterns: Sequence[Pattern],
) -> _Backend:
"""
.. warning:: This method is not part of the public API. It is subject to
change.
Create the backend for the patterns.
*name* (:class:`str`) is the name of the backend.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the compiled patterns.
Returns the matcher (:class:`._Backend`).
"""
return make_pathspec_backend(name, patterns)
def match_entries(
self,
entries: Iterable[TreeEntry],
separators: Optional[Collection[str]] = None,
*,
negate: Optional[bool] = None,
) -> Iterator[TreeEntry]:
"""
Matches the entries to this path-spec.
*entries* (:class:`~collections.abc.Iterable` of :class:`.TreeEntry`)
contains the entries to be matched against :attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns the matched entries (:class:`~collections.abc.Iterator` of
:class:`.TreeEntry`).
"""
if not _is_iterable(entries):
raise TypeError(f"entries:{entries!r} is not an iterable.")
for entry in entries:
norm_file = normalize_file(entry.path, separators)
include, _index = self._backend.match_file(norm_file)
if negate:
include = not include
if include:
yield entry
def match_file(
self,
file: StrPath,
separators: Optional[Collection[str]] = None,
) -> bool:
"""
Matches the file to this path-spec.
*file* (:class:`str` or :class:`os.PathLike`) is the file path to be matched
against :attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`)
optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns :data:`True` if *file* matched; otherwise, :data:`False`.
"""
norm_file = normalize_file(file, separators)
include, _index = self._backend.match_file(norm_file)
return bool(include)
def match_files(
self,
files: Iterable[StrPath],
separators: Optional[Collection[str]] = None,
*,
negate: Optional[bool] = None,
) -> Iterator[StrPath]:
"""
Matches the files to this path-spec.
*files* (:class:`~collections.abc.Iterable` of :class:`str` or
:class:`os.PathLike`) contains the file paths to be matched against
:attr:`self.patterns <.PathSpec.patterns>`.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns the matched files (:class:`~collections.abc.Iterator` of
:class:`str` or :class:`os.PathLike`).
"""
if not _is_iterable(files):
raise TypeError(f"files:{files!r} is not an iterable.")
for orig_file in files:
norm_file = normalize_file(orig_file, separators)
include, _index = self._backend.match_file(norm_file)
if negate:
include = not include
if include:
yield orig_file
def match_tree_entries(
self,
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
*,
negate: Optional[bool] = None,
) -> Iterator[TreeEntry]:
"""
Walks the specified root path for all files and matches them to this
path-spec.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to
search.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally
is the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns the matched files (:class:`~collections.abc.Iterator` of
:class:`.TreeEntry`).
"""
entries = util.iter_tree_entries(root, on_error=on_error, follow_links=follow_links)
yield from self.match_entries(entries, negate=negate)
# NOTICE: The deprecation warning was only added in 1.0.0 (from 2026-01-05).
@deprecated((
"PathSpec.match_tree() is deprecated. Use .match_tree_files() instead."
))
def match_tree(self, *args, **kw) -> Iterator[str]:
"""
.. version-deprecated:: 0.3.2
This is an alias for the :meth:`self.match_tree_files <.PathSpec.match_tree_files>`
method.
"""
return self.match_tree_files(*args, **kw)
def match_tree_files(
self,
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
*,
negate: Optional[bool] = None,
) -> Iterator[str]:
"""
Walks the specified root path for all files and matches them to this
path-spec.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to
search for files.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally
is the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
*negate* (:class:`bool` or :data:`None`) is whether to negate the match
results of the patterns. If :data:`True`, a pattern matching a file will
exclude the file rather than include it. Default is :data:`None` for
:data:`False`.
Returns the matched files (:class:`~collections.abc.Iterable` of :class:`str`).
"""
files = util.iter_tree_files(root, on_error=on_error, follow_links=follow_links)
yield from self.match_files(files, negate=negate)

241
lib/pathspec/pattern.py Normal file
View File

@@ -0,0 +1,241 @@
"""
This module provides the base definition for patterns.
"""
from __future__ import annotations
import re
from collections.abc import (
Iterable,
Iterator)
from dataclasses import (
dataclass)
from typing import (
Any,
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union) # Replaced by `X | Y` in 3.10.
from ._typing import (
AnyStr, # Removed in 3.18.
deprecated, # Added in 3.13.
override) # Added in 3.12.
RegexPatternSelf = TypeVar("RegexPatternSelf", bound='RegexPattern')
"""
:class:`.RegexPattern` self type hint to support Python v<3.11 using PEP 673
recommendation.
"""
class Pattern(object):
"""
The :class:`Pattern` class is the abstract definition of a pattern.
"""
# Make the class dict-less.
__slots__ = (
'include',
)
def __init__(self, include: Optional[bool]) -> None:
"""
Initializes the :class:`Pattern` instance.
*include* (:class:`bool` or :data:`None`) is whether the matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
"""
self.include = include
"""
*include* (:class:`bool` or :data:`None`) is whether the matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
"""
@deprecated((
"Pattern.match() is deprecated. Use Pattern.match_file() with a loop for "
"similar results."
))
def match(self, files: Iterable[str]) -> Iterator[str]:
"""
.. version-deprecated:: 0.10.0
This method is no longer used. Use the :meth:`self.match_file <.Pattern.match_file>`
method with a loop for similar results.
Matches this pattern against the specified files.
*files* (:class:`~collections.abc.Iterable` of :class:`str`) contains each
file relative to the root directory.
Returns an :class:`~collections.abc.Iterable` yielding each matched file
path (:class:`str`).
"""
for file in files:
if self.match_file(file) is not None:
yield file
def match_file(self, file: str) -> Optional[Any]:
"""
Matches this pattern against the specified file.
*file* (:class:`str`) is the normalized file path to match against.
Returns the match result if *file* matched; otherwise, :data:`None`.
"""
raise NotImplementedError((
"{cls.__module__}.{cls.__qualname__} must override match_file()."
).format(cls=self.__class__))
class RegexPattern(Pattern):
"""
The :class:`RegexPattern` class is an implementation of a pattern using
regular expressions.
"""
# Keep the class dict-less.
__slots__ = (
'pattern',
'regex',
)
def __init__(
self,
pattern: Union[AnyStr, re.Pattern, None],
include: Optional[bool] = None,
) -> None:
"""
Initializes the :class:`RegexPattern` instance.
*pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or
:data:`None`) is the pattern to compile into a regular expression.
*include* (:class:`bool` or :data:`None`) must be :data:`None` unless
*pattern* is a precompiled regular expression (:class:`re.Pattern`) in which
case it is whether matched files should be included (:data:`True`), excluded
(:data:`False`), or is a null operation (:data:`None`).
.. note:: Subclasses do not need to support the *include* parameter.
"""
if isinstance(pattern, (str, bytes)):
assert include is None, (
f"include:{include!r} must be null when pattern:{pattern!r} is a string."
)
regex, include = self.pattern_to_regex(pattern)
# NOTE: Make sure to allow a null regular expression to be
# returned for a null-operation.
if include is not None:
regex = re.compile(regex)
elif pattern is not None and hasattr(pattern, 'match'):
# Assume pattern is a precompiled regular expression.
# - NOTE: Used specified *include*.
regex = pattern
elif pattern is None:
# NOTE: Make sure to allow a null pattern to be passed for a
# null-operation.
assert include is None, (
f"include:{include!r} must be null when pattern:{pattern!r} is null."
)
regex = None
else:
raise TypeError(f"pattern:{pattern!r} is not a string, re.Pattern, or None.")
super(RegexPattern, self).__init__(include)
self.pattern: Union[AnyStr, re.Pattern, None] = pattern
"""
*pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or
:data:`None`) is the uncompiled, input pattern. This is for reference.
"""
self.regex: Optional[re.Pattern] = regex
"""
*regex* (:class:`re.Pattern` or :data:`None`) is the compiled regular
expression for the pattern.
"""
def __copy__(self: RegexPatternSelf) -> RegexPatternSelf:
"""
Performa a shallow copy of the pattern.
Returns the copy (:class:`RegexPattern`).
"""
other = self.__class__(self.regex, self.include)
other.pattern = self.pattern
return other
def __eq__(self, other: RegexPattern) -> bool:
"""
Tests the equality of this regex pattern with *other* (:class:`RegexPattern`)
by comparing their :attr:`~Pattern.include` and :attr:`~RegexPattern.regex`
attributes.
"""
if isinstance(other, RegexPattern):
return self.include == other.include and self.regex == other.regex
else:
return NotImplemented
@override
def match_file(self, file: AnyStr) -> Optional[RegexMatchResult]:
"""
Matches this pattern against the specified file.
*file* (:class:`str` or :class:`bytes`) is the file path relative to the
root directory (e.g., "relative/path/to/file").
Returns the match result (:class:`.RegexMatchResult`) if *file* matched;
otherwise, :data:`None`.
"""
if self.include is not None:
match = self.regex.search(file)
if match is not None:
return RegexMatchResult(match)
return None
@classmethod
def pattern_to_regex(
cls,
pattern: AnyStr,
) -> tuple[Optional[AnyStr], Optional[bool]]:
"""
Convert the pattern into an uncompiled regular expression.
*pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a
regular expression.
Returns a :class:`tuple` containing:
- *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the
uncompiled regular expression .
- *include* (:class:`bool` or :data:`None`) is whether matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
.. note:: The default implementation simply returns *pattern* and
:data:`True`.
"""
return pattern, True
@dataclass()
class RegexMatchResult(object):
"""
The :class:`RegexMatchResult` data class is used to return information about
the matched regular expression.
"""
# Keep the class dict-less.
__slots__ = (
'match',
)
match: re.Match
"""
*match* (:class:`re.Match`) is the regex match result.
"""

View File

@@ -0,0 +1,12 @@
"""
The *pathspec.patterns* package contains the pattern matching implementations.
"""
# Load pattern implementations.
from .gitignore import basic as _
from .gitignore import spec as _
# DEPRECATED: Deprecated since 0.11.0 (from 2023-01-24). Expose the
# GitWildMatchPattern class in this module for backward compatibility with
# 0.5.0 (from 2016-08-22).
from .gitwildmatch import GitWildMatchPattern

View File

@@ -0,0 +1,17 @@
"""
The *pathspec.patterns.gitignore* package provides the *gitignore*
implementations.
The following classes are imported and made available from this package:
- :class:`pathspec.patterns.gitignore.base.GitIgnorePatternError`
"""
# Expose the GitIgnorePatternError for convenience.
from .base import (
GitIgnorePatternError)
# Declare imports as part of the public interface.
__all__ = [
'GitIgnorePatternError',
]

View File

@@ -0,0 +1,176 @@
"""
This module provides common classes for the gitignore patterns.
"""
import re
from pathspec.pattern import (
RegexPattern)
from pathspec._typing import (
AnyStr) # Removed in 3.18.
_BYTES_ENCODING = 'latin1'
"""
The encoding to use when parsing a byte string pattern.
"""
class _GitIgnoreBasePattern(RegexPattern):
"""
.. warning:: This class is not part of the public API. It is subject to
change.
The :class:`_GitIgnoreBasePattern` class is the base implementation for a
compiled gitignore pattern.
"""
# Keep the dict-less class hierarchy.
__slots__ = ()
@staticmethod
def escape(s: AnyStr) -> AnyStr:
"""
Escape special characters in the given string.
*s* (:class:`str` or :class:`bytes`) a filename or a string that you want to
escape, usually before adding it to a ".gitignore".
Returns the escaped string (:class:`str` or :class:`bytes`).
"""
if isinstance(s, str):
return_type = str
string = s
elif isinstance(s, bytes):
return_type = bytes
string = s.decode(_BYTES_ENCODING)
else:
raise TypeError(f"s:{s!r} is not a unicode or byte string.")
# Reference: https://git-scm.com/docs/gitignore#_pattern_format
out_string = ''.join((f"\\{x}" if x in '[]!*#?' else x) for x in string)
if return_type is bytes:
return out_string.encode(_BYTES_ENCODING)
else:
return out_string
@staticmethod
def _translate_segment_glob(pattern: str) -> str:
"""
Translates the glob pattern to a regular expression. This is used in the
constructor to translate a path segment glob pattern to its corresponding
regular expression.
*pattern* (:class:`str`) is the glob pattern.
Returns the regular expression (:class:`str`).
"""
# NOTE: This is derived from `fnmatch.translate()` and is similar to the
# POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
escape = False
regex = ''
i, end = 0, len(pattern)
while i < end:
# Get next character.
char = pattern[i]
i += 1
if escape:
# Escape the character.
escape = False
regex += re.escape(char)
elif char == '\\':
# Escape character, escape next character.
escape = True
elif char == '*':
# Multi-character wildcard. Match any string (except slashes), including
# an empty string.
regex += '[^/]*'
elif char == '?':
# Single-character wildcard. Match any single character (except a
# slash).
regex += '[^/]'
elif char == '[':
# Bracket expression wildcard. Except for the beginning exclamation
# mark, the whole bracket expression can be used directly as regex, but
# we have to find where the expression ends.
# - "[][!]" matches ']', '[' and '!'.
# - "[]-]" matches ']' and '-'.
# - "[!]a-]" matches any character except ']', 'a' and '-'.
j = i
# Pass bracket expression negation.
if j < end and (pattern[j] == '!' or pattern[j] == '^'):
j += 1
# Pass first closing bracket if it is at the beginning of the
# expression.
if j < end and pattern[j] == ']':
j += 1
# Find closing bracket. Stop once we reach the end or find it.
while j < end and pattern[j] != ']':
j += 1
if j < end:
# Found end of bracket expression. Increment j to be one past the
# closing bracket:
#
# [...]
# ^ ^
# i j
#
j += 1
expr = '['
if pattern[i] == '!':
# Bracket expression needs to be negated.
expr += '^'
i += 1
elif pattern[i] == '^':
# POSIX declares that the regex bracket expression negation "[^...]"
# is undefined in a glob pattern. Python's `fnmatch.translate()`
# escapes the caret ('^') as a literal. Git supports the using a
# caret for negation. Maintain consistency with Git because that is
# the expected behavior.
expr += '^'
i += 1
# Build regex bracket expression. Escape slashes so they are treated
# as literal slashes by regex as defined by POSIX.
expr += pattern[i:j].replace('\\', '\\\\')
# Add regex bracket expression to regex result.
regex += expr
# Set i to one past the closing bracket.
i = j
else:
# Failed to find closing bracket, treat opening bracket as a bracket
# literal instead of as an expression.
regex += '\\['
else:
# Regular character, escape it for regex.
regex += re.escape(char)
if escape:
raise ValueError((
f"Escape character found with no next character to escape: {pattern!r}"
)) # ValueError
return regex
class GitIgnorePatternError(ValueError):
"""
The :class:`GitIgnorePatternError` class indicates an invalid gitignore
pattern.
"""
pass

View File

@@ -0,0 +1,317 @@
"""
This module provides :class:`GitIgnoreBasicPattern` which implements Git's
`gitignore`_ patterns as documented. This differs from how Git actually behaves
when including files in excluded directories.
.. _`gitignore`: https://git-scm.com/docs/gitignore
"""
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from pathspec import util
from pathspec._typing import (
AnyStr, # Removed in 3.18.
assert_unreachable,
override) # Added in 3.12.
from .base import (
GitIgnorePatternError,
_BYTES_ENCODING,
_GitIgnoreBasePattern)
class GitIgnoreBasicPattern(_GitIgnoreBasePattern):
"""
The :class:`GitIgnoreBasicPattern` class represents a compiled gitignore
pattern as documented. This is registered as "gitignore".
"""
# Keep the dict-less class hierarchy.
__slots__ = ()
@staticmethod
def __normalize_segments(
is_dir_pattern: bool,
pattern_segs: list[str],
) -> tuple[Optional[list[str]], Optional[str]]:
"""
Normalize the pattern segments to make processing easier.
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern (i.e., ends with a slash '/').
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
segments. This may be modified in place.
Returns a :class:`tuple` containing either:
- The normalized segments (:class:`list` of :class:`str`; or :data:`None`).
- The regular expression override (:class:`str` or :data:`None`).
"""
if not pattern_segs[0]:
# A pattern beginning with a slash ('/') should match relative to the root
# directory. Remove the empty first segment to make the pattern relative
# to root.
del pattern_segs[0]
elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
# A single segment pattern with or without a trailing slash ('/') will
# match any descendant path. This is equivalent to "**/{pattern}". Prepend
# double-asterisk segment to make pattern relative to root.
if pattern_segs[0] != '**':
pattern_segs.insert(0, '**')
else:
# A pattern without a beginning slash ('/') but contains at least one
# prepended directory (e.g., "dir/{pattern}") should match relative to the
# root directory. No segment modification is needed.
pass
if not pattern_segs:
# After normalization, we end up with no pattern at all. This must be
# because the pattern is invalid.
raise ValueError("Pattern normalized to nothing.")
if not pattern_segs[-1]:
# A pattern ending with a slash ('/') will match all descendant paths if
# it is a directory but not if it is a regular file. This is equivalent to
# "{pattern}/**". Set empty last segment to a double-asterisk to include
# all descendants.
pattern_segs[-1] = '**'
# EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**').
# Iterate over the segments in reverse order and remove the duplicate double
# asterisks as we go.
for i in range(len(pattern_segs) - 1, 0, -1):
prev = pattern_segs[i-1]
seg = pattern_segs[i]
if prev == '**' and seg == '**':
del pattern_segs[i]
seg_count = len(pattern_segs)
if seg_count == 1 and pattern_segs[0] == '**':
if is_dir_pattern:
# The pattern "**/" will be normalized to "**", but it should match
# everything except for files in the root. Special case this pattern.
return (None, '/')
else:
# The pattern "**" will match every path. Special case this pattern.
return (None, '.')
elif (
seg_count == 2
and pattern_segs[0] == '**'
and pattern_segs[1] == '*'
):
# The pattern "*" will be normalized to "**/*" and will match every
# path. Special case this pattern for efficiency.
return (None, '.')
elif (
seg_count == 3
and pattern_segs[0] == '**'
and pattern_segs[1] == '*'
and pattern_segs[2] == '**'
):
# The pattern "*/" will be normalized to "**/*/**" which will match every
# file not in the root directory. Special case this pattern for
# efficiency.
return (None, '/')
# No regular expression override, return modified pattern segments.
return (pattern_segs, None)
@override
@classmethod
def pattern_to_regex(
cls,
pattern: AnyStr,
) -> tuple[Optional[AnyStr], Optional[bool]]:
"""
Convert the pattern into a regular expression.
*pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a
regular expression.
Returns a :class:`tuple` containing:
- *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the
uncompiled regular expression.
- *include* (:class:`bool` or :data:`None`) is whether matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
"""
if isinstance(pattern, str):
pattern_str = pattern
return_type = str
elif isinstance(pattern, bytes):
pattern_str = pattern.decode(_BYTES_ENCODING)
return_type = bytes
else:
raise TypeError(f"{pattern=!r} is not a unicode or byte string.")
original_pattern = pattern_str
del pattern
if pattern_str.endswith('\\ '):
# EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends
# with a backslash is followed by a space, do not strip from the left.
pass
else:
# EDGE CASE: Leading spaces should be kept (only trailing spaces should be
# removed).
pattern_str = pattern_str.rstrip()
regex: Optional[str]
include: Optional[bool]
if not pattern_str:
# A blank pattern is a null-operation (neither includes nor excludes
# files).
return (None, None)
elif pattern_str.startswith('#'):
# A pattern starting with a hash ('#') serves as a comment (neither
# includes nor excludes files). Escape the hash with a backslash to match
# a literal hash (i.e., '\#').
return (None, None)
if pattern_str.startswith('!'):
# A pattern starting with an exclamation mark ('!') negates the pattern
# (exclude instead of include). Escape the exclamation mark with a back
# slash to match a literal exclamation mark (i.e., '\!').
include = False
# Remove leading exclamation mark.
pattern_str = pattern_str[1:]
else:
include = True
# Split pattern into segments.
pattern_segs = pattern_str.split('/')
# Check whether the pattern is specifically a directory pattern before
# normalization.
is_dir_pattern = not pattern_segs[-1]
if pattern_str == '/':
# EDGE CASE: A single slash ('/') is not addressed by the gitignore
# documentation. Git treats it as a no-op (does not match any files). The
# straight forward interpretation is to treat it as a directory and match
# every descendant path (equivalent to '**'). Remove the directory pattern
# flag so that it is treated as '**' instead of '**/'.
is_dir_pattern = False
# Normalize pattern to make processing easier.
try:
pattern_segs, override_regex = cls.__normalize_segments(
is_dir_pattern, pattern_segs,
)
except ValueError as e:
raise GitIgnorePatternError((
f"Invalid git pattern: {original_pattern!r}"
)) from e # GitIgnorePatternError
if override_regex is not None:
# Use regex override.
regex = override_regex
elif pattern_segs is not None:
# Build regular expression from pattern.
try:
regex_parts = cls.__translate_segments(pattern_segs)
except ValueError as e:
raise GitIgnorePatternError((
f"Invalid git pattern: {original_pattern!r}"
)) from e # GitIgnorePatternError
regex = ''.join(regex_parts)
else:
assert_unreachable((
f"{override_regex=} and {pattern_segs=} cannot both be null."
)) # assert_unreachable
# Encode regex if needed.
out_regex: AnyStr
if regex is not None and return_type is bytes:
out_regex = regex.encode(_BYTES_ENCODING)
else:
out_regex = regex
return (out_regex, include)
@classmethod
def __translate_segments(cls, pattern_segs: list[str]) -> list[str]:
"""
Translate the pattern segments to regular expressions.
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
segments.
Returns the regular expression parts (:class:`list` of :class:`str`).
"""
# Build regular expression from pattern.
out_parts = []
need_slash = False
end = len(pattern_segs) - 1
for i, seg in enumerate(pattern_segs):
if seg == '**':
if i == 0:
# A normalized pattern beginning with double-asterisks ('**') will
# match any leading path segments.
# - NOTICE: '(?:^|/)' benchmarks slower using p15 (sm=0.9382,
# hs=0.9966, re2=0.9337).
out_parts.append('^(?:.+/)?')
elif i < end:
# A pattern with inner double-asterisks ('**') will match multiple (or
# zero) inner path segments.
out_parts.append('(?:/.+)?')
need_slash = True
else:
assert i == end, (i, end)
# A normalized pattern ending with double-asterisks ('**') will match
# any trailing path segments.
out_parts.append('/')
else:
# Match path segment.
if i == 0:
# Anchor to root directory.
out_parts.append('^')
if need_slash:
out_parts.append('/')
if seg == '*':
# Match whole path segment.
out_parts.append('[^/]+')
else:
# Match segment glob pattern.
out_parts.append(cls._translate_segment_glob(seg))
if i == end:
if seg == '*':
# A pattern ending with an asterisk ('*') will match a file or
# directory (without matching descendant paths). E.g., "foo/*"
# matches "foo/test.json", "foo/bar/", but not "foo/bar/hello.c".
out_parts.append('/?$')
else:
# A pattern ending without a slash ('/') will match a file or a
# directory (with paths underneath it). E.g., "foo" matches "foo",
# "foo/bar", "foo/bar/baz", etc.
out_parts.append('(?:/|$)')
need_slash = True
return out_parts
# Register GitIgnoreBasicPattern as "gitignore".
util.register_pattern('gitignore', GitIgnoreBasicPattern)

View File

@@ -0,0 +1,335 @@
"""
This module provides :class:`GitIgnoreSpecPattern` which implements Git's
`gitignore`_ patterns, and handles edge-cases where Git's behavior differs from
what's documented. Git allows including files from excluded directories which
appears to contradict the documentation. This is used by
:class:`~pathspec.gitignore.GitIgnoreSpec` to fully replicate Git's handling.
.. _`gitignore`: https://git-scm.com/docs/gitignore
"""
from typing import (
Optional) # Replaced by `X | None` in 3.10.
from pathspec._typing import (
AnyStr, # Removed in 3.18.
assert_unreachable,
override) # Added in 3.12.
from .base import (
GitIgnorePatternError,
_BYTES_ENCODING,
_GitIgnoreBasePattern)
_DIR_MARK = 'ps_d'
"""
The regex group name for the directory marker. This is only used by
:class:`GitIgnoreSpec`.
"""
_DIR_MARK_CG = f'(?P<{_DIR_MARK}>/)'
"""
This regular expression matches the directory marker.
"""
_DIR_MARK_OPT = f'(?:{_DIR_MARK_CG}|$)'
"""
This regular expression matches the optional directory marker and sub-path.
"""
class GitIgnoreSpecPattern(_GitIgnoreBasePattern):
"""
The :class:`GitIgnoreSpecPattern` class represents a compiled gitignore
pattern with special handling for edge-cases to replicate Git's behavior.
This is registered under the deprecated name "gitwildmatch" for backward
compatibility with v0.12. The registered name will be removed in a future
version.
"""
# Keep the dict-less class hierarchy.
__slots__ = ()
@staticmethod
def __normalize_segments(
is_dir_pattern: bool,
pattern_segs: list[str],
) -> tuple[Optional[list[str]], Optional[str]]:
"""
Normalize the pattern segments to make processing easier.
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern (i.e., ends with a slash '/').
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
segments. This may be modified in place.
Returns a :class:`tuple` containing either:
- The normalized segments (:class:`list` of :class:`str`; or :data:`None`).
- The regular expression override (:class:`str` or :data:`None`).
"""
if not pattern_segs[0]:
# A pattern beginning with a slash ('/') should match relative to the root
# directory. Remove the empty first segment to make the pattern relative
# to root.
del pattern_segs[0]
elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
# A single segment pattern with or without a trailing slash ('/') will
# match any descendant path. This is equivalent to "**/{pattern}". Prepend
# double-asterisk segment to make pattern relative to root.
if pattern_segs[0] != '**':
pattern_segs.insert(0, '**')
else:
# A pattern without a beginning slash ('/') but contains at least one
# prepended directory (e.g., "dir/{pattern}") should match relative to the
# root directory. No segment modification is needed.
pass
if not pattern_segs:
# After normalization, we end up with no pattern at all. This must be
# because the pattern is invalid.
raise ValueError("Pattern normalized to nothing.")
if not pattern_segs[-1]:
# A pattern ending with a slash ('/') will match all descendant paths if
# it is a directory but not if it is a regular file. This is equivalent to
# "{pattern}/**". Set empty last segment to a double-asterisk to include
# all descendants.
pattern_segs[-1] = '**'
# EDGE CASE: Collapse duplicate double-asterisk sequences (i.e., '**/**').
# Iterate over the segments in reverse order and remove the duplicate double
# asterisks as we go.
for i in range(len(pattern_segs) - 1, 0, -1):
prev = pattern_segs[i-1]
seg = pattern_segs[i]
if prev == '**' and seg == '**':
del pattern_segs[i]
seg_count = len(pattern_segs)
if seg_count == 1 and pattern_segs[0] == '**':
if is_dir_pattern:
# The pattern "**/" will be normalized to "**", but it should match
# everything except for files in the root. Special case this pattern.
return (None, _DIR_MARK_CG)
else:
# The pattern "**" will match every path. Special case this pattern.
return (None, '.')
elif (
seg_count == 2
and pattern_segs[0] == '**'
and pattern_segs[1] == '*'
):
# The pattern "*" will be normalized to "**/*" and will match every
# path. Special case this pattern for efficiency.
return (None, '.')
elif (
seg_count == 3
and pattern_segs[0] == '**'
and pattern_segs[1] == '*'
and pattern_segs[2] == '**'
):
# The pattern "*/" will be normalized to "**/*/**" which will match every
# file not in the root directory. Special case this pattern for
# efficiency.
if is_dir_pattern:
return (None, _DIR_MARK_CG)
else:
return (None, '/')
# No regular expression override, return modified pattern segments.
return (pattern_segs, None)
@override
@classmethod
def pattern_to_regex(
cls,
pattern: AnyStr,
) -> tuple[Optional[AnyStr], Optional[bool]]:
"""
Convert the pattern into a regular expression.
*pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a
regular expression.
Returns a :class:`tuple` containing:
- *pattern* (:class:`str`, :class:`bytes` or :data:`None`) is the
uncompiled regular expression.
- *include* (:class:`bool` or :data:`None`) is whether matched files
should be included (:data:`True`), excluded (:data:`False`), or is a
null-operation (:data:`None`).
"""
if isinstance(pattern, str):
pattern_str = pattern
return_type = str
elif isinstance(pattern, bytes):
pattern_str = pattern.decode(_BYTES_ENCODING)
return_type = bytes
else:
raise TypeError(f"{pattern=!r} is not a unicode or byte string.")
original_pattern = pattern_str
del pattern
if pattern_str.endswith('\\ '):
# EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends
# with a backslash is followed by a space, do not strip from the left.
pass
else:
# EDGE CASE: Leading spaces should be kept (only trailing spaces should be
# removed). Git does not remove leading spaces.
pattern_str = pattern_str.rstrip()
regex: Optional[str]
include: Optional[bool]
if not pattern_str:
# A blank pattern is a null-operation (neither includes nor excludes
# files).
return (None, None)
elif pattern_str.startswith('#'):
# A pattern starting with a hash ('#') serves as a comment (neither
# includes nor excludes files). Escape the hash with a backslash to match
# a literal hash (i.e., '\#').
return (None, None)
elif pattern_str == '/':
# EDGE CASE: According to `git check-ignore` (v2.4.1), a single '/' does
# not match any file.
return (None, None)
if pattern_str.startswith('!'):
# A pattern starting with an exclamation mark ('!') negates the pattern
# (exclude instead of include). Escape the exclamation mark with a back
# slash to match a literal exclamation mark (i.e., '\!').
include = False
# Remove leading exclamation mark.
pattern_str = pattern_str[1:]
else:
include = True
# Split pattern into segments.
pattern_segs = pattern_str.split('/')
# Check whether the pattern is specifically a directory pattern before
# normalization.
is_dir_pattern = not pattern_segs[-1]
# Normalize pattern to make processing easier.
try:
pattern_segs, override_regex = cls.__normalize_segments(
is_dir_pattern, pattern_segs,
)
except ValueError as e:
raise GitIgnorePatternError((
f"Invalid git pattern: {original_pattern!r}"
)) from e # GitIgnorePatternError
if override_regex is not None:
# Use regex override.
regex = override_regex
elif pattern_segs is not None:
# Build regular expression from pattern.
try:
regex_parts = cls.__translate_segments(is_dir_pattern, pattern_segs)
except ValueError as e:
raise GitIgnorePatternError((
f"Invalid git pattern: {original_pattern!r}"
)) from e # GitIgnorePatternError
regex = ''.join(regex_parts)
else:
assert_unreachable((
f"{override_regex=} and {pattern_segs=} cannot both be null."
)) # assert_unreachable
# Encode regex if needed.
out_regex: AnyStr
if regex is not None and return_type is bytes:
out_regex = regex.encode(_BYTES_ENCODING)
else:
out_regex = regex
return (out_regex, include)
@classmethod
def __translate_segments(
cls,
is_dir_pattern: bool,
pattern_segs: list[str],
) -> list[str]:
"""
Translate the pattern segments to regular expressions.
*is_dir_pattern* (:class:`bool`) is whether the pattern is a directory
pattern (i.e., ends with a slash '/').
*pattern_segs* (:class:`list` of :class:`str`) contains the pattern
segments.
Returns the regular expression parts (:class:`list` of :class:`str`).
"""
# Build regular expression from pattern.
out_parts = []
need_slash = False
end = len(pattern_segs) - 1
for i, seg in enumerate(pattern_segs):
if seg == '**':
if i == 0:
# A normalized pattern beginning with double-asterisks ('**') will
# match any leading path segments.
out_parts.append('^(?:.+/)?')
elif i < end:
# A pattern with inner double-asterisks ('**') will match multiple (or
# zero) inner path segments.
out_parts.append('(?:/.+)?')
need_slash = True
else:
assert i == end, (i, end)
# A normalized pattern ending with double-asterisks ('**') will match
# any trailing path segments.
if is_dir_pattern:
out_parts.append(_DIR_MARK_CG)
else:
out_parts.append('/')
else:
# Match path segment.
if i == 0:
# Anchor to root directory.
out_parts.append('^')
if need_slash:
out_parts.append('/')
if seg == '*':
# Match whole path segment.
out_parts.append('[^/]+')
else:
# Match segment glob pattern.
out_parts.append(cls._translate_segment_glob(seg))
if i == end:
# A pattern ending without a slash ('/') will match a file or a
# directory (with paths underneath it). E.g., "foo" matches "foo",
# "foo/bar", "foo/bar/baz", etc.
out_parts.append(_DIR_MARK_OPT)
need_slash = True
return out_parts

View File

@@ -0,0 +1,52 @@
"""
.. version-deprecated: 1.0.0
This module is superseded by :module:`pathspec.patterns.gitignore`.
"""
from pathspec import util
from pathspec._typing import (
deprecated, # Added in 3.13.
override) # Added in 3.12.
from .gitignore.spec import (
GitIgnoreSpecPattern)
# DEPRECATED: Deprecated since version 1.0.0. Expose GitWildMatchPatternError
# in this module for backward compatibility.
from .gitignore import (
GitIgnorePatternError as GitWildMatchPatternError)
class GitWildMatchPattern(GitIgnoreSpecPattern):
"""
.. version-deprecated:: 1.0.0
This class is superseded by :class:`GitIgnoreSpecPattern` and
:class:`~pathspec.patterns.gitignore.basic.GitIgnoreBasicPattern`.
"""
@deprecated((
"GitWildMatchPattern ('gitwildmatch') is deprecated. Use 'gitignore' for "
"GitIgnoreBasicPattern or GitIgnoreSpecPattern instead."
))
def __init__(self, *args, **kw) -> None:
"""
Warn about deprecation.
"""
super().__init__(*args, **kw)
@override
@classmethod
@deprecated((
"GitWildMatchPattern ('gitwildmatch') is deprecated. Use 'gitignore' for "
"GitIgnoreBasicPattern or GitIgnoreSpecPattern instead."
))
def pattern_to_regex(cls, *args, **kw):
"""
Warn about deprecation.
"""
return super().pattern_to_regex(*args, **kw)
# DEPRECATED: Deprecated since version 1.0.0. Register GitWildMatchPattern as
# "gitwildmatch" for backward compatibility.
util.register_pattern('gitwildmatch', GitWildMatchPattern)

1
lib/pathspec/py.typed Normal file
View File

@@ -0,0 +1 @@
# Marker file for PEP 561. The pathspec package uses inline types.

847
lib/pathspec/util.py Normal file
View File

@@ -0,0 +1,847 @@
"""
This module provides utility methods for dealing with path-specs.
"""
import os
import os.path
import pathlib
import posixpath
import stat
from collections.abc import (
Collection,
Iterable,
Iterator,
Sequence)
from dataclasses import (
dataclass)
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Generic,
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union) # Replaced by `X | Y` in 3.10.
from .pattern import (
Pattern)
from ._typing import (
AnyStr, # Removed in 3.18.
deprecated) # Added in 3.13.
StrPath = Union[str, os.PathLike[str]]
TStrPath = TypeVar("TStrPath", bound=StrPath)
"""
Type variable for :class:`str` or :class:`os.PathLike`.
"""
NORMALIZE_PATH_SEPS = [
__sep
for __sep in [os.sep, os.altsep]
if __sep and __sep != posixpath.sep
]
"""
*NORMALIZE_PATH_SEPS* (:class:`list` of :class:`str`) contains the path
separators that need to be normalized to the POSIX separator for the current
operating system. The separators are determined by examining :data:`os.sep` and
:data:`os.altsep`.
"""
_registered_patterns = {}
"""
*_registered_patterns* (:class:`dict`) maps a name (:class:`str`) to the
registered pattern factory (:class:`~collections.abc.Callable`).
"""
def append_dir_sep(path: pathlib.Path) -> str:
"""
Appends the path separator to the path if the path is a directory. This can be
used to aid in distinguishing between directories and files on the file-system
by relying on the presence of a trailing path separator.
*path* (:class:`pathlib.Path`) is the path to use.
Returns the path (:class:`str`).
"""
str_path = str(path)
if path.is_dir():
str_path += os.sep
return str_path
def check_match_file(
patterns: Iterable[tuple[int, Pattern]],
file: str,
is_reversed: Optional[bool] = None,
) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*patterns* (:class:`~collections.abc.Iterable`) yields each indexed pattern
(:class:`tuple`) which contains the pattern index (:class:`int`) and actua
pattern (:class:`.Pattern`).
*file* (:class:`str`) is the normalized file path to be matched against
*patterns*.
*is_reversed* (:class:`bool` or :data:`None`) is whether the order of the
patterns has been reversed. Default is :data:`None` for :data:`False`.
Reversing the order of the patterns is an optimization.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
if is_reversed:
# Check patterns in reverse order. The first pattern that matches takes
# precedence.
for index, pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
return pattern.include, index
return None, None
else:
# Check all patterns. The last pattern that matches takes precedence.
out_include: Optional[bool] = None
out_index: Optional[int] = None
for index, pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
out_include = pattern.include
out_index = index
return out_include, out_index
def detailed_match_files(
patterns: Iterable[Pattern],
files: Iterable[str],
all_matches: Optional[bool] = None,
) -> dict[str, 'MatchDetail']:
"""
Matches the files to the patterns, and returns which patterns matched the
files.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*files* (:class:`~collections.abc.Iterable` of :class:`str`) contains the
normalized file paths to be matched against *patterns*.
*all_matches* (:class:`bool` or :data:`None`) is whether to return all matches
patterns (:data:`True`), or only the last matched pattern (:data:`False`).
Default is :data:`None` for :data:`False`.
Returns the matched files (:class:`dict`) which maps each matched file
(:class:`str`) to the patterns that matched in order (:class:`.MatchDetail`).
"""
all_files = files if isinstance(files, Collection) else list(files)
return_files = {}
for pattern in patterns:
if pattern.include is not None:
result_files = pattern.match(all_files) # TODO: Replace with `.match_file()`.
if pattern.include:
# Add files and record pattern.
for result_file in result_files:
if result_file in return_files:
if all_matches:
return_files[result_file].patterns.append(pattern)
else:
return_files[result_file].patterns[0] = pattern
else:
return_files[result_file] = MatchDetail([pattern])
else:
# Remove files.
for file in result_files:
del return_files[file]
return return_files
def _filter_check_patterns(
patterns: Iterable[Pattern],
) -> list[tuple[int, Pattern]]:
"""
Filters out null-patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns.
Returns a :class:`list` containing each indexed pattern (:class:`tuple`) which
contains the pattern index (:class:`int`) and the actual pattern
(:class:`.Pattern`).
"""
return [
(__index, __pat)
for __index, __pat in enumerate(patterns)
if __pat.include is not None
]
def _is_iterable(value: Any) -> bool:
"""
Check whether the value is an iterable (excludes strings).
*value* is the value to check,
Returns whether *value* is an iterable (:class:`bool`).
"""
return isinstance(value, Iterable) and not isinstance(value, (str, bytes))
@deprecated((
"pathspec.util.iter_tree() is deprecated. Use iter_tree_files() instead."
))
def iter_tree(root, on_error=None, follow_links=None):
"""
.. version-deprecated:: 0.10.0
This is an alias for the :func:`.iter_tree_files` function.
"""
return iter_tree_files(root, on_error=on_error, follow_links=follow_links)
def iter_tree_entries(
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
) -> Iterator['TreeEntry']:
"""
Walks the specified directory for all files and directories.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to search.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
Raises :exc:`.RecursionError` if recursion is detected.
Returns an :class:`~collections.abc.Iterator` yielding each file or directory
entry (:class:`.TreeEntry`) relative to *root*.
"""
if on_error is not None and not callable(on_error):
raise TypeError(f"on_error:{on_error!r} is not callable.")
if follow_links is None:
follow_links = True
yield from _iter_tree_entries_next(os.path.abspath(root), '', {}, on_error, follow_links)
def _iter_tree_entries_next(
root_full: str,
dir_rel: str,
memo: dict[str, str],
on_error: Callable[[OSError], None],
follow_links: bool,
) -> Iterator['TreeEntry']:
"""
Scan the directory for all descendant files.
*root_full* (:class:`str`) the absolute path to the root directory.
*dir_rel* (:class:`str`) the path to the directory to scan relative to
*root_full*.
*memo* (:class:`dict`) keeps track of ancestor directories encountered. Maps
each ancestor real path (:class:`str`) to relative path (:class:`str`).
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions.
*follow_links* (:class:`bool`) is whether to walk symbolic links that resolve
to directories.
Yields each entry (:class:`.TreeEntry`).
"""
dir_full = os.path.join(root_full, dir_rel)
dir_real = os.path.realpath(dir_full)
# Remember each encountered ancestor directory and its canonical (real) path.
# If a canonical path is encountered more than once, recursion has occurred.
if dir_real not in memo:
memo[dir_real] = dir_rel
else:
raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel)
with os.scandir(dir_full) as scan_iter:
node_ent: os.DirEntry
for node_ent in scan_iter:
node_rel = os.path.join(dir_rel, node_ent.name)
# Inspect child node.
try:
node_lstat = node_ent.stat(follow_symlinks=False)
except OSError as e:
if on_error is not None:
on_error(e)
continue
if node_ent.is_symlink():
# Child node is a link, inspect the target node.
try:
node_stat = node_ent.stat()
except OSError as e:
if on_error is not None:
on_error(e)
continue
else:
node_stat = node_lstat
if node_ent.is_dir(follow_symlinks=follow_links):
# Child node is a directory, recurse into it and yield its descendant
# files.
yield TreeEntry(node_ent.name, node_rel, node_lstat, node_stat)
yield from _iter_tree_entries_next(root_full, node_rel, memo, on_error, follow_links)
elif node_ent.is_file() or node_ent.is_symlink():
# Child node is either a file or an unfollowed link, yield it.
yield TreeEntry(node_ent.name, node_rel, node_lstat, node_stat)
# NOTE: Make sure to remove the canonical (real) path of the directory from
# the ancestors memo once we are done with it. This allows the same directory
# to appear multiple times. If this is not done, the second occurrence of the
# directory will be incorrectly interpreted as a recursion. See
# <https://github.com/cpburnz/python-path-specification/pull/7>.
del memo[dir_real]
def iter_tree_files(
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
) -> Iterator[str]:
"""
Walks the specified directory for all files.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to search
for files.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
Raises :exc:`.RecursionError` if recursion is detected.
Returns an :class:`~collections.abc.Iterator` yielding the path to each file
(:class:`str`) relative to *root*.
"""
if on_error is not None and not callable(on_error):
raise TypeError(f"on_error:{on_error!r} is not callable.")
if follow_links is None:
follow_links = True
yield from _iter_tree_files_next(os.path.abspath(root), '', {}, on_error, follow_links)
def _iter_tree_files_next(
root_full: str,
dir_rel: str,
memo: dict[str, str],
on_error: Callable[[OSError], None],
follow_links: bool,
) -> Iterator[str]:
"""
Scan the directory for all descendant files.
*root_full* (:class:`str`) the absolute path to the root directory.
*dir_rel* (:class:`str`) the path to the directory to scan relative to
*root_full*.
*memo* (:class:`dict`) keeps track of ancestor directories encountered. Maps
each ancestor real path (:class:`str`) to relative path (:class:`str`).
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions.
*follow_links* (:class:`bool`) is whether to walk symbolic links that resolve
to directories.
Yields each file path (:class:`str`).
"""
dir_full = os.path.join(root_full, dir_rel)
dir_real = os.path.realpath(dir_full)
# Remember each encountered ancestor directory and its canonical (real) path.
# If a canonical path is encountered more than once, recursion has occurred.
if dir_real not in memo:
memo[dir_real] = dir_rel
else:
raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel)
with os.scandir(dir_full) as scan_iter:
node_ent: os.DirEntry
for node_ent in scan_iter:
node_rel = os.path.join(dir_rel, node_ent.name)
if node_ent.is_dir(follow_symlinks=follow_links):
# Child node is a directory, recurse into it and yield its descendant
# files.
yield from _iter_tree_files_next(root_full, node_rel, memo, on_error, follow_links)
elif node_ent.is_file():
# Child node is a file, yield it.
yield node_rel
elif not follow_links and node_ent.is_symlink():
# Child node is an unfollowed link, yield it.
yield node_rel
# NOTE: Make sure to remove the canonical (real) path of the directory from
# the ancestors memo once we are done with it. This allows the same directory
# to appear multiple times. If this is not done, the second occurrence of the
# directory will be incorrectly interpreted as a recursion. See
# <https://github.com/cpburnz/python-path-specification/pull/7>.
del memo[dir_real]
def lookup_pattern(name: str) -> Callable[[AnyStr], Pattern]:
"""
Lookups a registered pattern factory by name.
*name* (:class:`str`) is the name of the pattern factory.
Returns the registered pattern factory (:class:`~collections.abc.Callable`).
If no pattern factory is registered, raises :exc:`KeyError`.
"""
return _registered_patterns[name]
def match_file(patterns: Iterable[Pattern], file: str) -> bool:
"""
Matches the file to the patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*file* (:class:`str`) is the normalized file path to be matched against
*patterns*.
Returns :data:`True` if *file* matched; otherwise, :data:`False`.
"""
matched = False
for pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
matched = pattern.include
return matched
@deprecated((
"pathspec.util.match_files() is deprecated. Use match_file() with a loop for "
"better results."
))
def match_files(
patterns: Iterable[Pattern],
files: Iterable[str],
) -> set[str]:
"""
.. version-deprecated:: 0.10.0
This function is no longer used. Use the :func:`.match_file` function with a
loop for better results.
Matches the files to the patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*files* (:class:`~collections.abc.Iterable` of :class:`str`) contains the
normalized file paths to be matched against *patterns*.
Returns the matched files (:class:`set` of :class:`str`).
"""
use_patterns = [__pat for __pat in patterns if __pat.include is not None]
return_files = set()
for file in files:
if match_file(use_patterns, file):
return_files.add(file)
return return_files
def normalize_file(
file: StrPath,
separators: Optional[Collection[str]] = None,
) -> str:
"""
Normalizes the file path to use the POSIX path separator (i.e., ``"/"``), and
make the paths relative (remove leading ``"/"``).
*file* (:class:`str` or :class:`os.PathLike`) is the file path.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. This does
not need to include the POSIX path separator (``"/"``), but including it will
not affect the results. Default is ``None`` for :data:`.NORMALIZE_PATH_SEPS`.
To prevent normalization, pass an empty container (e.g., an empty tuple
``()``).
Returns the normalized file path (:class:`str`).
"""
# Normalize path separators.
if separators is None:
separators = NORMALIZE_PATH_SEPS
# Convert path object to string.
norm_file: str = os.fspath(file)
for sep in separators:
norm_file = norm_file.replace(sep, posixpath.sep)
if norm_file.startswith('/'):
# Make path relative.
norm_file = norm_file[1:]
elif norm_file.startswith('./'):
# Remove current directory prefix.
norm_file = norm_file[2:]
return norm_file
@deprecated((
"pathspec.util.normalize_files() is deprecated. Use normalize_file() with a "
"loop for better results."
))
def normalize_files(
files: Iterable[StrPath],
separators: Optional[Collection[str]] = None,
) -> dict[str, list[StrPath]]:
"""
.. version-deprecated:: 0.10.0
This function is no longer used. Use the :func:`.normalize_file` function
with a loop for better results.
Normalizes the file paths to use the POSIX path separator.
*files* (:class:`~collections.abc.Iterable` of :class:`str` or
:class:`os.PathLike`) contains the file paths to be normalized.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns a :class:`dict` mapping each normalized file path (:class:`str`) to
the original file paths (:class:`list` of :class:`str` or
:class:`os.PathLike`).
"""
norm_files = {}
for path in files:
norm_file = normalize_file(path, separators=separators)
if norm_file in norm_files:
norm_files[norm_file].append(path)
else:
norm_files[norm_file] = [path]
return norm_files
def register_pattern(
name: str,
pattern_factory: Callable[[AnyStr], Pattern],
override: Optional[bool] = None,
) -> None:
"""
Registers the specified pattern factory.
*name* (:class:`str`) is the name to register the pattern factory under.
*pattern_factory* (:class:`~collections.abc.Callable`) is used to compile
patterns. It must accept an uncompiled pattern (:class:`str`) and return the
compiled pattern (:class:`.Pattern`).
*override* (:class:`bool` or :data:`None`) optionally is whether to allow
overriding an already registered pattern under the same name (:data:`True`),
instead of raising an :exc:`.AlreadyRegisteredError` (:data:`False`). Default
is :data:`None` for :data:`False`.
"""
if not isinstance(name, str):
raise TypeError(f"name:{name!r} is not a string.")
if not callable(pattern_factory):
raise TypeError(f"pattern_factory:{pattern_factory!r} is not callable.")
if name in _registered_patterns and not override:
raise AlreadyRegisteredError(name, _registered_patterns[name])
_registered_patterns[name] = pattern_factory
class AlreadyRegisteredError(Exception):
"""
The :exc:`AlreadyRegisteredError` exception is raised when a pattern factory
is registered under a name already in use.
"""
def __init__(
self,
name: str,
pattern_factory: Callable[[AnyStr], Pattern],
) -> None:
"""
Initializes the :exc:`AlreadyRegisteredError` instance.
*name* (:class:`str`) is the name of the registered pattern.
*pattern_factory* (:class:`~collections.abc.Callable`) is the registered
pattern factory.
"""
super().__init__(name, pattern_factory)
@property
def message(self) -> str:
"""
*message* (:class:`str`) is the error message.
"""
return (
f"{self.name!r} is already registered for pattern factory="
f"{self.pattern_factory!r}."
)
@property
def name(self) -> str:
"""
*name* (:class:`str`) is the name of the registered pattern.
"""
return self.args[0]
@property
def pattern_factory(self) -> Callable[[AnyStr], Pattern]:
"""
*pattern_factory* (:class:`~collections.abc.Callable`) is the registered
pattern factory.
"""
return self.args[1]
class RecursionError(Exception):
"""
The :exc:`RecursionError` exception is raised when recursion is detected.
"""
def __init__(
self,
real_path: str,
first_path: str,
second_path: str,
) -> None:
"""
Initializes the :exc:`RecursionError` instance.
*real_path* (:class:`str`) is the real path that recursion was encountered
on.
*first_path* (:class:`str`) is the first path encountered for *real_path*.
*second_path* (:class:`str`) is the second path encountered for *real_path*.
"""
super().__init__(real_path, first_path, second_path)
@property
def first_path(self) -> str:
"""
*first_path* (:class:`str`) is the first path encountered for
:attr:`self.real_path <RecursionError.real_path>`.
"""
return self.args[1]
@property
def message(self) -> str:
"""
*message* (:class:`str`) is the error message.
"""
return (
f"Real path {self.real_path!r} was encountered at {self.first_path!r} "
f"and then {self.second_path!r}."
)
@property
def real_path(self) -> str:
"""
*real_path* (:class:`str`) is the real path that recursion was
encountered on.
"""
return self.args[0]
@property
def second_path(self) -> str:
"""
*second_path* (:class:`str`) is the second path encountered for
:attr:`self.real_path <RecursionError.real_path>`.
"""
return self.args[2]
@dataclass(frozen=True)
class CheckResult(Generic[TStrPath]):
"""
The :class:`CheckResult` class contains information about the file and which
pattern matched it.
"""
# Make the class dict-less.
__slots__ = (
'file',
'include',
'index',
)
file: TStrPath
"""
*file* (:class:`str` or :class:`os.PathLike`) is the file path.
"""
include: Optional[bool]
"""
*include* (:class:`bool` or :data:`None`) is whether to include or exclude the
file. If :data:`None`, no pattern matched.
"""
index: Optional[int]
"""
*index* (:class:`int` or :data:`None`) is the index of the last pattern that
matched. If :data:`None`, no pattern matched.
"""
class MatchDetail(object):
"""
The :class:`.MatchDetail` class contains information about
"""
# Make the class dict-less.
__slots__ = ('patterns',)
def __init__(self, patterns: Sequence[Pattern]) -> None:
"""
Initialize the :class:`.MatchDetail` instance.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the patterns that matched the file in the order they were encountered.
"""
self.patterns = patterns
"""
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the patterns that matched the file in the order they were
encountered.
"""
class TreeEntry(object):
"""
The :class:`TreeEntry` class contains information about a file-system entry.
"""
# Make the class dict-less.
__slots__ = ('_lstat', 'name', 'path', '_stat')
def __init__(
self,
name: str,
path: str,
lstat: os.stat_result,
stat: os.stat_result,
) -> None:
"""
Initialize the :class:`TreeEntry` instance.
*name* (:class:`str`) is the base name of the entry.
*path* (:class:`str`) is the relative path of the entry.
*lstat* (:class:`os.stat_result`) is the stat result of the direct entry.
*stat* (:class:`os.stat_result`) is the stat result of the entry,
potentially linked.
"""
self._lstat: os.stat_result = lstat
"""
*_lstat* (:class:`os.stat_result`) is the stat result of the direct entry.
"""
self.name: str = name
"""
*name* (:class:`str`) is the base name of the entry.
"""
self.path: str = path
"""
*path* (:class:`str`) is the path of the entry.
"""
self._stat: os.stat_result = stat
"""
*_stat* (:class:`os.stat_result`) is the stat result of the linked entry.
"""
def is_dir(self, follow_links: Optional[bool] = None) -> bool:
"""
Get whether the entry is a directory.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, a symlink to a directory will result in
:data:`True`. Default is :data:`None` for :data:`True`.
Returns whether the entry is a directory (:class:`bool`).
"""
if follow_links is None:
follow_links = True
node_stat = self._stat if follow_links else self._lstat
return stat.S_ISDIR(node_stat.st_mode)
def is_file(self, follow_links: Optional[bool] = None) -> bool:
"""
Get whether the entry is a regular file.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, a symlink to a regular file will result in
:data:`True`. Default is :data:`None` for :data:`True`.
Returns whether the entry is a regular file (:class:`bool`).
"""
if follow_links is None:
follow_links = True
node_stat = self._stat if follow_links else self._lstat
return stat.S_ISREG(node_stat.st_mode)
def is_symlink(self) -> bool:
"""
Returns whether the entry is a symbolic link (:class:`bool`).
"""
return stat.S_ISLNK(self._lstat.st_mode)
def stat(self, follow_links: Optional[bool] = None) -> os.stat_result:
"""
Get the cached stat result for the entry.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, the stat result of the linked file will be
returned. Default is :data:`None` for :data:`True`.
Returns that stat result (:class:`os.stat_result`).
"""
if follow_links is None:
follow_links = True
return self._stat if follow_links else self._lstat

14
newplan?.md Normal file
View File

@@ -0,0 +1,14 @@
1. Dir Scanner
2. file filter
3. Hasher
4. Manifest
1.