1from collections.abc import Callable
2import fnmatch
3import os
4from pathlib import Path
5
6from gitignore_parser import ( # type: ignore[import-untyped] # library has no stub
7 parse_gitignore,
8)
9
10from sphinx_codelinks.source_discover.config import (
11 COMMENT_FILETYPE,
12 SourceDiscoverConfig,
13)
14
15
[docs]16# @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER]
17class SourceDiscover:
18 def __init__(self, src_discover_config: SourceDiscoverConfig):
19 self.src_discover_config = src_discover_config
20 # Only gitignore at source root is considered.
21 # TODO: Support nested gitignore files
22 gitignore_path = self.src_discover_config.src_dir / ".gitignore"
23 self.gitignore_matcher: Callable[[str], bool] | None = (
24 parse_gitignore(gitignore_path)
25 if self.src_discover_config.gitignore and gitignore_path.exists()
26 else None
27 )
28 # normalize the file types to lower case with leading dot
29 self.file_types = {
30 f".{ext}" for ext in COMMENT_FILETYPE[src_discover_config.comment_type]
31 }
32
33 self.source_paths = self._discover()
34
35 def _discover(self) -> list[Path]:
36 """Discover source files recursively in the given directory."""
37 discovered_files = []
38 for filepath in self.src_discover_config.src_dir.rglob("*"):
39 if filepath.is_file():
40 if self.file_types and filepath.suffix.lower() not in self.file_types:
41 continue
42 rel_filepath = str(
43 filepath.relative_to(self.src_discover_config.src_dir)
44 )
45 if self.src_discover_config.include and self._matches_any(
46 rel_filepath, self.src_discover_config.include
47 ):
48 # "includes" has the highest priority over "gitignore" and "excludes"
49 discovered_files.append(filepath)
50 continue
51 if self.gitignore_matcher and self.gitignore_matcher(
52 str(filepath.absolute())
53 ):
54 continue
55 if self.src_discover_config.exclude and self._matches_any(
56 rel_filepath, self.src_discover_config.exclude
57 ):
58 continue
59 discovered_files.append(filepath)
60 sorted_filepaths = sorted(
61 discovered_files, key=lambda x: os.path.normcase(os.path.normpath(x))
62 )
63 return sorted_filepaths
64
65 def _matches_any(self, rel_filepath: str, patterns: list[str]) -> bool:
66 """Check if the given file path matches any of the given patterns."""
67 return any(fnmatch.fnmatch(rel_filepath, pattern) for pattern in patterns)