1from collections.abc import Callable
 2import fnmatch
 3import os
 4from pathlib import Path
 5
 6from gitignore_parser import (  # type: ignore[import-untyped]  # library has no stub
 7    parse_gitignore,
 8)
 9
10from sphinx_codelinks.source_discover.config import (
11    COMMENT_FILETYPE,
12    SourceDiscoverConfig,
13)
14
15
[docs]16# @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER]
17class SourceDiscover:
18    def __init__(self, src_discover_config: SourceDiscoverConfig):
19        self.src_discover_config = src_discover_config
20        # Only gitignore at source root is considered.
21        # TODO: Support nested gitignore files
22        gitignore_path = self.src_discover_config.src_dir / ".gitignore"
23        self.gitignore_matcher: Callable[[str], bool] | None = (
24            parse_gitignore(gitignore_path)
25            if self.src_discover_config.gitignore and gitignore_path.exists()
26            else None
27        )
28        # normalize the file types to lower case with leading dot
29        self.file_types = {
30            f".{ext}" for ext in COMMENT_FILETYPE[src_discover_config.comment_type]
31        }
32
33        self.source_paths = self._discover()
34
35    def _discover(self) -> list[Path]:
36        """Discover source files recursively in the given directory."""
37        discovered_files = []
38        for filepath in self.src_discover_config.src_dir.rglob("*"):
39            if filepath.is_file():
40                if self.file_types and filepath.suffix.lower() not in self.file_types:
41                    continue
42                rel_filepath = str(
43                    filepath.relative_to(self.src_discover_config.src_dir)
44                )
45                if self.src_discover_config.include and self._matches_any(
46                    rel_filepath, self.src_discover_config.include
47                ):
48                    # "includes" has the highest priority over "gitignore" and "excludes"
49                    discovered_files.append(filepath)
50                    continue
51                if self.gitignore_matcher and self.gitignore_matcher(
52                    str(filepath.absolute())
53                ):
54                    continue
55                if self.src_discover_config.exclude and self._matches_any(
56                    rel_filepath, self.src_discover_config.exclude
57                ):
58                    continue
59                discovered_files.append(filepath)
60        sorted_filepaths = sorted(
61            discovered_files, key=lambda x: os.path.normcase(os.path.normpath(x))
62        )
63        return sorted_filepaths
64
65    def _matches_any(self, rel_filepath: str, patterns: list[str]) -> bool:
66        """Check if the given file path matches any of the given patterns."""
67        return any(fnmatch.fnmatch(rel_filepath, pattern) for pattern in patterns)