1import os
 2from pathlib import Path
 3
 4from ignore import WalkBuilder
 5from ignore.overrides import OverrideBuilder
 6
 7from sphinx_codelinks.source_discover.config import (
 8    COMMENT_FILETYPE,
 9    SourceDiscoverConfig,
10)
11
12
[docs]13# @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER]
14class SourceDiscover:
15    def __init__(self, src_discover_config: SourceDiscoverConfig):
16        self.src_discover_config = src_discover_config
17        # normalize the file types to lower case with leading dot
18        self.file_types = {
19            f".{ext}" for ext in COMMENT_FILETYPE[src_discover_config.comment_type]
20        }
21
22        self.source_paths = self._discover()
23
24    def _build_overrides(self) -> OverrideBuilder | None:
25        """Build an OverrideBuilder for include/exclude patterns.
26
27        Include patterns are added as whitelist globs.
28        Exclude patterns are added as negated globs (prefixed with ``!``).
29        """
30        has_include = bool(self.src_discover_config.include)
31        has_exclude = bool(self.src_discover_config.exclude)
32
33        if not has_include and not has_exclude:
34            return None
35
36        ob = OverrideBuilder(self.src_discover_config.src_dir)
37
38        if has_include:
39            for pattern in self.src_discover_config.include:
40                ob.add(pattern)
41
42        if has_exclude:
43            for pattern in self.src_discover_config.exclude:
44                ob.add(f"!{pattern}")
45
46        return ob
47
48    def _discover(self) -> list[Path]:
49        """Discover source files recursively in the given directory."""
50        src_dir = self.src_discover_config.src_dir
51        if not src_dir.is_dir():
52            return []
53
54        gitignore = self.src_discover_config.gitignore
55
56        builder = WalkBuilder(src_dir)
57        # Replicate the Rust ignore crate's standard_filters(gitignore)
58        # followed by hidden(false), matching ubc_codelinks behaviour.
59        builder.ignore(gitignore)
60        builder.parents(gitignore)
61        builder.git_ignore(gitignore)
62        builder.git_global(gitignore)
63        builder.git_exclude(gitignore)
64        builder.hidden(False)
65        builder.follow_links(self.src_discover_config.follow_links)
66
67        override_builder = self._build_overrides()
68        if override_builder is not None:
69            builder.overrides(override_builder.build())
70
71        discovered_files = []
72        for entry in builder.build():
73            filepath = entry.path()
74            if not filepath.is_file():
75                continue
76            if self.file_types and filepath.suffix.lower() not in self.file_types:
77                continue
78            # resolve() produces canonical absolute paths; follow_links only
79            # controls whether the walker descends into symlinked directories
80            discovered_files.append(filepath.resolve())
81
82        sorted_filepaths = sorted(
83            discovered_files, key=lambda x: os.path.normcase(os.path.normpath(x))
84        )
85        return sorted_filepaths