1import os
  2from pathlib import Path
  3
  4from ignore import WalkBuilder
  5from ignore.overrides import OverrideBuilder
  6
  7from sphinx_codelinks.source_discover.config import (
  8    COMMENT_FILETYPE,
  9    CommentType,
 10    SourceDiscoverConfig,
 11)
 12
 13
 14def _json_starts_with_comment(filepath: Path, sample_size: int = 256) -> bool:
 15    """Return True if a ``.json`` file's first non-whitespace content is a comment.
 16
 17    Used to decide whether a ``.json`` file should be treated as JSONC. Per
 18    https://jsonc.org/#filename-extension a ``.json`` file should only be treated as
 19    JSONC when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``).
 20    """
 21    try:
 22        with filepath.open("rb") as f:
 23            chunk = f.read(sample_size)
 24    except OSError:
 25        return False
 26    # strip a leading UTF-8 BOM, then leading whitespace
 27    text = chunk.removeprefix(b"\xef\xbb\xbf").lstrip()
 28    return text.startswith((b"//", b"/*"))
 29
 30
[docs] 31# @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER]
 32class SourceDiscover:
 33    def __init__(self, src_discover_config: SourceDiscoverConfig):
 34        self.src_discover_config = src_discover_config
 35        # normalize the file types to lower case with leading dot
 36        self.file_types = {
 37            f".{ext}" for ext in COMMENT_FILETYPE[src_discover_config.comment_type]
 38        }
 39
 40        self.source_paths = self._discover()
 41
 42    def _build_overrides(self) -> OverrideBuilder | None:
 43        """Build an OverrideBuilder for include/exclude patterns.
 44
 45        Include patterns are added as whitelist globs.
 46        Exclude patterns are added as negated globs (prefixed with ``!``).
 47        """
 48        has_include = bool(self.src_discover_config.include)
 49        has_exclude = bool(self.src_discover_config.exclude)
 50
 51        if not has_include and not has_exclude:
 52            return None
 53
 54        ob = OverrideBuilder(self.src_discover_config.src_dir)
 55
 56        if has_include:
 57            for pattern in self.src_discover_config.include:
 58                ob.add(pattern)
 59
 60        if has_exclude:
 61            for pattern in self.src_discover_config.exclude:
 62                ob.add(f"!{pattern}")
 63
 64        return ob
 65
 66    def _discover(self) -> list[Path]:
 67        """Discover source files recursively in the given directory."""
 68        src_dir = self.src_discover_config.src_dir
 69        if not src_dir.is_dir():
 70            return []
 71
 72        gitignore = self.src_discover_config.gitignore
 73
 74        builder = WalkBuilder(src_dir)
 75        # Replicate the Rust ignore crate's standard_filters(gitignore)
 76        # followed by hidden(false), matching ubc_codelinks behaviour.
 77        builder.ignore(gitignore)
 78        builder.parents(gitignore)
 79        builder.git_ignore(gitignore)
 80        builder.git_global(gitignore)
 81        builder.git_exclude(gitignore)
 82        builder.hidden(False)
 83        builder.follow_links(self.src_discover_config.follow_links)
 84
 85        override_builder = self._build_overrides()
 86        if override_builder is not None:
 87            builder.overrides(override_builder.build())
 88
 89        discovered_files = []
 90        for entry in builder.build():
 91            filepath = entry.path()
 92            if not filepath.is_file():
 93                continue
 94            if self.file_types and filepath.suffix.lower() not in self.file_types:
 95                continue
[docs] 96            # @JSONC .json files require a leading comment, IMPL_JSONC_3, impl, [FE_JSONC]
 97            # A plain ``.json`` file is only treated as JSONC when it opens with a
 98            # comment; otherwise it is skipped under the ``jsonc`` comment type.
 99            if (
100                self.src_discover_config.comment_type == CommentType.jsonc
101                and filepath.suffix.lower() == ".json"
102                and not _json_starts_with_comment(filepath)
103            ):
104                continue
105            # resolve() produces canonical absolute paths; follow_links only
106            # controls whether the walker descends into symlinked directories
107            discovered_files.append(filepath.resolve())
108
109        sorted_filepaths = sorted(
110            discovered_files, key=lambda x: os.path.normcase(os.path.normpath(x))
111        )
112        return sorted_filepaths