1import os
2from pathlib import Path
3
4from ignore import WalkBuilder
5from ignore.overrides import OverrideBuilder
6
7from sphinx_codelinks.source_discover.config import (
8 COMMENT_FILETYPE,
9 CommentType,
10 SourceDiscoverConfig,
11)
12
13
14def _json_starts_with_comment(filepath: Path, sample_size: int = 256) -> bool:
15 """Return True if a ``.json`` file's first non-whitespace content is a comment.
16
17 Used to decide whether a ``.json`` file should be treated as JSONC. Per
18 https://jsonc.org/#filename-extension a ``.json`` file should only be treated as
19 JSONC when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``).
20 """
21 try:
22 with filepath.open("rb") as f:
23 chunk = f.read(sample_size)
24 except OSError:
25 return False
26 # strip a leading UTF-8 BOM, then leading whitespace
27 text = chunk.removeprefix(b"\xef\xbb\xbf").lstrip()
28 return text.startswith((b"//", b"/*"))
29
30
[docs] 31# @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER]
32class SourceDiscover:
33 def __init__(self, src_discover_config: SourceDiscoverConfig):
34 self.src_discover_config = src_discover_config
35 # normalize the file types to lower case with leading dot
36 self.file_types = {
37 f".{ext}" for ext in COMMENT_FILETYPE[src_discover_config.comment_type]
38 }
39
40 self.source_paths = self._discover()
41
42 def _build_overrides(self) -> OverrideBuilder | None:
43 """Build an OverrideBuilder for include/exclude patterns.
44
45 Include patterns are added as whitelist globs.
46 Exclude patterns are added as negated globs (prefixed with ``!``).
47 """
48 has_include = bool(self.src_discover_config.include)
49 has_exclude = bool(self.src_discover_config.exclude)
50
51 if not has_include and not has_exclude:
52 return None
53
54 ob = OverrideBuilder(self.src_discover_config.src_dir)
55
56 if has_include:
57 for pattern in self.src_discover_config.include:
58 ob.add(pattern)
59
60 if has_exclude:
61 for pattern in self.src_discover_config.exclude:
62 ob.add(f"!{pattern}")
63
64 return ob
65
66 def _discover(self) -> list[Path]:
67 """Discover source files recursively in the given directory."""
68 src_dir = self.src_discover_config.src_dir
69 if not src_dir.is_dir():
70 return []
71
72 gitignore = self.src_discover_config.gitignore
73
74 builder = WalkBuilder(src_dir)
75 # Replicate the Rust ignore crate's standard_filters(gitignore)
76 # followed by hidden(false), matching ubc_codelinks behaviour.
77 builder.ignore(gitignore)
78 builder.parents(gitignore)
79 builder.git_ignore(gitignore)
80 builder.git_global(gitignore)
81 builder.git_exclude(gitignore)
82 builder.hidden(False)
83 builder.follow_links(self.src_discover_config.follow_links)
84
85 override_builder = self._build_overrides()
86 if override_builder is not None:
87 builder.overrides(override_builder.build())
88
89 discovered_files = []
90 for entry in builder.build():
91 filepath = entry.path()
92 if not filepath.is_file():
93 continue
94 if self.file_types and filepath.suffix.lower() not in self.file_types:
95 continue
[docs] 96 # @JSONC .json files require a leading comment, IMPL_JSONC_3, impl, [FE_JSONC]
97 # A plain ``.json`` file is only treated as JSONC when it opens with a
98 # comment; otherwise it is skipped under the ``jsonc`` comment type.
99 if (
100 self.src_discover_config.comment_type == CommentType.jsonc
101 and filepath.suffix.lower() == ".json"
102 and not _json_starts_with_comment(filepath)
103 ):
104 continue
105 # resolve() produces canonical absolute paths; follow_links only
106 # controls whether the walker descends into symlinked directories
107 discovered_files.append(filepath.resolve())
108
109 sorted_filepaths = sorted(
110 discovered_files, key=lambda x: os.path.normcase(os.path.normpath(x))
111 )
112 return sorted_filepaths