1from dataclasses import dataclass
2from enum import Enum
3
4from sphinx_codelinks.config import ESCAPE, UNIX_NEWLINE, OneLineCommentStyle
5
6
7class WarningSubTypeEnum(str, Enum):
8 """Enum for warning sub types."""
9
10 too_many_fields = "too_many_fields"
11 too_few_fields = "too_few_fields"
12 missing_square_brackets = "missing_square_brackets"
13 not_start_or_end_with_square_brackets = "not_start_or_end_with_square_brackets"
14 newline_in_field = "newline_in_field"
15
16
17@dataclass
18class OnelineParserInvalidWarning:
19 """Invalid oneline comments."""
20
21 sub_type: WarningSubTypeEnum
22 msg: str
23
24
[docs] 25# @One-line comment parser for traceability markers, IMPL_OLP_1, impl, [FE_DEF, FE_CMT]
26def oneline_parser( # noqa: PLR0912, PLR0911 # handel warnings
27 oneline: str, oneline_config: OneLineCommentStyle
28) -> dict[str, str | list[str] | int] | OnelineParserInvalidWarning | None:
29 """
30 Extract the string from the custom one-line comment style with the following steps.
31
32 - Locate the start and end sequences
33 - extract the string between them
34 - apply custom_split to split the strings into a list of fields by `field_split_char`
35 - check the number of required fields and the max number of the given fields
36 - split the strings located in the field with `type: list[str]` to a list of string
37 - introduce the default values to those fields which are not given
38 """
39 # find indices start and end char
40 start_idx = oneline.find(oneline_config.start_sequence)
41 end_idx = oneline.rfind(oneline_config.end_sequence)
42 if start_idx == -1 or end_idx == -1:
43 # start or end sequences do not exist
44 return None
45
46 # extract the string wrapped by start and end
47 start_idx = start_idx + len(oneline_config.start_sequence)
48 string = oneline[start_idx:end_idx].strip()
49
50 # numbers of needs_fields which are required
51 cnt_required_fields = oneline_config.get_cnt_required_fields()
52 # indices of the field which has type:list[str]
53 positions_list_str = oneline_config.get_pos_list_str()
54
55 min_fields = cnt_required_fields
56 max_fields = len(oneline_config.needs_fields)
57
58 string_fields = [
59 _field.strip(" ")
60 for _field in custom_split(
61 string, oneline_config.field_split_char, positions_list_str
62 )
63 ]
64 if len(string_fields) < min_fields:
65 return OnelineParserInvalidWarning(
66 sub_type=WarningSubTypeEnum.too_few_fields,
67 msg=f"{len(string_fields)} given fields. They shall be more than {min_fields}",
68 )
69
70 if len(string_fields) > max_fields:
71 return OnelineParserInvalidWarning(
72 sub_type=WarningSubTypeEnum.too_many_fields,
73 msg=f"{len(string_fields)} given fields. They shall be less than {max_fields}",
74 )
75 resolved: dict[str, str | list[str] | int] = {}
76 for idx in range(len(oneline_config.needs_fields)):
77 field_name: str = oneline_config.needs_fields[idx]["name"]
78 if len(string_fields) > idx:
79 # given fields
80 if is_newline_in_field(string_fields[idx]):
81 # the case where the field contains a new line character
82 return OnelineParserInvalidWarning(
83 sub_type=WarningSubTypeEnum.newline_in_field,
84 msg=f"Field {field_name} has newline character. It is not allowed",
85 )
86 if oneline_config.needs_fields[idx]["type"] == "str":
87 resolved[field_name] = string_fields[idx]
88 elif oneline_config.needs_fields[idx]["type"] == "list[str]":
89 # find the indices of "[" and "]"
90 list_start_idx = string_fields[idx].find("[")
91 list_end_idx = string_fields[idx].rfind("]")
92 if list_start_idx == -1 or list_end_idx == -1:
93 # brackets are not found
94 return OnelineParserInvalidWarning(
95 sub_type=WarningSubTypeEnum.missing_square_brackets,
96 msg=f"Field {field_name} with 'type': '{oneline_config.needs_fields[idx]['type']}' must be given with '[]' brackets",
97 )
98
99 if list_start_idx != 0 or list_end_idx != len(string_fields[idx]) - 1:
100 # brackets are found but not at the beginning and the end
101 return OnelineParserInvalidWarning(
102 sub_type=WarningSubTypeEnum.not_start_or_end_with_square_brackets,
103 msg=f"Field {field_name} with 'type': '{oneline_config.needs_fields[idx]['type']}' must start with '[' and end with ']'",
104 )
105
106 string_items = string_fields[idx][list_start_idx + 1 : list_end_idx]
107
108 if not string_items.strip():
109 # the case where the empty string ("") or only spaces between "[" "]"
110 resolved[field_name] = []
111 else:
112 items = [_item.strip() for _item in custom_split(string_items, ",")]
113 resolved[field_name] = [item.strip() for item in items]
114 else:
115 # for not given fields, introduce the default
116 default = oneline_config.needs_fields[idx].get("default")
117 if default is None:
118 continue
119 resolved[field_name] = default
120
121 resolved["start_column"] = start_idx
122 resolved["end_column"] = end_idx
123 return resolved
124
125
126def custom_split(
127 string: str, delimiter: str, positions_list_str: list[int] | None = None
128) -> list[str]:
129 """
130 A string shall be split with the following conditions:
131
132 - To use special chars in literal , escape ('\') must be used
133 - String shall be split by the given delimiter
134 - In a field with `type: str`:
135 - Special chars are delimiter, '\', '[' and ']'
136 - In a field with `type: list[str]`:
137 - Special chars are only '[' and ']'
138
139 When the string is given without any fields with `type: list[str]` (positions_list_str=None),
140 it's considered as it is in a field with `type: str`.
141 """
142 if positions_list_str is None:
143 positions_list_str = []
144 escape_chars = [delimiter, "[", "]", ESCAPE]
145 field = [] # a list of string for a field
146 fields: list[str] = [] # a list of string which contains
147 leading_escape = False
148 expect_closing_bracket = False
149
150 for char in string:
151 # +1 to locate the current field position
152 current_field_idx = len(fields) + 1
153 is_list_str_field = current_field_idx in positions_list_str
154
155 if leading_escape:
156 if char not in escape_chars:
157 # leading escape is considered as a literal
158 field.append(ESCAPE)
159 field.append(char)
160 leading_escape = False
161 continue
162
163 if char == ESCAPE and not is_list_str_field:
164 leading_escape = True
165 continue
166
167 if char == delimiter:
168 if is_list_str_field and expect_closing_bracket:
169 # delimiter occurs in the field with type:list[str]
170 field.append(char)
171 else:
172 fields.append("".join(field))
173 field = []
174 continue
175
176 if is_list_str_field:
177 if char == "[":
178 expect_closing_bracket = True
179 if char == "]":
180 expect_closing_bracket = False
181
182 field.append(char)
183
184 # add last field
185 fields.append("".join(field))
186 return fields
187
188
189def is_newline_in_field(field: str) -> bool:
190 """
191 Check if the field contains a new line character.
192 """
193 return UNIX_NEWLINE in field