1from dataclasses import dataclass
2from enum import Enum
3import logging
4
5from sphinx_codelinks.config import ESCAPE, UNIX_NEWLINE, OneLineCommentStyle
6
7# initialize logger
8logger = logging.getLogger(__name__)
9logger.setLevel(logging.INFO)
10# log to the console
11console = logging.StreamHandler()
12console.setLevel(logging.INFO)
13logger.addHandler(console)
14
15
16class WarningSubTypeEnum(str, Enum):
17 """Enum for warning sub types."""
18
19 too_many_fields = "too_many_fields"
20 too_few_fields = "too_few_fields"
21 missing_square_brackets = "missing_square_brackets"
22 not_start_or_end_with_square_brackets = "not_start_or_end_with_square_brackets"
23 newline_in_field = "newline_in_field"
24
25
26@dataclass
27class OnelineParserInvalidWarning:
28 """Invalid oneline comments."""
29
30 sub_type: WarningSubTypeEnum
31 msg: str
32
33
[docs] 34# @One-line comment parser for traceability markers, IMPL_OLP_1, impl, [FE_DEF, FE_CMT]
35def oneline_parser( # noqa: PLR0912, PLR0911 # handel warnings
36 oneline: str, oneline_config: OneLineCommentStyle
37) -> dict[str, str | list[str] | int] | OnelineParserInvalidWarning | None:
38 """
39 Extract the string from the custom one-line comment style with the following steps.
40
41 - Locate the start and end sequences
42 - extract the string between them
43 - apply custom_split to split the strings into a list of fields by `field_split_char`
44 - check the number of required fields and the max number of the given fields
45 - split the strings located in the field with `type: list[str]` to a list of string
46 - introduce the default values to those fields which are not given
47 """
48 # find indices start and end char
49 start_idx = oneline.find(oneline_config.start_sequence)
50 end_idx = oneline.rfind(oneline_config.end_sequence)
51 if start_idx == -1 or end_idx == -1:
52 # start or end sequences do not exist
53 return None
54
55 # extract the string wrapped by start and end
56 start_idx = start_idx + len(oneline_config.start_sequence)
57 string = oneline[start_idx:end_idx].strip()
58
59 # numbers of needs_fields which are required
60 cnt_required_fields = oneline_config.get_cnt_required_fields()
61 # indices of the field which has type:list[str]
62 positions_list_str = oneline_config.get_pos_list_str()
63
64 min_fields = cnt_required_fields
65 max_fields = len(oneline_config.needs_fields)
66
67 string_fields = [
68 _field.strip(" ")
69 for _field in custom_split(
70 string, oneline_config.field_split_char, positions_list_str
71 )
72 ]
73 if len(string_fields) < min_fields:
74 return OnelineParserInvalidWarning(
75 sub_type=WarningSubTypeEnum.too_few_fields,
76 msg=f"{len(string_fields)} given fields. They shall be more than {min_fields}",
77 )
78
79 if len(string_fields) > max_fields:
80 return OnelineParserInvalidWarning(
81 sub_type=WarningSubTypeEnum.too_many_fields,
82 msg=f"{len(string_fields)} given fields. They shall be less than {max_fields}",
83 )
84 resolved: dict[str, str | list[str] | int] = {}
85 for idx in range(len(oneline_config.needs_fields)):
86 field_name: str = oneline_config.needs_fields[idx]["name"]
87 if len(string_fields) > idx:
88 # given fields
89 if is_newline_in_field(string_fields[idx]):
90 # the case where the field contains a new line character
91 return OnelineParserInvalidWarning(
92 sub_type=WarningSubTypeEnum.newline_in_field,
93 msg=f"Field {field_name} has newline character. It is not allowed",
94 )
95 if oneline_config.needs_fields[idx]["type"] == "str":
96 resolved[field_name] = string_fields[idx]
97 elif oneline_config.needs_fields[idx]["type"] == "list[str]":
98 # find the indices of "[" and "]"
99 list_start_idx = string_fields[idx].find("[")
100 list_end_idx = string_fields[idx].rfind("]")
101 if list_start_idx == -1 or list_end_idx == -1:
102 # brackets are not found
103 return OnelineParserInvalidWarning(
104 sub_type=WarningSubTypeEnum.missing_square_brackets,
105 msg=f"Field {field_name} with 'type': '{oneline_config.needs_fields[idx]['type']}' must be given with '[]' brackets",
106 )
107
108 if list_start_idx != 0 or list_end_idx != len(string_fields[idx]) - 1:
109 # brackets are found but not at the beginning and the end
110 return OnelineParserInvalidWarning(
111 sub_type=WarningSubTypeEnum.not_start_or_end_with_square_brackets,
112 msg=f"Field {field_name} with 'type': '{oneline_config.needs_fields[idx]['type']}' must start with '[' and end with ']'",
113 )
114
115 string_items = string_fields[idx][list_start_idx + 1 : list_end_idx]
116
117 if not string_items.strip():
118 # the case where the empty string ("") or only spaces between "[" "]"
119 resolved[field_name] = []
120 else:
121 items = [_item.strip() for _item in custom_split(string_items, ",")]
122 resolved[field_name] = [item.strip() for item in items]
123 else:
124 # for not given fields, introduce the default
125 default = oneline_config.needs_fields[idx].get("default")
126 if default is None:
127 continue
128 resolved[field_name] = default
129
130 resolved["start_column"] = start_idx
131 resolved["end_column"] = end_idx
132 return resolved
133
134
135def custom_split(
136 string: str, delimiter: str, positions_list_str: list[int] | None = None
137) -> list[str]:
138 """
139 A string shall be split with the following conditions:
140
141 - To use special chars in literal , escape ('\') must be used
142 - String shall be split by the given delimiter
143 - In a field with `type: str`:
144 - Special chars are delimiter, '\', '[' and ']'
145 - In a field with `type: list[str]`:
146 - Special chars are only '[' and ']'
147
148 When the string is given without any fields with `type: list[str]` (positions_list_str=None),
149 it's considered as it is in a field with `type: str`.
150 """
151 if positions_list_str is None:
152 positions_list_str = []
153 escape_chars = [delimiter, "[", "]", ESCAPE]
154 field = [] # a list of string for a field
155 fields: list[str] = [] # a list of string which contains
156 leading_escape = False
157 expect_closing_bracket = False
158
159 for char in string:
160 # +1 to locate the current field position
161 current_field_idx = len(fields) + 1
162 is_list_str_field = current_field_idx in positions_list_str
163
164 if leading_escape:
165 if char not in escape_chars:
166 # leading escape is considered as a literal
167 field.append(ESCAPE)
168 field.append(char)
169 leading_escape = False
170 continue
171
172 if char == ESCAPE and not is_list_str_field:
173 leading_escape = True
174 continue
175
176 if char == delimiter:
177 if is_list_str_field and expect_closing_bracket:
178 # delimiter occurs in the field with type:list[str]
179 field.append(char)
180 else:
181 fields.append("".join(field))
182 field = []
183 continue
184
185 if is_list_str_field:
186 if char == "[":
187 expect_closing_bracket = True
188 if char == "]":
189 expect_closing_bracket = False
190
191 field.append(char)
192
193 # add last field
194 fields.append("".join(field))
195 return fields
196
197
198def is_newline_in_field(field: str) -> bool:
199 """
200 Check if the field contains a new line character.
201 """
202 return UNIX_NEWLINE in field