1from dataclasses import dataclass
  2from enum import Enum
  3
  4from sphinx_codelinks.config import ESCAPE, UNIX_NEWLINE, OneLineCommentStyle
  5
  6
  7class WarningSubTypeEnum(str, Enum):
  8    """Enum for warning sub types."""
  9
 10    too_many_fields = "too_many_fields"
 11    too_few_fields = "too_few_fields"
 12    missing_square_brackets = "missing_square_brackets"
 13    not_start_or_end_with_square_brackets = "not_start_or_end_with_square_brackets"
 14    newline_in_field = "newline_in_field"
 15
 16
 17@dataclass
 18class OnelineParserInvalidWarning:
 19    """Invalid oneline comments."""
 20
 21    sub_type: WarningSubTypeEnum
 22    msg: str
 23
 24
[docs] 25# @One-line comment parser for traceability markers, IMPL_OLP_1, impl, [FE_DEF, FE_CMT]
 26def oneline_parser(  # noqa: PLR0912, PLR0911 # handel warnings
 27    oneline: str, oneline_config: OneLineCommentStyle
 28) -> dict[str, str | list[str] | int] | OnelineParserInvalidWarning | None:
 29    """
 30    Extract the string from the custom one-line comment style with the following steps.
 31
 32    - Locate the start and end sequences
 33    - extract the string between them
 34    - apply custom_split to split the strings into a list of fields by `field_split_char`
 35    - check the number of required fields and the max number of the given fields
 36    - split the strings located in the field with `type: list[str]` to a list of string
 37    - introduce the default values to those fields which are not given
 38    """
 39    # find indices start and end char
 40    start_idx = oneline.find(oneline_config.start_sequence)
 41    end_idx = oneline.rfind(oneline_config.end_sequence)
 42    if start_idx == -1 or end_idx == -1:
 43        # start or end sequences do not exist
 44        return None
 45
 46    # extract the string wrapped by start and end
 47    start_idx = start_idx + len(oneline_config.start_sequence)
 48    string = oneline[start_idx:end_idx].strip()
 49
 50    # numbers of needs_fields which are required
 51    cnt_required_fields = oneline_config.get_cnt_required_fields()
 52    # indices of the field which has type:list[str]
 53    positions_list_str = oneline_config.get_pos_list_str()
 54
 55    min_fields = cnt_required_fields
 56    max_fields = len(oneline_config.needs_fields)
 57
 58    string_fields = [
 59        _field.strip(" ")
 60        for _field in custom_split(
 61            string, oneline_config.field_split_char, positions_list_str
 62        )
 63    ]
 64    if len(string_fields) < min_fields:
 65        return OnelineParserInvalidWarning(
 66            sub_type=WarningSubTypeEnum.too_few_fields,
 67            msg=f"{len(string_fields)} given fields. They shall be more than {min_fields}",
 68        )
 69
 70    if len(string_fields) > max_fields:
 71        return OnelineParserInvalidWarning(
 72            sub_type=WarningSubTypeEnum.too_many_fields,
 73            msg=f"{len(string_fields)} given fields. They shall be less than {max_fields}",
 74        )
 75    resolved: dict[str, str | list[str] | int] = {}
 76    for idx in range(len(oneline_config.needs_fields)):
 77        field_name: str = oneline_config.needs_fields[idx]["name"]
 78        if len(string_fields) > idx:
 79            # given fields
 80            if is_newline_in_field(string_fields[idx]):
 81                # the case where the field contains a new line character
 82                return OnelineParserInvalidWarning(
 83                    sub_type=WarningSubTypeEnum.newline_in_field,
 84                    msg=f"Field {field_name} has newline character. It is not allowed",
 85                )
 86            if oneline_config.needs_fields[idx]["type"] == "str":
 87                resolved[field_name] = string_fields[idx]
 88            elif oneline_config.needs_fields[idx]["type"] == "list[str]":
 89                # find the indices of "[" and "]"
 90                list_start_idx = string_fields[idx].find("[")
 91                list_end_idx = string_fields[idx].rfind("]")
 92                if list_start_idx == -1 or list_end_idx == -1:
 93                    # brackets are not  found
 94                    return OnelineParserInvalidWarning(
 95                        sub_type=WarningSubTypeEnum.missing_square_brackets,
 96                        msg=f"Field {field_name} with 'type': '{oneline_config.needs_fields[idx]['type']}' must be given with '[]' brackets",
 97                    )
 98
 99                if list_start_idx != 0 or list_end_idx != len(string_fields[idx]) - 1:
100                    # brackets are found but not at the beginning and the end
101                    return OnelineParserInvalidWarning(
102                        sub_type=WarningSubTypeEnum.not_start_or_end_with_square_brackets,
103                        msg=f"Field {field_name} with 'type': '{oneline_config.needs_fields[idx]['type']}' must start with '[' and end with ']'",
104                    )
105
106                string_items = string_fields[idx][list_start_idx + 1 : list_end_idx]
107
108                if not string_items.strip():
109                    # the case where the empty string ("") or only spaces between "[" "]"
110                    resolved[field_name] = []
111                else:
112                    items = [_item.strip() for _item in custom_split(string_items, ",")]
113                    resolved[field_name] = [item.strip() for item in items]
114        else:
115            # for not given fields, introduce the default
116            default = oneline_config.needs_fields[idx].get("default")
117            if default is None:
118                continue
119            resolved[field_name] = default
120
121    resolved["start_column"] = start_idx
122    resolved["end_column"] = end_idx
123    return resolved
124
125
126def custom_split(
127    string: str, delimiter: str, positions_list_str: list[int] | None = None
128) -> list[str]:
129    """
130    A string shall be split with the following conditions:
131
132    - To use special chars in literal , escape ('\') must be used
133    - String shall be split by the given delimiter
134    - In a field with `type: str`:
135        - Special chars are delimiter, '\', '[' and ']'
136    - In a field with `type: list[str]`:
137        - Special chars are only '[' and ']'
138
139    When the string is given without any fields with `type: list[str]` (positions_list_str=None),
140    it's considered as it is in a field with `type: str`.
141    """
142    if positions_list_str is None:
143        positions_list_str = []
144    escape_chars = [delimiter, "[", "]", ESCAPE]
145    field = []  # a list of string for a field
146    fields: list[str] = []  # a list of string which contains
147    leading_escape = False
148    expect_closing_bracket = False
149
150    for char in string:
151        # +1 to locate the current field position
152        current_field_idx = len(fields) + 1
153        is_list_str_field = current_field_idx in positions_list_str
154
155        if leading_escape:
156            if char not in escape_chars:
157                # leading escape is considered as a literal
158                field.append(ESCAPE)
159            field.append(char)
160            leading_escape = False
161            continue
162
163        if char == ESCAPE and not is_list_str_field:
164            leading_escape = True
165            continue
166
167        if char == delimiter:
168            if is_list_str_field and expect_closing_bracket:
169                # delimiter occurs in the field with type:list[str]
170                field.append(char)
171            else:
172                fields.append("".join(field))
173                field = []
174            continue
175
176        if is_list_str_field:
177            if char == "[":
178                expect_closing_bracket = True
179            if char == "]":
180                expect_closing_bracket = False
181
182        field.append(char)
183
184    # add last field
185    fields.append("".join(field))
186    return fields
187
188
189def is_newline_in_field(field: str) -> bool:
190    """
191    Check if the field contains a new line character.
192    """
193    return UNIX_NEWLINE in field