Source Code Tracing: oneline_parser.py

  1from dataclasses import dataclass
  2from enum import Enum
  3import logging
  4
  5from sphinx_codelinks.config import ESCAPE, UNIX_NEWLINE, OneLineCommentStyle
  6
  7# initialize logger
  8logger = logging.getLogger(__name__)
  9logger.setLevel(logging.INFO)
 10# log to the console
 11console = logging.StreamHandler()
 12console.setLevel(logging.INFO)
 13logger.addHandler(console)
 14
 15
 16class WarningSubTypeEnum(str, Enum):
 17    """Enum for warning sub types."""
 18
 19    too_many_fields = "too_many_fields"
 20    too_few_fields = "too_few_fields"
 21    missing_square_brackets = "missing_square_brackets"
 22    not_start_or_end_with_square_brackets = "not_start_or_end_with_square_brackets"
 23    newline_in_field = "newline_in_field"
 24
 25
 26@dataclass
 27class OnelineParserInvalidWarning:
 28    """Invalid oneline comments."""
 29
 30    sub_type: WarningSubTypeEnum
 31    msg: str
 32
 33
[docs] 34# @One-line comment parser for traceability markers, IMPL_OLP_1, impl, [FE_DEF, FE_CMT]
 35def oneline_parser(  # noqa: PLR0912, PLR0911 # handel warnings
 36    oneline: str, oneline_config: OneLineCommentStyle
 37) -> dict[str, str | list[str] | int] | OnelineParserInvalidWarning | None:
 38    """
 39    Extract the string from the custom one-line comment style with the following steps.
 40
 41    - Locate the start and end sequences
 42    - extract the string between them
 43    - apply custom_split to split the strings into a list of fields by `field_split_char`
 44    - check the number of required fields and the max number of the given fields
 45    - split the strings located in the field with `type: list[str]` to a list of string
 46    - introduce the default values to those fields which are not given
 47    """
 48    # find indices start and end char
 49    start_idx = oneline.find(oneline_config.start_sequence)
 50    end_idx = oneline.rfind(oneline_config.end_sequence)
 51    if start_idx == -1 or end_idx == -1:
 52        # start or end sequences do not exist
 53        return None
 54
 55    # extract the string wrapped by start and end
 56    start_idx = start_idx + len(oneline_config.start_sequence)
 57    string = oneline[start_idx:end_idx].strip()
 58
 59    # numbers of needs_fields which are required
 60    cnt_required_fields = oneline_config.get_cnt_required_fields()
 61    # indices of the field which has type:list[str]
 62    positions_list_str = oneline_config.get_pos_list_str()
 63
 64    min_fields = cnt_required_fields
 65    max_fields = len(oneline_config.needs_fields)
 66
 67    string_fields = [
 68        _field.strip(" ")
 69        for _field in custom_split(
 70            string, oneline_config.field_split_char, positions_list_str
 71        )
 72    ]
 73    if len(string_fields) < min_fields:
 74        return OnelineParserInvalidWarning(
 75            sub_type=WarningSubTypeEnum.too_few_fields,
 76            msg=f"{len(string_fields)} given fields. They shall be more than {min_fields}",
 77        )
 78
 79    if len(string_fields) > max_fields:
 80        return OnelineParserInvalidWarning(
 81            sub_type=WarningSubTypeEnum.too_many_fields,
 82            msg=f"{len(string_fields)} given fields. They shall be less than {max_fields}",
 83        )
 84    resolved: dict[str, str | list[str] | int] = {}
 85    for idx in range(len(oneline_config.needs_fields)):
 86        field_name: str = oneline_config.needs_fields[idx]["name"]
 87        if len(string_fields) > idx:
 88            # given fields
 89            if is_newline_in_field(string_fields[idx]):
 90                # the case where the field contains a new line character
 91                return OnelineParserInvalidWarning(
 92                    sub_type=WarningSubTypeEnum.newline_in_field,
 93                    msg=f"Field {field_name} has newline character. It is not allowed",
 94                )
 95            if oneline_config.needs_fields[idx]["type"] == "str":
 96                resolved[field_name] = string_fields[idx]
 97            elif oneline_config.needs_fields[idx]["type"] == "list[str]":
 98                # find the indices of "[" and "]"
 99                list_start_idx = string_fields[idx].find("[")
100                list_end_idx = string_fields[idx].rfind("]")
101                if list_start_idx == -1 or list_end_idx == -1:
102                    # brackets are not  found
103                    return OnelineParserInvalidWarning(
104                        sub_type=WarningSubTypeEnum.missing_square_brackets,
105                        msg=f"Field {field_name} with 'type': '{oneline_config.needs_fields[idx]['type']}' must be given with '[]' brackets",
106                    )
107
108                if list_start_idx != 0 or list_end_idx != len(string_fields[idx]) - 1:
109                    # brackets are found but not at the beginning and the end
110                    return OnelineParserInvalidWarning(
111                        sub_type=WarningSubTypeEnum.not_start_or_end_with_square_brackets,
112                        msg=f"Field {field_name} with 'type': '{oneline_config.needs_fields[idx]['type']}' must start with '[' and end with ']'",
113                    )
114
115                string_items = string_fields[idx][list_start_idx + 1 : list_end_idx]
116
117                if not string_items.strip():
118                    # the case where the empty string ("") or only spaces between "[" "]"
119                    resolved[field_name] = []
120                else:
121                    items = [_item.strip() for _item in custom_split(string_items, ",")]
122                    resolved[field_name] = [item.strip() for item in items]
123        else:
124            # for not given fields, introduce the default
125            default = oneline_config.needs_fields[idx].get("default")
126            if default is None:
127                continue
128            resolved[field_name] = default
129
130    resolved["start_column"] = start_idx
131    resolved["end_column"] = end_idx
132    return resolved
133
134
135def custom_split(
136    string: str, delimiter: str, positions_list_str: list[int] | None = None
137) -> list[str]:
138    """
139    A string shall be split with the following conditions:
140
141    - To use special chars in literal , escape ('\') must be used
142    - String shall be split by the given delimiter
143    - In a field with `type: str`:
144        - Special chars are delimiter, '\', '[' and ']'
145    - In a field with `type: list[str]`:
146        - Special chars are only '[' and ']'
147
148    When the string is given without any fields with `type: list[str]` (positions_list_str=None),
149    it's considered as it is in a field with `type: str`.
150    """
151    if positions_list_str is None:
152        positions_list_str = []
153    escape_chars = [delimiter, "[", "]", ESCAPE]
154    field = []  # a list of string for a field
155    fields: list[str] = []  # a list of string which contains
156    leading_escape = False
157    expect_closing_bracket = False
158
159    for char in string:
160        # +1 to locate the current field position
161        current_field_idx = len(fields) + 1
162        is_list_str_field = current_field_idx in positions_list_str
163
164        if leading_escape:
165            if char not in escape_chars:
166                # leading escape is considered as a literal
167                field.append(ESCAPE)
168            field.append(char)
169            leading_escape = False
170            continue
171
172        if char == ESCAPE and not is_list_str_field:
173            leading_escape = True
174            continue
175
176        if char == delimiter:
177            if is_list_str_field and expect_closing_bracket:
178                # delimiter occurs in the field with type:list[str]
179                field.append(char)
180            else:
181                fields.append("".join(field))
182                field = []
183            continue
184
185        if is_list_str_field:
186            if char == "[":
187                expect_closing_bracket = True
188            if char == "]":
189                expect_closing_bracket = False
190
191        field.append(char)
192
193    # add last field
194    fields.append("".join(field))
195    return fields
196
197
198def is_newline_in_field(field: str) -> bool:
199    """
200    Check if the field contains a new line character.
201    """
202    return UNIX_NEWLINE in field