Completed
Pull Request — master (#2423)
by
unknown
01:54
created

LineParser.__separate_by_first_occurrence()   D

Complexity

Conditions 8

Size

Total Lines 47

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 8
c 1
b 0
f 0
dl 0
loc 47
rs 4.3478
1
from coala_utils.string_processing.StringConverter import StringConverter
2
from coala_utils.string_processing import (unescape, convert_to_raw,
3
                                           position_is_escaped,
4
                                           unescaped_rstrip)
5
6
7
class LineParser:
8
9
    def __init__(self,
10
                 key_value_delimiters=('=',),
11
                 comment_separators=('#',),
12
                 key_delimiters=(',', ' '),
13
                 section_name_surroundings=None,
14
                 section_override_delimiters=(".",)):
15
        """
16
        Creates a new line parser. Please note that no delimiter or separator
17
        may be an "o" or you may encounter undefined behaviour with the
18
        escapes.
19
20
        :param key_value_delimiters:        Delimiters that delimit a key from
21
                                            a value
22
        :param comment_separators:          Used to initiate a comment
23
        :param key_delimiters:              Delimiters between several keys
24
        :param section_name_surroundings:   Dictionary, e.g. {"[", "]"} means a
25
                                            section name is surrounded by [].
26
                                            If None, {"[": "]"} is used as
27
                                            default.
28
        :param section_override_delimiters: Delimiter for a section override.
29
                                            E.g. "." would mean that
30
                                            section.key is a possible key that
31
                                            puts the key into the section
32
                                            "section" despite of the current
33
                                            section.
34
        """
35
        section_name_surroundings = (
36
            {"[": "]"} if section_name_surroundings is None
37
            else section_name_surroundings)
38
39
        self.key_value_delimiters = key_value_delimiters
40
        self.comment_separators = comment_separators
41
        self.key_delimiters = key_delimiters
42
        self.section_name_surroundings = section_name_surroundings
43
        self.section_override_delimiters = section_override_delimiters
44
45
    def parse(self, line):
46
        """
47
        Note that every value in the returned touple *besides the value* is
48
        unescaped. This is so since the value is meant to be put into a Setting
49
        later thus the escapes may be needed there.
50
51
        :param line: the line to parse
52
        :return:     section_name (empty string if it's no section name),
53
                     [(section_override, key), ...], value, comment
54
        """
55
        line, comment = self.__separate_by_first_occurrence(
56
            line,
57
            self.comment_separators)
58
        comment = unescape(comment)
59
        if line == "":
60
            return '', [], '', comment
61
62
        section_name = unescape(self.__get_section_name(line))
63
        if section_name != '':
64
            return section_name, [], '', comment
65
66
        # Escapes in value might be needed by the bears
67
        keys, value = self.__extract_keys_and_value(line)
68
69
        # Add all the delimiters that stored as tuples
70
        all_delimiters = self.key_value_delimiters
71
        all_delimiters += self.key_delimiters
72
        all_delimiters += self.comment_separators
73
        all_delimiters += self.section_override_delimiters
74
        all_delimiters = "".join(all_delimiters)
75
76
        # Add all keys and values in section_name_surroundings, which is
77
        # stored as a dict
78
        all_delimiters += "".join(self.section_name_surroundings.keys())
79
        all_delimiters += "".join(self.section_name_surroundings.values())
80
81
        value = convert_to_raw(value, all_delimiters)
82
83
        key_touples = []
84
        for key in keys:
85
            key = convert_to_raw(key, all_delimiters)
86
            section, key = self.__separate_by_first_occurrence(
87
                key,
88
                self.section_override_delimiters,
89
                True,
90
                True)
91
            key_touples.append((unescape(section), unescape(key)))
92
93
        return '', key_touples, value, comment
94
95
    @staticmethod
96
    def __separate_by_first_occurrence(string,
97
                                       delimiters,
98
                                       strip_delim=False,
99
                                       return_second_part_nonempty=False):
100
        """
101
        Separates a string by the first of all given delimiters. Any whitespace
102
        characters will be stripped away from the parts.
103
104
        :param string:                      The string to separate.
105
        :param delimiters:                  The delimiters.
106
        :param strip_delim:                 Strips the delimiter from the
107
                                            result if true.
108
        :param return_second_part_nonempty: If no delimiter is found and this
109
                                            is true the contents of the string
110
                                            will be returned in the second part
111
                                            of the touple instead of the first
112
                                            one.
113
        :return:                            (first_part, second_part)
114
        """
115
        temp_string = string.replace("\\\\", "oo")
116
        i = temp_string.find("\\")
117
        while i != -1:
118
            temp_string = temp_string[:i] + "oo" + temp_string[i+2:]
119
            i = temp_string.find("\\", i+2)
120
121
        delim_pos = len(string)
122
        used_delim = ""
123
        for delim in delimiters:
124
            pos = temp_string.find(delim)
125
            if 0 <= pos < delim_pos:
126
                delim_pos = pos
127
                used_delim = delim
128
129
        if return_second_part_nonempty and delim_pos == len(string):
130
            return "", string.strip(" \n")
131
132
        first_part = string[:delim_pos]
133
        second_part = string[delim_pos + (
134
            len(used_delim) if strip_delim else 0):]
135
136
        if not position_is_escaped(second_part, len(second_part) - 1):
137
            first_part = unescaped_rstrip(first_part)
138
            second_part = unescaped_rstrip(second_part)
139
140
        return (first_part.lstrip().rstrip("\n"),
141
                second_part.lstrip().rstrip("\n"))
142
143
    def __get_section_name(self, line):
144
        for begin, end in self.section_name_surroundings.items():
145
            if (line[0:len(begin)] == begin and
146
                    line[len(line) - len(end):len(line)] == end):
147
                return line[len(begin):len(line) - len(end)].strip(" \n")
148
149
        return ''
150
151
    def __extract_keys_and_value(self, line):
152
        key_part, value = self.__separate_by_first_occurrence(
153
            line,
154
            self.key_value_delimiters,
155
            True,
156
            True)
157
        keys = list(StringConverter(
158
            key_part,
159
            list_delimiters=self.key_delimiters).__iter__(
160
            remove_backslashes=False))
161
162
        return keys, value
163