Failed Conditions
Pull Request — master (#1152)
by Lasse
03:36
created

coalib.parsing.translate()   F

Complexity

Conditions 11

Size

Total Lines 41

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 11
dl 0
loc 41
rs 3.1765

How to fix   Complexity   

Complexity

Complex classes like coalib.parsing.translate() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import os
2
import platform
3
import re
4
5
from coalib.misc.Decorators import yield_once
6
7
8
def _end_of_set_index(string, start_index):
9
    """
10
    Returns the position of the appropriate closing bracket for a glob set in
11
    string.
12
13
    :param string:      Glob string with wildcards
14
    :param start_index: Index at which the set starts, meaning the position
15
                        right behind the opening bracket
16
    :return:            Position of appropriate closing bracket
17
    """
18
    length = len(string)
19
    closing_index = start_index
20
    if closing_index < length and string[closing_index] == '!':
21
        closing_index += 1
22
23
    if closing_index < length:  # the set cannot be closed by a bracket here
24
        closing_index += 1
25
26
    while closing_index < length and string[closing_index] != ']':
27
        closing_index += 1
28
29
    return closing_index
30
31
32
def _position_is_bracketed(string, position):
33
    """
34
    Tests whether the char at string[position] is inside a valid pair of
35
    brackets (and therefore loses its special meaning)
36
37
    :param string:   Glob string with wildcards
38
    :param position: Position of a char in string
39
    :return:         Whether or not the char is inside a valid set of brackets
40
    """
41
    # allow negative positions and trim too long ones
42
    position = len(string[:position])
43
44
    index, length = 0, len(string)
45
    while index < position:
46
        char = string[index]
47
        index += 1
48
        if char == '[':
49
            closing_index = _end_of_set_index(string, index)
50
            if closing_index < length:
51
                if index <= position < closing_index:
52
                    return True
53
                index = closing_index + 1
54
            else:
55
                return False
56
    return False
57
58
59
def _boundary_of_alternatives_indices(pattern):
60
    """
61
    Determines the location of a set of alternatives in a glob pattern.
62
    Alternatives are defined by a matching set of non-bracketed parentheses.
63
64
    :param pattern: Glob pattern with wildcards.
65
    :return:        Indices of the innermost set of matching non-bracketed
66
                    parentheses in a tuple. The Index of a missing parenthesis
67
                    will be passed as None.
68
    """
69
    # Taking the leftmost closing parenthesis and the rightmost opening
70
    # parenthesis left of it ensures that the parentheses belong together and
71
    # the pattern is parsed correctly from the most nested section outwards.
72
    end_pos = None
73
    for match in re.finditer('\\)', pattern):
74
        if not _position_is_bracketed(pattern, match.start()):
75
            end_pos = match.start()
76
            break  # break to get leftmost
77
78
    start_pos = None
79
    for match in re.finditer('\\(', pattern[:end_pos]):
80
        if not _position_is_bracketed(pattern, match.start()):
81
            start_pos = match.end()
82
            # no break to get rightmost
83
84
    return start_pos, end_pos
85
86
87
@yield_once
88
def _iter_choices(pattern):
89
    """
90
    Iterate through each choice of an alternative. Splits pattern on '|'s if
91
    they are not bracketed.
92
93
    :param pattern: String of choices separated by '|'s
94
    :return:        Iterator that yields parts of string separated by
95
                    non-bracketed '|'s
96
    """
97
    start_pos = 0
98
    split_pos_list = [match.start() for match in re.finditer('\\|', pattern)]
99
    split_pos_list.append(len(pattern))
100
    for end_pos in split_pos_list:
101
        if not _position_is_bracketed(pattern, end_pos):
102
            yield pattern[start_pos: end_pos]
103
            start_pos = end_pos + 1
104
105
106
@yield_once
107
def _iter_alternatives(pattern):
108
    """
109
    Iterates through all glob patterns that can be obtaines by combination of
110
    all choices for each alternative
111
112
    :param pattern: Glob pattern with wildcards
113
    :return:        Iterator that yields all glob patterns without alternatives
114
                    that can be created from the given pattern containing them.
115
    """
116
    start_pos, end_pos = _boundary_of_alternatives_indices(pattern)
117
118
    if None in (start_pos, end_pos):
119
        yield pattern
120
    else:
121
        # iterate through choices inside of parenthesis (separated by '|'):
122
        for choice in _iter_choices(pattern[start_pos: end_pos]):
123
            # put glob expression back together with alternative:
124
            variant = pattern[:start_pos-1] + choice + pattern[end_pos+1:]
125
126
            # iterate through alternatives outside of parenthesis
127
            # (pattern can have more alternatives elsewhere)
128
            for glob_pattern in _iter_alternatives(variant):
129
                yield glob_pattern
130
131
132
def translate(pattern):
133
    """
134
    Translates a pattern into a regular expression.
135
136
    :param pattern: Glob pattern with wildcards
137
    :return:        Regular expression with the same meaning
138
    """
139
    index, length = 0, len(pattern)
140
    regex = ''
141
    while index < length:
142
        char = pattern[index]
143
        index += 1
144
        if char == '*':
145
            # '**' matches everything
146
            if index < length and pattern[index] == '*':
147
                regex += '.*'
148
            # on Windows, '*' matches everything but the filesystem
149
            # separators '/' and '\'.
150
            elif platform.system() == 'Windows':  # pragma: nocover (Windows)
151
                regex += '[^/\\\\]*'
152
            # on all other (~Unix-) platforms, '*' matches everything but the
153
            # filesystem separator, most likely '/'.
154
            else:
155
                regex += '[^' + re.escape(os.sep) + ']*'
156
        elif char == '?':
157
            regex += '.'
158
        elif char == '[':
159
            closing_index = _end_of_set_index(pattern, index)
160
            if closing_index >= length:
161
                regex += '\\['
162
            else:
163
                sequence = pattern[index:closing_index].replace('\\', '\\\\')
164
                index = closing_index+1
165
                if sequence[0] == '!':
166
                    sequence = '^' + sequence[1:]
167
                elif sequence[0] == '^':
168
                    sequence = '\\' + sequence
169
                regex += '[' + sequence + ']'
170
        else:
171
            regex = regex + re.escape(char)
172
    return regex + '\\Z(?ms)'
173
174
175
def fnmatch(name, pattern):
176
    """
177
    Tests whether name matches pattern
178
179
    :param name:    File or directory name
180
    :param pattern: Glob string with wildcards
181
    :return:        Boolean: Whether or not name is matched by pattern
182
183
    Glob Syntax:
184
    '[seq]':         Matches any character in seq. Cannot be empty.
185
                     Any special character looses its special meaning in a set.
186
   '[!seq]':         Matches any character not in seq. Cannot be empty
187
                     Any special character looses its special meaning in a set.
188
    '(seq_a|seq_b)': Matches either sequence_a or sequence_b as a whole.
189
                     More than two or just one sequence can be given.
190
    '?':             Matches any single character.
191
    '*':             Matches everything but os.sep.
192
    '**':            Matches everything.
193
    """
194
    name = os.path.normcase(name)
195
    for pat in _iter_alternatives(pattern):
196
        pat = os.path.expanduser(pat)
197
        pat = os.path.normcase(pat)
198
        match = re.compile(translate(pat)).match
199
        if match(name) is not None:
200
            return True
201
    return False
202
203
204
def _absolute_flat_glob(pattern):
205
    """
206
    Glob function for a pattern that do not contain wildcards.
207
208
    :pattern: File or directory path
209
    :return:  Iterator that yields at most one valid file or dir name
210
    """
211
    dirname, basename = os.path.split(pattern)
212
213
    if basename:
214
        if os.path.exists(pattern):
215
            yield pattern
216
    else:
217
        # Patterns ending with a slash should match only directories
218
        if os.path.isdir(dirname):
219
            yield pattern
220
    return
221
222
223
def _iter_relative_dirs(dirname):
224
    """
225
    Recursively iterates subdirectories of all levels from dirname
226
227
    :param dirname: Directory name
228
    :return:        Iterator that yields files and directory from the given dir
229
                    and all it's (recursive) subdirectories
230
    """
231
    if not dirname:
232
        dirname = os.curdir
233
    try:
234
        files_or_dirs = os.listdir(dirname)
235
    except os.error:
236
        return
237
    for file_or_dir in files_or_dirs:
238
        yield file_or_dir
239
        path = os.path.join(dirname, file_or_dir)
240
        for sub_file_or_dir in _iter_relative_dirs(path):
241
            yield os.path.join(file_or_dir, sub_file_or_dir)
242
243
244
def relative_wildcard_glob(dirname, pattern):
245
    """
246
    Non-recursive glob for one directory. Accepts wildcards.
247
248
    :param dirname: Directory name
249
    :param pattern: Glob pattern with wildcards
250
    :return:        List of files in the dir of dirname that match the pattern
251
    """
252
    if not dirname:
253
        dirname = os.curdir
254
    try:
255
        names = os.listdir(dirname)
256
    except OSError:
257
        return []
258
    result = []
259
    pattern = os.path.normcase(pattern)
260
    match = re.compile(translate(pattern)).match
261
    for name in names:
262
        if match(os.path.normcase(name)):
263
            result.append(name)
264
    return result
265
266
267
def relative_flat_glob(dirname, basename):
268
    """
269
    Non-recursive glob for one directory. Does not accept wildcards.
270
271
    :param dirname:  Directory name
272
    :param basename: Basename of a file in dir of dirname
273
    :return:         List containing Basename if the file exists
274
    """
275
    if os.path.exists(os.path.join(dirname, basename)):
276
        return [basename]
277
    return[]
278
279
280
def relative_recursive_glob(dirname, pattern):
281
    """
282
    Recursive Glob for one directory and all its (nested) subdirectories.
283
    Accepts only '**' as pattern.
284
285
    :param dirname: Directory name
286
    :param pattern: The recursive wildcard '**'
287
    :return:        Iterator that yields all the (nested) subdirectories of the
288
                    given dir
289
    """
290
    assert pattern == '**'
291
    if dirname:
292
        yield pattern[:0]
293
    for relative_dir in _iter_relative_dirs(dirname):
294
        yield relative_dir
295
296
297
wildcard_check_pattern = re.compile('([*?[])')
298
299
300
def has_wildcard(pattern):
301
    """
302
    Checks whether pattern has any wildcards.
303
304
    :param pattern: Glob pattern that may contain wildcards
305
    :return:        Boolean: Whether or not there are wildcards in pattern
306
    """
307
    match = wildcard_check_pattern.search(pattern)
308
    return match is not None
309
310
311
def iglob(pattern):
312
    """
313
    Iterates all filesystem paths that get matched by the glob pattern.
314
    Syntax is equal to that of fnmatch.
315
316
    :param pattern: Glob pattern with wildcards
317
    :return:        Iterator that yields all file names that match pattern
318
    """
319
    for pat in _iter_alternatives(pattern):
320
        pat = os.path.expanduser(pat)
321
        pat = os.path.normcase(pat)
322
        dirname, basename = os.path.split(pat)
323
        if not has_wildcard(pat):
324
            for file in _absolute_flat_glob(pat):
325
                yield file
326
            return
327
328
        if basename == '**':
329
            relative_glob_function = relative_recursive_glob
330
        elif has_wildcard(basename):
331
            relative_glob_function = relative_wildcard_glob
332
        else:
333
            relative_glob_function = relative_flat_glob
334
335
        if not dirname:
336
            for file in relative_glob_function(dirname, basename):
337
                yield file
338
            return
339
340
        # Prevent an infinite recursion if a drive or UNC path contains
341
        # wildcard characters (i.e. r'\\?\C:').
342
        if dirname != pat and has_wildcard(dirname):
343
            dirs = iglob(dirname)
344
        else:
345
            dirs = [dirname]
346
347
        for dirname in dirs:
348
            for name in relative_glob_function(dirname, basename):
349
                yield os.path.join(dirname, name)
350
351
352
def glob(pattern):
353
    """
354
    Iterates all filesystem paths that get matched by the glob pattern.
355
    Syntax is equal to that of fnmatch.
356
357
    :param pattern: Glob pattern with wildcards
358
    :return:        List of all file names that match pattern
359
    """
360
    return list(iglob(pattern))
361