1 | import os |
||
2 | import platform |
||
3 | import re |
||
4 | |||
5 | from coalib.misc.Decorators import yield_once |
||
6 | from coalib.misc.Constants import GLOBBING_SPECIAL_CHARS |
||
7 | |||
8 | |||
9 | def _end_of_set_index(string, start_index): |
||
10 | """ |
||
11 | Returns the position of the appropriate closing bracket for a glob set in |
||
12 | string. |
||
13 | |||
14 | :param string: Glob string with wildcards |
||
15 | :param start_index: Index at which the set starts, meaning the position |
||
16 | right behind the opening bracket |
||
17 | :return: Position of appropriate closing bracket |
||
18 | """ |
||
19 | length = len(string) |
||
20 | closing_index = start_index |
||
21 | if closing_index < length and string[closing_index] == '!': |
||
22 | closing_index += 1 |
||
23 | |||
24 | if closing_index < length: # the set cannot be closed by a bracket here |
||
25 | closing_index += 1 |
||
26 | |||
27 | while closing_index < length and string[closing_index] != ']': |
||
28 | closing_index += 1 |
||
29 | |||
30 | return closing_index |
||
31 | |||
32 | |||
33 | def glob_escape(input_string): |
||
34 | """ |
||
35 | Escapes the given string with ``[c]`` pattern. Examples: |
||
36 | |||
37 | >>> from coalib.parsing.Globbing import glob_escape |
||
38 | >>> glob_escape('test (1)') |
||
39 | 'test [(]1[)]' |
||
40 | >>> glob_escape('test folder?') |
||
41 | 'test folder[?]' |
||
42 | >>> glob_escape('test*folder') |
||
43 | 'test[*]folder' |
||
44 | |||
45 | :param input_string: String that is to be escaped with ``[ ]``. |
||
46 | :return: Escaped string in which all the special glob characters |
||
47 | ``()[]|?*`` are escaped. |
||
48 | """ |
||
49 | return re.sub("(?P<char>[" + re.escape(GLOBBING_SPECIAL_CHARS) + "])", |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
![]() |
|||
50 | "[\\g<char>]", input_string) |
||
51 | |||
52 | |||
53 | def _position_is_bracketed(string, position): |
||
54 | """ |
||
55 | Tests whether the char at string[position] is inside a valid pair of |
||
56 | brackets (and therefore loses its special meaning) |
||
57 | |||
58 | :param string: Glob string with wildcards |
||
59 | :param position: Position of a char in string |
||
60 | :return: Whether or not the char is inside a valid set of brackets |
||
61 | """ |
||
62 | # allow negative positions and trim too long ones |
||
63 | position = len(string[:position]) |
||
64 | |||
65 | index, length = 0, len(string) |
||
66 | while index < position: |
||
67 | char = string[index] |
||
68 | index += 1 |
||
69 | if char == '[': |
||
70 | closing_index = _end_of_set_index(string, index) |
||
71 | if closing_index < length: |
||
72 | if index <= position < closing_index: |
||
73 | return True |
||
74 | index = closing_index + 1 |
||
75 | else: |
||
76 | return False |
||
77 | return False |
||
78 | |||
79 | |||
80 | def _boundary_of_alternatives_indices(pattern): |
||
81 | """ |
||
82 | Determines the location of a set of alternatives in a glob pattern. |
||
83 | Alternatives are defined by a matching set of non-bracketed parentheses. |
||
84 | |||
85 | :param pattern: Glob pattern with wildcards. |
||
86 | :return: Indices of the innermost set of matching non-bracketed |
||
87 | parentheses in a tuple. The Index of a missing parenthesis |
||
88 | will be passed as None. |
||
89 | """ |
||
90 | # Taking the leftmost closing parenthesis and the rightmost opening |
||
91 | # parenthesis left of it ensures that the parentheses belong together and |
||
92 | # the pattern is parsed correctly from the most nested section outwards. |
||
93 | end_pos = None |
||
94 | for match in re.finditer('\\)', pattern): |
||
95 | if not _position_is_bracketed(pattern, match.start()): |
||
96 | end_pos = match.start() |
||
97 | break # break to get leftmost |
||
98 | |||
99 | start_pos = None |
||
100 | for match in re.finditer('\\(', pattern[:end_pos]): |
||
101 | if not _position_is_bracketed(pattern, match.start()): |
||
102 | start_pos = match.end() |
||
103 | # no break to get rightmost |
||
104 | |||
105 | return start_pos, end_pos |
||
106 | |||
107 | |||
108 | @yield_once |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
109 | def _iter_choices(pattern): |
||
110 | """ |
||
111 | Iterate through each choice of an alternative. Splits pattern on '|'s if |
||
112 | they are not bracketed. |
||
113 | |||
114 | :param pattern: String of choices separated by '|'s |
||
115 | :return: Iterator that yields parts of string separated by |
||
116 | non-bracketed '|'s |
||
117 | """ |
||
118 | start_pos = 0 |
||
119 | split_pos_list = [match.start() for match in re.finditer('\\|', pattern)] |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
120 | split_pos_list.append(len(pattern)) |
||
121 | for end_pos in split_pos_list: |
||
122 | if not _position_is_bracketed(pattern, end_pos): |
||
123 | yield pattern[start_pos: end_pos] |
||
124 | start_pos = end_pos + 1 |
||
125 | |||
126 | |||
127 | @yield_once |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
128 | def _iter_alternatives(pattern): |
||
129 | """ |
||
130 | Iterates through all glob patterns that can be obtaines by combination of |
||
131 | all choices for each alternative |
||
132 | |||
133 | :param pattern: Glob pattern with wildcards |
||
134 | :return: Iterator that yields all glob patterns without alternatives |
||
135 | that can be created from the given pattern containing them. |
||
136 | """ |
||
137 | start_pos, end_pos = _boundary_of_alternatives_indices(pattern) |
||
138 | |||
139 | if None in (start_pos, end_pos): |
||
140 | yield pattern |
||
141 | else: |
||
142 | # iterate through choices inside of parenthesis (separated by '|'): |
||
143 | for choice in _iter_choices(pattern[start_pos: end_pos]): |
||
144 | # put glob expression back together with alternative: |
||
145 | variant = pattern[:start_pos-1] + choice + pattern[end_pos+1:] |
||
146 | |||
147 | # iterate through alternatives outside of parenthesis |
||
148 | # (pattern can have more alternatives elsewhere) |
||
149 | for glob_pattern in _iter_alternatives(variant): |
||
150 | yield glob_pattern |
||
151 | |||
152 | |||
153 | def translate(pattern): |
||
154 | """ |
||
155 | Translates a pattern into a regular expression. |
||
156 | |||
157 | :param pattern: Glob pattern with wildcards |
||
158 | :return: Regular expression with the same meaning |
||
159 | """ |
||
160 | index, length = 0, len(pattern) |
||
161 | regex = '' |
||
162 | while index < length: |
||
163 | char = pattern[index] |
||
164 | index += 1 |
||
165 | if char == '*': |
||
166 | # '**' matches everything |
||
167 | if index < length and pattern[index] == '*': |
||
168 | regex += '.*' |
||
169 | # on Windows, '*' matches everything but the filesystem |
||
170 | # separators '/' and '\'. |
||
171 | elif platform.system() == 'Windows': # pragma: nocover (Windows) |
||
172 | regex += '[^/\\\\]*' |
||
173 | # on all other (~Unix-) platforms, '*' matches everything but the |
||
174 | # filesystem separator, most likely '/'. |
||
175 | else: |
||
176 | regex += '[^' + re.escape(os.sep) + ']*' |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
177 | elif char == '?': |
||
178 | regex += '.' |
||
179 | elif char == '[': |
||
180 | closing_index = _end_of_set_index(pattern, index) |
||
181 | if closing_index >= length: |
||
182 | regex += '\\[' |
||
183 | else: |
||
184 | sequence = pattern[index:closing_index].replace('\\', '\\\\') |
||
185 | index = closing_index+1 |
||
186 | if sequence[0] == '!': |
||
187 | sequence = '^' + sequence[1:] |
||
188 | elif sequence[0] == '^': |
||
189 | sequence = '\\' + sequence |
||
190 | regex += '[' + sequence + ']' |
||
191 | else: |
||
192 | regex = regex + re.escape(char) |
||
193 | return regex + '\\Z(?ms)' |
||
194 | |||
195 | |||
196 | def fnmatch(name, patterns): |
||
197 | """ |
||
198 | Tests whether name matches pattern |
||
199 | |||
200 | :param name: File or directory name |
||
201 | :param patterns: Glob string with wildcards or list of globs |
||
202 | :return: Boolean: Whether or not name is matched by pattern |
||
203 | |||
204 | Glob Syntax: |
||
205 | |||
206 | - '[seq]': Matches any character in seq. Cannot be empty. Any |
||
207 | special character looses its special meaning in a set. |
||
208 | - '[!seq]': Matches any character not in seq. Cannot be empty. Any |
||
209 | special character looses its special meaning in a set. |
||
210 | - '(seq_a|seq_b)': Matches either sequence_a or sequence_b as a whole. |
||
211 | More than two or just one sequence can be given. |
||
212 | - '?': Matches any single character. |
||
213 | - '*': Matches everything but os.sep. |
||
214 | - '**': Matches everything. |
||
215 | """ |
||
216 | if isinstance(patterns, str): |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
217 | patterns = [patterns] |
||
218 | if len(patterns) == 0: |
||
219 | return True |
||
220 | |||
221 | name = os.path.normcase(name) |
||
222 | for pattern in patterns: |
||
223 | for pat in _iter_alternatives(pattern): |
||
224 | pat = os.path.expanduser(pat) |
||
225 | pat = os.path.normcase(pat) |
||
226 | match = re.compile(translate(pat)).match |
||
227 | if match(name) is not None: |
||
228 | return True |
||
229 | return False |
||
230 | |||
231 | |||
232 | def _absolute_flat_glob(pattern): |
||
233 | """ |
||
234 | Glob function for a pattern that do not contain wildcards. |
||
235 | |||
236 | :pattern: File or directory path |
||
237 | :return: Iterator that yields at most one valid file or dir name |
||
238 | """ |
||
239 | dirname, basename = os.path.split(pattern) |
||
240 | |||
241 | if basename: |
||
242 | if os.path.exists(pattern): |
||
243 | yield pattern |
||
244 | else: |
||
245 | # Patterns ending with a slash should match only directories |
||
246 | if os.path.isdir(dirname): |
||
247 | yield pattern |
||
248 | return |
||
249 | |||
250 | |||
251 | def _iter_relative_dirs(dirname): |
||
252 | """ |
||
253 | Recursively iterates subdirectories of all levels from dirname |
||
254 | |||
255 | :param dirname: Directory name |
||
256 | :return: Iterator that yields files and directory from the given dir |
||
257 | and all it's (recursive) subdirectories |
||
258 | """ |
||
259 | if not dirname: |
||
260 | dirname = os.curdir |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
261 | try: |
||
262 | files_or_dirs = os.listdir(dirname) |
||
263 | except os.error: |
||
264 | return |
||
265 | for file_or_dir in files_or_dirs: |
||
266 | yield file_or_dir |
||
267 | path = os.path.join(dirname, file_or_dir) |
||
268 | for sub_file_or_dir in _iter_relative_dirs(path): |
||
269 | yield os.path.join(file_or_dir, sub_file_or_dir) |
||
270 | |||
271 | |||
272 | def relative_wildcard_glob(dirname, pattern): |
||
273 | """ |
||
274 | Non-recursive glob for one directory. Accepts wildcards. |
||
275 | |||
276 | :param dirname: Directory name |
||
277 | :param pattern: Glob pattern with wildcards |
||
278 | :return: List of files in the dir of dirname that match the pattern |
||
279 | """ |
||
280 | if not dirname: |
||
281 | dirname = os.curdir |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
282 | try: |
||
283 | names = os.listdir(dirname) |
||
284 | except OSError: |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
285 | return [] |
||
286 | result = [] |
||
287 | pattern = os.path.normcase(pattern) |
||
288 | match = re.compile(translate(pattern)).match |
||
289 | for name in names: |
||
290 | if match(os.path.normcase(name)): |
||
291 | result.append(name) |
||
292 | return result |
||
293 | |||
294 | |||
295 | def relative_flat_glob(dirname, basename): |
||
296 | """ |
||
297 | Non-recursive glob for one directory. Does not accept wildcards. |
||
298 | |||
299 | :param dirname: Directory name |
||
300 | :param basename: Basename of a file in dir of dirname |
||
301 | :return: List containing Basename if the file exists |
||
302 | """ |
||
303 | if os.path.exists(os.path.join(dirname, basename)): |
||
304 | return [basename] |
||
305 | return[] |
||
306 | |||
307 | |||
308 | def relative_recursive_glob(dirname, pattern): |
||
309 | """ |
||
310 | Recursive Glob for one directory and all its (nested) subdirectories. |
||
311 | Accepts only '**' as pattern. |
||
312 | |||
313 | :param dirname: Directory name |
||
314 | :param pattern: The recursive wildcard '**' |
||
315 | :return: Iterator that yields all the (nested) subdirectories of the |
||
316 | given dir |
||
317 | """ |
||
318 | assert pattern == '**' |
||
319 | if dirname: |
||
320 | yield pattern[:0] |
||
321 | for relative_dir in _iter_relative_dirs(dirname): |
||
322 | yield relative_dir |
||
323 | |||
324 | |||
325 | wildcard_check_pattern = re.compile('([*?[])') |
||
326 | |||
327 | |||
328 | def has_wildcard(pattern): |
||
329 | """ |
||
330 | Checks whether pattern has any wildcards. |
||
331 | |||
332 | :param pattern: Glob pattern that may contain wildcards |
||
333 | :return: Boolean: Whether or not there are wildcards in pattern |
||
334 | """ |
||
335 | match = wildcard_check_pattern.search(pattern) |
||
336 | return match is not None |
||
337 | |||
338 | |||
339 | def iglob(pattern): |
||
340 | """ |
||
341 | Iterates all filesystem paths that get matched by the glob pattern. |
||
342 | Syntax is equal to that of fnmatch. |
||
343 | |||
344 | :param pattern: Glob pattern with wildcards |
||
345 | :return: Iterator that yields all file names that match pattern |
||
346 | """ |
||
347 | for pat in _iter_alternatives(pattern): |
||
348 | pat = os.path.expanduser(pat) |
||
349 | pat = os.path.normcase(pat) |
||
350 | dirname, basename = os.path.split(pat) |
||
351 | if not has_wildcard(pat): |
||
352 | for file in _absolute_flat_glob(pat): |
||
353 | yield file |
||
354 | return |
||
355 | |||
356 | if basename == '**': |
||
357 | relative_glob_function = relative_recursive_glob |
||
358 | elif has_wildcard(basename): |
||
359 | relative_glob_function = relative_wildcard_glob |
||
360 | else: |
||
361 | relative_glob_function = relative_flat_glob |
||
362 | |||
363 | if not dirname: |
||
364 | for file in relative_glob_function(dirname, basename): |
||
365 | yield file |
||
366 | return |
||
367 | |||
368 | # Prevent an infinite recursion if a drive or UNC path contains |
||
369 | # wildcard characters (i.e. r'\\?\C:'). |
||
370 | if dirname != pat and has_wildcard(dirname): |
||
371 | dirs = iglob(dirname) |
||
372 | else: |
||
373 | dirs = [dirname] |
||
374 | |||
375 | for dirname in dirs: |
||
376 | for name in relative_glob_function(dirname, basename): |
||
377 | yield os.path.join(dirname, name) |
||
378 | |||
379 | |||
380 | def glob(pattern): |
||
381 | """ |
||
382 | Iterates all filesystem paths that get matched by the glob pattern. |
||
383 | Syntax is equal to that of fnmatch. |
||
384 | |||
385 | :param pattern: Glob pattern with wildcards |
||
386 | :return: List of all file names that match pattern |
||
387 | """ |
||
388 | return list(iglob(pattern)) |
||
389 |