Completed
Push — 0.5.3 ( c2ca4c...28aba1 )
by Felipe A.
01:05
created

GlobTransformBase   A

Complexity

Total Complexity 30

Size/Duplication

Total Lines 204
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 30
c 1
b 0
f 0
dl 0
loc 204
rs 10

14 Methods

Rating   Name   Duplication   Size   Complexity  
A transform_text() 0 2 1
A transform_posix_collating_symbol() 0 5 1
A transform_posix_equivalence_class() 0 5 1
A __init__() 0 6 1
A transform_range_ignore() 0 2 1
A transform_literal() 0 2 1
A transform() 0 9 3
B transform_group() 0 10 6
A transform_wildcard() 0 6 3
A transform_sep() 0 2 1
A transform_range() 0 7 3
A transform_range_sep() 0 2 1
A transform_posix_character_class() 0 12 4
A transform_range_close() 0 6 2
1
2
import os
3
import warnings
4
5
from unicategories import categories as unicat, RangeGroup as ranges
6
7
from ..compat import re_escape, chr
8
from . import StateMachine
9
10
11
class GlobTransformBase(StateMachine):
12
    jumps = {
13
        'text': {
14
            '*': 'wildcard',
15
            '**': 'wildcard',
16
            '?': 'wildcard',
17
            '[': 'range',
18
            '[!': 'range',
19
            '[]': 'range',
20
            '{': 'group',
21
            ',': 'group',
22
            '}': 'group',
23
            '\\': 'literal',
24
            '/': 'sep',
25
            },
26
        'sep': {
27
            '': 'text',
28
            },
29
        'literal': {
30
            c: 'text' for c in '\\*?[{'
31
            },
32
        'wildcard': {
33
            '': 'text',
34
            },
35
        'range': {
36
            '/': 'range_sep',
37
            ']': 'range_close',
38
            '[.': 'posix_collating_symbol',
39
            '[:': 'posix_character_class',
40
            '[=': 'posix_equivalence_class',
41
            },
42
        'range_sep': {
43
            '': 'range',
44
            },
45
        'range_ignore': {
46
            '': 'range',
47
            },
48
        'range_close': {
49
            '': 'text',
50
            },
51
        'posix_collating_symbol': {
52
            '.]': 'range_ignore',
53
            },
54
        'posix_character_class': {
55
            ':]': 'range_ignore',
56
            },
57
        'posix_equivalence_class': {
58
            '=]': 'range_ignore',
59
            },
60
        'group': {
61
            '': 'text',
62
            },
63
        }
64
    character_classes = {
65
        'alnum': (
66
            # [\p{L}\p{Nl}\p{Nd}]
67
            unicat['L'] + unicat['Nl'] + unicat['Nd']
68
            ),
69
        'alpha': (
70
            # \p{L}\p{Nl}
71
            unicat['L'] + unicat['Nl']
72
            ),
73
        'ascii': (
74
            # [\x00-\x7F]
75
            ranges(((0, 0x7F),))
76
            ),
77
        'blank': (
78
            # [\p{Zs}\t]
79
            unicat['Zs'] + ranges(((9, 10),))
80
            ),
81
        'cntrl': (
82
            # \p{Cc}
83
            unicat['Cc']
84
            ),
85
        'digit': (
86
            # \p{Nd}
87
            unicat['Nd']
88
            ),
89
        'graph': (
90
            # [^\p{Z}\p{C}]
91
            unicat['M'] + unicat['L'] + unicat['N'] + unicat['P'] + unicat['S']
92
            ),
93
        'lower': (
94
            # \p{Ll}
95
            unicat['Ll']
96
            ),
97
        'print': (
98
            # \P{C}
99
            unicat['C']
100
            ),
101
        'punct': (
102
            # \p{P}
103
            unicat['P']
104
            ),
105
        'space': (
106
            # [\p{Z}\t\r\n\v\f]
107
            unicat['Z'] + ranges(((9, 14),))
108
            ),
109
        'upper': (
110
            # \p{Lu}
111
            unicat['Lu']
112
            ),
113
        'word': (
114
            # [\p{L}\p{Nl}\p{Nd}\p{Pc}]
115
            unicat['L'] + unicat['Nl'] + unicat['Nd'] + unicat['Pc']
116
            ),
117
        'xdigit': (
118
            # [0-9A-Fa-f]
119
            ranges(((48, 58), (65, 71), (97, 103)))
120
            ),
121
        }
122
    current = 'text'
123
    deferred = False
124
125
    def __init__(self, data, sep=os.sep, base=None):
126
        self.sep = sep
127
        self.base = base or ''
128
        self.deferred_data = []
129
        self.deep = 0
130
        super(GlobTransformBase, self).__init__(data)
131
132
    def transform(self, data, mark, next):
133
        data = super(GlobTransformBase, self).transform(data, mark, next)
134
        if self.deferred:
135
            self.deferred_data.append(data)
136
            data = ''
137
        elif self.deferred_data:
138
            data = ''.join(self.deferred_data) + data
139
            self.deferred_data[:] = ()
140
        return data
141
142
    def transform_posix_collating_symbol(self, data, mark, next):
143
        warnings.warn(
144
            'Posix collating symbols (like %s%s) are not supported.'
145
            % (data, mark))
146
        return None
147
148
    def transform_posix_character_class(self, data, mark, next):
149
        name = data[len(self.start):]
150
        if name not in self.character_classes:
151
            warnings.warn(
152
                'Posix character class %s is not supported.'
153
                % name)
154
            return None
155
        return ''.join(
156
            chr(start)
157
            if 1 == end - start else
158
            '%s-%s' % (chr(start), chr(end - 1))
159
            for start, end in self.character_classes[name]
160
            )
161
162
    def transform_posix_equivalence_class(self, data, mark, next):
163
        warnings.warn(
164
            'Posix equivalence class expresions (like %s%s) are not supported.'
165
            % (data, mark))
166
        return None
167
168
    def transform_wildcard(self, data, mark, next):
169
        if self.start == '**':
170
            return '.*'
171
        if self.start == '*':
172
            return '[^%s]*' % re_escape(self.sep)
173
        return '.'
174
175
    def transform_text(self, data, mark, next):
176
        return re_escape(data)
177
178
    def transform_sep(self, data, mark, next):
179
        return re_escape(self.sep)
180
181
    def transform_literal(self, data, mark, next):
182
        return data[len(self.start):]
183
184
    def transform_range(self, data, mark, next):
185
        self.deferred = True
186
        if self.start == '[!':
187
            return '[^%s' % data[2:]
188
        if self.start == '[]':
189
            return '[\\]%s' % data[2:]
190
        return data
191
192
    def transform_range_sep(self, data, mark, next):
193
        return re_escape(self.sep)
194
195
    def transform_range_close(self, data, mark, next):
196
        self.deferred = False
197
        if None in self.deferred_data:
198
            self.deferred_data[:] = ()
199
            return '.'
200
        return data
201
202
    def transform_range_ignore(self, data, mark, next):
203
        return ''
204
205
    def transform_group(self, data, mark, next):
206
        if self.start == '{':
207
            self.deep += 1
208
            return '('
209
        if self.start == ',' and self.deep:
210
            return '|'
211
        if self.start == '}' and self.deep:
212
            self.deep -= 1
213
            return ')'
214
        return data
215
216
217
class GlobTransform(GlobTransformBase):
218
    jumps = GlobTransformBase.jumps.copy()
219
    jumps.update({
220
        'start': {
221
            '': 'text',
222
            '/': 'sep',
223
            },
224
        })
225
    current = 'start'
226
227
    def transform_start(self, data, mark, next):
228
        if mark == '/':
229
            return '^%s' % re_escape(self.base)
230
        return re_escape(self.sep)
231
232
    def flush(self):
233
        return '%s(%s|$)' % (
234
            super(GlobTransformBase, self).flush(),
235
            re_escape(self.sep),
236
            )
237
238
239
def translate(data, sep=os.sep, base=None):
240
    self = GlobTransform(data, sep, base)
241
    return ''.join(self)
242