Completed
Push — master ( b1b981...5a9ec2 )
by Felipe A.
01:40 queued 35s
created

GlobTransform.__init__()   A

Complexity

Conditions 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 5
Bugs 3 Features 0
Metric Value
cc 1
c 5
b 3
f 0
dl 0
loc 6
rs 9.4285
1
2
import os
3
import warnings
4
5
from unicategories import categories as unicat, RangeGroup as ranges
6
7
from ..compat import re_escape, chr
8
from . import StateMachine
9
10
11
class GlobTransform(StateMachine):
12
    jumps = {
13
        'start': {
14
            '': 'text',
15
            '/': 'sep',
16
            },
17
        'text': {
18
            '*': 'wildcard',
19
            '**': 'wildcard',
20
            '?': 'wildcard',
21
            '[': 'range',
22
            '[!': 'range',
23
            '[]': 'range',
24
            '{': 'group',
25
            ',': 'group',
26
            '}': 'group',
27
            '\\': 'literal',
28
            '/': 'sep',
29
            },
30
        'sep': {
31
            '': 'text',
32
            },
33
        'literal': {
34
            c: 'text' for c in ('\\', '*', '?', '[', '{', '}', ',', '/', '')
35
            },
36
        'wildcard': {
37
            '': 'text',
38
            },
39
        'range': {
40
            '/': 'range_sep',
41
            ']': 'range_close',
42
            '[.': 'posix_collating_symbol',
43
            '[:': 'posix_character_class',
44
            '[=': 'posix_equivalence_class',
45
            },
46
        'range_sep': {
47
            '': 'range',
48
            },
49
        'range_ignore': {
50
            '': 'range',
51
            },
52
        'range_close': {
53
            '': 'text',
54
            },
55
        'posix_collating_symbol': {
56
            '.]': 'range_ignore',
57
            },
58
        'posix_character_class': {
59
            ':]': 'range_ignore',
60
            },
61
        'posix_equivalence_class': {
62
            '=]': 'range_ignore',
63
            },
64
        'group': {
65
            '': 'text',
66
            },
67
        }
68
    character_classes = {
69
        'alnum': (
70
            # [\p{L}\p{Nl}\p{Nd}]
71
            unicat['L'] + unicat['Nl'] + unicat['Nd']
72
            ),
73
        'alpha': (
74
            # \p{L}\p{Nl}
75
            unicat['L'] + unicat['Nl']
76
            ),
77
        'ascii': (
78
            # [\x00-\x7F]
79
            ranges(((0, 0x80),))
80
            ),
81
        'blank': (
82
            # [\p{Zs}\t]
83
            unicat['Zs'] + ranges(((9, 10),))
84
            ),
85
        'cntrl': (
86
            # \p{Cc}
87
            unicat['Cc']
88
            ),
89
        'digit': (
90
            # \p{Nd}
91
            unicat['Nd']
92
            ),
93
        'graph': (
94
            # [^\p{Z}\p{C}]
95
            unicat['M'] + unicat['L'] + unicat['N'] + unicat['P'] + unicat['S']
96
            ),
97
        'lower': (
98
            # \p{Ll}
99
            unicat['Ll']
100
            ),
101
        'print': (
102
            # \P{C}
103
            unicat['C']
104
            ),
105
        'punct': (
106
            # \p{P}
107
            unicat['P']
108
            ),
109
        'space': (
110
            # [\p{Z}\t\n\v\f\r]
111
            unicat['Z'] + ranges(((9, 14),))
112
            ),
113
        'upper': (
114
            # \p{Lu}
115
            unicat['Lu']
116
            ),
117
        'word': (
118
            # [\p{L}\p{Nl}\p{Nd}\p{Pc}]
119
            unicat['L'] + unicat['Nl'] + unicat['Nd'] + unicat['Pc']
120
            ),
121
        'xdigit': (
122
            # [0-9A-Fa-f]
123
            ranges(((48, 58), (65, 71), (97, 103)))
124
            ),
125
        }
126
    current = 'start'
127
    deferred = False
128
129
    def __init__(self, data, sep=os.sep, base=None):
130
        self.sep = sep
131
        self.base = base or ''
132
        self.deferred_data = []
133
        self.deep = 0
134
        super(GlobTransform, self).__init__(data)
135
136
    def transform(self, data, mark, next):
137
        data = super(GlobTransform, self).transform(data, mark, next)
138
        if self.deferred:
139
            self.deferred_data.append(data)
140
            data = ''
141
        elif self.deferred_data:
142
            data = ''.join(self.deferred_data) + data
143
            self.deferred_data[:] = ()
144
        return data
145
146
    def transform_posix_collating_symbol(self, data, mark, next):
147
        warnings.warn(
148
            'Posix collating symbols (like %s%s) are not supported.'
149
            % (data, mark))
150
        return None
151
152
    def transform_posix_character_class(self, data, mark, next):
153
        name = data[len(self.start):]
154
        if name not in self.character_classes:
155
            warnings.warn(
156
                'Posix character class %s is not supported.'
157
                % name)
158
            return None
159
        return ''.join(
160
            chr(start)
161
            if 1 == end - start else
162
            '%s-%s' % (chr(start), chr(end - 1))
163
            for start, end in self.character_classes[name]
164
            )
165
166
    def transform_posix_equivalence_class(self, data, mark, next):
167
        warnings.warn(
168
            'Posix equivalence class expresions (like %s%s) are not supported.'
169
            % (data, mark))
170
        return None
171
172
    def transform_wildcard(self, data, mark, next):
173
        if self.start == '**':
174
            return '.*'
175
        if self.start == '*':
176
            return '[^%s]*' % re_escape(self.sep)
177
        return '[^%s]' % re_escape(self.sep)
178
179
    def transform_text(self, data, mark, next):
180
        if next is None:
181
            return '%s(%s|$)' % (re_escape(data), re_escape(self.sep))
182
        return re_escape(data)
183
184
    def transform_sep(self, data, mark, next):
185
        return re_escape(self.sep)
186
187
    def transform_literal(self, data, mark, next):
188
        return data[len(self.start):]
189
190
    def transform_range(self, data, mark, next):
191
        self.deferred = True
192
        if self.start == '[!':
193
            return '[^%s' % data[2:]
194
        if self.start == '[]':
195
            return '[\\]%s' % data[2:]
196
        return data
197
198
    def transform_range_sep(self, data, mark, next):
199
        return re_escape(self.sep)
200
201
    def transform_range_close(self, data, mark, next):
202
        self.deferred = False
203
        if None in self.deferred_data:
204
            self.deferred_data[:] = ()
205
            return '.'
206
        return data
207
208
    def transform_range_ignore(self, data, mark, next):
209
        return ''
210
211
    def transform_group(self, data, mark, next):
212
        if self.start == '{':
213
            self.deep += 1
214
            return '('
215
        if self.start == ',' and self.deep:
216
            return '|'
217
        if self.start == '}' and self.deep:
218
            self.deep -= 1
219
            return ')'
220
        return data
221
222
    def transform_start(self, data, mark, next):
223
        if mark == '/':
224
            return '^%s' % re_escape(self.base)
225
        return re_escape(self.sep)
226
227
228
def translate(data, sep=os.sep, base=None):
229
    self = GlobTransform(data, sep, base)
230
    return ''.join(self)
231