Completed
Push — 0.5.3 ( fe5108...c2ca4c )
by Felipe A.
01:07
created

GlobTransformBase.__init__()   A

Complexity

Conditions 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 6
rs 9.4285
c 0
b 0
f 0
cc 1
1
2
import os
3
import warnings
4
5
from unicategories import categories as unicat, RangeGroup as ranges
6
7
from ..compat import re_escape, chr
8
from . import StateMachine
9
10
11
class GlobTransformBase(StateMachine):
12
    jumps = {
13
        'text': {
14
            '*': 'wildcard',
15
            '**': 'wildcard',
16
            '?': 'wildcard',
17
            '[': 'range',
18
            '[!': 'range',
19
            '[]': 'range',
20
            '{': 'group',
21
            '\\': 'literal',
22
            '/': 'sep',
23
            },
24
        'sep': {
25
            '': 'text',
26
            },
27
        'literal': {
28
            c: 'text' for c in '\\*?[{'
29
            },
30
        'wildcard': {
31
            '': 'text',
32
            },
33
        'range': {
34
            '/': 'range_sep',
35
            ']': 'range_close',
36
            '[.': 'posix_collating_symbol',
37
            '[:': 'posix_character_class',
38
            '[=': 'posix_equivalence_class',
39
            },
40
        'range_sep': {
41
            '': 'range',
42
            },
43
        'range_ignore': {
44
            '': 'range',
45
            },
46
        'range_close': {
47
            '': 'text',
48
            },
49
        'posix_collating_symbol': {
50
            '.]': 'range_ignore',
51
            },
52
        'posix_character_class': {
53
            ':]': 'range_ignore',
54
            },
55
        'posix_equivalence_class': {
56
            '=]': 'range_ignore',
57
            },
58
        'group': {
59
            '}': 'group_close',
60
            },
61
        'group_close': {
62
            '': 'text',
63
            }
64
        }
65
    character_classes = {
66
        'alnum': (
67
            # [\p{L}\p{Nl}\p{Nd}]
68
            unicat['L'] + unicat['Nl'] + unicat['Nd']
69
            ),
70
        'alpha': (
71
            # \p{L}\p{Nl}
72
            unicat['L'] + unicat['Nl']
73
            ),
74
        'ascii': (
75
            # [\x00-\x7F]
76
            ranges(((0, 0x7F),))
77
            ),
78
        'blank': (
79
            # [\p{Zs}\t]
80
            unicat['Zs'] + ranges(((9, 10),))
81
            ),
82
        'cntrl': (
83
            # \p{Cc}
84
            unicat['Cc']
85
            ),
86
        'digit': (
87
            # \p{Nd}
88
            unicat['Nd']
89
            ),
90
        'graph': (
91
            # [^\p{Z}\p{C}]
92
            unicat['M'] + unicat['L'] + unicat['N'] + unicat['P'] + unicat['S']
93
            ),
94
        'lower': (
95
            # \p{Ll}
96
            unicat['Ll']
97
            ),
98
        'print': (
99
            # \P{C}
100
            unicat['C']
101
            ),
102
        'punct': (
103
            # \p{P}
104
            unicat['P']
105
            ),
106
        'space': (
107
            # [\p{Z}\t\r\n\v\f]
108
            unicat['Z'] + ranges(((9, 14),))
109
            ),
110
        'upper': (
111
            # \p{Lu}
112
            unicat['Lu']
113
            ),
114
        'word': (
115
            # [\p{L}\p{Nl}\p{Nd}\p{Pc}]
116
            unicat['L'] + unicat['Nl'] + unicat['Nd'] + unicat['Pc']
117
            ),
118
        'xdigit': (
119
            # [0-9A-Fa-f]
120
            ranges(((48, 58), (65, 71), (97, 103)))
121
            ),
122
        }
123
    current = 'text'
124
    deferred = False
125
126
    def __init__(self, data, sep=os.sep, base=None):
127
        self.sep = sep
128
        self.base = base or ''
129
        self.deferred_data = []
130
        self.jumps = dict(self.jumps)
131
        super(GlobTransformBase, self).__init__(data)
132
133
    def transform(self, data, mark, next):
134
        data = super(GlobTransformBase, self).transform(data, mark, next)
135
        if self.deferred:
136
            self.deferred_data.append(data)
137
            data = ''
138
        elif self.deferred_data:
139
            data = ''.join(self.deferred_data) + data
140
            self.deferred_data[:] = ()
141
        return data
142
143
    def transform_posix_collating_symbol(self, data, mark, next):
144
        warnings.warn(
145
            'Posix collating symbols (like %s%s) are not supported.'
146
            % (data, mark))
147
        return None
148
149
    def transform_posix_character_class(self, data, mark, next):
150
        name = data[len(self.start):]
151
        if name not in self.character_classes:
152
            warnings.warn(
153
                'Posix character class %s is not supported.'
154
                % name)
155
            return None
156
        return ''.join(
157
            chr(start)
158
            if 1 == end - start else
159
            '%s-%s' % (chr(start), chr(end - 1))
160
            for start, end in self.character_classes[name]
161
            )
162
163
    def transform_posix_equivalence_class(self, data, mark, next):
164
        warnings.warn(
165
            'Posix equivalence class expresions (like %s%s) are not supported.'
166
            % (data, mark))
167
        return None
168
169
    def transform_wildcard(self, data, mark, next):
170
        if self.start == '**':
171
            return '.*'
172
        if self.start == '*':
173
            return '[^%s]*' % re_escape(self.sep)
174
        return '.'
175
176
    def transform_text(self, data, mark, next):
177
        return re_escape(data)
178
179
    def transform_sep(self, data, mark, next):
180
        return re_escape(self.sep)
181
182
    def transform_literal(self, data, mark, next):
183
        return data[len(self.start):]
184
185
    def transform_range(self, data, mark, next):
186
        self.deferred = True
187
        if self.start == '[!':
188
            return '[^%s' % data[2:]
189
        if self.start == '[]':
190
            return '[\\]%s' % data[2:]
191
        return data
192
193
    def transform_range_sep(self, data, mark, next):
194
        return re_escape(self.sep)
195
196
    def transform_range_close(self, data, mark, next):
197
        self.deferred = False
198
        if None in self.deferred_data:
199
            self.deferred_data[:] = ()
200
            return '.'
201
        return data
202
203
    def transform_range_ignore(self, data, mark, next):
204
        return ''
205
206
    def transform_group(self, data, mark, next):
207
        return '(%s' % ('|'.join(data[len(self.start):].split(',')))
208
209
    def transform_group_close(self, data, mark, next):
210
        return ')'
211
212
213
class GlobTransform(GlobTransformBase):
214
    jumps = GlobTransformBase.jumps.copy()
215
    jumps.update({
216
        'start': {
217
            '': 'text',
218
            '/': 'sep',
219
            },
220
        })
221
    current = 'start'
222
223
    def transform_start(self, data, mark, next):
224
        if mark == '/':
225
            return '^%s' % re_escape(self.base)
226
        return re_escape(self.sep)
227
228
    def flush(self):
229
        return '%s(%s|$)' % (
230
            super(GlobTransformBase, self).flush(),
231
            re_escape(self.sep),
232
            )
233
234
235
def translate(data, sep=os.sep, base=None):
236
    self = GlobTransform(data, sep, base)
237
    return ''.join(self)
238