Completed
Push — 0.5.3 ( 25f4bc...c617a3 )
by Felipe A.
01:02
created

GlobTransform   A

Complexity

Total Complexity 27

Size/Duplication

Total Lines 199
Duplicated Lines 0 %

Importance

Changes 2
Bugs 2 Features 0
Metric Value
wmc 27
c 2
b 2
f 0
dl 0
loc 199
rs 10

15 Methods

Rating   Name   Duplication   Size   Complexity  
A transform_start() 0 4 2
A transform_literal() 0 2 1
A transform_range_ignore() 0 2 1
A transform_text() 0 2 1
A transform_group_close() 0 2 1
A transform_posix_character_class() 0 13 4
A transform_range() 0 7 3
A transform_posix_collating_symbol() 0 5 1
A flush() 0 2 1
A __init__() 0 7 1
A transform_posix_equivalence_class() 0 5 1
A transform_wildcard() 0 6 3
A transform_range_close() 0 6 2
A transform_group() 0 2 1
A transform() 0 9 3
1
2
import os
3
import warnings
4
5
from unicategories import categories as unicat, RangeGroup as ranges
6
7
from ..compat import re_escape
8
from . import StateMachine
9
10
11
class GlobTransform(StateMachine):
12
    jumps = {
13
        'start': {
14
            '': 'text',  # edit on __init__
15
            },
16
        'text': {
17
            '*': 'wildcard',
18
            '**': 'wildcard',
19
            '?': 'wildcard',
20
            '[': 'range',
21
            '[!': 'range',
22
            '[]': 'range',
23
            '{': 'group',
24
            '\\': 'literal',
25
            },
26
        'literal': {
27
            c: 'text' for c in '\\*?[{'
28
            },
29
        'wildcard': {
30
            '': 'text',
31
            },
32
        'range': {
33
            ']': 'range_close',
34
            '[.': 'posix_collating_symbol',
35
            '[:': 'posix_character_class',
36
            '[=': 'posix_equivalence_class',
37
            },
38
        'range_ignore': {
39
            '': 'range',
40
            },
41
        'range_close': {
42
            '': 'text',
43
            },
44
        'posix_collating_symbol': {
45
            '.]': 'range_ignore',
46
            },
47
        'posix_character_class': {
48
            ':]': 'range_ignore',
49
            },
50
        'posix_equivalence_class': {
51
            '=]': 'range_ignore',
52
            },
53
        'group': {
54
            '}': 'group_close',
55
            },
56
        'group_close': {
57
            '': 'text',
58
            }
59
        }
60
    character_classes = {
61
        'alnum': (
62
            # [\p{L}\p{Nl}\p{Nd}]
63
            unicat['L'] + unicat['Nl'] + unicat['Nd']
64
            ),
65
        'alpha': (
66
            # \p{L}\p{Nl}
67
            unicat['L'] + unicat['Nl']
68
            ),
69
        'ascii': (
70
            # [\x00-\x7F]
71
            ranges(((0, 0x7F),))
72
            ),
73
        'blank': (
74
            # [\p{Zs}\t]
75
            unicat['Zs'] + ranges(((9, 10),))
76
            ),
77
        'cntrl': (
78
            # \p{Cc}
79
            unicat['Cc']
80
            ),
81
        'digit': (
82
            # \p{Nd}
83
            unicat['Nd']
84
            ),
85
        'graph': (
86
            # [^\p{Z}\p{C}]
87
            unicat['M'] + unicat['L'] + unicat['N'] + unicat['P'] + unicat['S']
88
            ),
89
        'lower': (
90
            # \p{Ll}
91
            unicat['Ll']
92
            ),
93
        'print': (
94
            # \P{C}
95
            unicat['C']
96
            ),
97
        'punct': (
98
            # \p{P}
99
            unicat['P']
100
            ),
101
        'space': (
102
            # [\p{Z}\t\r\n\v\f]
103
            unicat['Z'] + ranges(((9, 14),))
104
            ),
105
        'upper': (
106
            # \p{Lu}
107
            unicat['Lu']
108
            ),
109
        'word': (
110
            # [\p{L}\p{Nl}\p{Nd}\p{Pc}]
111
            unicat['L'] + unicat['Nl'] + unicat['Nd'] + unicat['Pc']
112
            ),
113
        'xdigit': (
114
            # [0-9A-Fa-f]
115
            ranges(((48, 58), (65, 71), (97, 103)))
116
            ),
117
        }
118
    current = 'start'
119
    deferred = False
120
121
    def __init__(self, data, sep=os.sep):
122
        self.sep = sep
123
        self.deferred_data = []
124
        self.jumps = dict(self.jumps)
125
        self.jumps['start'] = dict(self.jumps['start'])
126
        self.jumps['start'][sep] = 'text'
127
        super(GlobTransform, self).__init__(data)
128
129
    def flush(self):
130
        return '%s$' % super(GlobTransform, self).flush()
131
132
    def transform(self, data, mark, next):
133
        data = super(GlobTransform, self).transform(data, mark, next)
134
        if self.deferred:
135
            self.deferred_data.append(data)
136
            data = ''
137
        elif self.deferred_data:
138
            data = ''.join(self.deferred_data) + data
139
            self.deferred_data[:] = ()
140
        return data
141
142
    def transform_posix_collating_symbol(self, data, mark, next):
143
        warnings.warn(
144
            'Posix collating symbols (like %s%s) are not supported.'
145
            % (data, mark))
146
        return None
147
148
    def transform_posix_character_class(self, data, mark, next):
149
        name = data[len(self.start):]
150
        print(name)
151
        if name not in self.character_classes:
152
            warnings.warn(
153
                'Posix character class %s is not supported.'
154
                % name)
155
            return None
156
        return ''.join(
157
            chr(start)
158
            if 1 == end - start else
159
            '%s-%s' % (chr(start), chr(end - 1))
160
            for start, end in self.character_classes[name]
161
            )
162
163
    def transform_posix_equivalence_class(self, data, mark, next):
164
        warnings.warn(
165
            'Posix equivalence class expresions (like %s%s) are not supported.'
166
            % (data, mark))
167
        return None
168
169
    def transform_start(self, data, mark, next):
170
        if mark == self.sep:
171
            return '^'
172
        return self.transform_text(self.sep, mark, next)
173
174
    def transform_wildcard(self, data, mark, next):
175
        if self.start == '**':
176
            return '.*'
177
        if self.start == '*':
178
            return '[^%s]*' % self.sep
179
        return '.'
180
181
    def transform_text(self, data, mark, next):
182
        return re_escape(data)
183
184
    def transform_literal(self, data, mark, next):
185
        return data[len(self.start):]
186
187
    def transform_range(self, data, mark, next):
188
        self.deferred = True
189
        if self.start == '[!':
190
            return '[^%s' % data[2:]
191
        if self.start == '[]':
192
            return '[\\]%s' % data[2:]
193
        return data
194
195
    def transform_range_close(self, data, mark, next):
196
        self.deferred = False
197
        if None in self.deferred_data:
198
            self.deferred_data[:] = ()
199
            return '.'
200
        return data
201
202
    def transform_range_ignore(self, data, mark, next):
203
        return ''
204
205
    def transform_group(self, data, mark, next):
206
        return '(%s' % ('|'.join(data[len(self.start):].split(',')))
207
208
    def transform_group_close(self, data, mark, next):
209
        return ')'
210
211
212
def translate(data, sep=os.sep):
213
    self = GlobTransform(data)
214
    return ''.join(self)
215