|
1
|
|
|
|
|
2
|
|
|
|
|
3
|
|
|
class StateMachine(object): |
|
4
|
|
|
''' |
|
5
|
|
|
Abstract character-driven finite state machine implementation, used to |
|
6
|
|
|
chop down and transform strings. |
|
7
|
|
|
|
|
8
|
|
|
Useful for implementig simple transpilators, compressors and so on. |
|
9
|
|
|
|
|
10
|
|
|
Important: when implementing this class, you must set the :attr:`current` |
|
11
|
|
|
attribute to a key defined in :attr:`jumps` dict. |
|
12
|
|
|
''' |
|
13
|
|
|
jumps = {} # finite state machine jumps |
|
14
|
|
|
start = '' # character which started current state |
|
15
|
|
|
current = '' # current state (an initial value must be set) |
|
16
|
|
|
pending = '' # unprocessed remaining data |
|
17
|
|
|
streaming = False # stream mode toggle |
|
18
|
|
|
|
|
19
|
|
|
@property |
|
20
|
|
|
def nearest(self): |
|
21
|
|
|
''' |
|
22
|
|
|
Get the next state jump. |
|
23
|
|
|
|
|
24
|
|
|
The next jump is calculated looking at :attr:`current` state |
|
25
|
|
|
and its possible :attr:`jumps` to find the nearest and bigger |
|
26
|
|
|
option in :attr:`pending` data. |
|
27
|
|
|
|
|
28
|
|
|
If none is found, the returned next state label will be None. |
|
29
|
|
|
|
|
30
|
|
|
:returns: tuple with index, substring and next state label |
|
31
|
|
|
:rtype: tuple |
|
32
|
|
|
''' |
|
33
|
|
|
try: |
|
34
|
|
|
options = self.jumps[self.current] |
|
35
|
|
|
except KeyError: |
|
36
|
|
|
raise KeyError( |
|
37
|
|
|
'Current state %r not defined in %s.jumps.' |
|
38
|
|
|
% (self.current, self.__class__) |
|
39
|
|
|
) |
|
40
|
|
|
offset = len(self.start) |
|
41
|
|
|
index = len(self.pending) |
|
42
|
|
|
if self.streaming: |
|
43
|
|
|
index -= max(map(len, options)) |
|
44
|
|
|
key = (index, 1) |
|
45
|
|
|
result = (index, '', None) |
|
46
|
|
|
for amark, anext in options.items(): |
|
47
|
|
|
asize = len(amark) |
|
48
|
|
|
aindex = self.pending.find(amark, offset, index + asize) |
|
49
|
|
|
if aindex > -1: |
|
50
|
|
|
index = aindex |
|
51
|
|
|
akey = (aindex, -asize) |
|
52
|
|
|
if akey < key: |
|
53
|
|
|
key = akey |
|
54
|
|
|
result = (aindex, amark, anext) |
|
55
|
|
|
return result |
|
56
|
|
|
|
|
57
|
|
|
def __init__(self, data=''): |
|
58
|
|
|
''' |
|
59
|
|
|
:param data: content will be added to pending data |
|
60
|
|
|
:type data: str |
|
61
|
|
|
''' |
|
62
|
|
|
self.pending += data |
|
63
|
|
|
|
|
64
|
|
|
def __iter__(self): |
|
65
|
|
|
''' |
|
66
|
|
|
Yield over result chunks, consuming :attr:`pending` data. |
|
67
|
|
|
|
|
68
|
|
|
On :attr:`streaming` mode, yield only finished states. |
|
69
|
|
|
|
|
70
|
|
|
On non :attr:`streaming` mode, yield last state's result chunk |
|
71
|
|
|
even if unfinished, consuming all pending data. |
|
72
|
|
|
|
|
73
|
|
|
:yields: transformation result chunka |
|
74
|
|
|
:ytype: str |
|
75
|
|
|
''' |
|
76
|
|
|
index, mark, next = self.nearest |
|
77
|
|
|
while next is not None: |
|
78
|
|
|
data = self.transform(self.pending[:index], mark, next) |
|
79
|
|
|
self.start = mark |
|
80
|
|
|
self.current = next |
|
81
|
|
|
self.pending = self.pending[index:] |
|
82
|
|
|
if data: |
|
83
|
|
|
yield data |
|
84
|
|
|
index, mark, next = self.nearest |
|
85
|
|
|
if not self.streaming: |
|
86
|
|
|
data = self.transform(self.pending, mark, next) |
|
87
|
|
|
self.start = '' |
|
88
|
|
|
self.pending = '' |
|
89
|
|
|
if data: |
|
90
|
|
|
yield data |
|
91
|
|
|
|
|
92
|
|
|
def transform(self, data, mark, next): |
|
93
|
|
|
''' |
|
94
|
|
|
Apply the appropriate transformation function on current state data, |
|
95
|
|
|
which is supposed to end at this point. |
|
96
|
|
|
|
|
97
|
|
|
It is expected transformation logic makes use of :attr:`start`, |
|
98
|
|
|
:attr:`current` and :attr:`streaming` instance attributes to |
|
99
|
|
|
bettee know the state is being left. |
|
100
|
|
|
|
|
101
|
|
|
:param data: string to transform (includes start) |
|
102
|
|
|
:type data: str |
|
103
|
|
|
:param mark: string producing the new state jump |
|
104
|
|
|
:type mark: str |
|
105
|
|
|
:param next: state is about to star, None on finish |
|
106
|
|
|
:type next: str or None |
|
107
|
|
|
|
|
108
|
|
|
:returns: transformed data |
|
109
|
|
|
:rtype: str |
|
110
|
|
|
''' |
|
111
|
|
|
method = getattr(self, 'transform_%s' % self.current, None) |
|
112
|
|
|
return method(data, mark, next) if method else data |
|
113
|
|
|
|
|
114
|
|
|
def feed(self, data=''): |
|
115
|
|
|
''' |
|
116
|
|
|
Optionally add pending data, switch into streaming mode, and yield |
|
117
|
|
|
result chunks. |
|
118
|
|
|
|
|
119
|
|
|
:yields: result chunks |
|
120
|
|
|
:ytype: str |
|
121
|
|
|
''' |
|
122
|
|
|
self.streaming = True |
|
123
|
|
|
self.pending += data |
|
124
|
|
|
for i in self: |
|
125
|
|
|
yield i |
|
126
|
|
|
|
|
127
|
|
|
def finish(self, data=''): |
|
128
|
|
|
''' |
|
129
|
|
|
Optionally add pending data, turn off streaming mode, and yield |
|
130
|
|
|
result chunks, which implies all pending data will be consumed. |
|
131
|
|
|
|
|
132
|
|
|
:yields: result chunks |
|
133
|
|
|
:ytype: str |
|
134
|
|
|
''' |
|
135
|
|
|
self.pending += data |
|
136
|
|
|
self.streaming = False |
|
137
|
|
|
for i in self: |
|
138
|
|
|
yield i |
|
139
|
|
|
|