1
|
|
|
|
2
|
|
|
|
3
|
|
|
class StateMachine(object): |
4
|
|
|
''' |
5
|
|
|
Abstract character-driven finite state machine implementation, used to |
6
|
|
|
chop down and transform strings. |
7
|
|
|
|
8
|
|
|
Useful for implementig simple transpilators, compressors and so on. |
9
|
|
|
|
10
|
|
|
Important: when implementing this class, you must set the :attr:`current` |
11
|
|
|
attribute to a key defined in :attr:`jumps` dict. |
12
|
|
|
''' |
13
|
|
|
jumps = {} # finite state machine jumps |
14
|
|
|
start = '' # character which started current state |
15
|
|
|
current = '' # current state (an initial value must be set) |
16
|
|
|
pending = '' # unprocessed remaining data |
17
|
|
|
streaming = False # stream mode toggle |
18
|
|
|
|
19
|
|
|
@property |
20
|
|
|
def nearest(self): |
21
|
|
|
''' |
22
|
|
|
Get the next state jump. |
23
|
|
|
|
24
|
|
|
The next jump is calculated looking at :attr:`current` state |
25
|
|
|
and its possible :attr:`jumps` to find the nearest and bigger |
26
|
|
|
option in :attr:`pending` data. |
27
|
|
|
|
28
|
|
|
If none is found, the returned next state label will be None. |
29
|
|
|
|
30
|
|
|
:returns: tuple with index, substring and next state label |
31
|
|
|
:rtype: tuple |
32
|
|
|
''' |
33
|
|
|
try: |
34
|
|
|
options = self.jumps[self.current] |
35
|
|
|
except KeyError: |
36
|
|
|
raise KeyError( |
37
|
|
|
'Current state %r not defined in %s.jumps.' |
38
|
|
|
% (self.current, self.__class__) |
39
|
|
|
) |
40
|
|
|
offset = len(self.start) |
41
|
|
|
index = len(self.pending) |
42
|
|
|
if self.streaming: |
43
|
|
|
index -= max(map(len, options)) |
44
|
|
|
key = (index, 1) |
45
|
|
|
result = (index, '', None) |
46
|
|
|
for amark, anext in options.items(): |
47
|
|
|
asize = len(amark) |
48
|
|
|
aindex = self.pending.find(amark, offset, index + asize) |
49
|
|
|
if aindex > -1: |
50
|
|
|
index = aindex |
51
|
|
|
akey = (aindex, -asize) |
52
|
|
|
if akey < key: |
53
|
|
|
key = akey |
54
|
|
|
result = (aindex, amark, anext) |
55
|
|
|
return result |
56
|
|
|
|
57
|
|
|
def __init__(self, data=''): |
58
|
|
|
''' |
59
|
|
|
:param data: content will be added to pending data |
60
|
|
|
:type data: str |
61
|
|
|
''' |
62
|
|
|
self.pending += data |
63
|
|
|
|
64
|
|
|
def __iter__(self): |
65
|
|
|
''' |
66
|
|
|
Yield over result chunks, consuming :attr:`pending` data. |
67
|
|
|
|
68
|
|
|
On :attr:`streaming` mode, yield only finished states. |
69
|
|
|
|
70
|
|
|
On non :attr:`streaming` mode, yield last state's result chunk |
71
|
|
|
even if unfinished, consuming all pending data. |
72
|
|
|
|
73
|
|
|
:yields: transformation result chunka |
74
|
|
|
:ytype: str |
75
|
|
|
''' |
76
|
|
|
index, mark, next = self.nearest |
77
|
|
|
while next is not None: |
78
|
|
|
data = self.transform(self.pending[:index], mark, next) |
79
|
|
|
self.start = mark |
80
|
|
|
self.current = next |
81
|
|
|
self.pending = self.pending[index:] |
82
|
|
|
if data: |
83
|
|
|
yield data |
84
|
|
|
index, mark, next = self.nearest |
85
|
|
|
if not self.streaming: |
86
|
|
|
data = self.transform(self.pending, mark, next) |
87
|
|
|
self.start = '' |
88
|
|
|
self.pending = '' |
89
|
|
|
if data: |
90
|
|
|
yield data |
91
|
|
|
|
92
|
|
|
def transform(self, data, mark, next): |
93
|
|
|
''' |
94
|
|
|
Apply the appropriate transformation function on current state data, |
95
|
|
|
which is supposed to end at this point. |
96
|
|
|
|
97
|
|
|
It is expected transformation logic makes use of :attr:`start`, |
98
|
|
|
:attr:`current` and :attr:`streaming` instance attributes to |
99
|
|
|
bettee know the state is being left. |
100
|
|
|
|
101
|
|
|
:param data: string to transform (includes start) |
102
|
|
|
:type data: str |
103
|
|
|
:param mark: string producing the new state jump |
104
|
|
|
:type mark: str |
105
|
|
|
:param next: state is about to star, None on finish |
106
|
|
|
:type next: str or None |
107
|
|
|
|
108
|
|
|
:returns: transformed data |
109
|
|
|
:rtype: str |
110
|
|
|
''' |
111
|
|
|
method = getattr(self, 'transform_%s' % self.current, None) |
112
|
|
|
return method(data, mark, next) if method else data |
113
|
|
|
|
114
|
|
|
def feed(self, data=''): |
115
|
|
|
''' |
116
|
|
|
Optionally add pending data, switch into streaming mode, and yield |
117
|
|
|
result chunks. |
118
|
|
|
|
119
|
|
|
:yields: result chunks |
120
|
|
|
:ytype: str |
121
|
|
|
''' |
122
|
|
|
self.streaming = True |
123
|
|
|
self.pending += data |
124
|
|
|
for i in self: |
125
|
|
|
yield i |
126
|
|
|
|
127
|
|
|
def finish(self, data=''): |
128
|
|
|
''' |
129
|
|
|
Optionally add pending data, turn off streaming mode, and yield |
130
|
|
|
result chunks, which implies all pending data will be consumed. |
131
|
|
|
|
132
|
|
|
:yields: result chunks |
133
|
|
|
:ytype: str |
134
|
|
|
''' |
135
|
|
|
self.pending += data |
136
|
|
|
self.streaming = False |
137
|
|
|
for i in self: |
138
|
|
|
yield i |
139
|
|
|
|