Passed
Push — master ( 9253d7...e42fcd )
by Sebastian
03:09
created

Mailcode_Parser_Statement_Tokenizer   A

Complexity

Total Complexity 31

Size/Duplication

Total Lines 304
Duplicated Lines 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
eloc 95
c 4
b 0
f 0
dl 0
loc 304
rs 9.92
wmc 31

16 Methods

Rating   Name   Duplication   Size   Complexity  
A tokenize() 0 16 2
A appendToken() 0 9 1
A getFirstUnknown() 0 10 2
A onTokensChanged() 0 5 2
A appendKeyword() 0 15 2
A generateID() 0 25 4
A __construct() 0 5 1
A getTokens() 0 3 1
A getNormalized() 0 15 3
A createProcessor() 0 18 2
A removeToken() 0 18 3
A hasUnknown() 0 5 1
A createToken() 0 7 1
A getUnknown() 0 13 3
A hasTokens() 0 3 1
A triggerTokensChanged() 0 5 2
1
<?php
2
/**
3
 * File containing the {@see Mailcode_Parser_Statement_Tokenizer} class.
4
 *
5
 * @package Mailcode
6
 * @subpackage Parser
7
 * @see Mailcode_Parser_Statement_Tokenizer
8
 */
9
10
declare(strict_types=1);
11
12
namespace Mailcode;
13
14
/**
15
 * Mailcode statement tokenizer: parses a mailcode statement
16
 * into its logical parts.
17
 *
18
 * @package Mailcode
19
 * @subpackage Parser
20
 * @author Sebastian Mordziol <[email protected]>
21
 */
22
class Mailcode_Parser_Statement_Tokenizer
23
{
24
    const ERROR_TOKENIZE_METHOD_MISSING = 49801;
25
    const ERROR_INVALID_TOKEN_CREATED = 49802;
26
    
27
    /**
28
     * @var string[]
29
     */
30
    protected $tokenCategories = array(
31
        'Variables',
32
        'NormalizeQuotes',
33
        'EscapedQuotes',
34
        'StringLiterals',
35
        'Keywords',
36
        'Numbers',
37
        'Operands',
38
        'ExtractTokens'
39
    );
40
    
41
   /**
42
    * @var Mailcode_Parser_Statement
43
    */
44
    protected $statement;
45
    
46
   /**
47
    * @var string
48
    */
49
    protected $tokenized;
50
    
51
    /**
52
     * @var Mailcode_Parser_Statement_Tokenizer_Token[]
53
     */
54
    protected $tokensOrdered = array();
55
    
56
   /**
57
    * @var string[]
58
    */
59
    protected static $ids = array();
60
61
    /**
62
     * @var callable[]
63
     */
64
    protected $changeHandlers = array();
65
66
    public function __construct(Mailcode_Parser_Statement $statement)
67
    {
68
        $this->statement = $statement;
69
70
        $this->tokenize($statement->getStatementString());
71
    }
72
73
   /**
74
    * Retrieves all tokens detected in the statement string, in 
75
    * the order they were found.
76
    * 
77
    * @return Mailcode_Parser_Statement_Tokenizer_Token[]
78
    */
79
    public function getTokens()
80
    {
81
        return $this->tokensOrdered;
82
    }
83
84
    public function hasTokens() : bool
85
    {
86
        return !empty($this->tokensOrdered);
87
    }
88
    
89
   /**
90
    * Whether there were any unknown tokens in the statement.
91
    * 
92
    * @return bool
93
    */
94
    public function hasUnknown() : bool
95
    {
96
        $unknown = $this->getUnknown();
97
        
98
        return !empty($unknown);
99
    }
100
    
101
   /**
102
    * Retrieves all unknown content tokens, if any.
103
    * 
104
    * @return Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
105
    */
106
    public function getUnknown()
107
    {
108
        $result = array();
109
        
110
        foreach($this->tokensOrdered as $token)
111
        {
112
            if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
113
            {
114
                $result[] = $token;
115
            }
116
        }
117
        
118
        return $result;
119
    }
120
    
121
    public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
122
    {
123
        $unknown = $this->getUnknown();
124
        
125
        if(!empty($unknown))
126
        {
127
            return array_shift($unknown);
128
        }
129
        
130
        return null;
131
    }
132
    
133
    public function getNormalized() : string
134
    {
135
        $parts = array();
136
        
137
        foreach($this->tokensOrdered as $token)
138
        {
139
            $string = $token->getNormalized();
140
            
141
            if($string != '')
142
            {
143
                $parts[] = $string;
144
            }
145
        }
146
        
147
        return implode(' ', $parts);
148
    }
149
150
    /**
151
     * Goes through all tokenization processors, in the order that
152
     * they are defined in the tokenCategories property. This filters
153
     * the statement string, and extracts the tokens contained within.
154
     *
155
     * @param string $statement
156
     * @throws Mailcode_Exception
157
     *
158
     * @see Mailcode_Parser_Statement_Tokenizer_Process
159
     */
160
    protected function tokenize(string $statement) : void
161
    {
162
        $statement = trim($statement);
163
        $tokens = array();
164
165
        foreach($this->tokenCategories as $tokenCategory)
166
        {
167
            $processor = $this->createProcessor($tokenCategory, $statement, $tokens);
168
            $processor->process();
169
170
            $statement = $processor->getStatement();
171
            $tokens = $processor->getTokens();
172
        }
173
174
        $this->tokenized = $statement;
175
        $this->tokensOrdered = $tokens;
176
    }
177
178
    /**
179
     * @param string $id
180
     * @param string $statement
181
     * @param Mailcode_Parser_Statement_Tokenizer_Token[] $tokens
182
     * @return Mailcode_Parser_Statement_Tokenizer_Process
183
     * @throws Mailcode_Exception
184
     */
185
    protected function createProcessor(string $id, string $statement, array $tokens) : Mailcode_Parser_Statement_Tokenizer_Process
186
    {
187
        $class = 'Mailcode\Mailcode_Parser_Statement_Tokenizer_Process_'.$id;
188
189
        $instance = new $class($this, $statement, $tokens);
190
191
        if($instance instanceof Mailcode_Parser_Statement_Tokenizer_Process)
192
        {
193
            return $instance;
194
        }
195
196
        throw new Mailcode_Exception(
197
            'Unknown statement token.',
198
            sprintf(
199
                'The tokenize class [%s] is not present.',
200
                $class
201
            ),
202
            self::ERROR_TOKENIZE_METHOD_MISSING
203
        );
204
    }
205
206
    /**
207
     * @param string $type
208
     * @param string $matchedText
209
     * @param mixed $subject
210
     * @return Mailcode_Parser_Statement_Tokenizer_Token
211
     */
212
    public function createToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
213
    {
214
        $tokenID = $this->generateID();
215
216
        $class = '\Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_'.$type;
217
218
        return new $class($tokenID, $matchedText, $subject);
219
    }
220
221
    public function appendKeyword(string $name) : Mailcode_Parser_Statement_Tokenizer_Token_Keyword
222
    {
223
        $name = rtrim($name, ':').':';
224
225
        $token = $this->appendToken('Keyword', $name);
226
227
        if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Keyword)
228
        {
229
            return $token;
230
        }
231
232
        throw new Mailcode_Exception(
233
            'Invalid token created',
234
            '',
235
            self::ERROR_INVALID_TOKEN_CREATED
236
        );
237
    }
238
239
    public function removeToken(Mailcode_Parser_Statement_Tokenizer_Token $token) : Mailcode_Parser_Statement_Tokenizer
240
    {
241
        $keep = array();
242
        $tokenID = $token->getID();
243
244
        foreach ($this->tokensOrdered as $checkToken)
245
        {
246
            if($checkToken->getID() !== $tokenID)
247
            {
248
                $keep[] = $checkToken;
249
            }
250
        }
251
252
        $this->tokensOrdered = $keep;
253
254
        $this->triggerTokensChanged();
255
256
        return $this;
257
    }
258
259
    /**
260
     * @param string $type
261
     * @param string $matchedText
262
     * @param mixed $subject
263
     * @return Mailcode_Parser_Statement_Tokenizer_Token
264
     */
265
    protected function appendToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
266
    {
267
        $token = $this->createToken($type, $matchedText, $subject);
268
269
        $this->tokensOrdered[] = $token;
270
271
        $this->triggerTokensChanged();
272
273
        return $token;
274
    }
275
    
276
   /**
277
    * Generates a unique alphabet-based ID without numbers
278
    * to use as token name, to avoid conflicts with the
279
    * numbers detection.
280
    *
281
    * @return string
282
    */
283
    protected function generateID() : string
284
    {
285
        static $alphas;
286
287
        if(!isset($alphas))
288
        {
289
            $alphas = range('A', 'Z');
290
        }
291
292
        $amount = 12;
293
294
        $result = '';
295
296
        for($i=0; $i < $amount; $i++)
297
        {
298
            $result .= $alphas[array_rand($alphas)];
299
        }
300
301
        if(!in_array($result, self::$ids))
302
        {
303
            self::$ids[] = $result;
304
            return $result;
305
        }
306
307
        return $this->generateID();
308
    }
309
310
    /**
311
     * @param callable $callback
312
     */
313
    public function onTokensChanged($callback) : void
314
    {
315
        if(is_callable($callback))
316
        {
317
            $this->changeHandlers[] = $callback;
318
        }
319
    }
320
321
    protected function triggerTokensChanged() : void
322
    {
323
        foreach ($this->changeHandlers as $callback)
324
        {
325
            $callback($this);
326
        }
327
    }
328
}
329