Passed
Push — master ( 75ee28...758f58 )
by Sebastian
04:00
created

Mailcode_Parser_Statement_Tokenizer   B

Complexity

Total Complexity 47

Size/Duplication

Total Lines 394
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
eloc 134
c 3
b 0
f 0
dl 0
loc 394
rs 8.64
wmc 47

23 Methods

Rating   Name   Duplication   Size   Complexity  
A tokenize() 0 22 3
A registerToken() 0 3 1
A getFirstUnknown() 0 10 2
A appendToken() 0 7 1
A tokenize_normalize_quotes() 0 3 1
A tokenize_escaped_quotes() 0 3 1
A appendKeyword() 0 15 2
A tokenize_string_literals() 0 8 2
A __construct() 0 5 1
A generateID() 0 25 4
A getNormalized() 0 15 3
A getTokens() 0 3 1
A tokenize_operands() 0 7 3
A tokenize_variables() 0 7 2
A removeToken() 0 16 3
A tokenize_numbers() 0 8 2
A hasUnknown() 0 5 1
A createToken() 0 13 1
A tokenize_extract_tokens() 0 20 3
A getUnknown() 0 13 3
A hasTokens() 0 3 1
A tokenize_keywords() 0 7 3
A getTokenByID() 0 11 3

How to fix   Complexity   

Complex Class

Complex classes like Mailcode_Parser_Statement_Tokenizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Mailcode_Parser_Statement_Tokenizer, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * File containing the {@see Mailcode_Parser_Statement_Tokenizer} class.
4
 *
5
 * @package Mailcode
6
 * @subpackage Parser
7
 * @see Mailcode_Parser_Statement_Tokenizer
8
 */
9
10
declare(strict_types=1);
11
12
namespace Mailcode;
13
14
/**
15
 * Mailcode statement tokenizer: parses a mailcode statement
16
 * into its logical parts.
17
 *
18
 * @package Mailcode
19
 * @subpackage Parser
20
 * @author Sebastian Mordziol <[email protected]>
21
 */
22
class Mailcode_Parser_Statement_Tokenizer
23
{
24
    const ERROR_TOKENIZE_METHOD_MISSING = 49801;
25
    const ERROR_INVALID_TOKEN_CREATED = 49802;
26
    
27
   /**
28
    * @var string[]
29
    */
30
    protected $operands = array(
31
        '==',
32
        '<=',
33
        '>=',
34
        '!=',
35
        '=',
36
        '+',
37
        '-',
38
        '/',
39
        '*',
40
        '>',
41
        '<'
42
    );
43
    
44
   /**
45
    * @var string[]
46
    */
47
    protected $keywords = array(
48
        'in:',
49
        'insensitive:',
50
        'urlencode:'
51
    );
52
    
53
   /**
54
    * @var string
55
    */
56
    protected $delimiter = '§§';
57
    
58
    /**
59
     * @var string[]
60
     */
61
    protected $tokenCategories = array(
62
        'variables',
63
        'normalize_quotes',
64
        'escaped_quotes',
65
        'string_literals',
66
        'keywords',
67
        'numbers',
68
        'operands',
69
        'extract_tokens'
70
    );
71
    
72
   /**
73
    * @var Mailcode_Parser_Statement
74
    */
75
    protected $statement;
76
    
77
   /**
78
    * @var string
79
    */
80
    protected $tokenized;
81
    
82
   /**
83
    * @var Mailcode_Parser_Statement_Tokenizer_Token[]
84
    */
85
    protected $tokensTemporary = array();
86
    
87
    /**
88
     * @var Mailcode_Parser_Statement_Tokenizer_Token[]
89
     */
90
    protected $tokensOrdered = array();
91
    
92
   /**
93
    * @var string[]
94
    */
95
    protected static $ids = array();
96
    
97
    public function __construct(Mailcode_Parser_Statement $statement)
98
    {
99
        $this->statement = $statement;
100
        
101
        $this->tokenize($statement->getStatementString());
102
    }
103
104
   /**
105
    * Retrieves all tokens detected in the statement string, in 
106
    * the order they were found.
107
    * 
108
    * @return Mailcode_Parser_Statement_Tokenizer_Token[]
109
    */
110
    public function getTokens()
111
    {
112
        return $this->tokensOrdered;
113
    }
114
    
115
    public function hasTokens() : bool
116
    {
117
        return !empty($this->tokensOrdered);
118
    }
119
    
120
   /**
121
    * Whether there were any unknown tokens in the statement.
122
    * 
123
    * @return bool
124
    */
125
    public function hasUnknown() : bool
126
    {
127
        $unknown = $this->getUnknown();
128
        
129
        return !empty($unknown);
130
    }
131
    
132
   /**
133
    * Retrieves all unknown content tokens, if any.
134
    * 
135
    * @return \Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
136
    */
137
    public function getUnknown()
138
    {
139
        $result = array();
140
        
141
        foreach($this->tokensOrdered as $token)
142
        {
143
            if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
144
            {
145
                $result[] = $token;
146
            }
147
        }
148
        
149
        return $result;
150
    }
151
    
152
    public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
153
    {
154
        $unknown = $this->getUnknown();
155
        
156
        if(!empty($unknown))
157
        {
158
            return array_shift($unknown);
159
        }
160
        
161
        return null;
162
    }
163
    
164
    public function getNormalized() : string
165
    {
166
        $parts = array();
167
        
168
        foreach($this->tokensOrdered as $token)
169
        {
170
            $string = $token->getNormalized();
171
            
172
            if($string != '')
173
            {
174
                $parts[] = $string;
175
            }
176
        }
177
        
178
        return implode(' ', $parts);
179
    }
180
    
181
    protected function tokenize(string $statement) : void
182
    {
183
        $this->tokenized = trim($statement);
184
        
185
        foreach($this->tokenCategories as $token)
186
        {
187
            $method = 'tokenize_'.$token;
188
            
189
            if(!method_exists($this, $method))
190
            {
191
                throw new Mailcode_Exception(
192
                    'Unknown statement token.',
193
                    sprintf(
194
                        'The tokenize method [%s] is not present in class [%s].',
195
                        $method,
196
                        get_class($this)
197
                    ),
198
                    self::ERROR_TOKENIZE_METHOD_MISSING
199
                );
200
            }
201
            
202
            $this->$method();
203
        }
204
    }
205
206
   /**
207
    * Registers a token to add in the statement string.
208
    * 
209
    * @param string $type
210
    * @param string $matchedText
211
    * @param mixed $subject
212
    */
213
    protected function registerToken(string $type, string $matchedText, $subject=null) : void
214
    {
215
        $this->tokensTemporary[] = $this->createToken($type, $matchedText, $subject);
216
    }
217
218
    protected function createToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
219
    {
220
        $tokenID = $this->generateID();
221
222
        $this->tokenized = str_replace(
223
            $matchedText,
224
            $this->delimiter.$tokenID.$this->delimiter,
225
            $this->tokenized
226
        );
227
228
        $class = '\Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_'.$type;
229
230
        return new $class($tokenID, $matchedText, $subject);
231
    }
232
233
    public function appendKeyword(string $name) : Mailcode_Parser_Statement_Tokenizer_Token_Keyword
234
    {
235
        $name = rtrim($name, ':').':';
236
237
        $token = $this->appendToken('Keyword', $name);
238
239
        if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Keyword)
240
        {
241
            return $token;
242
        }
243
244
        throw new Mailcode_Exception(
245
            'Invalid token created',
246
            '',
247
            self::ERROR_INVALID_TOKEN_CREATED
248
        );
249
    }
250
251
    public function removeToken(Mailcode_Parser_Statement_Tokenizer_Token $token) : Mailcode_Parser_Statement_Tokenizer
252
    {
253
        $keep = array();
254
        $tokenID = $token->getID();
255
256
        foreach ($this->tokensOrdered as $checkToken)
257
        {
258
            if($checkToken->getID() !== $tokenID)
259
            {
260
                $keep[] = $checkToken;
261
            }
262
        }
263
264
        $this->tokensOrdered = $keep;
265
266
        return $this;
267
    }
268
269
    protected function appendToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
270
    {
271
        $token = $this->createToken($type, $matchedText, $subject);
272
273
        $this->tokensOrdered[] = $token;
274
275
        return $token;
276
    }
277
    
278
    protected function getTokenByID(string $tokenID) : ?Mailcode_Parser_Statement_Tokenizer_Token
279
    {
280
        foreach($this->tokensTemporary as $token)
281
        {
282
            if($token->getID() === $tokenID)
283
            {
284
                return $token;
285
            }
286
        }
287
        
288
        return null;
289
    }
290
    
291
   /**
292
    * Some WYSIWYG editors like using pretty quotes instead
293
    * of the usual double quotes. This simply replaces all
294
    * occurrences with the regular variant.
295
    */
296
    protected function tokenize_normalize_quotes() : void
297
    {
298
        $this->tokenized = str_replace(array('“', '”'), '"', $this->tokenized);
299
    }
300
    
301
    protected function tokenize_escaped_quotes() : void
302
    {
303
        $this->tokenized = str_replace('\"', '__QUOTE__', $this->tokenized);
304
    }
305
    
306
    protected function tokenize_keywords() : void
307
    {
308
        foreach($this->keywords as $keyword)
309
        {
310
            if(strstr($this->tokenized, $keyword))
311
            {
312
                $this->registerToken('Keyword', $keyword);
313
            }
314
        }
315
    }
316
    
317
    protected function tokenize_extract_tokens() : void
318
    {
319
        // split the string by the delimiters: this gives an
320
        // array with tokenIDs, and any content that may be left
321
        // over that could not be tokenized.
322
        $parts = \AppUtils\ConvertHelper::explodeTrim($this->delimiter, $this->tokenized);
323
324
        foreach($parts as $part)
325
        {
326
            $token = $this->getTokenByID($part);
327
            
328
            // if the entry is a token, simply add it.
329
            if($token)
330
            {
331
                $this->tokensOrdered[] = $token;
332
            }
333
            // anything else is added as an unknown token.
334
            else 
335
            {
336
                $this->tokensOrdered[] = new Mailcode_Parser_Statement_Tokenizer_Token_Unknown($this->generateID(), $part);
337
            }
338
        }
339
    }
340
        
341
    protected function tokenize_variables() : void
342
    {
343
        $vars = Mailcode::create()->findVariables($this->tokenized)->getGroupedByHash();
344
        
345
        foreach($vars as $var)
346
        {
347
            $this->registerToken('Variable', $var->getMatchedText(), $var);
348
        }
349
    }
350
    
351
    protected function tokenize_operands() : void
352
    {
353
        foreach($this->operands as $operand)
354
        {
355
            if(strstr($this->tokenized, $operand))
356
            {
357
                $this->registerToken('Operand', $operand);
358
            }
359
        }
360
    }
361
    
362
    protected function tokenize_string_literals() : void
363
    {
364
        $matches = array();
365
        preg_match_all('/"(.*)"/sxU', $this->tokenized, $matches, PREG_PATTERN_ORDER);
366
        
367
        foreach($matches[0] as $match)
368
        {
369
            $this->registerToken('StringLiteral', $match);
370
        }
371
    }
372
    
373
    protected function tokenize_numbers() : void
374
    {
375
        $matches = array();
376
        preg_match_all('/-*[0-9]+\s*[.,]\s*[0-9]+|-*[0-9]+/sx', $this->tokenized, $matches, PREG_PATTERN_ORDER);
377
        
378
        foreach($matches[0] as $match)
379
        {
380
            $this->registerToken('Number', $match);
381
        }
382
    }
383
    
384
   /**
385
    * Generates a unique alphabet-based ID without numbers
386
    * to use as token name, to avoid conflicts with the
387
    * numbers detection.
388
    * 
389
    * @return string
390
    */
391
    protected function generateID() : string
392
    {
393
        static $alphas;
394
        
395
        if(!isset($alphas))
396
        {
397
            $alphas = range('A', 'Z');
398
        }
399
        
400
        $amount = 12;
401
        
402
        $result = '';
403
        
404
        for($i=0; $i < $amount; $i++)
405
        {
406
            $result .= $alphas[array_rand($alphas)];
407
        }
408
        
409
        if(!in_array($result, self::$ids))
410
        {
411
            self::$ids[] = $result;
412
            return $result;
413
        }
414
        
415
        return $this->generateID();
416
    }
417
}
418