Mailcode_Parser_Statement_Tokenizer::tokenize()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 9
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 16
rs 9.9666
1
<?php
2
/**
3
 * @package Mailcode
4
 * @subpackage Parser
5
 */
6
7
declare(strict_types=1);
8
9
namespace Mailcode;
10
11
use AppUtils\ClassHelper;
12
use AppUtils\ClassHelper\BaseClassHelperException;
13
use Mailcode\Parser\Statement\Tokenizer\EventHandler;
14
use Mailcode\Parser\Statement\Tokenizer\SpecialChars;
15
16
/**
17
 * Mailcode statement tokenizer: parses a mailcode statement
18
 * into its logical parts.
19
 *
20
 * @package Mailcode
21
 * @subpackage Parser
22
 * @author Sebastian Mordziol <[email protected]>
23
 */
24
class Mailcode_Parser_Statement_Tokenizer
25
{
26
    public const ERROR_TOKENIZE_METHOD_MISSING = 49801;
27
    public const ERROR_INVALID_TOKEN_CREATED = 49802;
28
    public const ERROR_INVALID_TOKEN_CLASS = 49803;
29
    public const ERROR_TARGET_INSERT_TOKEN_NOT_FOUND = 49804;
30
31
    /**
32
     * @var string[]
33
     */
34
    protected array $tokenClasses = array(
35
        Mailcode_Parser_Statement_Tokenizer_Process_LegacySyntaxConversion::class,
36
        Mailcode_Parser_Statement_Tokenizer_Process_Variables::class,
37
        Mailcode_Parser_Statement_Tokenizer_Process_NormalizeQuotes::class,
38
        Mailcode_Parser_Statement_Tokenizer_Process_EncodeSpecialChars::class,
39
        Mailcode_Parser_Statement_Tokenizer_Process_Keywords::class,
40
        // Must be before named parameters to exclude equal signs in strings
41
        Mailcode_Parser_Statement_Tokenizer_Process_StringLiterals::class,
42
        // Must be before numbers, because named parameters can contain numbers
43
        Mailcode_Parser_Statement_Tokenizer_Process_NamedParameters::class,
44
        Mailcode_Parser_Statement_Tokenizer_Process_Numbers::class,
45
        Mailcode_Parser_Statement_Tokenizer_Process_Operands::class,
46
        Mailcode_Parser_Statement_Tokenizer_Process_ExtractTokens::class,
47
        // Must be at the end when all tokens have been determined
48
        Mailcode_Parser_Statement_Tokenizer_Process_SetNames::class,
49
    );
50
    
51
   /**
52
    * @var Mailcode_Parser_Statement
53
    */
54
    protected Mailcode_Parser_Statement $statement;
55
    
56
   /**
57
    * @var string
58
    */
59
    protected string $tokenized = '';
60
    
61
    /**
62
     * @var Mailcode_Parser_Statement_Tokenizer_Token[]
63
     */
64
    protected array $tokensOrdered = array();
65
    
66
   /**
67
    * @var string[]
68
    */
69
    protected static array $ids = array();
70
71
    /**
72
     * @var callable[]
73
     */
74
    protected array $changeHandlers = array();
75
76
    private EventHandler $eventHandler;
77
78
    public function __construct(Mailcode_Parser_Statement $statement)
79
    {
80
        $this->statement = $statement;
81
        $this->eventHandler = new EventHandler($this);
82
83
        $this->tokenize($statement->getStatementString());
84
    }
85
86
    public function getSourceCommand() : ?Mailcode_Commands_Command
87
    {
88
        return $this->statement->getSourceCommand();
89
    }
90
91
   /**
92
    * Retrieves all tokens detected in the statement string, in 
93
    * the order they were found.
94
    * 
95
    * @return Mailcode_Parser_Statement_Tokenizer_Token[]
96
    */
97
    public function getTokens() : array
98
    {
99
        return $this->tokensOrdered;
100
    }
101
102
    public function hasTokens() : bool
103
    {
104
        return !empty($this->tokensOrdered);
105
    }
106
    
107
   /**
108
    * Whether there were any unknown tokens in the statement.
109
    * 
110
    * @return bool
111
    */
112
    public function hasUnknown() : bool
113
    {
114
        $unknown = $this->getUnknown();
115
        
116
        return !empty($unknown);
117
    }
118
    
119
   /**
120
    * Retrieves all unknown content tokens, if any.
121
    * 
122
    * @return Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
123
    */
124
    public function getUnknown() : array
125
    {
126
        $result = array();
127
        
128
        foreach($this->tokensOrdered as $token)
129
        {
130
            if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
131
            {
132
                $result[] = $token;
133
            }
134
        }
135
        
136
        return $result;
137
    }
138
    
139
    public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
140
    {
141
        $unknown = $this->getUnknown();
142
        
143
        if(!empty($unknown))
144
        {
145
            return array_shift($unknown);
146
        }
147
        
148
        return null;
149
    }
150
    
151
    public function getNormalized() : string
152
    {
153
        $parts = array();
154
155
        foreach($this->tokensOrdered as $token)
156
        {
157
            $string = $token->getNormalized();
158
            
159
            if($string === '') {
160
                continue;
161
            }
162
163
            // Only add spaces between tokens if they require spacing
164
            if($token->hasSpacing()) {
165
                $string .= ' ';
166
            }
167
168
            $parts[] = $string;
169
        }
170
        
171
        return trim(implode('', $parts));
172
    }
173
174
    /**
175
     * Goes through all tokenization processors, in the order that
176
     * they are defined in the tokenCategories property. This filters
177
     * the statement string, and extracts the tokens contained within.
178
     *
179
     * @param string $statement
180
     *
181
     * @throws Mailcode_Parser_Exception
182
     *
183
     * @see Mailcode_Parser_Statement_Tokenizer_Process
184
     */
185
    protected function tokenize(string $statement) : void
186
    {
187
        $statement = trim($statement);
188
        $tokens = array();
189
190
        foreach($this->tokenClasses as $tokenClass)
191
        {
192
            $processor = $this->createProcessor($tokenClass, $statement, $tokens);
193
            $processor->process();
194
195
            $statement = $processor->getStatement();
196
            $tokens = $processor->getTokens();
197
        }
198
199
        $this->tokenized = $statement;
200
        $this->tokensOrdered = $tokens;
201
    }
202
203
    /**
204
     * @param string $className
205
     * @param string $statement
206
     * @param Mailcode_Parser_Statement_Tokenizer_Token[] $tokens
207
     * @return Mailcode_Parser_Statement_Tokenizer_Process
208
     * @throws Mailcode_Parser_Exception
209
     */
210
    protected function createProcessor(string $className, string $statement, array $tokens) : Mailcode_Parser_Statement_Tokenizer_Process
211
    {
212
        $instance = new $className($this, $statement, $tokens);
213
214
        if($instance instanceof Mailcode_Parser_Statement_Tokenizer_Process)
215
        {
216
            return $instance;
217
        }
218
219
        throw new Mailcode_Parser_Exception(
220
            'Unknown statement token.',
221
            sprintf(
222
                'The tokenize class [%s] does not extend the base process class.',
223
                $className
224
            ),
225
            self::ERROR_TOKENIZE_METHOD_MISSING
226
        );
227
    }
228
229
    /**
230
     * @param string $type
231
     * @param string $matchedText
232
     * @param mixed $subject
233
     * @return Mailcode_Parser_Statement_Tokenizer_Token
234
     */
235
    public function createToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
236
    {
237
        $tokenID = $this->generateID();
238
239
        $class = Mailcode_Parser_Statement_Tokenizer_Token::class.'_'.$type;
240
241
        $token = new $class($tokenID, $matchedText, $subject, $this->getSourceCommand());
242
243
        if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token)
244
        {
245
            return $token;
246
        }
247
248
        throw new Mailcode_Parser_Exception(
249
            'Invalid token class',
250
            sprintf(
251
                'The class [%s] does not extend the base token class.',
252
                get_class($token)
253
            ),
254
            self::ERROR_INVALID_TOKEN_CLASS
255
        );
256
    }
257
258
    private function createVariable(Mailcode_Variables_Variable $variable) : Mailcode_Parser_Statement_Tokenizer_Token_Variable
259
    {
260
        return ClassHelper::requireObjectInstanceOf(
261
            Mailcode_Parser_Statement_Tokenizer_Token_Variable::class,
262
            $this->createToken('Variable', dollarize($variable->getFullName()), $variable)
263
        );
264
    }
265
266
    private function createKeyword(string $name) : Mailcode_Parser_Statement_Tokenizer_Token_Keyword
267
    {
268
        $name = rtrim($name, ':').':';
269
270
        $token = $this->createToken('Keyword', $name);
271
272
        if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Keyword)
0 ignored issues
show
introduced by
$token is always a sub-type of Mailcode\Mailcode_Parser...Tokenizer_Token_Keyword.
Loading history...
273
        {
274
            return $token;
275
        }
276
277
        throw new Mailcode_Parser_Exception(
278
            'Invalid token created',
279
            '',
280
            self::ERROR_INVALID_TOKEN_CREATED
281
        );
282
    }
283
284
    public function appendKeyword(string $name) : Mailcode_Parser_Statement_Tokenizer_Token_Keyword
285
    {
286
        $token = $this->createKeyword($name);
287
288
        $this->appendToken($token);
289
290
        return $token;
291
    }
292
293
    private function createStringLiteral(string $text) : Mailcode_Parser_Statement_Tokenizer_Token_StringLiteral
294
    {
295
        $token = $this->createToken('StringLiteral', SpecialChars::encodeAll($text));
296
297
        if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_StringLiteral)
0 ignored issues
show
introduced by
$token is always a sub-type of Mailcode\Mailcode_Parser...zer_Token_StringLiteral.
Loading history...
298
        {
299
            return $token;
300
        }
301
302
        throw new Mailcode_Parser_Exception(
303
            'Invalid token created',
304
            '',
305
            self::ERROR_INVALID_TOKEN_CREATED
306
        );
307
    }
308
309
    private function createNumber(string $number) : Mailcode_Parser_Statement_Tokenizer_Token_Number
310
    {
311
        $token = $this->createToken('Number', $number);
312
313
        if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Number)
0 ignored issues
show
introduced by
$token is always a sub-type of Mailcode\Mailcode_Parser..._Tokenizer_Token_Number.
Loading history...
314
        {
315
            return $token;
316
        }
317
318
        throw new Mailcode_Parser_Exception(
319
            'Invalid token created',
320
            '',
321
            self::ERROR_INVALID_TOKEN_CREATED
322
        );
323
    }
324
325
    public function appendStringLiteral(string $text) : Mailcode_Parser_Statement_Tokenizer_Token_StringLiteral
326
    {
327
        $token = $this->createStringLiteral($text);
328
329
        $this->appendToken($token);
330
331
        return $token;
332
    }
333
334
    public function appendNumber(string $number) : Mailcode_Parser_Statement_Tokenizer_Token_Number
335
    {
336
        $token = $this->createNumber($number);
337
338
        $this->appendToken($token);
339
340
        return $token;
341
    }
342
343
    public function prependStringLiteral(string $text) : Mailcode_Parser_Statement_Tokenizer_Token_StringLiteral
344
    {
345
        $token = $this->createStringLiteral($text);
346
347
        $this->prependToken($token);
348
349
        return $token;
350
    }
351
352
    public function removeToken(Mailcode_Parser_Statement_Tokenizer_Token $token) : Mailcode_Parser_Statement_Tokenizer
353
    {
354
        $name = $this->findNameToken($token);
355
        if($name !== null) {
356
            $this->removeToken($name);
357
        }
358
359
        $keep = array();
360
        $tokenID = $token->getID();
361
        $removed = false;
362
363
        foreach ($this->tokensOrdered as $checkToken)
364
        {
365
            if($checkToken->getID() === $tokenID)
366
            {
367
                $removed = true;
368
                continue;
369
            }
370
371
            $keep[] = $checkToken;
372
        }
373
374
        $this->tokensOrdered = $keep;
375
376
        if($removed)
377
        {
378
            $this->eventHandler->handleTokenRemoved($token);
379
        }
380
381
        return $this;
382
    }
383
384
    /**
385
     * @param Mailcode_Parser_Statement_Tokenizer_Token $token
386
     * @return $this
387
     */
388
    protected function appendToken(Mailcode_Parser_Statement_Tokenizer_Token $token) : self
389
    {
390
        $this->tokensOrdered[] = $token;
391
392
        $this->eventHandler->handleTokenAppended($token);
393
394
        return $this;
395
    }
396
397
    /**
398
     * @param Mailcode_Parser_Statement_Tokenizer_Token $token
399
     * @return $this
400
     */
401
    protected function prependToken(Mailcode_Parser_Statement_Tokenizer_Token $token) : self
402
    {
403
        array_unshift($this->tokensOrdered, $token);
404
405
        $this->eventHandler->handleTokenPrepended($token);
406
407
        return $this;
408
    }
409
    
410
   /**
411
    * Generates a unique alphabet-based ID without numbers
412
    * to use as token name, to avoid conflicts with the
413
    * numbers detection.
414
    *
415
    * @return string
416
    */
417
    protected function generateID() : string
418
    {
419
        static $alphas;
420
421
        if(!isset($alphas))
422
        {
423
            $alphas = range('A', 'Z');
424
        }
425
426
        $amount = 12;
427
428
        $result = '';
429
430
        for($i=0; $i < $amount; $i++)
431
        {
432
            $result .= $alphas[array_rand($alphas)];
433
        }
434
435
        if(!in_array($result, self::$ids))
436
        {
437
            self::$ids[] = $result;
438
            return $result;
439
        }
440
441
        return $this->generateID();
442
    }
443
444
    /**
445
     * @param callable $callback
446
     */
447
    public function onTokensChanged(callable $callback) : void
448
    {
449
        $this->changeHandlers[] = $callback;
450
    }
451
452
    /**
453
     * @return EventHandler
454
     */
455
    public function getEventHandler() : EventHandler
456
    {
457
        return $this->eventHandler;
458
    }
459
460
    public function appendVariable(Mailcode_Variables_Variable $variable) : Mailcode_Parser_Statement_Tokenizer_Token_Variable
461
    {
462
        $token = $this->createVariable($variable);
463
        $this->appendToken($token);
464
        return $token;
465
    }
466
467
    public function findNameToken(Mailcode_Parser_Statement_Tokenizer_Token $targetToken) : ?Mailcode_Parser_Statement_Tokenizer_Token_ParamName
468
    {
469
        $targetID = $targetToken->getID();
470
471
        foreach($this->tokensOrdered as $idx => $token)
472
        {
473
            if($token->getID() === $targetID)
474
            {
475
                $prev = $this->tokensOrdered[$idx-1] ?? null;
476
                if($prev instanceof Mailcode_Parser_Statement_Tokenizer_Token_ParamName)
477
                {
478
                    return $prev;
479
                }
480
            }
481
        }
482
483
        return null;
484
    }
485
486
    /**
487
     * Injects a parameter name token into the statement, before
488
     * the target token. Existing parameter names are replaced.
489
     *
490
     * @param Mailcode_Parser_Statement_Tokenizer_Token $targetToken
491
     * @param string $name
492
     * @return Mailcode_Parser_Statement_Tokenizer_Token_ParamName
493
     *
494
     * @throws Mailcode_Parser_Exception {@see self::ERROR_TARGET_INSERT_TOKEN_NOT_FOUND}
495
     * @throws BaseClassHelperException
496
     */
497
    public function injectParamName(Mailcode_Parser_Statement_Tokenizer_Token $targetToken, string $name) : Mailcode_Parser_Statement_Tokenizer_Token_ParamName
498
    {
499
        $existing = $this->findNameToken($targetToken);
500
        if($existing) {
501
            $this->removeToken($existing);
502
        }
503
504
        $nameToken = ClassHelper::requireObjectInstanceOf(
505
            Mailcode_Parser_Statement_Tokenizer_Token_ParamName::class,
506
            $this->createToken('ParamName', $name.'=')
507
        );
508
509
        $this->insertBefore($targetToken, $nameToken);
510
        $targetToken->registerNameToken($nameToken);
511
512
        return $nameToken;
513
    }
514
515
    public function insertBefore(Mailcode_Parser_Statement_Tokenizer_Token $targetToken, Mailcode_Parser_Statement_Tokenizer_Token $newToken) : self
516
    {
517
        $targetID = $targetToken->getID();
518
        $tokens = array();
519
        $found = false;
520
521
        foreach($this->tokensOrdered as $token)
522
        {
523
            if($token->getID() === $targetID)
524
            {
525
                $tokens[] = $newToken;
526
                $found = true;
527
            }
528
529
            $tokens[] = $token;
530
        }
531
532
        if($found) {
533
            $this->tokensOrdered = $tokens;
534
535
            return $this;
536
        }
537
538
        throw new Mailcode_Parser_Exception(
539
            'Could not find target token for insertion.',
540
            sprintf(
541
                'The token [%s] was not found in the statement [%s].',
542
                $targetToken->getNormalized(),
543
                $this->getNormalized()
544
            ),
545
            self::ERROR_TARGET_INSERT_TOKEN_NOT_FOUND
546
        );
547
    }
548
}
549