Passed
Push — master ( 83b5cd...0e7983 )
by Sebastian
02:43
created

tokenize_normalize_quotes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * File containing the {@see Mailcode_Parser_Statement_Tokenizer} class.
4
 *
5
 * @package Mailcode
6
 * @subpackage Parser
7
 * @see Mailcode_Parser_Statement_Tokenizer
8
 */
9
10
declare(strict_types=1);
11
12
namespace Mailcode;
13
14
/**
15
 * Mailcode statement tokenizer: parses a mailcode statement
16
 * into its logical parts.
17
 *
18
 * @package Mailcode
19
 * @subpackage Parser
20
 * @author Sebastian Mordziol <[email protected]>
21
 */
22
class Mailcode_Parser_Statement_Tokenizer
23
{
24
    const ERROR_TOKENIZE_METHOD_MISSING = 49801;
25
    
26
   /**
27
    * @var string[]
28
    */
29
    protected $operands = array(
30
        '==',
31
        '<=',
32
        '>=',
33
        '!=',
34
        '=',
35
        '+',
36
        '-',
37
        '/',
38
        '*',
39
        '>',
40
        '<'
41
    );
42
    
43
   /**
44
    * @var string[]
45
    */
46
    protected $keywords = array(
47
        'in:',
48
        'insensitive:'
49
    );
50
    
51
   /**
52
    * @var string
53
    */
54
    protected $delimiter = '§§';
55
    
56
    /**
57
     * @var string[]
58
     */
59
    protected $tokenCategories = array(
60
        'variables',
61
        'normalize_quotes',
62
        'escaped_quotes',
63
        'string_literals',
64
        'keywords',
65
        'numbers',
66
        'operands',
67
        'extract_tokens'
68
    );
69
    
70
   /**
71
    * @var Mailcode_Parser_Statement
72
    */
73
    protected $statement;
74
    
75
   /**
76
    * @var string
77
    */
78
    protected $tokenized;
79
    
80
   /**
81
    * @var Mailcode_Parser_Statement_Tokenizer_Token[]
82
    */
83
    protected $tokensTemporary = array();
84
    
85
    /**
86
     * @var Mailcode_Parser_Statement_Tokenizer_Token[]
87
     */
88
    protected $tokensOrdered = array();
89
    
90
   /**
91
    * @var string[]
92
    */
93
    protected static $ids = array();
94
    
95
    public function __construct(Mailcode_Parser_Statement $statement)
96
    {
97
        $this->statement = $statement;
98
        
99
        $this->tokenize($statement->getStatementString());
100
    }
101
102
   /**
103
    * Retrieves all tokens detected in the statement string, in 
104
    * the order they were found.
105
    * 
106
    * @return Mailcode_Parser_Statement_Tokenizer_Token[]
107
    */
108
    public function getTokens()
109
    {
110
        return $this->tokensOrdered;
111
    }
112
    
113
    public function hasTokens() : bool
114
    {
115
        return !empty($this->tokensOrdered);
116
    }
117
    
118
   /**
119
    * Whether there were any unknown tokens in the statement.
120
    * 
121
    * @return bool
122
    */
123
    public function hasUnknown() : bool
124
    {
125
        $unknown = $this->getUnknown();
126
        
127
        return !empty($unknown);
128
    }
129
    
130
   /**
131
    * Retrieves all unknown content tokens, if any.
132
    * 
133
    * @return \Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
134
    */
135
    public function getUnknown()
136
    {
137
        $result = array();
138
        
139
        foreach($this->tokensOrdered as $token)
140
        {
141
            if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
142
            {
143
                $result[] = $token;
144
            }
145
        }
146
        
147
        return $result;
148
    }
149
    
150
    public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
151
    {
152
        $unknown = $this->getUnknown();
153
        
154
        if(!empty($unknown))
155
        {
156
            return array_shift($unknown);
157
        }
158
        
159
        return null;
160
    }
161
    
162
    public function getNormalized() : string
163
    {
164
        $parts = array();
165
        
166
        foreach($this->tokensOrdered as $token)
167
        {
168
            $string = $token->getNormalized();
169
            
170
            if($string != '')
171
            {
172
                $parts[] = $string;
173
            }
174
        }
175
        
176
        return implode(' ', $parts);
177
    }
178
    
179
    protected function tokenize(string $statement) : void
180
    {
181
        $this->tokenized = trim($statement);
182
        
183
        foreach($this->tokenCategories as $token)
184
        {
185
            $method = 'tokenize_'.$token;
186
            
187
            if(!method_exists($this, $method))
188
            {
189
                throw new Mailcode_Exception(
190
                    'Unknown statement token.',
191
                    sprintf(
192
                        'The tokenize method [%s] is not present in class [%s].',
193
                        $method,
194
                        get_class($this)
195
                    ),
196
                    self::ERROR_TOKENIZE_METHOD_MISSING
197
                );
198
            }
199
            
200
            $this->$method();
201
        }
202
    }
203
   
204
   /**
205
    * Registers a token to add in the statement string.
206
    * 
207
    * @param string $type
208
    * @param string $matchedText
209
    * @param mixed $subject
210
    */
211
    protected function registerToken(string $type, string $matchedText, $subject=null) : void
212
    {
213
        $tokenID = $this->generateID();
214
        
215
        $this->tokenized = str_replace(
216
            $matchedText,
217
            $this->delimiter.$tokenID.$this->delimiter,
218
            $this->tokenized
219
        );
220
        
221
        $class = '\Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_'.$type;
222
        
223
        $this->tokensTemporary[] = new $class($tokenID, $matchedText, $subject);
224
    }
225
    
226
    protected function getTokenByID(string $tokenID) : ?Mailcode_Parser_Statement_Tokenizer_Token
227
    {
228
        foreach($this->tokensTemporary as $token)
229
        {
230
            if($token->getID() === $tokenID)
231
            {
232
                return $token;
233
            }
234
        }
235
        
236
        return null;
237
    }
238
    
239
   /**
240
    * Some WYSIWYG editors like using pretty quotes instead
241
    * of the usual double quotes. This simply replaces all
242
    * occurrences with the regular variant.
243
    */
244
    protected function tokenize_normalize_quotes() : void
245
    {
246
        $this->tokenized = str_replace(array('“', '”'), '"', $this->tokenized);
247
    }
248
    
249
    protected function tokenize_escaped_quotes() : void
250
    {
251
        $this->tokenized = str_replace('\"', '__QUOTE__', $this->tokenized);
252
    }
253
    
254
    protected function tokenize_keywords() : void
255
    {
256
        foreach($this->keywords as $keyword)
257
        {
258
            if(strstr($this->tokenized, $keyword))
259
            {
260
                $this->registerToken('Keyword', $keyword);
261
            }
262
        }
263
    }
264
    
265
    protected function tokenize_extract_tokens() : void
266
    {
267
        // split the string by the delimiters: this gives an
268
        // array with tokenIDs, and any content that may be left
269
        // over that could not be tokenized.
270
        $parts = \AppUtils\ConvertHelper::explodeTrim($this->delimiter, $this->tokenized);
271
272
        foreach($parts as $part)
273
        {
274
            $token = $this->getTokenByID($part);
275
            
276
            // if the entry is a token, simply add it.
277
            if($token)
278
            {
279
                $this->tokensOrdered[] = $token;
280
            }
281
            // anything else is added as an unknown token.
282
            else 
283
            {
284
                $this->tokensOrdered[] = new Mailcode_Parser_Statement_Tokenizer_Token_Unknown($this->generateID(), $part);
285
            }
286
        }
287
    }
288
        
289
    protected function tokenize_variables() : void
290
    {
291
        $vars = Mailcode::create()->findVariables($this->tokenized)->getGroupedByHash();
292
        
293
        foreach($vars as $var)
294
        {
295
            $this->registerToken('Variable', $var->getMatchedText(), $var);
296
        }
297
    }
298
    
299
    protected function tokenize_operands() : void
300
    {
301
        foreach($this->operands as $operand)
302
        {
303
            if(strstr($this->tokenized, $operand))
304
            {
305
                $this->registerToken('Operand', $operand);
306
            }
307
        }
308
    }
309
    
310
    protected function tokenize_string_literals() : void
311
    {
312
        $matches = array();
313
        preg_match_all('/"(.*)"/sxU', $this->tokenized, $matches, PREG_PATTERN_ORDER);
314
        
315
        foreach($matches[0] as $match)
316
        {
317
            $this->registerToken('StringLiteral', $match);
318
        }
319
    }
320
    
321
    protected function tokenize_numbers() : void
322
    {
323
        $matches = array();
324
        preg_match_all('/-*[0-9]+\s*[.,]\s*[0-9]+|-*[0-9]+/sx', $this->tokenized, $matches, PREG_PATTERN_ORDER);
325
        
326
        foreach($matches[0] as $match)
327
        {
328
            $this->registerToken('Number', $match);
329
        }
330
    }
331
    
332
   /**
333
    * Generates a unique alphabet-based ID without numbers
334
    * to use as token name, to avoid conflicts with the
335
    * numbers detection.
336
    * 
337
    * @return string
338
    */
339
    protected function generateID() : string
340
    {
341
        static $alphas;
342
        
343
        if(!isset($alphas))
344
        {
345
            $alphas = range('A', 'Z');
346
        }
347
        
348
        $amount = 12;
349
        
350
        $result = '';
351
        
352
        for($i=0; $i < $amount; $i++)
353
        {
354
            $result .= $alphas[array_rand($alphas)];
355
        }
356
        
357
        if(!in_array($result, self::$ids))
358
        {
359
            self::$ids[] = $result;
360
            return $result;
361
        }
362
        
363
        return $this->generateID();
364
    }
365
}
366