Passed
Push — master ( 2f714f...1c67eb )
by Sebastian
02:15
created

tokenize_keywords()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 3
c 1
b 0
f 0
nc 3
nop 0
dl 0
loc 7
rs 10
1
<?php
2
/**
3
 * File containing the {@see Mailcode_Parser_Statement_Tokenizer} class.
4
 *
5
 * @package Mailcode
6
 * @subpackage Parser
7
 * @see Mailcode_Parser_Statement_Tokenizer
8
 */
9
10
declare(strict_types=1);
11
12
namespace Mailcode;
13
14
/**
15
 * Mailcode statement tokenizer: parses a mailcode statement
16
 * into its logical parts.
17
 *
18
 * @package Mailcode
19
 * @subpackage Parser
20
 * @author Sebastian Mordziol <[email protected]>
21
 */
22
class Mailcode_Parser_Statement_Tokenizer
23
{
24
    const ERROR_TOKENIZE_METHOD_MISSING = 49801;
25
    
26
   /**
27
    * @var string[]
28
    */
29
    protected $operands = array(
30
        '==',
31
        '<=',
32
        '>=',
33
        '!=',
34
        '=',
35
        '+',
36
        '-',
37
        '/',
38
        '*',
39
        '>',
40
        '<'
41
    );
42
    
43
   /**
44
    * @var string[]
45
    */
46
    protected $keywords = array(
47
        'in:'
48
    );
49
    
50
   /**
51
    * @var string
52
    */
53
    protected $delimiter = '§§';
54
    
55
    /**
56
     * @var string[]
57
     */
58
    protected $tokenCategories = array(
59
        'variables',
60
        'escaped_quotes',
61
        'string_literals',
62
        'keywords',
63
        'numbers',
64
        'operands',
65
        'extract_tokens'
66
    );
67
    
68
   /**
69
    * @var Mailcode_Parser_Statement
70
    */
71
    protected $statement;
72
    
73
   /**
74
    * @var string
75
    */
76
    protected $tokenized;
77
    
78
   /**
79
    * @var Mailcode_Parser_Statement_Tokenizer_Token[]
80
    */
81
    protected $tokensTemporary = array();
82
    
83
    /**
84
     * @var Mailcode_Parser_Statement_Tokenizer_Token[]
85
     */
86
    protected $tokensOrdered = array();
87
    
88
   /**
89
    * @var string[]
90
    */
91
    protected static $ids = array();
92
    
93
    public function __construct(Mailcode_Parser_Statement $statement)
94
    {
95
        $this->statement = $statement;
96
        
97
        $this->tokenize($statement->getStatementString());
98
    }
99
100
   /**
101
    * Retrieves all tokens detected in the statement string, in 
102
    * the order they were found.
103
    * 
104
    * @return Mailcode_Parser_Statement_Tokenizer_Token[]
105
    */
106
    public function getTokens()
107
    {
108
        return $this->tokensOrdered;
109
    }
110
    
111
    public function hasTokens() : bool
112
    {
113
        return !empty($this->tokensOrdered);
114
    }
115
    
116
   /**
117
    * Whether there were any unknown tokens in the statement.
118
    * 
119
    * @return bool
120
    */
121
    public function hasUnknown() : bool
122
    {
123
        $unknown = $this->getUnknown();
124
        
125
        return !empty($unknown);
126
    }
127
    
128
   /**
129
    * Retrieves all unknown content tokens, if any.
130
    * 
131
    * @return \Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
132
    */
133
    public function getUnknown()
134
    {
135
        $result = array();
136
        
137
        foreach($this->tokensOrdered as $token)
138
        {
139
            if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
140
            {
141
                $result[] = $token;
142
            }
143
        }
144
        
145
        return $result;
146
    }
147
    
148
    public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
149
    {
150
        $unknown = $this->getUnknown();
151
        
152
        if(!empty($unknown))
153
        {
154
            return array_shift($unknown);
155
        }
156
        
157
        return null;
158
    }
159
    
160
    public function getNormalized() : string
161
    {
162
        $parts = array();
163
        
164
        foreach($this->tokensOrdered as $token)
165
        {
166
            $string = $token->getNormalized();
167
            
168
            if(!empty($string))
169
            {
170
                $parts[] = $string;
171
            }
172
        }
173
        
174
        return implode(' ', $parts);
175
    }
176
    
177
    protected function tokenize(string $statement) : void
178
    {
179
        $this->tokenized = trim($statement);
180
        
181
        foreach($this->tokenCategories as $token)
182
        {
183
            $method = 'tokenize_'.$token;
184
            
185
            if(!method_exists($this, $method))
186
            {
187
                throw new Mailcode_Exception(
188
                    'Unknown statement token.',
189
                    sprintf(
190
                        'The tokenize method [%s] is not present in class [%s].',
191
                        $method,
192
                        get_class($this)
193
                    ),
194
                    self::ERROR_TOKENIZE_METHOD_MISSING
195
                );
196
            }
197
            
198
            $this->$method();
199
        }
200
    }
201
   
202
   /**
203
    * Registers a token to add in the statement string.
204
    * 
205
    * @param string $type
206
    * @param string $matchedText
207
    * @param mixed $subject
208
    */
209
    protected function registerToken(string $type, string $matchedText, $subject=null) : void
210
    {
211
        $tokenID = $this->generateID();
212
        
213
        $this->tokenized = str_replace(
214
            $matchedText,
215
            $this->delimiter.$tokenID.$this->delimiter,
216
            $this->tokenized
217
        );
218
        
219
        $class = '\Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_'.$type;
220
        
221
        $this->tokensTemporary[] = new $class($tokenID, $matchedText, $subject);
222
    }
223
    
224
    protected function getTokenByID(string $tokenID) : ?Mailcode_Parser_Statement_Tokenizer_Token
225
    {
226
        foreach($this->tokensTemporary as $token)
227
        {
228
            if($token->getID() === $tokenID)
229
            {
230
                return $token;
231
            }
232
        }
233
        
234
        return null;
235
    }
236
    
237
    protected function tokenize_escaped_quotes() : void
238
    {
239
        $this->tokenized = str_replace('\"', '__QUOTE__', $this->tokenized);
240
    }
241
    
242
    protected function tokenize_keywords() : void
243
    {
244
        foreach($this->keywords as $keyword)
245
        {
246
            if(strstr($this->tokenized, $keyword))
247
            {
248
                $this->registerToken('Keyword', $keyword);
249
            }
250
        }
251
    }
252
    
253
    protected function tokenize_extract_tokens() : void
254
    {
255
        // split the string by the delimiters: this gives an
256
        // array with tokenIDs, and any content that may be left
257
        // over that could not be tokenized.
258
        $parts = \AppUtils\ConvertHelper::explodeTrim($this->delimiter, $this->tokenized);
259
260
        foreach($parts as $part)
261
        {
262
            $token = $this->getTokenByID($part);
263
            
264
            // if the entry is a token, simply add it.
265
            if($token)
266
            {
267
                $this->tokensOrdered[] = $token;
268
            }
269
            // anything else is added as an unknown token.
270
            else 
271
            {
272
                $this->tokensOrdered[] = new Mailcode_Parser_Statement_Tokenizer_Token_Unknown($this->generateID(), $part);
273
            }
274
        }
275
    }
276
        
277
    protected function tokenize_variables() : void
278
    {
279
        $vars = Mailcode::create()->findVariables($this->tokenized)->getGroupedByHash();
280
        
281
        foreach($vars as $var)
282
        {
283
            $this->registerToken('Variable', $var->getMatchedText(), $var);
284
        }
285
    }
286
    
287
    protected function tokenize_operands() : void
288
    {
289
        foreach($this->operands as $operand)
290
        {
291
            if(strstr($this->tokenized, $operand))
292
            {
293
                $this->registerToken('Operand', $operand);
294
            }
295
        }
296
    }
297
    
298
    protected function tokenize_string_literals() : void
299
    {
300
        $matches = array();
301
        preg_match_all('/"(.*)"/sx', $this->tokenized, $matches, PREG_PATTERN_ORDER);
302
        
303
        foreach($matches[0] as $match)
304
        {
305
            $this->registerToken('StringLiteral', $match);
306
        }
307
    }
308
    
309
    protected function tokenize_numbers() : void
310
    {
311
        $matches = array();
312
        preg_match_all('/-*[0-9]+\s*[.,]\s*[0-9]+|-*[0-9]+/sx', $this->tokenized, $matches, PREG_PATTERN_ORDER);
313
        
314
        foreach($matches[0] as $match)
315
        {
316
            $this->registerToken('Number', $match);
317
        }
318
    }
319
    
320
   /**
321
    * Generates a unique alphabet-based ID without numbers
322
    * to use as token name, to avoid conflicts with the
323
    * numbers detection.
324
    * 
325
    * @return string
326
    */
327
    protected function generateID() : string
328
    {
329
        static $alphas;
330
        
331
        if(!isset($alphas))
332
        {
333
            $alphas = range('A', 'Z');
334
        }
335
        
336
        $amount = 12;
337
        
338
        $result = '';
339
        
340
        for($i=0; $i < $amount; $i++)
341
        {
342
            $result .= $alphas[array_rand($alphas)];
343
        }
344
        
345
        if(!in_array($result, self::$ids))
346
        {
347
            self::$ids[] = $result;
348
            return $result;
349
        }
350
        
351
        return $this->generateID();
352
    }
353
}
354