Mailcode_Parser_Statement_Tokenizer - Code Metrics - Inspection of "Added testsuite for the collection class." - Mistralys/mailcode - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 75ee28...758f58 )

by Sebastian

created 2020-11-13 09:52 UTC

Mailcode_Parser_Statement_Tokenizer B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	394
Duplicated Lines	0 %

Importance

Changes	3
Bugs	0	Features	0

Metric	Value
eloc	134
c	3
b	0
f	0
dl	0
loc	394
rs	8.64
wmc	47

23 Methods

Rating	Name	Size	Complexity
A	tokenize()	22	3
A	registerToken()	3	1
A	getFirstUnknown()	10	2
A	appendToken()	7	1
A	tokenize_normalize_quotes()	3	1
A	tokenize_escaped_quotes()	3	1
A	appendKeyword()	15	2
A	tokenize_string_literals()	8	2
A	__construct()	5	1
A	generateID()	25	4
A	getNormalized()	15	3
A	getTokens()	3	1
A	tokenize_operands()	7	3
A	tokenize_variables()	7	2
A	removeToken()	16	3
A	tokenize_numbers()	8	2
A	hasUnknown()	5	1
A	createToken()	13	1
A	tokenize_extract_tokens()	20	3
A	getUnknown()	13	3
A	hasTokens()	3	1
A	tokenize_keywords()	7	3
A	getTokenByID()	11	3

How to fix Complexity

<?php
/**
 * File containing the {@see Mailcode_Parser_Statement_Tokenizer} class.
 *
 * @package Mailcode
 * @subpackage Parser
 * @see Mailcode_Parser_Statement_Tokenizer
 */

declare(strict_types=1);

namespace Mailcode;

/**
 * Mailcode statement tokenizer: parses a mailcode statement
 * into its logical parts.
 *
 * @package Mailcode
 * @subpackage Parser
 * @author Sebastian Mordziol <[email protected]>
 */
class Mailcode_Parser_Statement_Tokenizer
{
    const ERROR_TOKENIZE_METHOD_MISSING = 49801;
    const ERROR_INVALID_TOKEN_CREATED = 49802;
    
   /**
    * @var string[]
    */
    protected $operands = array(
        '==',
        '<=',
        '>=',
        '!=',
        '=',
        '+',
        '-',
        '/',
        '*',
        '>',
        '<'
    );
    
   /**
    * @var string[]
    */
    protected $keywords = array(
        'in:',
        'insensitive:',
        'urlencode:'
    );
    
   /**
    * @var string
    */
    protected $delimiter = '§§';
    
    /**
     * @var string[]
     */
    protected $tokenCategories = array(
        'variables',
        'normalize_quotes',
        'escaped_quotes',
        'string_literals',
        'keywords',
        'numbers',
        'operands',
        'extract_tokens'
    );
    
   /**
    * @var Mailcode_Parser_Statement
    */
    protected $statement;
    
   /**
    * @var string
    */
    protected $tokenized;
    
   /**
    * @var Mailcode_Parser_Statement_Tokenizer_Token[]
    */
    protected $tokensTemporary = array();
    
    /**
     * @var Mailcode_Parser_Statement_Tokenizer_Token[]
     */
    protected $tokensOrdered = array();
    
   /**
    * @var string[]
    */
    protected static $ids = array();
    
    public function __construct(Mailcode_Parser_Statement $statement)
    {
        $this->statement = $statement;
        
        $this->tokenize($statement->getStatementString());
    }

   /**
    * Retrieves all tokens detected in the statement string, in 
    * the order they were found.
    * 
    * @return Mailcode_Parser_Statement_Tokenizer_Token[]
    */
    public function getTokens()
    {
        return $this->tokensOrdered;
    }
    
    public function hasTokens() : bool
    {
        return !empty($this->tokensOrdered);
    }
    
   /**
    * Whether there were any unknown tokens in the statement.
    * 
    * @return bool
    */
    public function hasUnknown() : bool
    {
        $unknown = $this->getUnknown();
        
        return !empty($unknown);
    }
    
   /**
    * Retrieves all unknown content tokens, if any.
    * 
    * @return \Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
    */
    public function getUnknown()
    {
        $result = array();
        
        foreach($this->tokensOrdered as $token)
        {
            if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
            {
                $result[] = $token;
            }
        }
        
        return $result;
    }
    
    public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
    {
        $unknown = $this->getUnknown();
        
        if(!empty($unknown))
        {
            return array_shift($unknown);
        }
        
        return null;
    }
    
    public function getNormalized() : string
    {
        $parts = array();
        
        foreach($this->tokensOrdered as $token)
        {
            $string = $token->getNormalized();
            
            if($string != '')
            {
                $parts[] = $string;
            }
        }
        
        return implode(' ', $parts);
    }
    
    protected function tokenize(string $statement) : void
    {
        $this->tokenized = trim($statement);
        
        foreach($this->tokenCategories as $token)
        {
            $method = 'tokenize_'.$token;
            
            if(!method_exists($this, $method))
            {
                throw new Mailcode_Exception(
                    'Unknown statement token.',
                    sprintf(
                        'The tokenize method [%s] is not present in class [%s].',
                        $method,
                        get_class($this)
                    ),
                    self::ERROR_TOKENIZE_METHOD_MISSING
                );
            }
            
            $this->$method();
        }
    }

   /**
    * Registers a token to add in the statement string.
    * 
    * @param string $type
    * @param string $matchedText
    * @param mixed $subject
    */
    protected function registerToken(string $type, string $matchedText, $subject=null) : void
    {
        $this->tokensTemporary[] = $this->createToken($type, $matchedText, $subject);
    }

    protected function createToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
    {
        $tokenID = $this->generateID();

        $this->tokenized = str_replace(
            $matchedText,
            $this->delimiter.$tokenID.$this->delimiter,
            $this->tokenized
        );

        $class = '\Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_'.$type;

        return new $class($tokenID, $matchedText, $subject);
    }

    public function appendKeyword(string $name) : Mailcode_Parser_Statement_Tokenizer_Token_Keyword
    {
        $name = rtrim($name, ':').':';

        $token = $this->appendToken('Keyword', $name);

        if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Keyword)
        {
            return $token;
        }

        throw new Mailcode_Exception(
            'Invalid token created',
            '',
            self::ERROR_INVALID_TOKEN_CREATED
        );
    }

    public function removeToken(Mailcode_Parser_Statement_Tokenizer_Token $token) : Mailcode_Parser_Statement_Tokenizer
    {
        $keep = array();
        $tokenID = $token->getID();

        foreach ($this->tokensOrdered as $checkToken)
        {
            if($checkToken->getID() !== $tokenID)
            {
                $keep[] = $checkToken;
            }
        }

        $this->tokensOrdered = $keep;

        return $this;
    }

    protected function appendToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
    {
        $token = $this->createToken($type, $matchedText, $subject);

        $this->tokensOrdered[] = $token;

        return $token;
    }
    
    protected function getTokenByID(string $tokenID) : ?Mailcode_Parser_Statement_Tokenizer_Token
    {
        foreach($this->tokensTemporary as $token)
        {
            if($token->getID() === $tokenID)
            {
                return $token;
            }
        }
        
        return null;
    }
    
   /**
    * Some WYSIWYG editors like using pretty quotes instead
    * of the usual double quotes. This simply replaces all
    * occurrences with the regular variant.
    */
    protected function tokenize_normalize_quotes() : void
    {
        $this->tokenized = str_replace(array('“', '”'), '"', $this->tokenized);
    }
    
    protected function tokenize_escaped_quotes() : void
    {
        $this->tokenized = str_replace('\"', '__QUOTE__', $this->tokenized);
    }
    
    protected function tokenize_keywords() : void
    {
        foreach($this->keywords as $keyword)
        {
            if(strstr($this->tokenized, $keyword))
            {
                $this->registerToken('Keyword', $keyword);
            }
        }
    }
    
    protected function tokenize_extract_tokens() : void
    {
        // split the string by the delimiters: this gives an
        // array with tokenIDs, and any content that may be left
        // over that could not be tokenized.
        $parts = \AppUtils\ConvertHelper::explodeTrim($this->delimiter, $this->tokenized);

        foreach($parts as $part)
        {
            $token = $this->getTokenByID($part);
            
            // if the entry is a token, simply add it.
            if($token)
            {
                $this->tokensOrdered[] = $token;
            }
            // anything else is added as an unknown token.
            else 
            {
                $this->tokensOrdered[] = new Mailcode_Parser_Statement_Tokenizer_Token_Unknown($this->generateID(), $part);
            }
        }
    }
        
    protected function tokenize_variables() : void
    {
        $vars = Mailcode::create()->findVariables($this->tokenized)->getGroupedByHash();
        
        foreach($vars as $var)
        {
            $this->registerToken('Variable', $var->getMatchedText(), $var);
        }
    }
    
    protected function tokenize_operands() : void
    {
        foreach($this->operands as $operand)
        {
            if(strstr($this->tokenized, $operand))
            {
                $this->registerToken('Operand', $operand);
            }
        }
    }
    
    protected function tokenize_string_literals() : void
    {
        $matches = array();
        preg_match_all('/"(.*)"/sxU', $this->tokenized, $matches, PREG_PATTERN_ORDER);
        
        foreach($matches[0] as $match)
        {
            $this->registerToken('StringLiteral', $match);
        }
    }
    
    protected function tokenize_numbers() : void
    {
        $matches = array();
        preg_match_all('/-*[0-9]+\s*[.,]\s*[0-9]+|-*[0-9]+/sx', $this->tokenized, $matches, PREG_PATTERN_ORDER);
        
        foreach($matches[0] as $match)
        {
            $this->registerToken('Number', $match);
        }
    }
    
   /**
    * Generates a unique alphabet-based ID without numbers
    * to use as token name, to avoid conflicts with the
    * numbers detection.
    * 
    * @return string
    */
    protected function generateID() : string
    {
        static $alphas;
        
        if(!isset($alphas))
        {
            $alphas = range('A', 'Z');
        }
        
        $amount = 12;
        
        $result = '';
        
        for($i=0; $i < $amount; $i++)
        {
            $result .= $alphas[array_rand($alphas)];
        }
        
        if(!in_array($result, self::$ids))
        {
            self::$ids[] = $result;
            return $result;
        }
        
        return $this->generateID();
    }
}


1			<?php
2			/**
3			* File containing the {@see Mailcode_Parser_Statement_Tokenizer} class.
4			*
5			* @package Mailcode
6			* @subpackage Parser
7			* @see Mailcode_Parser_Statement_Tokenizer
8			*/
9
10			declare(strict_types=1);
11
12			namespace Mailcode;
13
14			/**
15			* Mailcode statement tokenizer: parses a mailcode statement
16			* into its logical parts.
17			*
18			* @package Mailcode
19			* @subpackage Parser
20			* @author Sebastian Mordziol <[email protected]>
21			*/
22			class Mailcode_Parser_Statement_Tokenizer
23			{
24			const ERROR_TOKENIZE_METHOD_MISSING = 49801;
25			const ERROR_INVALID_TOKEN_CREATED = 49802;
26
27			/**
28			* @var string[]
29			*/
30			protected $operands = array(
31			'==',
32			'<=',
33			'>=',
34			'!=',
35			'=',
36			'+',
37			'-',
38			'/',
39			'*',
40			'>',
41			'<'
42			);
43
44			/**
45			* @var string[]
46			*/
47			protected $keywords = array(
48			'in:',
49			'insensitive:',
50			'urlencode:'
51			);
52
53			/**
54			* @var string
55			*/
56			protected $delimiter = '§§';
57
58			/**
59			* @var string[]
60			*/
61			protected $tokenCategories = array(
62			'variables',
63			'normalize_quotes',
64			'escaped_quotes',
65			'string_literals',
66			'keywords',
67			'numbers',
68			'operands',
69			'extract_tokens'
70			);
71
72			/**
73			* @var Mailcode_Parser_Statement
74			*/
75			protected $statement;
76
77			/**
78			* @var string
79			*/
80			protected $tokenized;
81
82			/**
83			* @var Mailcode_Parser_Statement_Tokenizer_Token[]
84			*/
85			protected $tokensTemporary = array();
86
87			/**
88			* @var Mailcode_Parser_Statement_Tokenizer_Token[]
89			*/
90			protected $tokensOrdered = array();
91
92			/**
93			* @var string[]
94			*/
95			protected static $ids = array();
96
97			public function __construct(Mailcode_Parser_Statement $statement)
98			{
99			$this->statement = $statement;
100
101			$this->tokenize($statement->getStatementString());
102			}
103
104			/**
105			* Retrieves all tokens detected in the statement string, in
106			* the order they were found.
107			*
108			* @return Mailcode_Parser_Statement_Tokenizer_Token[]
109			*/
110			public function getTokens()
111			{
112			return $this->tokensOrdered;
113			}
114
115			public function hasTokens() : bool
116			{
117			return !empty($this->tokensOrdered);
118			}
119
120			/**
121			* Whether there were any unknown tokens in the statement.
122			*
123			* @return bool
124			*/
125			public function hasUnknown() : bool
126			{
127			$unknown = $this->getUnknown();
128
129			return !empty($unknown);
130			}
131
132			/**
133			* Retrieves all unknown content tokens, if any.
134			*
135			* @return \Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
136			*/
137			public function getUnknown()
138			{
139			$result = array();
140
141			foreach($this->tokensOrdered as $token)
142			{
143			if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
144			{
145			$result[] = $token;
146			}
147			}
148
149			return $result;
150			}
151
152			public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
153			{
154			$unknown = $this->getUnknown();
155
156			if(!empty($unknown))
157			{
158			return array_shift($unknown);
159			}
160
161			return null;
162			}
163
164			public function getNormalized() : string
165			{
166			$parts = array();
167
168			foreach($this->tokensOrdered as $token)
169			{
170			$string = $token->getNormalized();
171
172			if($string != '')
173			{
174			$parts[] = $string;
175			}
176			}
177
178			return implode(' ', $parts);
179			}
180
181			protected function tokenize(string $statement) : void
182			{
183			$this->tokenized = trim($statement);
184
185			foreach($this->tokenCategories as $token)
186			{
187			$method = 'tokenize_'.$token;
188
189			if(!method_exists($this, $method))
190			{
191			throw new Mailcode_Exception(
192			'Unknown statement token.',
193			sprintf(
194			'The tokenize method [%s] is not present in class [%s].',
195			$method,
196			get_class($this)
197			),
198			self::ERROR_TOKENIZE_METHOD_MISSING
199			);
200			}
201
202			$this->$method();
203			}
204			}
205
206			/**
207			* Registers a token to add in the statement string.
208			*
209			* @param string $type
210			* @param string $matchedText
211			* @param mixed $subject
212			*/
213			protected function registerToken(string $type, string $matchedText, $subject=null) : void
214			{
215			$this->tokensTemporary[] = $this->createToken($type, $matchedText, $subject);
216			}
217
218			protected function createToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
219			{
220			$tokenID = $this->generateID();
221
222			$this->tokenized = str_replace(
223			$matchedText,
224			$this->delimiter.$tokenID.$this->delimiter,
225			$this->tokenized
226			);
227
228			$class = '\Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_'.$type;
229
230			return new $class($tokenID, $matchedText, $subject);
231			}
232
233			public function appendKeyword(string $name) : Mailcode_Parser_Statement_Tokenizer_Token_Keyword
234			{
235			$name = rtrim($name, ':').':';
236
237			$token = $this->appendToken('Keyword', $name);
238
239			if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Keyword)
240			{
241			return $token;
242			}
243
244			throw new Mailcode_Exception(
245			'Invalid token created',
246			'',
247			self::ERROR_INVALID_TOKEN_CREATED
248			);
249			}
250
251			public function removeToken(Mailcode_Parser_Statement_Tokenizer_Token $token) : Mailcode_Parser_Statement_Tokenizer
252			{
253			$keep = array();
254			$tokenID = $token->getID();
255
256			foreach ($this->tokensOrdered as $checkToken)
257			{
258			if($checkToken->getID() !== $tokenID)
259			{
260			$keep[] = $checkToken;
261			}
262			}
263
264			$this->tokensOrdered = $keep;
265
266			return $this;
267			}
268
269			protected function appendToken(string $type, string $matchedText, $subject=null) : Mailcode_Parser_Statement_Tokenizer_Token
270			{
271			$token = $this->createToken($type, $matchedText, $subject);
272
273			$this->tokensOrdered[] = $token;
274
275			return $token;
276			}
277
278			protected function getTokenByID(string $tokenID) : ?Mailcode_Parser_Statement_Tokenizer_Token
279			{
280			foreach($this->tokensTemporary as $token)
281			{
282			if($token->getID() === $tokenID)
283			{
284			return $token;
285			}
286			}
287
288			return null;
289			}
290
291			/**
292			* Some WYSIWYG editors like using pretty quotes instead
293			* of the usual double quotes. This simply replaces all
294			* occurrences with the regular variant.
295			*/
296			protected function tokenize_normalize_quotes() : void
297			{
298			$this->tokenized = str_replace(array('“', '”'), '"', $this->tokenized);
299			}
300
301			protected function tokenize_escaped_quotes() : void
302			{
303			$this->tokenized = str_replace('\"', '__QUOTE__', $this->tokenized);
304			}
305
306			protected function tokenize_keywords() : void
307			{
308			foreach($this->keywords as $keyword)
309			{
310			if(strstr($this->tokenized, $keyword))
311			{
312			$this->registerToken('Keyword', $keyword);
313			}
314			}
315			}
316
317			protected function tokenize_extract_tokens() : void
318			{
319			// split the string by the delimiters: this gives an
320			// array with tokenIDs, and any content that may be left
321			// over that could not be tokenized.
322			$parts = \AppUtils\ConvertHelper::explodeTrim($this->delimiter, $this->tokenized);
323
324			foreach($parts as $part)
325			{
326			$token = $this->getTokenByID($part);
327
328			// if the entry is a token, simply add it.
329			if($token)
330			{
331			$this->tokensOrdered[] = $token;
332			}
333			// anything else is added as an unknown token.
334			else
335			{
336			$this->tokensOrdered[] = new Mailcode_Parser_Statement_Tokenizer_Token_Unknown($this->generateID(), $part);
337			}
338			}
339			}
340
341			protected function tokenize_variables() : void
342			{
343			$vars = Mailcode::create()->findVariables($this->tokenized)->getGroupedByHash();
344
345			foreach($vars as $var)
346			{
347			$this->registerToken('Variable', $var->getMatchedText(), $var);
348			}
349			}
350
351			protected function tokenize_operands() : void
352			{
353			foreach($this->operands as $operand)
354			{
355			if(strstr($this->tokenized, $operand))
356			{
357			$this->registerToken('Operand', $operand);
358			}
359			}
360			}
361
362			protected function tokenize_string_literals() : void
363			{
364			$matches = array();
365			preg_match_all('/"(.*)"/sxU', $this->tokenized, $matches, PREG_PATTERN_ORDER);
366
367			foreach($matches[0] as $match)
368			{
369			$this->registerToken('StringLiteral', $match);
370			}
371			}
372
373			protected function tokenize_numbers() : void
374			{
375			$matches = array();
376			preg_match_all('/-[0-9]+\s[.,]\s[0-9]+\|-[0-9]+/sx', $this->tokenized, $matches, PREG_PATTERN_ORDER);
377
378			foreach($matches[0] as $match)
379			{
380			$this->registerToken('Number', $match);
381			}
382			}
383
384			/**
385			* Generates a unique alphabet-based ID without numbers
386			* to use as token name, to avoid conflicts with the
387			* numbers detection.
388			*
389			* @return string
390			*/
391			protected function generateID() : string
392			{
393			static $alphas;
394
395			if(!isset($alphas))
396			{
397			$alphas = range('A', 'Z');
398			}
399
400			$amount = 12;
401
402			$result = '';
403
404			for($i=0; $i < $amount; $i++)
405			{
406			$result .= $alphas[array_rand($alphas)];
407			}
408
409			if(!in_array($result, self::$ids))
410			{
411			self::$ids[] = $result;
412			return $result;
413			}
414
415			return $this->generateID();
416			}
417			}
418

Mistralys / mailcode

Push — master ( 75ee28...758f58 )

Mailcode_Parser_Statement_Tokenizer B

Complexity

Size/Duplication

Importance

23 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like