Mailcode_Parser_Statement_Tokenizer - Code Metrics - Inspection of "Normalizing pretty quotes in commands." - Mistralys/mailcode - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 83b5cd...0e7983 )

by Sebastian

created 2020-07-27 05:50 UTC

Mailcode_Parser_Statement_Tokenizer A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	342
Duplicated Lines	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
eloc	113
dl	0
loc	342
rs	9.2
c	1
b	0
f	0
wmc	40

19 Methods

Rating	Name	Size	Complexity
A	tokenize()	22	3
A	registerToken()	13	1
A	getFirstUnknown()	10	2
A	tokenize_normalize_quotes()	3	1
A	tokenize_escaped_quotes()	3	1
A	tokenize_string_literals()	8	2
A	generateID()	25	4
A	__construct()	5	1
A	getNormalized()	15	3
A	getTokens()	3	1
A	tokenize_operands()	7	3
A	tokenize_variables()	7	2
A	tokenize_numbers()	8	2
A	hasUnknown()	5	1
A	tokenize_extract_tokens()	20	3
A	getUnknown()	13	3
A	hasTokens()	3	1
A	tokenize_keywords()	7	3
A	getTokenByID()	11	3

How to fix Complexity

<?php
/**
 * File containing the {@see Mailcode_Parser_Statement_Tokenizer} class.
 *
 * @package Mailcode
 * @subpackage Parser
 * @see Mailcode_Parser_Statement_Tokenizer
 */

declare(strict_types=1);

namespace Mailcode;

/**
 * Mailcode statement tokenizer: parses a mailcode statement
 * into its logical parts.
 *
 * @package Mailcode
 * @subpackage Parser
 * @author Sebastian Mordziol <[email protected]>
 */
class Mailcode_Parser_Statement_Tokenizer
{
    const ERROR_TOKENIZE_METHOD_MISSING = 49801;
    
   /**
    * @var string[]
    */
    protected $operands = array(
        '==',
        '<=',
        '>=',
        '!=',
        '=',
        '+',
        '-',
        '/',
        '*',
        '>',
        '<'
    );
    
   /**
    * @var string[]
    */
    protected $keywords = array(
        'in:',
        'insensitive:'
    );
    
   /**
    * @var string
    */
    protected $delimiter = '§§';
    
    /**
     * @var string[]
     */
    protected $tokenCategories = array(
        'variables',
        'normalize_quotes',
        'escaped_quotes',
        'string_literals',
        'keywords',
        'numbers',
        'operands',
        'extract_tokens'
    );
    
   /**
    * @var Mailcode_Parser_Statement
    */
    protected $statement;
    
   /**
    * @var string
    */
    protected $tokenized;
    
   /**
    * @var Mailcode_Parser_Statement_Tokenizer_Token[]
    */
    protected $tokensTemporary = array();
    
    /**
     * @var Mailcode_Parser_Statement_Tokenizer_Token[]
     */
    protected $tokensOrdered = array();
    
   /**
    * @var string[]
    */
    protected static $ids = array();
    
    public function __construct(Mailcode_Parser_Statement $statement)
    {
        $this->statement = $statement;
        
        $this->tokenize($statement->getStatementString());
    }

   /**
    * Retrieves all tokens detected in the statement string, in 
    * the order they were found.
    * 
    * @return Mailcode_Parser_Statement_Tokenizer_Token[]
    */
    public function getTokens()
    {
        return $this->tokensOrdered;
    }
    
    public function hasTokens() : bool
    {
        return !empty($this->tokensOrdered);
    }
    
   /**
    * Whether there were any unknown tokens in the statement.
    * 
    * @return bool
    */
    public function hasUnknown() : bool
    {
        $unknown = $this->getUnknown();
        
        return !empty($unknown);
    }
    
   /**
    * Retrieves all unknown content tokens, if any.
    * 
    * @return \Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
    */
    public function getUnknown()
    {
        $result = array();
        
        foreach($this->tokensOrdered as $token)
        {
            if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
            {
                $result[] = $token;
            }
        }
        
        return $result;
    }
    
    public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
    {
        $unknown = $this->getUnknown();
        
        if(!empty($unknown))
        {
            return array_shift($unknown);
        }
        
        return null;
    }
    
    public function getNormalized() : string
    {
        $parts = array();
        
        foreach($this->tokensOrdered as $token)
        {
            $string = $token->getNormalized();
            
            if($string != '')
            {
                $parts[] = $string;
            }
        }
        
        return implode(' ', $parts);
    }
    
    protected function tokenize(string $statement) : void
    {
        $this->tokenized = trim($statement);
        
        foreach($this->tokenCategories as $token)
        {
            $method = 'tokenize_'.$token;
            
            if(!method_exists($this, $method))
            {
                throw new Mailcode_Exception(
                    'Unknown statement token.',
                    sprintf(
                        'The tokenize method [%s] is not present in class [%s].',
                        $method,
                        get_class($this)
                    ),
                    self::ERROR_TOKENIZE_METHOD_MISSING
                );
            }
            
            $this->$method();
        }
    }
   
   /**
    * Registers a token to add in the statement string.
    * 
    * @param string $type
    * @param string $matchedText
    * @param mixed $subject
    */
    protected function registerToken(string $type, string $matchedText, $subject=null) : void
    {
        $tokenID = $this->generateID();
        
        $this->tokenized = str_replace(
            $matchedText,
            $this->delimiter.$tokenID.$this->delimiter,
            $this->tokenized
        );
        
        $class = '\Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_'.$type;
        
        $this->tokensTemporary[] = new $class($tokenID, $matchedText, $subject);
    }
    
    protected function getTokenByID(string $tokenID) : ?Mailcode_Parser_Statement_Tokenizer_Token
    {
        foreach($this->tokensTemporary as $token)
        {
            if($token->getID() === $tokenID)
            {
                return $token;
            }
        }
        
        return null;
    }
    
   /**
    * Some WYSIWYG editors like using pretty quotes instead
    * of the usual double quotes. This simply replaces all
    * occurrences with the regular variant.
    */
    protected function tokenize_normalize_quotes() : void
    {
        $this->tokenized = str_replace(array('“', '”'), '"', $this->tokenized);
    }
    
    protected function tokenize_escaped_quotes() : void
    {
        $this->tokenized = str_replace('\"', '__QUOTE__', $this->tokenized);
    }
    
    protected function tokenize_keywords() : void
    {
        foreach($this->keywords as $keyword)
        {
            if(strstr($this->tokenized, $keyword))
            {
                $this->registerToken('Keyword', $keyword);
            }
        }
    }
    
    protected function tokenize_extract_tokens() : void
    {
        // split the string by the delimiters: this gives an
        // array with tokenIDs, and any content that may be left
        // over that could not be tokenized.
        $parts = \AppUtils\ConvertHelper::explodeTrim($this->delimiter, $this->tokenized);

        foreach($parts as $part)
        {
            $token = $this->getTokenByID($part);
            
            // if the entry is a token, simply add it.
            if($token)
            {
                $this->tokensOrdered[] = $token;
            }
            // anything else is added as an unknown token.
            else 
            {
                $this->tokensOrdered[] = new Mailcode_Parser_Statement_Tokenizer_Token_Unknown($this->generateID(), $part);
            }
        }
    }
        
    protected function tokenize_variables() : void
    {
        $vars = Mailcode::create()->findVariables($this->tokenized)->getGroupedByHash();
        
        foreach($vars as $var)
        {
            $this->registerToken('Variable', $var->getMatchedText(), $var);
        }
    }
    
    protected function tokenize_operands() : void
    {
        foreach($this->operands as $operand)
        {
            if(strstr($this->tokenized, $operand))
            {
                $this->registerToken('Operand', $operand);
            }
        }
    }
    
    protected function tokenize_string_literals() : void
    {
        $matches = array();
        preg_match_all('/"(.*)"/sxU', $this->tokenized, $matches, PREG_PATTERN_ORDER);
        
        foreach($matches[0] as $match)
        {
            $this->registerToken('StringLiteral', $match);
        }
    }
    
    protected function tokenize_numbers() : void
    {
        $matches = array();
        preg_match_all('/-*[0-9]+\s*[.,]\s*[0-9]+|-*[0-9]+/sx', $this->tokenized, $matches, PREG_PATTERN_ORDER);
        
        foreach($matches[0] as $match)
        {
            $this->registerToken('Number', $match);
        }
    }
    
   /**
    * Generates a unique alphabet-based ID without numbers
    * to use as token name, to avoid conflicts with the
    * numbers detection.
    * 
    * @return string
    */
    protected function generateID() : string
    {
        static $alphas;
        
        if(!isset($alphas))
        {
            $alphas = range('A', 'Z');
        }
        
        $amount = 12;
        
        $result = '';
        
        for($i=0; $i < $amount; $i++)
        {
            $result .= $alphas[array_rand($alphas)];
        }
        
        if(!in_array($result, self::$ids))
        {
            self::$ids[] = $result;
            return $result;
        }
        
        return $this->generateID();
    }
}


1			<?php
2			/**
3			* File containing the {@see Mailcode_Parser_Statement_Tokenizer} class.
4			*
5			* @package Mailcode
6			* @subpackage Parser
7			* @see Mailcode_Parser_Statement_Tokenizer
8			*/
9
10			declare(strict_types=1);
11
12			namespace Mailcode;
13
14			/**
15			* Mailcode statement tokenizer: parses a mailcode statement
16			* into its logical parts.
17			*
18			* @package Mailcode
19			* @subpackage Parser
20			* @author Sebastian Mordziol <[email protected]>
21			*/
22			class Mailcode_Parser_Statement_Tokenizer
23			{
24			const ERROR_TOKENIZE_METHOD_MISSING = 49801;
25
26			/**
27			* @var string[]
28			*/
29			protected $operands = array(
30			'==',
31			'<=',
32			'>=',
33			'!=',
34			'=',
35			'+',
36			'-',
37			'/',
38			'*',
39			'>',
40			'<'
41			);
42
43			/**
44			* @var string[]
45			*/
46			protected $keywords = array(
47			'in:',
48			'insensitive:'
49			);
50
51			/**
52			* @var string
53			*/
54			protected $delimiter = '§§';
55
56			/**
57			* @var string[]
58			*/
59			protected $tokenCategories = array(
60			'variables',
61			'normalize_quotes',
62			'escaped_quotes',
63			'string_literals',
64			'keywords',
65			'numbers',
66			'operands',
67			'extract_tokens'
68			);
69
70			/**
71			* @var Mailcode_Parser_Statement
72			*/
73			protected $statement;
74
75			/**
76			* @var string
77			*/
78			protected $tokenized;
79
80			/**
81			* @var Mailcode_Parser_Statement_Tokenizer_Token[]
82			*/
83			protected $tokensTemporary = array();
84
85			/**
86			* @var Mailcode_Parser_Statement_Tokenizer_Token[]
87			*/
88			protected $tokensOrdered = array();
89
90			/**
91			* @var string[]
92			*/
93			protected static $ids = array();
94
95			public function __construct(Mailcode_Parser_Statement $statement)
96			{
97			$this->statement = $statement;
98
99			$this->tokenize($statement->getStatementString());
100			}
101
102			/**
103			* Retrieves all tokens detected in the statement string, in
104			* the order they were found.
105			*
106			* @return Mailcode_Parser_Statement_Tokenizer_Token[]
107			*/
108			public function getTokens()
109			{
110			return $this->tokensOrdered;
111			}
112
113			public function hasTokens() : bool
114			{
115			return !empty($this->tokensOrdered);
116			}
117
118			/**
119			* Whether there were any unknown tokens in the statement.
120			*
121			* @return bool
122			*/
123			public function hasUnknown() : bool
124			{
125			$unknown = $this->getUnknown();
126
127			return !empty($unknown);
128			}
129
130			/**
131			* Retrieves all unknown content tokens, if any.
132			*
133			* @return \Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_Unknown[]
134			*/
135			public function getUnknown()
136			{
137			$result = array();
138
139			foreach($this->tokensOrdered as $token)
140			{
141			if($token instanceof Mailcode_Parser_Statement_Tokenizer_Token_Unknown)
142			{
143			$result[] = $token;
144			}
145			}
146
147			return $result;
148			}
149
150			public function getFirstUnknown() : ?Mailcode_Parser_Statement_Tokenizer_Token_Unknown
151			{
152			$unknown = $this->getUnknown();
153
154			if(!empty($unknown))
155			{
156			return array_shift($unknown);
157			}
158
159			return null;
160			}
161
162			public function getNormalized() : string
163			{
164			$parts = array();
165
166			foreach($this->tokensOrdered as $token)
167			{
168			$string = $token->getNormalized();
169
170			if($string != '')
171			{
172			$parts[] = $string;
173			}
174			}
175
176			return implode(' ', $parts);
177			}
178
179			protected function tokenize(string $statement) : void
180			{
181			$this->tokenized = trim($statement);
182
183			foreach($this->tokenCategories as $token)
184			{
185			$method = 'tokenize_'.$token;
186
187			if(!method_exists($this, $method))
188			{
189			throw new Mailcode_Exception(
190			'Unknown statement token.',
191			sprintf(
192			'The tokenize method [%s] is not present in class [%s].',
193			$method,
194			get_class($this)
195			),
196			self::ERROR_TOKENIZE_METHOD_MISSING
197			);
198			}
199
200			$this->$method();
201			}
202			}
203
204			/**
205			* Registers a token to add in the statement string.
206			*
207			* @param string $type
208			* @param string $matchedText
209			* @param mixed $subject
210			*/
211			protected function registerToken(string $type, string $matchedText, $subject=null) : void
212			{
213			$tokenID = $this->generateID();
214
215			$this->tokenized = str_replace(
216			$matchedText,
217			$this->delimiter.$tokenID.$this->delimiter,
218			$this->tokenized
219			);
220
221			$class = '\Mailcode\Mailcode_Parser_Statement_Tokenizer_Token_'.$type;
222
223			$this->tokensTemporary[] = new $class($tokenID, $matchedText, $subject);
224			}
225
226			protected function getTokenByID(string $tokenID) : ?Mailcode_Parser_Statement_Tokenizer_Token
227			{
228			foreach($this->tokensTemporary as $token)
229			{
230			if($token->getID() === $tokenID)
231			{
232			return $token;
233			}
234			}
235
236			return null;
237			}
238
239			/**
240			* Some WYSIWYG editors like using pretty quotes instead
241			* of the usual double quotes. This simply replaces all
242			* occurrences with the regular variant.
243			*/
244			protected function tokenize_normalize_quotes() : void
245			{
246			$this->tokenized = str_replace(array('“', '”'), '"', $this->tokenized);
247			}
248
249			protected function tokenize_escaped_quotes() : void
250			{
251			$this->tokenized = str_replace('\"', '__QUOTE__', $this->tokenized);
252			}
253
254			protected function tokenize_keywords() : void
255			{
256			foreach($this->keywords as $keyword)
257			{
258			if(strstr($this->tokenized, $keyword))
259			{
260			$this->registerToken('Keyword', $keyword);
261			}
262			}
263			}
264
265			protected function tokenize_extract_tokens() : void
266			{
267			// split the string by the delimiters: this gives an
268			// array with tokenIDs, and any content that may be left
269			// over that could not be tokenized.
270			$parts = \AppUtils\ConvertHelper::explodeTrim($this->delimiter, $this->tokenized);
271
272			foreach($parts as $part)
273			{
274			$token = $this->getTokenByID($part);
275
276			// if the entry is a token, simply add it.
277			if($token)
278			{
279			$this->tokensOrdered[] = $token;
280			}
281			// anything else is added as an unknown token.
282			else
283			{
284			$this->tokensOrdered[] = new Mailcode_Parser_Statement_Tokenizer_Token_Unknown($this->generateID(), $part);
285			}
286			}
287			}
288
289			protected function tokenize_variables() : void
290			{
291			$vars = Mailcode::create()->findVariables($this->tokenized)->getGroupedByHash();
292
293			foreach($vars as $var)
294			{
295			$this->registerToken('Variable', $var->getMatchedText(), $var);
296			}
297			}
298
299			protected function tokenize_operands() : void
300			{
301			foreach($this->operands as $operand)
302			{
303			if(strstr($this->tokenized, $operand))
304			{
305			$this->registerToken('Operand', $operand);
306			}
307			}
308			}
309
310			protected function tokenize_string_literals() : void
311			{
312			$matches = array();
313			preg_match_all('/"(.*)"/sxU', $this->tokenized, $matches, PREG_PATTERN_ORDER);
314
315			foreach($matches[0] as $match)
316			{
317			$this->registerToken('StringLiteral', $match);
318			}
319			}
320
321			protected function tokenize_numbers() : void
322			{
323			$matches = array();
324			preg_match_all('/-[0-9]+\s[.,]\s[0-9]+\|-[0-9]+/sx', $this->tokenized, $matches, PREG_PATTERN_ORDER);
325
326			foreach($matches[0] as $match)
327			{
328			$this->registerToken('Number', $match);
329			}
330			}
331
332			/**
333			* Generates a unique alphabet-based ID without numbers
334			* to use as token name, to avoid conflicts with the
335			* numbers detection.
336			*
337			* @return string
338			*/
339			protected function generateID() : string
340			{
341			static $alphas;
342
343			if(!isset($alphas))
344			{
345			$alphas = range('A', 'Z');
346			}
347
348			$amount = 12;
349
350			$result = '';
351
352			for($i=0; $i < $amount; $i++)
353			{
354			$result .= $alphas[array_rand($alphas)];
355			}
356
357			if(!in_array($result, self::$ids))
358			{
359			self::$ids[] = $result;
360			return $result;
361			}
362
363			return $this->generateID();
364			}
365			}
366

Mistralys / mailcode

Push — master ( 83b5cd...0e7983 )

Mailcode_Parser_Statement_Tokenizer A

Complexity

Size/Duplication

Importance

19 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like