Completed
Push — master ( d91fed...fd66aa )
by Josh
17:36
created

Optimizer   F

Complexity

Total Complexity 64

Size/Duplication

Total Lines 420
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 0
Metric Value
wmc 64
lcom 1
cbo 1
dl 0
loc 420
rs 3.28
c 0
b 0
f 0

15 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
B optimize() 0 54 7
A isBetweenHtmlspecialcharCalls() 0 8 5
B isHtmlspecialcharSafeVar() 0 10 7
A isOutputAssignment() 0 7 4
A isPrecededByOutputVar() 0 6 3
B mergeConcatenatedHtmlSpecialChars() 0 54 9
A mergeConcatenatedStrings() 0 22 4
A optimizeOutConcatEqual() 0 32 5
A optimizeConcatenations() 0 8 3
A optimizeHtmlspecialchars() 0 13 4
A removeHtmlspecialcharsSafeVar() 0 19 2
A replaceHtmlspecialcharsLiteral() 0 29 5
A skipPast() 0 4 2
A skipTo() 0 12 3

How to fix   Complexity   

Complex Class

Complex classes like Optimizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Optimizer, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\RendererGenerators\PHP;
9
10
/**
11
* This class optimizes the code produced by the PHP renderer. It is not meant to be used on general
12
* purpose code
13
*/
14
class Optimizer
15
{
16
	/**
17
	* @var BranchOutputOptimizer
18
	*/
19
	public $branchOutputOptimizer;
20
21
	/**
22
	* @var integer Number of tokens in $this->tokens
23
	*/
24
	protected $cnt;
25
26
	/**
27
	* @var integer Current token index
28
	*/
29
	protected $i;
30
31
	/**
32
	* @var integer Maximum number iterations over the optimization passes
33
	*/
34
	public $maxLoops = 10;
35
36
	/**
37
	* @var array Array of tokens from token_get_all()
38
	*/
39
	protected $tokens;
40
41
	/**
42
	* Constructor
43
	*/
44
	public function __construct()
45
	{
46
		$this->branchOutputOptimizer = new BranchOutputOptimizer;
47
	}
48
49
	/**
50
	* Optimize the code generated by the PHP renderer generator
51
	*
52
	* @param  string $php Original code
53
	* @return string      Optimized code
54
	*/
55
	public function optimize($php)
56
	{
57
		$this->tokens = token_get_all('<?php ' . $php);
58
		$this->cnt    = count($this->tokens);
59
		$this->i      = 0;
60
61
		// Remove line numbers from tokens
62
		foreach ($this->tokens as &$token)
63
		{
64
			if (is_array($token))
65
			{
66
				unset($token[2]);
67
			}
68
		}
69
		unset($token);
70
71
		// Optimization passes, in order of execution
72
		$passes = [
73
			'optimizeOutConcatEqual',
74
			'optimizeConcatenations',
75
			'optimizeHtmlspecialchars'
76
		];
77
78
		// Limit the number of loops, in case something would make it loop indefinitely
79
		$remainingLoops = $this->maxLoops;
80
		do
81
		{
82
			$continue = false;
83
84
			foreach ($passes as $pass)
85
			{
86
				// Run the pass
87
				$this->$pass();
88
89
				// If the array was modified, reset the keys and keep going
90
				$cnt = count($this->tokens);
91
				if ($this->cnt !== $cnt)
92
				{
93
					$this->tokens = array_values($this->tokens);
94
					$this->cnt    = $cnt;
95
					$continue     = true;
96
				}
97
			}
98
		}
99
		while ($continue && --$remainingLoops);
100
101
		// Optimize common output expressions in if-else-elseif conditionals
102
		$php = $this->branchOutputOptimizer->optimize($this->tokens);
103
104
		// Reclaim some memory
105
		unset($this->tokens);
106
107
		return $php;
108
	}
109
110
	/**
111
	* Test whether current token is between two htmlspecialchars() calls
112
	*
113
	* @return bool
114
	*/
115
	protected function isBetweenHtmlspecialcharCalls()
116
	{
117
		return ($this->tokens[$this->i + 1]    === [T_STRING, 'htmlspecialchars']
118
		     && $this->tokens[$this->i + 2]    === '('
119
		     && $this->tokens[$this->i - 1]    === ')'
120
		     && $this->tokens[$this->i - 2][0] === T_LNUMBER
121
		     && $this->tokens[$this->i - 3]    === ',');
122
	}
123
124
	/**
125
	* Test whether current token is at the beginning of an htmlspecialchars()-safe var
126
	*
127
	* Tests whether current var is either $node->localName or $node->nodeName
128
	*
129
	* @return bool
130
	*/
131
	protected function isHtmlspecialcharSafeVar()
132
	{
133
		return ($this->tokens[$this->i    ]    === [T_VARIABLE,        '$node']
134
		     && $this->tokens[$this->i + 1]    === [T_OBJECT_OPERATOR, '->']
135
		     && ($this->tokens[$this->i + 2]   === [T_STRING,          'localName']
136
		      || $this->tokens[$this->i + 2]   === [T_STRING,          'nodeName'])
137
		     && $this->tokens[$this->i + 3]    === ','
138
		     && $this->tokens[$this->i + 4][0] === T_LNUMBER
139
		     && $this->tokens[$this->i + 5]    === ')');
140
	}
141
142
	/**
143
	* Test whether the cursor is at the beginning of an output assignment
144
	*
145
	* @return bool
146
	*/
147
	protected function isOutputAssignment()
148
	{
149
		return ($this->tokens[$this->i    ] === [T_VARIABLE,        '$this']
150
		     && $this->tokens[$this->i + 1] === [T_OBJECT_OPERATOR, '->']
151
		     && $this->tokens[$this->i + 2] === [T_STRING,          'out']
152
		     && $this->tokens[$this->i + 3] === [T_CONCAT_EQUAL,    '.=']);
153
	}
154
155
	/**
156
	* Test whether the cursor is immediately after the output variable
157
	*
158
	* @return bool
159
	*/
160
	protected function isPrecededByOutputVar()
161
	{
162
		return ($this->tokens[$this->i - 1] === [T_STRING,          'out']
163
		     && $this->tokens[$this->i - 2] === [T_OBJECT_OPERATOR, '->']
164
		     && $this->tokens[$this->i - 3] === [T_VARIABLE,        '$this']);
165
	}
166
167
	/**
168
	* Merge concatenated htmlspecialchars() calls together
169
	*
170
	* Must be called when the cursor is at the concatenation operator
171
	*
172
	* @return bool Whether calls were merged
173
	*/
174
	protected function mergeConcatenatedHtmlSpecialChars()
175
	{
176
		if (!$this->isBetweenHtmlspecialcharCalls())
177
		{
178
			 return false;
179
		}
180
181
		// Save the escape mode of the first call
182
		$escapeMode = $this->tokens[$this->i - 2][1];
183
184
		// Save the index of the comma that comes after the first argument of the first call
185
		$startIndex = $this->i - 3;
186
187
		// Save the index of the parenthesis that follows the second htmlspecialchars
188
		$endIndex = $this->i + 2;
189
190
		// Move the cursor to the first comma of the second call
191
		$this->i = $endIndex;
192
		$parens = 0;
193
		while (++$this->i < $this->cnt)
194
		{
195
			if ($this->tokens[$this->i] === ',' && !$parens)
196
			{
197
				break;
198
			}
199
200
			if ($this->tokens[$this->i] === '(')
201
			{
202
				++$parens;
203
			}
204
			elseif ($this->tokens[$this->i] === ')')
205
			{
206
				--$parens;
207
			}
208
		}
209
210
		if ($this->tokens[$this->i + 1] !== [T_LNUMBER, $escapeMode])
211
		{
212
			return false;
213
		}
214
215
		// Replace the first comma of the first call with a concatenator operator
216
		$this->tokens[$startIndex] = '.';
217
218
		// Move the cursor back to the first comma then advance it and delete everything up to the
219
		// parenthesis of the second call, included
220
		$this->i = $startIndex;
221
		while (++$this->i <= $endIndex)
222
		{
223
			unset($this->tokens[$this->i]);
224
		}
225
226
		return true;
227
	}
228
229
	/**
230
	* Merge concatenated strings together
231
	*
232
	* Must be called when the cursor is at the concatenation operator
233
	*
234
	* @return bool Whether strings were merged
235
	*/
236
	protected function mergeConcatenatedStrings()
237
	{
238
		if ($this->tokens[$this->i - 1][0]    !== T_CONSTANT_ENCAPSED_STRING
239
		 || $this->tokens[$this->i + 1][0]    !== T_CONSTANT_ENCAPSED_STRING
240
		 || $this->tokens[$this->i - 1][1][0] !== $this->tokens[$this->i + 1][1][0])
241
		{
242
			return false;
243
		}
244
245
		// Merge both strings into the right string
246
		$this->tokens[$this->i + 1][1] = substr($this->tokens[$this->i - 1][1], 0, -1)
247
		                               . substr($this->tokens[$this->i + 1][1], 1);
248
249
		// Unset the tokens that have been optimized away
250
		unset($this->tokens[$this->i - 1]);
251
		unset($this->tokens[$this->i]);
252
253
		// Advance the cursor
254
		++$this->i;
255
256
		return true;
257
	}
258
259
	/**
260
	* Optimize T_CONCAT_EQUAL assignments in an array of PHP tokens
261
	*
262
	* Will only optimize $this->out.= assignments
263
	*
264
	* @return void
265
	*/
266
	protected function optimizeOutConcatEqual()
267
	{
268
		// Start at offset 4 to skip the first four tokens: <?php $this->out.=
269
		$this->i = 3;
270
271
		while ($this->skipTo([T_CONCAT_EQUAL, '.=']))
272
		{
273
			// Test whether this T_CONCAT_EQUAL is preceded with $this->out
274
			if (!$this->isPrecededByOutputVar())
275
			{
276
				 continue;
277
			}
278
279
			while ($this->skipPast(';'))
280
			{
281
				// Test whether the assignment is followed by another $this->out.= assignment
282
				if (!$this->isOutputAssignment())
283
				{
284
					 break;
285
				}
286
287
				// Replace the semicolon between assignments with a concatenation operator
288
				$this->tokens[$this->i - 1] = '.';
289
290
				// Remove the following $this->out.= assignment and move the cursor past it
291
				unset($this->tokens[$this->i++]);
292
				unset($this->tokens[$this->i++]);
293
				unset($this->tokens[$this->i++]);
294
				unset($this->tokens[$this->i++]);
295
			}
296
		}
297
	}
298
299
	/**
300
	* Optimize concatenations in an array of PHP tokens
301
	*
302
	* - Will precompute the result of the concatenation of constant strings
303
	* - Will replace the concatenation of two compatible htmlspecialchars() calls with one call to
304
	*   htmlspecialchars() on the concatenation of their first arguments
305
	*
306
	* @return void
307
	*/
308
	protected function optimizeConcatenations()
309
	{
310
		$this->i = 1;
311
		while ($this->skipTo('.'))
312
		{
313
			$this->mergeConcatenatedStrings() || $this->mergeConcatenatedHtmlSpecialChars();
314
		}
315
	}
316
317
	/**
318
	* Optimize htmlspecialchars() calls
319
	*
320
	* - The result of htmlspecialchars() on literals is precomputed
321
	* - By default, the generator escapes all values, including variables that cannot contain
322
	*   special characters such as $node->localName. This pass removes those calls
323
	*
324
	* @return void
325
	*/
326
	protected function optimizeHtmlspecialchars()
327
	{
328
		$this->i = 0;
329
330
		while ($this->skipPast([T_STRING, 'htmlspecialchars']))
331
		{
332
			if ($this->tokens[$this->i] === '(')
333
			{
334
				++$this->i;
335
				$this->replaceHtmlspecialcharsLiteral() || $this->removeHtmlspecialcharsSafeVar();
336
			}
337
		}
338
	}
339
340
	/**
341
	* Remove htmlspecialchars() calls on variables that are known to be safe
342
	*
343
	* Must be called when the cursor is at the first argument of the call
344
	*
345
	* @return bool Whether the call was removed
346
	*/
347
	protected function removeHtmlspecialcharsSafeVar()
348
	{
349
		if (!$this->isHtmlspecialcharSafeVar())
350
		{
351
			 return false;
352
		}
353
354
		// Remove the htmlspecialchars() call, except for its first argument
355
		unset($this->tokens[$this->i - 2]);
356
		unset($this->tokens[$this->i - 1]);
357
		unset($this->tokens[$this->i + 3]);
358
		unset($this->tokens[$this->i + 4]);
359
		unset($this->tokens[$this->i + 5]);
360
361
		// Move the cursor past the call
362
		$this->i += 6;
363
364
		return true;
365
	}
366
367
	/**
368
	* Precompute the result of a htmlspecialchars() call on a string literal
369
	*
370
	* Must be called when the cursor is at the first argument of the call
371
	*
372
	* @return bool Whether the call was replaced
373
	*/
374
	protected function replaceHtmlspecialcharsLiteral()
375
	{
376
		// Test whether a constant string is being escaped
377
		if ($this->tokens[$this->i    ][0] !== T_CONSTANT_ENCAPSED_STRING
378
		 || $this->tokens[$this->i + 1]    !== ','
379
		 || $this->tokens[$this->i + 2][0] !== T_LNUMBER
380
		 || $this->tokens[$this->i + 3]    !== ')')
381
		{
382
			return false;
383
		}
384
385
		// Escape the content of the T_CONSTANT_ENCAPSED_STRING token
386
		$this->tokens[$this->i][1] = var_export(
387
			htmlspecialchars(
388
				stripslashes(substr($this->tokens[$this->i][1], 1, -1)),
389
				$this->tokens[$this->i + 2][1]
390
			),
391
			true
392
		);
393
394
		// Remove the htmlspecialchars() call, except for the T_CONSTANT_ENCAPSED_STRING token
395
		unset($this->tokens[$this->i - 2]);
396
		unset($this->tokens[$this->i - 1]);
397
		unset($this->tokens[++$this->i]);
398
		unset($this->tokens[++$this->i]);
399
		unset($this->tokens[++$this->i]);
400
401
		return true;
402
	}
403
404
	/**
405
	* Move the cursor past given token
406
	*
407
	* @param  array|string $token Target token
408
	* @return bool                Whether a matching token was found and the cursor is within bounds
409
	*/
410
	protected function skipPast($token)
411
	{
412
		return ($this->skipTo($token) && ++$this->i < $this->cnt);
413
	}
414
415
	/**
416
	* Move the cursor until it reaches given token
417
	*
418
	* @param  array|string $token Target token
419
	* @return bool                Whether a matching token was found
420
	*/
421
	protected function skipTo($token)
422
	{
423
		while (++$this->i < $this->cnt)
424
		{
425
			if ($this->tokens[$this->i] === $token)
426
			{
427
				return true;
428
			}
429
		}
430
431
		return false;
432
	}
433
}