Completed
Branch development (b1b115)
by Johannes
10:28
created

CSS::tokenize()   F

Complexity

Conditions 86
Paths > 20000

Size

Total Lines 464

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 464
rs 0
cc 86
nc 1630512
nop 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Tokenizes CSS code.
4
 *
5
 * @author    Greg Sherwood <[email protected]>
6
 * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
7
 * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
8
 */
9
10
namespace PHP_CodeSniffer\Tokenizers;
11
12
use PHP_CodeSniffer\Util;
13
use PHP_CodeSniffer\Config;
14
use PHP_CodeSniffer\Exceptions\TokenizerException;
15
16
class CSS extends PHP
17
{
18
19
20
    /**
21
     * Initialise the tokenizer.
22
     *
23
     * Pre-checks the content to see if it looks minified.
24
     *
25
     * @param string                  $content The content to tokenize,
26
     * @param \PHP_CodeSniffer\Config $config  The config data for the run.
27
     * @param string                  $eolChar The EOL char used in the content.
28
     *
29
     * @return void
30
     * @throws TokenizerException If the file appears to be minified.
31
     */
32
    public function __construct($content, Config $config, $eolChar='\n')
33
    {
34
        if ($this->isMinifiedContent($content, $eolChar) === true) {
35
            throw new TokenizerException('File appears to be minified and cannot be processed');
36
        }
37
38
        return parent::__construct($content, $config, $eolChar);
39
40
    }//end __construct()
41
42
43
    /**
44
     * Creates an array of tokens when given some CSS code.
45
     *
46
     * Uses the PHP tokenizer to do all the tricky work
47
     *
48
     * @param string $string The string to tokenize.
49
     *
50
     * @return array
51
     */
52
    public function tokenize($string)
53
    {
54
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
55
            echo "\t*** START CSS TOKENIZING 1ST PASS ***".PHP_EOL;
56
        }
57
58
        // If the content doesn't have an EOL char on the end, add one so
59
        // the open and close tags we add are parsed correctly.
60
        $eolAdded = false;
61
        if (substr($string, (strlen($this->eolChar) * -1)) !== $this->eolChar) {
62
            $string  .= $this->eolChar;
63
            $eolAdded = true;
64
        }
65
66
        $string = str_replace('<?php', '^PHPCS_CSS_T_OPEN_TAG^', $string);
67
        $string = str_replace('?>', '^PHPCS_CSS_T_CLOSE_TAG^', $string);
68
        $tokens = parent::tokenize('<?php '.$string.'?>');
69
70
        $finalTokens    = [];
71
        $finalTokens[0] = [
72
            'code'    => T_OPEN_TAG,
73
            'type'    => 'T_OPEN_TAG',
74
            'content' => '',
75
        ];
76
77
        $newStackPtr      = 1;
78
        $numTokens        = count($tokens);
79
        $multiLineComment = false;
80
        for ($stackPtr = 1; $stackPtr < $numTokens; $stackPtr++) {
81
            $token = $tokens[$stackPtr];
82
83
            // CSS files don't have lists, breaks etc, so convert these to
84
            // standard strings early so they can be converted into T_STYLE
85
            // tokens and joined with other strings if needed.
86
            if ($token['code'] === T_BREAK
87
                || $token['code'] === T_LIST
88
                || $token['code'] === T_DEFAULT
89
                || $token['code'] === T_SWITCH
90
                || $token['code'] === T_FOR
91
                || $token['code'] === T_FOREACH
92
                || $token['code'] === T_WHILE
93
                || $token['code'] === T_DEC
94
            ) {
95
                $token['type'] = 'T_STRING';
96
                $token['code'] = T_STRING;
97
            }
98
99
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
100
                $type    = $token['type'];
101
                $content = Util\Common::prepareForOutput($token['content']);
102
                echo "\tProcess token $stackPtr: $type => $content".PHP_EOL;
103
            }
104
105
            if ($token['code'] === T_BITWISE_XOR
106
                && $tokens[($stackPtr + 1)]['content'] === 'PHPCS_CSS_T_OPEN_TAG'
107
            ) {
108
                $content = '<?php';
109
                for ($stackPtr = ($stackPtr + 3); $stackPtr < $numTokens; $stackPtr++) {
110
                    if ($tokens[$stackPtr]['code'] === T_BITWISE_XOR
111
                        && $tokens[($stackPtr + 1)]['content'] === 'PHPCS_CSS_T_CLOSE_TAG'
112
                    ) {
113
                        // Add the end tag and ignore the * we put at the end.
114
                        $content  .= '?>';
115
                        $stackPtr += 2;
116
                        break;
117
                    } else {
118
                        $content .= $tokens[$stackPtr]['content'];
119
                    }
120
                }
121
122
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
123
                    echo "\t\t=> Found embedded PHP code: ";
124
                    $cleanContent = Util\Common::prepareForOutput($content);
125
                    echo $cleanContent.PHP_EOL;
126
                }
127
128
                $finalTokens[$newStackPtr] = [
129
                    'type'    => 'T_EMBEDDED_PHP',
130
                    'code'    => T_EMBEDDED_PHP,
131
                    'content' => $content,
132
                ];
133
134
                $newStackPtr++;
135
                continue;
136
            }//end if
137
138
            if ($token['code'] === T_GOTO_LABEL) {
139
                // Convert these back to T_STRING followed by T_COLON so we can
140
                // more easily process style definitions.
141
                $finalTokens[$newStackPtr] = [
142
                    'type'    => 'T_STRING',
143
                    'code'    => T_STRING,
144
                    'content' => substr($token['content'], 0, -1),
145
                ];
146
                $newStackPtr++;
147
                $finalTokens[$newStackPtr] = [
148
                    'type'    => 'T_COLON',
149
                    'code'    => T_COLON,
150
                    'content' => ':',
151
                ];
152
                $newStackPtr++;
153
                continue;
154
            }
155
156
            if ($token['code'] === T_FUNCTION) {
157
                // There are no functions in CSS, so convert this to a string.
158
                $finalTokens[$newStackPtr] = [
159
                    'type'    => 'T_STRING',
160
                    'code'    => T_STRING,
161
                    'content' => $token['content'],
162
                ];
163
164
                $newStackPtr++;
165
                continue;
166
            }
167
168
            if ($token['code'] === T_COMMENT
169
                && substr($token['content'], 0, 2) === '/*'
170
            ) {
171
                // Multi-line comment. Record it so we can ignore other
172
                // comment tags until we get out of this one.
173
                $multiLineComment = true;
174
            }
175
176
            if ($token['code'] === T_COMMENT
177
                && $multiLineComment === false
178
                && (substr($token['content'], 0, 2) === '//'
179
                || $token['content']{0} === '#')
180
            ) {
181
                $content = ltrim($token['content'], '#/');
182
183
                // Guard against PHP7+ syntax errors by stripping
184
                // leading zeros so the content doesn't look like an invalid int.
185
                $leadingZero = false;
186
                if ($content{0} === '0') {
187
                    $content     = '1'.$content;
188
                    $leadingZero = true;
189
                }
190
191
                $commentTokens = parent::tokenize('<?php '.$content.'?>');
192
193
                // The first and last tokens are the open/close tags.
194
                array_shift($commentTokens);
195
                array_pop($commentTokens);
196
197
                if ($leadingZero === true) {
198
                    $commentTokens[0]['content'] = substr($commentTokens[0]['content'], 1);
199
                    $content = substr($content, 1);
200
                }
201
202
                if ($token['content']{0} === '#') {
203
                    // The # character is not a comment in CSS files, so
204
                    // determine what it means in this context.
205
                    $firstContent = $commentTokens[0]['content'];
206
207
                    // If the first content is just a number, it is probably a
208
                    // colour like 8FB7DB, which PHP splits into 8 and FB7DB.
209
                    if (($commentTokens[0]['code'] === T_LNUMBER
210
                        || $commentTokens[0]['code'] === T_DNUMBER)
211
                        && $commentTokens[1]['code'] === T_STRING
212
                    ) {
213
                        $firstContent .= $commentTokens[1]['content'];
214
                        array_shift($commentTokens);
215
                    }
216
217
                    // If the first content looks like a colour and not a class
218
                    // definition, join the tokens together.
219
                    if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1
220
                        && $commentTokens[1]['content'] !== '-'
221
                    ) {
222
                        array_shift($commentTokens);
223
                        // Work out what we trimmed off above and remember to re-add it.
224
                        $trimmed = substr($token['content'], 0, (strlen($token['content']) - strlen($content)));
225
                        $finalTokens[$newStackPtr] = [
226
                            'type'    => 'T_COLOUR',
227
                            'code'    => T_COLOUR,
228
                            'content' => $trimmed.$firstContent,
229
                        ];
230
                    } else {
231
                        $finalTokens[$newStackPtr] = [
232
                            'type'    => 'T_HASH',
233
                            'code'    => T_HASH,
234
                            'content' => '#',
235
                        ];
236
                    }
237
                } else {
238
                    $finalTokens[$newStackPtr] = [
239
                        'type'    => 'T_STRING',
240
                        'code'    => T_STRING,
241
                        'content' => '//',
242
                    ];
243
                }//end if
244
245
                $newStackPtr++;
246
247
                array_splice($tokens, $stackPtr, 1, $commentTokens);
248
                $numTokens = count($tokens);
249
                $stackPtr--;
250
                continue;
251
            }//end if
252
253
            if ($token['code'] === T_COMMENT
254
                && substr($token['content'], -2) === '*/'
255
            ) {
256
                // Multi-line comment is done.
257
                $multiLineComment = false;
258
            }
259
260
            $finalTokens[$newStackPtr] = $token;
261
            $newStackPtr++;
262
        }//end for
263
264
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
265
            echo "\t*** END CSS TOKENIZING 1ST PASS ***".PHP_EOL;
266
            echo "\t*** START CSS TOKENIZING 2ND PASS ***".PHP_EOL;
267
        }
268
269
        // A flag to indicate if we are inside a style definition,
270
        // which is defined using curly braces.
271
        $inStyleDef = false;
272
273
        // A flag to indicate if an At-rule like "@media" is used, which will result
274
        // in nested curly brackets.
275
        $asperandStart = false;
276
277
        $numTokens = count($finalTokens);
278
        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
279
            $token = $finalTokens[$stackPtr];
280
281
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
282
                $type    = $token['type'];
283
                $content = Util\Common::prepareForOutput($token['content']);
284
                echo "\tProcess token $stackPtr: $type => $content".PHP_EOL;
285
            }
286
287
            switch ($token['code']) {
288
            case T_OPEN_CURLY_BRACKET:
289
                // Opening curly brackets for an At-rule do not start a style
290
                // definition. We also reset the asperand flag here because the next
291
                // opening curly bracket could be indeed the start of a style
292
                // definition.
293
                if ($asperandStart === true) {
294
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
295
                        if ($inStyleDef === true) {
296
                            echo "\t\t* style definition closed *".PHP_EOL;
297
                        }
298
299
                        if ($asperandStart === true) {
300
                            echo "\t\t* at-rule definition closed *".PHP_EOL;
301
                        }
302
                    }
303
304
                    $inStyleDef    = false;
305
                    $asperandStart = false;
306
                } else {
307
                    $inStyleDef = true;
308
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
309
                        echo "\t\t* style definition opened *".PHP_EOL;
310
                    }
311
                }
312
                break;
313
            case T_CLOSE_CURLY_BRACKET:
314
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
315
                    if ($inStyleDef === true) {
316
                        echo "\t\t* style definition closed *".PHP_EOL;
317
                    }
318
319
                    if ($asperandStart === true) {
320
                        echo "\t\t* at-rule definition closed *".PHP_EOL;
321
                    }
322
                }
323
324
                $inStyleDef    = false;
325
                $asperandStart = false;
326
                break;
327
            case T_MINUS:
328
                // Minus signs are often used instead of spaces inside
329
                // class names, IDs and styles.
330
                if ($finalTokens[($stackPtr + 1)]['code'] === T_STRING) {
331
                    if ($finalTokens[($stackPtr - 1)]['code'] === T_STRING) {
332
                        $newContent = $finalTokens[($stackPtr - 1)]['content'].'-'.$finalTokens[($stackPtr + 1)]['content'];
333
334
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
335
                            echo "\t\t* token is a string joiner; ignoring this and previous token".PHP_EOL;
336
                            $old = Util\Common::prepareForOutput($finalTokens[($stackPtr + 1)]['content']);
337
                            $new = Util\Common::prepareForOutput($newContent);
338
                            echo "\t\t=> token ".($stackPtr + 1)." content changed from \"$old\" to \"$new\"".PHP_EOL;
339
                        }
340
341
                        $finalTokens[($stackPtr + 1)]['content'] = $newContent;
342
                        unset($finalTokens[$stackPtr]);
343
                        unset($finalTokens[($stackPtr - 1)]);
344
                    } else {
345
                        $newContent = '-'.$finalTokens[($stackPtr + 1)]['content'];
346
347
                        $finalTokens[($stackPtr + 1)]['content'] = $newContent;
348
                        unset($finalTokens[$stackPtr]);
349
                    }
350
                } else if ($finalTokens[($stackPtr + 1)]['code'] === T_LNUMBER) {
351
                    // They can also be used to provide negative numbers.
352
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
353
                        echo "\t\t* token is part of a negative number; adding content to next token and ignoring *".PHP_EOL;
354
                        $content = Util\Common::prepareForOutput($finalTokens[($stackPtr + 1)]['content']);
355
                        echo "\t\t=> token ".($stackPtr + 1)." content changed from \"$content\" to \"-$content\"".PHP_EOL;
356
                    }
357
358
                    $finalTokens[($stackPtr + 1)]['content'] = '-'.$finalTokens[($stackPtr + 1)]['content'];
359
                    unset($finalTokens[$stackPtr]);
360
                }//end if
361
362
                break;
363
            case T_COLON:
364
                // Only interested in colons that are defining styles.
365
                if ($inStyleDef === false) {
366
                    break;
367
                }
368
369
                for ($x = ($stackPtr - 1); $x >= 0; $x--) {
370
                    if (isset(Util\Tokens::$emptyTokens[$finalTokens[$x]['code']]) === false) {
371
                        break;
372
                    }
373
                }
374
375
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
376
                    $type = $finalTokens[$x]['type'];
377
                    echo "\t\t=> token $x changed from $type to T_STYLE".PHP_EOL;
378
                }
379
380
                $finalTokens[$x]['type'] = 'T_STYLE';
381
                $finalTokens[$x]['code'] = T_STYLE;
382
                break;
383
            case T_STRING:
384
                if (strtolower($token['content']) === 'url') {
385
                    // Find the next content.
386
                    for ($x = ($stackPtr + 1); $x < $numTokens; $x++) {
387
                        if (isset(Util\Tokens::$emptyTokens[$finalTokens[$x]['code']]) === false) {
388
                            break;
389
                        }
390
                    }
391
392
                    // Needs to be in the format "url(" for it to be a URL.
393
                    if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) {
394
                        continue;
395
                    }
396
397
                    // Make sure the content isn't empty.
398
                    for ($y = ($x + 1); $y < $numTokens; $y++) {
399
                        if (isset(Util\Tokens::$emptyTokens[$finalTokens[$y]['code']]) === false) {
400
                            break;
401
                        }
402
                    }
403
404
                    if ($finalTokens[$y]['code'] === T_CLOSE_PARENTHESIS) {
405
                        continue;
406
                    }
407
408
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
409
                        for ($i = ($stackPtr + 1); $i <= $y; $i++) {
410
                            $type    = $finalTokens[$i]['type'];
411
                            $content = Util\Common::prepareForOutput($finalTokens[$i]['content']);
412
                            echo "\tProcess token $i: $type => $content".PHP_EOL;
413
                        }
414
415
                        echo "\t\t* token starts a URL *".PHP_EOL;
416
                    }
417
418
                    // Join all the content together inside the url() statement.
419
                    $newContent = '';
420
                    for ($i = ($x + 2); $i < $numTokens; $i++) {
421
                        if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
422
                            break;
423
                        }
424
425
                        $newContent .= $finalTokens[$i]['content'];
426
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
427
                            $content = Util\Common::prepareForOutput($finalTokens[$i]['content']);
428
                            echo "\t\t=> token $i added to URL string and ignored: $content".PHP_EOL;
429
                        }
430
431
                        unset($finalTokens[$i]);
432
                    }
433
434
                    $stackPtr = $i;
435
436
                    // If the content inside the "url()" is in double quotes
437
                    // there will only be one token and so we don't have to do
438
                    // anything except change its type. If it is not empty,
439
                    // we need to do some token merging.
440
                    $finalTokens[($x + 1)]['type'] = 'T_URL';
441
                    $finalTokens[($x + 1)]['code'] = T_URL;
442
443
                    if ($newContent !== '') {
444
                        $finalTokens[($x + 1)]['content'] .= $newContent;
445
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
446
                            $content = Util\Common::prepareForOutput($finalTokens[($x + 1)]['content']);
447
                            echo "\t\t=> token content changed to: $content".PHP_EOL;
448
                        }
449
                    }
450
                } else if ($finalTokens[$stackPtr]['content'][0] === '-'
451
                    && $finalTokens[($stackPtr + 1)]['code'] === T_STRING
452
                ) {
453
                    if (isset($finalTokens[($stackPtr - 1)]) === true
454
                        && $finalTokens[($stackPtr - 1)]['code'] === T_STRING
455
                    ) {
456
                        $newContent = $finalTokens[($stackPtr - 1)]['content'].$finalTokens[$stackPtr]['content'].$finalTokens[($stackPtr + 1)]['content'];
457
458
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
459
                            echo "\t\t* token is a string joiner; ignoring this and previous token".PHP_EOL;
460
                            $old = Util\Common::prepareForOutput($finalTokens[($stackPtr + 1)]['content']);
461
                            $new = Util\Common::prepareForOutput($newContent);
462
                            echo "\t\t=> token ".($stackPtr + 1)." content changed from \"$old\" to \"$new\"".PHP_EOL;
463
                        }
464
465
                        $finalTokens[($stackPtr + 1)]['content'] = $newContent;
466
                        unset($finalTokens[$stackPtr]);
467
                        unset($finalTokens[($stackPtr - 1)]);
468
                    } else {
469
                        $newContent = $finalTokens[$stackPtr]['content'].$finalTokens[($stackPtr + 1)]['content'];
470
471
                        $finalTokens[($stackPtr + 1)]['content'] = $newContent;
472
                        unset($finalTokens[$stackPtr]);
473
                    }
474
                }//end if
475
476
                break;
477
            case T_ASPERAND:
478
                $asperandStart = true;
479
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
480
                    echo "\t\t* at-rule definition opened *".PHP_EOL;
481
                }
482
                break;
483
            default:
484
                // Nothing special to be done with this token.
485
                break;
486
            }//end switch
487
        }//end for
488
489
        // Reset the array keys to avoid gaps.
490
        $finalTokens = array_values($finalTokens);
491
        $numTokens   = count($finalTokens);
492
493
        // Blank out the content of the end tag.
494
        $finalTokens[($numTokens - 1)]['content'] = '';
495
496
        if ($eolAdded === true) {
497
            // Strip off the extra EOL char we added for tokenizing.
498
            $finalTokens[($numTokens - 2)]['content'] = substr(
499
                $finalTokens[($numTokens - 2)]['content'],
500
                0,
501
                (strlen($this->eolChar) * -1)
502
            );
503
504
            if ($finalTokens[($numTokens - 2)]['content'] === '') {
505
                unset($finalTokens[($numTokens - 2)]);
506
                $finalTokens = array_values($finalTokens);
507
                $numTokens   = count($finalTokens);
508
            }
509
        }
510
511
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
512
            echo "\t*** END CSS TOKENIZING 2ND PASS ***".PHP_EOL;
513
        }
514
515
        return $finalTokens;
516
517
    }//end tokenize()
518
519
520
    /**
521
     * Performs additional processing after main tokenizing.
522
     *
523
     * @return void
524
     */
525
    public function processAdditional()
526
    {
527
        /*
528
            We override this method because we don't want the PHP version to
529
            run during CSS processing because it is wasted processing time.
530
        */
531
532
    }//end processAdditional()
533
534
535
}//end class
536