GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — develop ( baac3d...439f66 )
by gyeong-won
17:54
created

HTMLPurifier_Lexer_DirectLex::substrCount()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 12
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 9
nc 4
nop 4
dl 0
loc 12
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Our in-house implementation of a parser.
5
 *
6
 * A pure PHP parser, DirectLex has absolutely no dependencies, making
7
 * it a reasonably good default for PHP4.  Written with efficiency in mind,
8
 * it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
9
 * pales in comparison to HTMLPurifier_Lexer_DOMLex.
10
 *
11
 * @todo Reread XML spec and document differences.
12
 */
13
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
14
{
15
16
    public $tracksLineNumbers = true;
17
18
    /**
19
     * Whitespace characters for str(c)spn.
20
     */
21
    protected $_whitespace = "\x20\x09\x0D\x0A";
22
23
    /**
24
     * Callback function for script CDATA fudge
25
     * @param $matches, in form of array(opening tag, contents, closing tag)
26
     */
27
    protected function scriptCallback($matches) {
28
        return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
29
    }
30
31
    public function tokenizeHTML($html, $config, $context) {
32
33
        // special normalization for script tags without any armor
34
        // our "armor" heurstic is a < sign any number of whitespaces after
35
        // the first script tag
36
        if ($config->get('HTML.Trusted')) {
37
            $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
38
                array($this, 'scriptCallback'), $html);
39
        }
40
41
        $html = $this->normalize($html, $config, $context);
42
43
        $cursor = 0; // our location in the text
44
        $inside_tag = false; // whether or not we're parsing the inside of a tag
45
        $array = array(); // result array
46
47
        // This is also treated to mean maintain *column* numbers too
48
        $maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
49
50
        if ($maintain_line_numbers === null) {
51
            // automatically determine line numbering by checking
52
            // if error collection is on
53
            $maintain_line_numbers = $config->get('Core.CollectErrors');
54
        }
55
56
        if ($maintain_line_numbers) {
57
            $current_line = 1;
58
            $current_col  = 0;
59
            $length = strlen($html);
60
        } else {
61
            $current_line = false;
62
            $current_col  = false;
63
            $length = false;
64
        }
65
        $context->register('CurrentLine', $current_line);
66
        $context->register('CurrentCol',  $current_col);
67
        $nl = "\n";
68
        // how often to manually recalculate. This will ALWAYS be right,
69
        // but it's pretty wasteful. Set to 0 to turn off
70
        $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
71
72
        $e = false;
73
        if ($config->get('Core.CollectErrors')) {
74
            $e =& $context->get('ErrorCollector');
75
        }
76
77
        // for testing synchronization
78
        $loops = 0;
79
80
        while(++$loops) {
81
82
            // $cursor is either at the start of a token, or inside of
83
            // a tag (i.e. there was a < immediately before it), as indicated
84
            // by $inside_tag
85
86
            if ($maintain_line_numbers) {
87
88
                // $rcursor, however, is always at the start of a token.
89
                $rcursor = $cursor - (int) $inside_tag;
90
91
                // Column number is cheap, so we calculate it every round.
92
                // We're interested at the *end* of the newline string, so
93
                // we need to add strlen($nl) == 1 to $nl_pos before subtracting it
94
                // from our "rcursor" position.
95
                $nl_pos = strrpos($html, $nl, $rcursor - $length);
96
                $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
97
98
                // recalculate lines
99
                if (
100
                    $synchronize_interval &&  // synchronization is on
101
                    $cursor > 0 &&            // cursor is further than zero
102
                    $loops % $synchronize_interval === 0 // time to synchronize!
103
                ) {
104
                    $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
105
                }
106
107
            }
108
109
            $position_next_lt = strpos($html, '<', $cursor);
110
            $position_next_gt = strpos($html, '>', $cursor);
111
112
            // triggers on "<b>asdf</b>" but not "asdf <b></b>"
113
            // special case to set up context
114
            if ($position_next_lt === $cursor) {
115
                $inside_tag = true;
116
                $cursor++;
117
            }
118
119
            if (!$inside_tag && $position_next_lt !== false) {
120
                // We are not inside tag and there still is another tag to parse
121
                $token = new
122
                    HTMLPurifier_Token_Text(
123
                        $this->parseData(
124
                            substr(
125
                                $html, $cursor, $position_next_lt - $cursor
126
                            )
127
                        )
128
                    );
129 View Code Duplication
                if ($maintain_line_numbers) {
130
                    $token->rawPosition($current_line, $current_col);
131
                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
132
                }
133
                $array[] = $token;
134
                $cursor  = $position_next_lt + 1;
135
                $inside_tag = true;
136
                continue;
137
            } elseif (!$inside_tag) {
138
                // We are not inside tag but there are no more tags
139
                // If we're already at the end, break
140
                if ($cursor === strlen($html)) break;
141
                // Create Text of rest of string
142
                $token = new
143
                    HTMLPurifier_Token_Text(
144
                        $this->parseData(
145
                            substr(
146
                                $html, $cursor
147
                            )
148
                        )
149
                    );
150
                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
151
                $array[] = $token;
152
                break;
153
            } elseif ($inside_tag && $position_next_gt !== false) {
154
                // We are in tag and it is well formed
155
                // Grab the internals of the tag
156
                $strlen_segment = $position_next_gt - $cursor;
157
158
                if ($strlen_segment < 1) {
159
                    // there's nothing to process!
160
                    $token = new HTMLPurifier_Token_Text('<');
0 ignored issues
show
Unused Code introduced by
$token is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
161
                    $cursor++;
162
                    continue;
163
                }
164
165
                $segment = substr($html, $cursor, $strlen_segment);
166
167
                if ($segment === false) {
168
                    // somehow, we attempted to access beyond the end of
169
                    // the string, defense-in-depth, reported by Nate Abele
170
                    break;
171
                }
172
173
                // Check if it's a comment
174
                if (
175
                    substr($segment, 0, 3) === '!--'
176
                ) {
177
                    // re-determine segment length, looking for -->
178
                    $position_comment_end = strpos($html, '-->', $cursor);
179
                    if ($position_comment_end === false) {
180
                        // uh oh, we have a comment that extends to
181
                        // infinity. Can't be helped: set comment
182
                        // end position to end of string
183
                        if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
184
                        $position_comment_end = strlen($html);
185
                        $end = true;
186
                    } else {
187
                        $end = false;
188
                    }
189
                    $strlen_segment = $position_comment_end - $cursor;
190
                    $segment = substr($html, $cursor, $strlen_segment);
191
                    $token = new
192
                        HTMLPurifier_Token_Comment(
193
                            substr(
194
                                $segment, 3, $strlen_segment - 3
195
                            )
196
                        );
197
                    if ($maintain_line_numbers) {
198
                        $token->rawPosition($current_line, $current_col);
199
                        $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
200
                    }
201
                    $array[] = $token;
202
                    $cursor = $end ? $position_comment_end : $position_comment_end + 3;
203
                    $inside_tag = false;
204
                    continue;
205
                }
206
207
                // Check if it's an end tag
208
                $is_end_tag = (strpos($segment,'/') === 0);
209
                if ($is_end_tag) {
210
                    $type = substr($segment, 1);
211
                    $token = new HTMLPurifier_Token_End($type);
212 View Code Duplication
                    if ($maintain_line_numbers) {
213
                        $token->rawPosition($current_line, $current_col);
214
                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
215
                    }
216
                    $array[] = $token;
217
                    $inside_tag = false;
218
                    $cursor = $position_next_gt + 1;
219
                    continue;
220
                }
221
222
                // Check leading character is alnum, if not, we may
223
                // have accidently grabbed an emoticon. Translate into
224
                // text and go our merry way
225
                if (!ctype_alpha($segment[0])) {
226
                    // XML:  $segment[0] !== '_' && $segment[0] !== ':'
227
                    if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
228
                    $token = new HTMLPurifier_Token_Text('<');
229 View Code Duplication
                    if ($maintain_line_numbers) {
230
                        $token->rawPosition($current_line, $current_col);
231
                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
232
                    }
233
                    $array[] = $token;
234
                    $inside_tag = false;
235
                    continue;
236
                }
237
238
                // Check if it is explicitly self closing, if so, remove
239
                // trailing slash. Remember, we could have a tag like <br>, so
240
                // any later token processing scripts must convert improperly
241
                // classified EmptyTags from StartTags.
242
                $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
243
                if ($is_self_closing) {
244
                    $strlen_segment--;
245
                    $segment = substr($segment, 0, $strlen_segment);
246
                }
247
248
                // Check if there are any attributes
249
                $position_first_space = strcspn($segment, $this->_whitespace);
250
251
                if ($position_first_space >= $strlen_segment) {
252
                    if ($is_self_closing) {
253
                        $token = new HTMLPurifier_Token_Empty($segment);
254
                    } else {
255
                        $token = new HTMLPurifier_Token_Start($segment);
256
                    }
257 View Code Duplication
                    if ($maintain_line_numbers) {
258
                        $token->rawPosition($current_line, $current_col);
259
                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
260
                    }
261
                    $array[] = $token;
262
                    $inside_tag = false;
263
                    $cursor = $position_next_gt + 1;
264
                    continue;
265
                }
266
267
                // Grab out all the data
268
                $type = substr($segment, 0, $position_first_space);
269
                $attribute_string =
270
                    trim(
271
                        substr(
272
                            $segment, $position_first_space
273
                        )
274
                    );
275
                if ($attribute_string) {
276
                    $attr = $this->parseAttributeString(
277
                                    $attribute_string
278
                                  , $config, $context
279
                              );
280
                } else {
281
                    $attr = array();
282
                }
283
284
                if ($is_self_closing) {
285
                    $token = new HTMLPurifier_Token_Empty($type, $attr);
286
                } else {
287
                    $token = new HTMLPurifier_Token_Start($type, $attr);
288
                }
289 View Code Duplication
                if ($maintain_line_numbers) {
290
                    $token->rawPosition($current_line, $current_col);
291
                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
292
                }
293
                $array[] = $token;
294
                $cursor = $position_next_gt + 1;
295
                $inside_tag = false;
296
                continue;
297
            } else {
298
                // inside tag, but there's no ending > sign
299
                if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
300
                $token = new
301
                    HTMLPurifier_Token_Text(
302
                        '<' .
303
                        $this->parseData(
304
                            substr($html, $cursor)
305
                        )
306
                    );
307
                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
308
                // no cursor scroll? Hmm...
309
                $array[] = $token;
310
                break;
311
            }
312
            break;
0 ignored issues
show
Unused Code introduced by
break; does not seem to be reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
313
        }
314
315
        $context->destroy('CurrentLine');
316
        $context->destroy('CurrentCol');
317
        return $array;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return $array; (array) is incompatible with the return type of the parent method HTMLPurifier_Lexer::tokenizeHTML of type HTMLPurifier_Token|null.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
318
    }
319
320
    /**
321
     * PHP 5.0.x compatible substr_count that implements offset and length
322
     */
323
    protected function substrCount($haystack, $needle, $offset, $length) {
324
        static $oldVersion;
325
        if ($oldVersion === null) {
326
            $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
327
        }
328
        if ($oldVersion) {
329
            $haystack = substr($haystack, $offset, $length);
330
            return substr_count($haystack, $needle);
331
        } else {
332
            return substr_count($haystack, $needle, $offset, $length);
333
        }
334
    }
335
336
    /**
337
     * Takes the inside of an HTML tag and makes an assoc array of attributes.
338
     *
339
     * @param $string Inside of tag excluding name.
340
     * @returns Assoc array of attributes.
341
     */
342
    public function parseAttributeString($string, $config, $context) {
343
        $string = (string) $string; // quick typecast
344
345
        if ($string == '') return array(); // no attributes
346
347
        $e = false;
348
        if ($config->get('Core.CollectErrors')) {
349
            $e =& $context->get('ErrorCollector');
350
        }
351
352
        // let's see if we can abort as quickly as possible
353
        // one equal sign, no spaces => one attribute
354
        $num_equal = substr_count($string, '=');
355
        $has_space = strpos($string, ' ');
356
        if ($num_equal === 0 && !$has_space) {
357
            // bool attribute
358
            return array($string => $string);
359
        } elseif ($num_equal === 1 && !$has_space) {
360
            // only one attribute
361
            list($key, $quoted_value) = explode('=', $string);
362
            $quoted_value = trim($quoted_value);
363
            if (!$key) {
364
                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
365
                return array();
366
            }
367
            if (!$quoted_value) return array($key => '');
368
            $first_char = @$quoted_value[0];
369
            $last_char  = @$quoted_value[strlen($quoted_value)-1];
370
371
            $same_quote = ($first_char == $last_char);
372
            $open_quote = ($first_char == '"' || $first_char == "'");
373
374
            if ( $same_quote && $open_quote) {
375
                // well behaved
376
                $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
377
            } else {
378
                // not well behaved
379
                if ($open_quote) {
380
                    if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
381
                    $value = substr($quoted_value, 1);
382
                } else {
383
                    $value = $quoted_value;
384
                }
385
            }
386
            if ($value === false) $value = '';
387
            return array($key => $this->parseData($value));
388
        }
389
390
        // setup loop environment
391
        $array  = array(); // return assoc array of attributes
392
        $cursor = 0; // current position in string (moves forward)
393
        $size   = strlen($string); // size of the string (stays the same)
394
395
        // if we have unquoted attributes, the parser expects a terminating
396
        // space, so let's guarantee that there's always a terminating space.
397
        $string .= ' ';
398
399
        while(true) {
400
401
            if ($cursor >= $size) {
402
                break;
403
            }
404
405
            $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
0 ignored issues
show
Unused Code introduced by
$value is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
406
            // grab the key
407
408
            $key_begin = $cursor; //we're currently at the start of the key
409
410
            // scroll past all characters that are the key (not whitespace or =)
411
            $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
412
413
            $key_end = $cursor; // now at the end of the key
414
415
            $key = substr($string, $key_begin, $key_end - $key_begin);
416
417
            if (!$key) {
418
                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
419
                $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
420
                continue; // empty key
421
            }
422
423
            // scroll past all whitespace
424
            $cursor += strspn($string, $this->_whitespace, $cursor);
425
426
            if ($cursor >= $size) {
427
                $array[$key] = $key;
428
                break;
429
            }
430
431
            // if the next character is an equal sign, we've got a regular
432
            // pair, otherwise, it's a bool attribute
433
            $first_char = @$string[$cursor];
434
435
            if ($first_char == '=') {
436
                // key="value"
437
438
                $cursor++;
439
                $cursor += strspn($string, $this->_whitespace, $cursor);
440
441
                if ($cursor === false) {
442
                    $array[$key] = '';
443
                    break;
444
                }
445
446
                // we might be in front of a quote right now
447
448
                $char = @$string[$cursor];
449
450
                if ($char == '"' || $char == "'") {
451
                    // it's quoted, end bound is $char
452
                    $cursor++;
453
                    $value_begin = $cursor;
454
                    $cursor = strpos($string, $char, $cursor);
455
                    $value_end = $cursor;
456
                } else {
457
                    // it's not quoted, end bound is whitespace
458
                    $value_begin = $cursor;
459
                    $cursor += strcspn($string, $this->_whitespace, $cursor);
460
                    $value_end = $cursor;
461
                }
462
463
                // we reached a premature end
464
                if ($cursor === false) {
465
                    $cursor = $size;
466
                    $value_end = $cursor;
467
                }
468
469
                $value = substr($string, $value_begin, $value_end - $value_begin);
470
                if ($value === false) $value = '';
471
                $array[$key] = $this->parseData($value);
472
                $cursor++;
473
474
            } else {
475
                // boolattr
476
                if ($key !== '') {
477
                    $array[$key] = $key;
478
                } else {
479
                    // purely theoretical
480
                    if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
481
                }
482
483
            }
484
        }
485
        return $array;
486
    }
487
488
}
489
490
// vim: et sw=4 sts=4
491