Completed
Push — master ( afd04b...3b4c22 )
by Colin
10s
created

src/Cursor.php (3 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark;
13
14
class Cursor
15
{
16
    const INDENT_LEVEL = 4;
17
18
    /**
19
     * @var string
20
     */
21
    private $line;
22
23
    /**
24
     * @var int
25
     */
26
    private $length;
27
28
    /**
29
     * @var int
30
     *
31
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
32
     * reached the end.  In this state, any character-returning method MUST return null.
33
     */
34
    private $currentPosition = 0;
35
36
    /**
37
     * @var int
38
     */
39
    private $column = 0;
40
41
    /**
42
     * @var int
43
     */
44
    private $indent = 0;
45
46
    /**
47
     * @var int
48
     */
49
    private $previousPosition = 0;
50
51
    /**
52
     * @var int|null
53
     */
54
    private $nextNonSpaceCache;
55
56
    /**
57
     * @var bool
58
     */
59
    private $partiallyConsumedTab = false;
60
61
    /**
62
     * @var string
63
     */
64
    private $encoding;
65
66
    /**
67
     * @param string $line
68
     */
69 2430
    public function __construct($line)
70
    {
71 2430
        $this->line = $line;
72 2430
        $this->encoding = mb_detect_encoding($line, 'ASCII,UTF-8', true) ?: 'ISO-8859-1';
73 2430
        $this->length = mb_strlen($line, $this->encoding);
74 2430
    }
75
76
    /**
77
     * Returns the position of the next character which is not a space (or tab)
78
     *
79
     * @deprecated Use getNextNonSpacePosition() instead
80
     *
81
     * @return int
82
     */
83 16
    public function getFirstNonSpacePosition()
84
    {
85 16
        @trigger_error('Cursor::getFirstNonSpacePosition() will be removed in a future 0.x release.  Use getNextNonSpacePosition() instead. See https://github.com/thephpleague/commonmark/issues/280', E_USER_DEPRECATED);
1 ignored issue
show
Security Best Practice introduced by
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
86
87 16
        return $this->getNextNonSpacePosition();
88
    }
89
90
    /**
91
     * Returns the position of the next character which is not a space (or tab)
92
     *
93
     * @return int
94
     */
95 2112
    public function getNextNonSpacePosition()
96
    {
97 2112
        if ($this->nextNonSpaceCache !== null) {
98 1980
            return $this->nextNonSpaceCache;
99
        }
100
101 2112
        $i = $this->currentPosition;
102 2112
        $cols = $this->column;
103
104 2112
        while (($c = $this->getCharacter($i)) !== null) {
105 2088
            if ($c === ' ') {
106 504
                $i++;
107 504
                $cols++;
108 2074
            } elseif ($c === "\t") {
109 36
                $i++;
110 36
                $cols += (4 - ($cols % 4));
111 24
            } else {
112 2046
                break;
113
            }
114 350
        }
115
116 2112
        $nextNonSpace = ($c === null) ? $this->length : $i;
117 2112
        $this->indent = $cols - $this->column;
118
119 2112
        return $this->nextNonSpaceCache = $nextNonSpace;
120
    }
121
122
    /**
123
     * Returns the next character which isn't a space (or tab)
124
     *
125
     * @deprecated Use getNextNonSpaceCharacter() instead
126
     *
127
     * @return string
128
     */
129 16
    public function getFirstNonSpaceCharacter()
130
    {
131 16
        @trigger_error('Cursor::getFirstNonSpaceCharacter() will be removed in a future 0.x release.  Use getNextNonSpaceCharacter() instead. See https://github.com/thephpleague/commonmark/issues/280', E_USER_DEPRECATED);
1 ignored issue
show
Security Best Practice introduced by
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
132
133 16
        return $this->getNextNonSpaceCharacter();
134
    }
135
136
    /**
137
     * Returns the next character which isn't a space (or tab)
138
     *
139
     * @return string
140
     */
141 1905
    public function getNextNonSpaceCharacter()
142
    {
143 1905
        return $this->getCharacter($this->getNextNonSpacePosition());
144
    }
145
146
    /**
147
     * Calculates the current indent (number of spaces after current position)
148
     *
149
     * @return int
150
     */
151 1992
    public function getIndent()
152
    {
153 1992
        $this->getNextNonSpacePosition();
154
155 1992
        return $this->indent;
156
    }
157
158
    /**
159
     * Whether the cursor is indented to INDENT_LEVEL
160
     *
161
     * @return bool
162
     */
163 1932
    public function isIndented()
164
    {
165 1932
        return $this->getIndent() >= self::INDENT_LEVEL;
166
    }
167
168
    /**
169
     * @param int|null $index
170
     *
171
     * @return string|null
172
     */
173 2205
    public function getCharacter($index = null)
174
    {
175 2205
        if ($index === null) {
176 1683
            $index = $this->currentPosition;
177 1122
        }
178
179
        // Index out-of-bounds, or we're at the end
180 2205
        if ($index < 0 || $index >= $this->length) {
181 1887
            return;
182
        }
183
184 2163
        return mb_substr($this->line, $index, 1, $this->encoding);
185
    }
186
187
    /**
188
     * Returns the next character (or null, if none) without advancing forwards
189
     *
190
     * @param int $offset
191
     *
192
     * @return string|null
193
     */
194 1014
    public function peek($offset = 1)
195
    {
196 1014
        return $this->getCharacter($this->currentPosition + $offset);
197
    }
198
199
    /**
200
     * Whether the remainder is blank
201
     *
202
     * @return bool
203
     */
204 1950
    public function isBlank()
205
    {
206 1950
        return $this->getNextNonSpacePosition() === $this->length;
207
    }
208
209
    /**
210
     * Move the cursor forwards
211
     */
212 789
    public function advance()
213
    {
214 789
        $this->advanceBy(1);
215 789
    }
216
217
    /**
218
     * Move the cursor forwards
219
     *
220
     * @param int  $characters       Number of characters to advance by
221
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
222
     */
223 2286
    public function advanceBy($characters, $advanceByColumns = false)
224
    {
225 2286
        if ($characters === 0) {
226 1971
            $this->previousPosition = $this->currentPosition;
227
228 1971
            return;
229
        }
230
231 2142
        $this->previousPosition = $this->currentPosition;
232 2142
        $this->nextNonSpaceCache = null;
233
234 2142
        $nextFewChars = mb_substr($this->line, $this->currentPosition, $characters, $this->encoding);
235
236
        // Optimization to avoid tab handling logic if we have no tabs
237 2142
        if (preg_match('/\t/', $nextFewChars) === 0) {
238 2130
            $length = min($characters, $this->length - $this->currentPosition);
239 2130
            $this->partiallyConsumedTab = false;
240 2130
            $this->currentPosition += $length;
241 2130
            $this->column += $length;
242
243 2130
            return;
244
        }
245
246 45
        if ($characters === 1 && !empty($nextFewChars)) {
247 18
            $asArray = [$nextFewChars];
248 12
        } else {
249 39
            $asArray = preg_split('//u', $nextFewChars, null, PREG_SPLIT_NO_EMPTY);
250
        }
251
252 45
        foreach ($asArray as $relPos => $c) {
253 45
            if ($c === "\t") {
254 45
                $charsToTab = 4 - ($this->column % 4);
255 45
                if ($advanceByColumns) {
256 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
257 33
                    $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
258 33
                    $this->column += $charsToAdvance;
259 33
                    $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
260 33
                    $characters -= $charsToAdvance;
261 22
                } else {
262 18
                    $this->partiallyConsumedTab = false;
263 18
                    $this->column += $charsToTab;
264 18
                    $this->currentPosition++;
265 27
                    $characters--;
266
                }
267 30
            } else {
268 12
                $this->partiallyConsumedTab = false;
269 12
                $this->currentPosition++;
270 12
                $this->column++;
271 12
                $characters--;
272
            }
273
274 45
            if ($characters <= 0) {
275 45
                break;
276
            }
277 30
        }
278 45
    }
279
280
    /**
281
     * Advances the cursor by a single space or tab, if present
282
     *
283
     * @return bool
284
     */
285 333
    public function advanceBySpaceOrTab()
286
    {
287 333
        $character = $this->getCharacter();
288
289 333
        if ($character === ' ' || $character === "\t") {
290 321
            $this->advanceBy(1, true);
291
292 321
            return true;
293
        }
294
295 249
        return false;
296
    }
297
298
    /**
299
     * Advances the cursor while the given character is matched
300
     *
301
     * @param string   $character                  Character to match
302
     * @param int|null $maximumCharactersToAdvance Maximum number of characters to advance before giving up
303
     *
304
     * @return int Number of positions moved (0 if unsuccessful)
305
     *
306
     * @deprecated Use match() instead
307
     */
308 30
    public function advanceWhileMatches($character, $maximumCharactersToAdvance = null)
309
    {
310 30
        @trigger_error('Cursor::advanceWhileMatches() will be removed in a future 0.x release.  Use match() instead.', E_USER_DEPRECATED);
311
312
        // Calculate how far to advance
313 30
        $start = $this->currentPosition;
314 30
        $newIndex = $start;
315 30
        if ($maximumCharactersToAdvance === null) {
316 12
            $maximumCharactersToAdvance = $this->length;
317 6
        }
318
319 30
        $max = min($start + $maximumCharactersToAdvance, $this->length);
320
321 30
        while ($newIndex < $max && $this->getCharacter($newIndex) === $character) {
322 22
            ++$newIndex;
323 11
        }
324
325 30
        if ($newIndex <= $start) {
326 8
            return 0;
327
        }
328
329 22
        $this->advanceBy($newIndex - $start);
330
331 22
        return $this->currentPosition - $this->previousPosition;
332
    }
333
334
    /**
335
     * Parse zero or more space characters, including at most one newline.
336
     *
337
     * @deprecated Use advanceToNextNonSpaceOrNewline() instead
338
     */
339 36
    public function advanceToFirstNonSpace()
340
    {
341 36
        @trigger_error('Cursor::advanceToFirstNonSpace() will be removed in a future 0.x release.  Use advanceToNextNonSpaceOrTab() or advanceToNextNonSpaceOrNewline() instead. See https://github.com/thephpleague/commonmark/issues/280', E_USER_DEPRECATED);
1 ignored issue
show
Security Best Practice introduced by
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
342
343 36
        return $this->advanceToNextNonSpaceOrNewline();
344
    }
345
346
    /**
347
     * Parse zero or more space/tab characters
348
     *
349
     * @return int Number of positions moved
350
     */
351 1842
    public function advanceToNextNonSpaceOrTab()
352
    {
353 1842
        $newPosition = $this->getNextNonSpacePosition();
354 1842
        $this->advanceBy($newPosition - $this->currentPosition);
355 1842
        $this->partiallyConsumedTab = false;
356
357 1842
        return $this->currentPosition - $this->previousPosition;
358
    }
359
360
    /**
361
     * Parse zero or more space characters, including at most one newline.
362
     *
363
     * Tab characters are not parsed with this function.
364
     *
365
     * @return int Number of positions moved
366
     */
367 441
    public function advanceToNextNonSpaceOrNewline()
368
    {
369 441
        $matches = [];
370 441
        preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, PREG_OFFSET_CAPTURE);
371
372
        // [0][0] contains the matched text
373
        // [0][1] contains the index of that match
374 441
        $increment = $matches[0][1] + strlen($matches[0][0]);
375
376 441
        if ($increment === 0) {
377 300
            return 0;
378
        }
379
380 300
        $this->advanceBy($increment);
381
382 300
        return $this->currentPosition - $this->previousPosition;
383
    }
384
385
    /**
386
     * Move the position to the very end of the line
387
     *
388
     * @return int The number of characters moved
389
     */
390 84
    public function advanceToEnd()
391
    {
392 84
        $this->previousPosition = $this->currentPosition;
393 84
        $this->nextNonSpaceCache = null;
394
395 84
        $this->currentPosition = $this->length;
396
397 84
        return $this->currentPosition - $this->previousPosition;
398
    }
399
400
    /**
401
     * @return string
402
     */
403 2037
    public function getRemainder()
404
    {
405 2037
        if ($this->currentPosition >= $this->length) {
406 693
            return '';
407
        }
408
409 2022
        $prefix = '';
410 2022
        $position = $this->currentPosition;
411 2022
        if ($this->partiallyConsumedTab) {
412 15
            $position++;
413 15
            $charsToTab = 4 - ($this->column % 4);
414 15
            $prefix = str_repeat(' ', $charsToTab);
415 10
        }
416
417 2022
        return $prefix . mb_substr($this->line, $position, null, $this->encoding);
418
    }
419
420
    /**
421
     * @return string
422
     */
423 1887
    public function getLine()
424
    {
425 1887
        return $this->line;
426
    }
427
428
    /**
429
     * @return bool
430
     */
431 411
    public function isAtEnd()
432
    {
433 411
        return $this->currentPosition >= $this->length;
434
    }
435
436
    /**
437
     * Try to match a regular expression
438
     *
439
     * Returns the matching text and advances to the end of that match
440
     *
441
     * @param string $regex
442
     *
443
     * @return string|null
444
     */
445 1902
    public function match($regex)
446
    {
447 1902
        $subject = $this->getRemainder();
448
449 1902
        $matches = [];
450 1902
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
451 1758
            return;
452
        }
453
454
        // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
455 1776
        $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], $this->encoding), $this->encoding);
456
457
        // [0][0] contains the matched text
458
        // [0][1] contains the index of that match
459 1776
        $this->advanceBy($offset + mb_strlen($matches[0][0], $this->encoding));
460
461 1776
        return $matches[0][0];
462
    }
463
464
    /**
465
     * @return CursorState
466
     */
467 1011
    public function saveState()
468
    {
469 1011
        return new CursorState(
470 1011
            $this->line,
471 1011
            $this->length,
472 1011
            $this->currentPosition,
473 1011
            $this->previousPosition,
474 1011
            $this->nextNonSpaceCache,
475 1011
            $this->indent,
476 1011
            $this->column,
477 1011
            $this->partiallyConsumedTab,
478 1011
            $this->encoding
479 674
        );
480
    }
481
482
    /**
483
     * @param CursorState $state
484
     */
485 795
    public function restoreState(CursorState $state)
486
    {
487 795
        $this->line = $state->getLine();
488 795
        $this->length = $state->getLength();
489 795
        $this->currentPosition = $state->getCurrentPosition();
490 795
        $this->previousPosition = $state->getPreviousPosition();
491 795
        $this->nextNonSpaceCache = $state->getNextNonSpaceCache();
492 795
        $this->column = $state->getColumn();
493 795
        $this->indent = $state->getIndent();
494 795
        $this->partiallyConsumedTab = $state->getPartiallyConsumedTab();
495 795
        $this->encoding = $state->getEncoding();
496 795
    }
497
498
    /**
499
     * @return int
500
     */
501 630
    public function getPosition()
502
    {
503 630
        return $this->currentPosition;
504
    }
505
506
    /**
507
     * @return string
508
     */
509 870
    public function getPreviousText()
510
    {
511 870
        return mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, $this->encoding);
512
    }
513
514
    /**
515
     * @return int
516
     */
517 240
    public function getColumn()
518
    {
519 240
        return $this->column;
520
    }
521
}
522