Passed
Push — 2.0 ( d95bff...f1b31a )
by Colin
07:53 queued 05:47
created

Cursor::getCurrentCharacter()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 15
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 4

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 15
ccs 8
cts 8
cp 1
rs 10
c 0
b 0
f 0
cc 4
nc 4
nop 0
crap 4
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Parser;
15
16
use League\CommonMark\Exception\UnexpectedEncodingException;
17
18
class Cursor
19
{
20
    public const INDENT_LEVEL = 4;
21
22
    /**
23
     * @var string
24
     *
25
     * @psalm-readonly
26
     */
27
    private $line;
28
29
    /**
30
     * @var int
31
     *
32
     * @psalm-readonly
33
     */
34
    private $length;
35
36
    /**
37
     * @var int
38
     *
39
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
40
     * reached the end.  In this state, any character-returning method MUST return null.
41
     */
42
    private $currentPosition = 0;
43
44
    /** @var int */
45
    private $column = 0;
46
47
    /** @var int */
48
    private $indent = 0;
49
50
    /** @var int */
51
    private $previousPosition = 0;
52
53
    /** @var int|null */
54
    private $nextNonSpaceCache;
55
56
    /** @var bool */
57
    private $partiallyConsumedTab = false;
58
59
    /**
60
     * @var bool
61
     *
62
     * @psalm-readonly
63
     */
64
    private $lineContainsTabs;
65
66
    /**
67
     * @var bool
68
     *
69
     * @psalm-readonly
70
     */
71
    private $isMultibyte;
72
73
    /** @var array<int, string> */
74
    private $charCache = [];
75
76
    /**
77
     * @param string $line The line being parsed (ASCII or UTF-8)
78
     */
79 3624
    public function __construct(string $line)
80
    {
81 3624
        if (! \mb_check_encoding($line, 'UTF-8')) {
82 3
            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
83
        }
84
85 3621
        $this->line             = $line;
86 3621
        $this->length           = \mb_strlen($line, 'UTF-8') ?: 0;
87 3621
        $this->isMultibyte      = $this->length !== \strlen($line);
88 3621
        $this->lineContainsTabs = \strpos($line, "\t") !== false;
89 3621
    }
90
91
    /**
92
     * Returns the position of the next character which is not a space (or tab)
93
     */
94 3264
    public function getNextNonSpacePosition(): int
95
    {
96 3264
        if ($this->nextNonSpaceCache !== null) {
97 3084
            return $this->nextNonSpaceCache;
98
        }
99
100 3264
        $c    = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $c is dead and can be removed.
Loading history...
101 3264
        $i    = $this->currentPosition;
102 3264
        $cols = $this->column;
103
104 3264
        while (($c = $this->getCharacter($i)) !== null) {
105 3240
            if ($c === ' ') {
106 837
                $i++;
107 837
                $cols++;
108 3198
            } elseif ($c === "\t") {
109 36
                $i++;
110 36
                $cols += 4 - ($cols % 4);
111
            } else {
112 3198
                break;
113
            }
114
        }
115
116 3264
        $nextNonSpace = $c === null ? $this->length : $i;
0 ignored issues
show
introduced by
The condition $c === null is always true.
Loading history...
117 3264
        $this->indent = $cols - $this->column;
118
119 3264
        return $this->nextNonSpaceCache = $nextNonSpace;
120
    }
121
122
    /**
123
     * Returns the next character which isn't a space (or tab)
124
     */
125 3030
    public function getNextNonSpaceCharacter(): ?string
126
    {
127 3030
        return $this->getCharacter($this->getNextNonSpacePosition());
128
    }
129
130
    /**
131
     * Calculates the current indent (number of spaces after current position)
132
     */
133 1836
    public function getIndent(): int
134
    {
135 1836
        if ($this->nextNonSpaceCache === null) {
136 60
            $this->getNextNonSpacePosition();
137
        }
138
139 1836
        return $this->indent;
140
    }
141
142
    /**
143
     * Whether the cursor is indented to INDENT_LEVEL
144
     */
145 3084
    public function isIndented(): bool
146
    {
147 3084
        $this->getNextNonSpacePosition();
148
149 3084
        return $this->indent >= self::INDENT_LEVEL;
150
    }
151
152 3321
    public function getCharacter(?int $index = null): ?string
153
    {
154 3321
        if ($index === null) {
155 6
            $index = $this->currentPosition;
156
        }
157
158
        // Index out-of-bounds, or we're at the end
159 3321
        if ($index < 0 || $index >= $this->length) {
160 1659
            return null;
161
        }
162
163 3279
        if ($this->isMultibyte) {
164 132
            if (isset($this->charCache[$index])) {
165 78
                return $this->charCache[$index];
166
            }
167
168 132
            return $this->charCache[$index] = \mb_substr($this->line, $index, 1, 'UTF-8');
169
        }
170
171 3159
        return $this->line[$index];
172
    }
173
174
    /**
175
     * Slightly-optimized version of getCurrent(null)
176
     */
177 3081
    public function getCurrentCharacter(): ?string
178
    {
179 3081
        if ($this->currentPosition >= $this->length) {
180 816
            return null;
181
        }
182
183 3066
        if ($this->isMultibyte) {
184 96
            if (isset($this->charCache[$this->currentPosition])) {
185 72
                return $this->charCache[$this->currentPosition];
186
            }
187
188 63
            return $this->charCache[$this->currentPosition] = \mb_substr($this->line, $this->currentPosition, 1, 'UTF-8');
189
        }
190
191 2979
        return $this->line[$this->currentPosition];
192
    }
193
194
    /**
195
     * Returns the next character (or null, if none) without advancing forwards
196
     */
197 1506
    public function peek(int $offset = 1): ?string
198
    {
199 1506
        return $this->getCharacter($this->currentPosition + $offset);
200
    }
201
202
    /**
203
     * Whether the remainder is blank
204
     */
205 3078
    public function isBlank(): bool
206
    {
207 3078
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
208
    }
209
210
    /**
211
     * Move the cursor forwards
212
     */
213 582
    public function advance(): void
214
    {
215 582
        $this->advanceBy(1);
216 582
    }
217
218
    /**
219
     * Move the cursor forwards
220
     *
221
     * @param int  $characters       Number of characters to advance by
222
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
223
     */
224 3567
    public function advanceBy(int $characters, bool $advanceByColumns = false): void
225
    {
226 3567
        $this->previousPosition = $this->currentPosition;
227
228 3567
        if ($characters === 0) {
229 3150
            return;
230
        }
231
232 3192
        $this->nextNonSpaceCache = null;
233
234
        // Optimization to avoid tab handling logic if we have no tabs
235 3192
        if (! $this->lineContainsTabs) {
236 3159
            $this->advanceWithoutTabCharacters($characters);
237
238 3159
            return;
239
        }
240
241 51
        $nextFewChars = $this->isMultibyte ?
242 6
            \mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
243 50
            \substr($this->line, $this->currentPosition, $characters);
244
245 51
        if ($nextFewChars === '') {
246
            return;
247
        }
248
249
        // Optimization to avoid tab handling logic if we have no tabs
250 51
        if (\strpos($nextFewChars, "\t") === false) {
251 24
            $this->advanceWithoutTabCharacters($characters);
252
253 24
            return;
254
        }
255
256 45
        if ($characters === 1) {
257 18
            $asArray = [$nextFewChars];
258 39
        } elseif ($this->isMultibyte) {
259
            /** @var string[] $asArray */
260
            $asArray = \preg_split('//u', $nextFewChars, -1, \PREG_SPLIT_NO_EMPTY);
261
        } else {
262 39
            $asArray = \str_split($nextFewChars);
263
        }
264
265 45
        foreach ($asArray as $c) {
266 45
            if ($c === "\t") {
267 45
                $charsToTab = 4 - ($this->column % 4);
268 45
                if ($advanceByColumns) {
269 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
270 33
                    $charsToAdvance             = $charsToTab > $characters ? $characters : $charsToTab;
271 33
                    $this->column              += $charsToAdvance;
272 33
                    $this->currentPosition     += $this->partiallyConsumedTab ? 0 : 1;
273 33
                    $characters                -= $charsToAdvance;
274
                } else {
275 18
                    $this->partiallyConsumedTab = false;
276 18
                    $this->column              += $charsToTab;
277 18
                    $this->currentPosition++;
278 45
                    $characters--;
279
                }
280
            } else {
281 12
                $this->partiallyConsumedTab = false;
282 12
                $this->currentPosition++;
283 12
                $this->column++;
284 12
                $characters--;
285
            }
286
287 45
            if ($characters <= 0) {
288 45
                break;
289
            }
290
        }
291 45
    }
292
293 3177
    private function advanceWithoutTabCharacters(int $characters): void
294
    {
295 3177
        $length                     = \min($characters, $this->length - $this->currentPosition);
296 3177
        $this->partiallyConsumedTab = false;
297 3177
        $this->currentPosition     += $length;
298 3177
        $this->column              += $length;
299 3177
    }
300
301
    /**
302
     * Advances the cursor by a single space or tab, if present
303
     */
304 411
    public function advanceBySpaceOrTab(): bool
305
    {
306 411
        $character = $this->getCurrentCharacter();
307
308 411
        if ($character === ' ' || $character === "\t") {
309 399
            $this->advanceBy(1, true);
310
311 399
            return true;
312
        }
313
314 312
        return false;
315
    }
316
317
    /**
318
     * Parse zero or more space/tab characters
319
     *
320
     * @return int Number of positions moved
321
     */
322 3096
    public function advanceToNextNonSpaceOrTab(): int
323
    {
324 3096
        $newPosition = $this->getNextNonSpacePosition();
325 3096
        $this->advanceBy($newPosition - $this->currentPosition);
326 3096
        $this->partiallyConsumedTab = false;
327
328 3096
        return $this->currentPosition - $this->previousPosition;
329
    }
330
331
    /**
332
     * Parse zero or more space characters, including at most one newline.
333
     *
334
     * Tab characters are not parsed with this function.
335
     *
336
     * @return int Number of positions moved
337
     */
338 420
    public function advanceToNextNonSpaceOrNewline(): int
339
    {
340 420
        $remainder = $this->getRemainder();
341
342
        // Optimization: Avoid the regex if we know there are no spaces or newlines
343 420
        if ($remainder === '' || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
344 372
            $this->previousPosition = $this->currentPosition;
345
346 372
            return 0;
347
        }
348
349 120
        $matches = [];
350 120
        \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
351
352
        // [0][0] contains the matched text
353
        // [0][1] contains the index of that match
354 120
        $increment = $matches[0][1] + \strlen($matches[0][0]);
355
356 120
        $this->advanceBy($increment);
357
358 120
        return $this->currentPosition - $this->previousPosition;
359
    }
360
361
    /**
362
     * Move the position to the very end of the line
363
     *
364
     * @return int The number of characters moved
365
     */
366 990
    public function advanceToEnd(): int
367
    {
368 990
        $this->previousPosition  = $this->currentPosition;
369 990
        $this->nextNonSpaceCache = null;
370
371 990
        $this->currentPosition = $this->length;
372
373 990
        return $this->currentPosition - $this->previousPosition;
374
    }
375
376 3312
    public function getRemainder(): string
377
    {
378 3312
        if ($this->currentPosition >= $this->length) {
379 591
            return '';
380
        }
381
382 3282
        $prefix   = '';
383 3282
        $position = $this->currentPosition;
384 3282
        if ($this->partiallyConsumedTab) {
385 12
            $position++;
386 12
            $charsToTab = 4 - ($this->column % 4);
387 12
            $prefix     = \str_repeat(' ', $charsToTab);
388
        }
389
390 3282
        $subString = $this->isMultibyte ?
391 102
            \mb_substr($this->line, $position, null, 'UTF-8') :
392 3252
            \substr($this->line, $position);
393
394 3282
        return $prefix . $subString;
395
    }
396
397 2241
    public function getLine(): string
398
    {
399 2241
        return $this->line;
400
    }
401
402 2796
    public function isAtEnd(): bool
403
    {
404 2796
        return $this->currentPosition >= $this->length;
405
    }
406
407
    /**
408
     * Try to match a regular expression
409
     *
410
     * Returns the matching text and advances to the end of that match
411
     */
412 1122
    public function match(string $regex): ?string
413
    {
414 1122
        $subject = $this->getRemainder();
415
416 1122
        if (! \preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
417 864
            return null;
418
        }
419
420
        // $matches[0][0] contains the matched text
421
        // $matches[0][1] contains the index of that match
422
423 903
        if ($this->isMultibyte) {
424
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
425 30
            $offset      = \mb_strlen(\substr($subject, 0, $matches[0][1]), 'UTF-8');
426 30
            $matchLength = \mb_strlen($matches[0][0], 'UTF-8');
427
        } else {
428 876
            $offset      = $matches[0][1];
429 876
            $matchLength = \strlen($matches[0][0]);
430
        }
431
432
        // [0][0] contains the matched text
433
        // [0][1] contains the index of that match
434 903
        $this->advanceBy($offset + $matchLength);
435
436 903
        return $matches[0][0];
437
    }
438
439
    /**
440
     * Encapsulates the current state of this cursor in case you need to rollback later.
441
     *
442
     * WARNING: Do not parse or use the return value for ANYTHING except for
443
     * passing it back into restoreState(), as the number of values and their
444
     * contents may change in any future release without warning.
445
     */
446 2034
    public function saveState(): CursorState
447
    {
448 2034
        return new CursorState([
449 2034
            $this->currentPosition,
450 2034
            $this->previousPosition,
451 2034
            $this->nextNonSpaceCache,
452 2034
            $this->indent,
453 2034
            $this->column,
454 2034
            $this->partiallyConsumedTab,
455
        ]);
456
    }
457
458
    /**
459
     * Restore the cursor to a previous state.
460
     *
461
     * Pass in the value previously obtained by calling saveState().
462
     */
463 1860
    public function restoreState(CursorState $state): void
464
    {
465
        [
466 1860
            $this->currentPosition,
467 1860
            $this->previousPosition,
468 1860
            $this->nextNonSpaceCache,
469 1860
            $this->indent,
470 1860
            $this->column,
471 1860
            $this->partiallyConsumedTab,
472 1860
        ] = $state->toArray();
473 1860
    }
474
475 2283
    public function getPosition(): int
476
    {
477 2283
        return $this->currentPosition;
478
    }
479
480 1890
    public function getPreviousText(): string
481
    {
482 1890
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
483
    }
484
485 444
    public function getSubstring(int $start, ?int $length = null): string
486
    {
487 444
        if ($this->isMultibyte) {
488 21
            return \mb_substr($this->line, $start, $length, 'UTF-8');
489
        }
490
491 423
        if ($length !== null) {
492 420
            return \substr($this->line, $start, $length);
493
        }
494
495 3
        return \substr($this->line, $start);
496
    }
497
498 297
    public function getColumn(): int
499
    {
500 297
        return $this->column;
501
    }
502
}
503