Passed
Push — main ( a26734...cf2261 )
by Colin
04:28 queued 02:25
created

Cursor::getLine()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Parser;
15
16
use League\CommonMark\Exception\UnexpectedEncodingException;
17
18
class Cursor
19
{
20
    public const INDENT_LEVEL = 4;
21
22
    /** @psalm-readonly */
23
    private string $line;
24
25
    /** @psalm-readonly */
26
    private int $length;
27
28
    /**
29
     * @var int
30
     *
31
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
32
     * reached the end.  In this state, any character-returning method MUST return null.
33
     */
34
    private int $currentPosition = 0;
35
36
    private int $column = 0;
37
38
    private int $indent = 0;
39
40
    private int $previousPosition = 0;
41
42
    private ?int $nextNonSpaceCache = null;
43
44
    private bool $partiallyConsumedTab = false;
45
46
    /** @psalm-readonly */
47
    private bool $lineContainsTabs;
48
49
    /** @psalm-readonly */
50
    private bool $isMultibyte;
51
52
    /** @var array<int, string> */
53
    private array $charCache = [];
54
55
    /**
56
     * @param string $line The line being parsed (ASCII or UTF-8)
57
     */
58 2498
    public function __construct(string $line)
59
    {
60 2498
        if (! \mb_check_encoding($line, 'UTF-8')) {
61 2
            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
62
        }
63
64 2496
        $this->line             = $line;
65 2496
        $this->length           = \mb_strlen($line, 'UTF-8') ?: 0;
66 2496
        $this->isMultibyte      = $this->length !== \strlen($line);
67 2496
        $this->lineContainsTabs = \strpos($line, "\t") !== false;
68 2496
    }
69
70
    /**
71
     * Returns the position of the next character which is not a space (or tab)
72
     */
73 2258
    public function getNextNonSpacePosition(): int
74
    {
75 2258
        if ($this->nextNonSpaceCache !== null) {
76 2158
            return $this->nextNonSpaceCache;
77
        }
78
79 2258
        $c    = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $c is dead and can be removed.
Loading history...
80 2258
        $i    = $this->currentPosition;
81 2258
        $cols = $this->column;
82
83 2258
        while (($c = $this->getCharacter($i)) !== null) {
84 2242
            if ($c === ' ') {
85 592
                $i++;
86 592
                $cols++;
87 2214
            } elseif ($c === "\t") {
88 26
                $i++;
89 26
                $cols += 4 - ($cols % 4);
90
            } else {
91 2214
                break;
92
            }
93
        }
94
95 2258
        $nextNonSpace = $c === null ? $this->length : $i;
0 ignored issues
show
introduced by
The condition $c === null is always true.
Loading history...
96 2258
        $this->indent = $cols - $this->column;
97
98 2258
        return $this->nextNonSpaceCache = $nextNonSpace;
99
    }
100
101
    /**
102
     * Returns the next character which isn't a space (or tab)
103
     */
104 2102
    public function getNextNonSpaceCharacter(): ?string
105
    {
106 2102
        return $this->getCharacter($this->getNextNonSpacePosition());
107
    }
108
109
    /**
110
     * Calculates the current indent (number of spaces after current position)
111
     */
112 1304
    public function getIndent(): int
113
    {
114 1304
        if ($this->nextNonSpaceCache === null) {
115 40
            $this->getNextNonSpacePosition();
116
        }
117
118 1304
        return $this->indent;
119
    }
120
121
    /**
122
     * Whether the cursor is indented to INDENT_LEVEL
123
     */
124 2138
    public function isIndented(): bool
125
    {
126 2138
        if ($this->nextNonSpaceCache === null) {
127 218
            $this->getNextNonSpacePosition();
128
        }
129
130 2138
        return $this->indent >= self::INDENT_LEVEL;
131
    }
132
133 2296
    public function getCharacter(?int $index = null): ?string
134
    {
135 2296
        if ($index === null) {
136 4
            $index = $this->currentPosition;
137
        }
138
139
        // Index out-of-bounds, or we're at the end
140 2296
        if ($index < 0 || $index >= $this->length) {
141 1144
            return null;
142
        }
143
144 2268
        if ($this->isMultibyte) {
145 88
            return $this->charCache[$index] ??= \mb_substr($this->line, $index, 1, 'UTF-8');
146
        }
147
148 2188
        return $this->line[$index];
149
    }
150
151
    /**
152
     * Slightly-optimized version of getCurrent(null)
153
     */
154 1978
    public function getCurrentCharacter(): ?string
155
    {
156 1978
        if ($this->currentPosition >= $this->length) {
157 570
            return null;
158
        }
159
160 1966
        if ($this->isMultibyte) {
161 64
            return $this->charCache[$this->currentPosition] ??= \mb_substr($this->line, $this->currentPosition, 1, 'UTF-8');
162
        }
163
164 1906
        return $this->line[$this->currentPosition];
165
    }
166
167
    /**
168
     * Returns the next character (or null, if none) without advancing forwards
169
     */
170 1052
    public function peek(int $offset = 1): ?string
171
    {
172 1052
        return $this->getCharacter($this->currentPosition + $offset);
173
    }
174
175
    /**
176
     * Whether the remainder is blank
177
     */
178 2134
    public function isBlank(): bool
179
    {
180 2134
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
181
    }
182
183
    /**
184
     * Move the cursor forwards
185
     */
186 394
    public function advance(): void
187
    {
188 394
        $this->advanceBy(1);
189 394
    }
190
191
    /**
192
     * Move the cursor forwards
193
     *
194
     * @param int  $characters       Number of characters to advance by
195
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
196
     */
197 2252
    public function advanceBy(int $characters, bool $advanceByColumns = false): void
198
    {
199 2252
        $this->previousPosition = $this->currentPosition;
200
201 2252
        $this->nextNonSpaceCache = null;
202
203
        // Optimization to avoid tab handling logic if we have no tabs
204 2252
        if (! $this->lineContainsTabs) {
205 2228
            $this->advanceWithoutTabCharacters($characters);
206
207 2228
            return;
208
        }
209
210 38
        $nextFewChars = $this->isMultibyte ?
211 4
            \mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
212 36
            \substr($this->line, $this->currentPosition, $characters);
213
214 38
        if ($nextFewChars === '') {
215 4
            return;
216
        }
217
218
        // Optimization to avoid tab handling logic if we have no tabs
219 36
        if (\strpos($nextFewChars, "\t") === false) {
220 16
            $this->advanceWithoutTabCharacters($characters);
221
222 16
            return;
223
        }
224
225 32
        if ($characters === 1) {
226 14
            $asArray = [$nextFewChars];
227 26
        } elseif ($this->isMultibyte) {
228
            /** @var string[] $asArray */
229
            $asArray = \mb_str_split($nextFewChars, 1, 'UTF-8');
230
        } else {
231 26
            $asArray = \str_split($nextFewChars);
232
        }
233
234 32
        foreach ($asArray as $c) {
235 32
            if ($c === "\t") {
236 32
                $charsToTab = 4 - ($this->column % 4);
237 32
                if ($advanceByColumns) {
238 22
                    $this->partiallyConsumedTab = $charsToTab > $characters;
239 22
                    $charsToAdvance             = $charsToTab > $characters ? $characters : $charsToTab;
240 22
                    $this->column              += $charsToAdvance;
241 22
                    $this->currentPosition     += $this->partiallyConsumedTab ? 0 : 1;
242 22
                    $characters                -= $charsToAdvance;
243
                } else {
244 14
                    $this->partiallyConsumedTab = false;
245 14
                    $this->column              += $charsToTab;
246 14
                    $this->currentPosition++;
247 32
                    $characters--;
248
                }
249
            } else {
250 8
                $this->partiallyConsumedTab = false;
251 8
                $this->currentPosition++;
252 8
                $this->column++;
253 8
                $characters--;
254
            }
255
256 32
            if ($characters <= 0) {
257 32
                break;
258
            }
259
        }
260 32
    }
261
262 2240
    private function advanceWithoutTabCharacters(int $characters): void
263
    {
264 2240
        $length                     = \min($characters, $this->length - $this->currentPosition);
265 2240
        $this->partiallyConsumedTab = false;
266 2240
        $this->currentPosition     += $length;
267 2240
        $this->column              += $length;
268 2240
    }
269
270
    /**
271
     * Advances the cursor by a single space or tab, if present
272
     */
273 284
    public function advanceBySpaceOrTab(): bool
274
    {
275 284
        $character = $this->getCurrentCharacter();
276
277 284
        if ($character === ' ' || $character === "\t") {
278 276
            $this->advanceBy(1, true);
279
280 276
            return true;
281
        }
282
283 218
        return false;
284
    }
285
286
    /**
287
     * Parse zero or more space/tab characters
288
     *
289
     * @return int Number of positions moved
290
     */
291 2084
    public function advanceToNextNonSpaceOrTab(): int
292
    {
293 2084
        $newPosition = $this->nextNonSpaceCache ?? $this->getNextNonSpacePosition();
294 2084
        if ($newPosition === $this->currentPosition) {
295 2056
            return 0;
296
        }
297
298 370
        $this->advanceBy($newPosition - $this->currentPosition);
299 370
        $this->partiallyConsumedTab = false;
300
301
        // We've just advanced to where that non-space is,
302
        // so any subsequent calls to find the next one will
303
        // always return the current position.
304 370
        $this->nextNonSpaceCache = $this->currentPosition;
305 370
        $this->indent            = 0;
306
307 370
        return $this->currentPosition - $this->previousPosition;
308
    }
309
310
    /**
311
     * Parse zero or more space characters, including at most one newline.
312
     *
313
     * Tab characters are not parsed with this function.
314
     *
315
     * @return int Number of positions moved
316
     */
317 290
    public function advanceToNextNonSpaceOrNewline(): int
318
    {
319 290
        $remainder = $this->getRemainder();
320
321
        // Optimization: Avoid the regex if we know there are no spaces or newlines
322 290
        if ($remainder === '' || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
323 258
            $this->previousPosition = $this->currentPosition;
324
325 258
            return 0;
326
        }
327
328 82
        $matches = [];
329 82
        \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
330
331
        // [0][0] contains the matched text
332
        // [0][1] contains the index of that match
333 82
        $increment = $matches[0][1] + \strlen($matches[0][0]);
334
335 82
        $this->advanceBy($increment);
336
337 82
        return $this->currentPosition - $this->previousPosition;
338
    }
339
340
    /**
341
     * Move the position to the very end of the line
342
     *
343
     * @return int The number of characters moved
344
     */
345 714
    public function advanceToEnd(): int
346
    {
347 714
        $this->previousPosition  = $this->currentPosition;
348 714
        $this->nextNonSpaceCache = null;
349
350 714
        $this->currentPosition = $this->length;
351
352 714
        return $this->currentPosition - $this->previousPosition;
353
    }
354
355 2290
    public function getRemainder(): string
356
    {
357 2290
        if ($this->currentPosition >= $this->length) {
358 438
            return '';
359
        }
360
361 2274
        $prefix   = '';
362 2274
        $position = $this->currentPosition;
363 2274
        if ($this->partiallyConsumedTab) {
364 8
            $position++;
365 8
            $charsToTab = 4 - ($this->column % 4);
366 8
            $prefix     = \str_repeat(' ', $charsToTab);
367
        }
368
369 2274
        $subString = $this->isMultibyte ?
370 68
            \mb_substr($this->line, $position, null, 'UTF-8') :
371 2214
            \substr($this->line, $position);
372
373 2274
        return $prefix . $subString;
374
    }
375
376 1306
    public function getLine(): string
377
    {
378 1306
        return $this->line;
379
    }
380
381 1938
    public function isAtEnd(): bool
382
    {
383 1938
        return $this->currentPosition >= $this->length;
384
    }
385
386
    /**
387
     * Try to match a regular expression
388
     *
389
     * Returns the matching text and advances to the end of that match
390
     */
391 738
    public function match(string $regex): ?string
392
    {
393 738
        $subject = $this->getRemainder();
394
395 738
        if (! \preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
396 526
            return null;
397
        }
398
399
        // $matches[0][0] contains the matched text
400
        // $matches[0][1] contains the index of that match
401
402 650
        if ($this->isMultibyte) {
403
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
404 20
            $offset      = \mb_strlen(\substr($subject, 0, $matches[0][1]), 'UTF-8');
405 20
            $matchLength = \mb_strlen($matches[0][0], 'UTF-8');
406
        } else {
407 632
            $offset      = $matches[0][1];
408 632
            $matchLength = \strlen($matches[0][0]);
409
        }
410
411
        // [0][0] contains the matched text
412
        // [0][1] contains the index of that match
413 650
        $this->advanceBy($offset + $matchLength);
414
415 650
        return $matches[0][0];
416
    }
417
418
    /**
419
     * Encapsulates the current state of this cursor in case you need to rollback later.
420
     *
421
     * WARNING: Do not parse or use the return value for ANYTHING except for
422
     * passing it back into restoreState(), as the number of values and their
423
     * contents may change in any future release without warning.
424
     */
425 1426
    public function saveState(): CursorState
426
    {
427 1426
        return new CursorState([
428 1426
            $this->currentPosition,
429 1426
            $this->previousPosition,
430 1426
            $this->nextNonSpaceCache,
431 1426
            $this->indent,
432 1426
            $this->column,
433 1426
            $this->partiallyConsumedTab,
434
        ]);
435
    }
436
437
    /**
438
     * Restore the cursor to a previous state.
439
     *
440
     * Pass in the value previously obtained by calling saveState().
441
     */
442 1310
    public function restoreState(CursorState $state): void
443
    {
444
        [
445 1310
            $this->currentPosition,
446 1310
            $this->previousPosition,
447 1310
            $this->nextNonSpaceCache,
448 1310
            $this->indent,
449 1310
            $this->column,
450 1310
            $this->partiallyConsumedTab,
451 1310
        ] = $state->toArray();
452 1310
    }
453
454 1662
    public function getPosition(): int
455
    {
456 1662
        return $this->currentPosition;
457
    }
458
459 1298
    public function getPreviousText(): string
460
    {
461 1298
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
462
    }
463
464 314
    public function getSubstring(int $start, ?int $length = null): string
465
    {
466 314
        if ($this->isMultibyte) {
467 14
            return \mb_substr($this->line, $start, $length, 'UTF-8');
468
        }
469
470 300
        if ($length !== null) {
471 298
            return \substr($this->line, $start, $length);
472
        }
473
474 2
        return \substr($this->line, $start);
475
    }
476
477 208
    public function getColumn(): int
478
    {
479 208
        return $this->column;
480
    }
481
}
482