Cursor::advanceBy()   C
last analyzed

Complexity

Conditions 14
Paths 80

Size

Total Lines 56
Code Lines 40

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 38
CRAP Score 14

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 14
eloc 40
c 2
b 0
f 0
nc 80
nop 2
dl 0
loc 56
ccs 38
cts 38
cp 1
crap 14
rs 6.2666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Parser;
15
16
use League\CommonMark\Exception\UnexpectedEncodingException;
17
18
class Cursor
19
{
20
    public const INDENT_LEVEL = 4;
21
22
    /** @psalm-readonly */
23
    private string $line;
24
25
    /** @psalm-readonly */
26
    private int $length;
27
28
    /**
29
     * @var int
30
     *
31
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
32
     * reached the end.  In this state, any character-returning method MUST return null.
33
     */
34
    private int $currentPosition = 0;
35
36
    private int $column = 0;
37
38
    private int $indent = 0;
39
40
    private int $previousPosition = 0;
41
42
    private ?int $nextNonSpaceCache = null;
43
44
    private bool $partiallyConsumedTab = false;
45
46
    /**
47
     * @var int|false
48
     *
49
     * @psalm-readonly
50
     */
51
    private $lastTabPosition;
52
53
    /** @psalm-readonly */
54
    private bool $isMultibyte;
55
56
    /** @var array<int, string> */
57
    private array $charCache = [];
58
59
    /**
60
     * @param string $line The line being parsed (ASCII or UTF-8)
61
     */
62 2770
    public function __construct(string $line)
63
    {
64 2770
        if (! \mb_check_encoding($line, 'UTF-8')) {
65 2
            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
66
        }
67
68 2768
        $this->line            = $line;
69 2768
        $this->length          = \mb_strlen($line, 'UTF-8') ?: 0;
70 2768
        $this->isMultibyte     = $this->length !== \strlen($line);
71 2768
        $this->lastTabPosition = $this->isMultibyte ? \mb_strrpos($line, "\t", 0, 'UTF-8') : \strrpos($line, "\t");
72
    }
73
74
    /**
75
     * Returns the position of the next character which is not a space (or tab)
76
     */
77 2440
    public function getNextNonSpacePosition(): int
78
    {
79 2440
        if ($this->nextNonSpaceCache !== null) {
80 2362
            return $this->nextNonSpaceCache;
81
        }
82
83 2440
        if ($this->currentPosition >= $this->length) {
84 722
            return $this->length;
85
        }
86
87 2424
        $cols = $this->column;
88
89 2424
        for ($i = $this->currentPosition; $i < $this->length; $i++) {
90
            // This if-else was copied out of getCharacter() for performance reasons
91 2424
            if ($this->isMultibyte) {
92 82
                $c = $this->charCache[$i] ??= \mb_substr($this->line, $i, 1, 'UTF-8');
93
            } else {
94 2354
                $c = $this->line[$i];
95
            }
96
97 2424
            if ($c === ' ') {
98 612
                $cols++;
99 2396
            } elseif ($c === "\t") {
100 30
                $cols += 4 - ($cols % 4);
101
            } else {
102 2396
                break;
103
            }
104
        }
105
106 2424
        $this->indent = $cols - $this->column;
107
108 2424
        return $this->nextNonSpaceCache = $i;
109
    }
110
111
    /**
112
     * Returns the next character which isn't a space (or tab)
113
     */
114 2284
    public function getNextNonSpaceCharacter(): ?string
115
    {
116 2284
        $index = $this->getNextNonSpacePosition();
117 2284
        if ($index >= $this->length) {
118 54
            return null;
119
        }
120
121 2278
        if ($this->isMultibyte) {
122 60
            return $this->charCache[$index] ??= \mb_substr($this->line, $index, 1, 'UTF-8');
123
        }
124
125 2228
        return $this->line[$index];
126
    }
127
128
    /**
129
     * Calculates the current indent (number of spaces after current position)
130
     */
131 1410
    public function getIndent(): int
132
    {
133 1410
        if ($this->nextNonSpaceCache === null) {
134 46
            $this->getNextNonSpacePosition();
135
        }
136
137 1410
        return $this->indent;
138
    }
139
140
    /**
141
     * Whether the cursor is indented to INDENT_LEVEL
142
     */
143 2320
    public function isIndented(): bool
144
    {
145 2320
        if ($this->nextNonSpaceCache === null) {
146 238
            $this->getNextNonSpacePosition();
147
        }
148
149 2320
        return $this->indent >= self::INDENT_LEVEL;
150
    }
151
152 1276
    public function getCharacter(?int $index = null): ?string
153
    {
154 1276
        if ($index === null) {
155 124
            $index = $this->currentPosition;
156
        }
157
158
        // Index out-of-bounds, or we're at the end
159 1276
        if ($index < 0 || $index >= $this->length) {
160 614
            return null;
161
        }
162
163 1210
        if ($this->isMultibyte) {
164 60
            return $this->charCache[$index] ??= \mb_substr($this->line, $index, 1, 'UTF-8');
165
        }
166
167 1154
        return $this->line[$index];
168
    }
169
170
    /**
171
     * Slightly-optimized version of getCurrent(null)
172
     */
173 2210
    public function getCurrentCharacter(): ?string
174
    {
175 2210
        if ($this->currentPosition >= $this->length) {
176 582
            return null;
177
        }
178
179 2192
        if ($this->isMultibyte) {
180 80
            return $this->charCache[$this->currentPosition] ??= \mb_substr($this->line, $this->currentPosition, 1, 'UTF-8');
181
        }
182
183 2120
        return $this->line[$this->currentPosition];
184
    }
185
186
    /**
187
     * Returns the next character (or null, if none) without advancing forwards
188
     */
189 1150
    public function peek(int $offset = 1): ?string
190
    {
191 1150
        return $this->getCharacter($this->currentPosition + $offset);
192
    }
193
194
    /**
195
     * Whether the remainder is blank
196
     */
197 2316
    public function isBlank(): bool
198
    {
199 2316
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
200
    }
201
202
    /**
203
     * Move the cursor forwards
204
     */
205 412
    public function advance(): void
206
    {
207 412
        $this->advanceBy(1);
208
    }
209
210
    /**
211
     * Move the cursor forwards
212
     *
213
     * @param int  $characters       Number of characters to advance by
214
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
215
     */
216 2460
    public function advanceBy(int $characters, bool $advanceByColumns = false): void
217
    {
218 2460
        $this->previousPosition  = $this->currentPosition;
219 2460
        $this->nextNonSpaceCache = null;
220
221 2460
        if ($this->currentPosition >= $this->length || $characters === 0) {
222 140
            return;
223
        }
224
225
        // Optimization to avoid tab handling logic if we have no tabs
226 2392
        if ($this->lastTabPosition === false || $this->currentPosition > $this->lastTabPosition) {
227 2372
            $length                     = \min($characters, $this->length - $this->currentPosition);
228 2372
            $this->partiallyConsumedTab = false;
229 2372
            $this->currentPosition     += $length;
230 2372
            $this->column              += $length;
231
232 2372
            return;
233
        }
234
235 40
        $nextFewChars = $this->isMultibyte ?
236 4
            \mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
237 38
            \substr($this->line, $this->currentPosition, $characters);
238
239 40
        if ($characters === 1) {
240 14
            $asArray = [$nextFewChars];
241 34
        } elseif ($this->isMultibyte) {
242
            /** @var string[] $asArray */
243 2
            $asArray = \mb_str_split($nextFewChars, 1, 'UTF-8');
244
        } else {
245 34
            $asArray = \str_split($nextFewChars);
246
        }
247
248 40
        foreach ($asArray as $c) {
249 40
            if ($c === "\t") {
250 36
                $charsToTab = 4 - ($this->column % 4);
251 36
                if ($advanceByColumns) {
252 26
                    $this->partiallyConsumedTab = $charsToTab > $characters;
253 26
                    $charsToAdvance             = $charsToTab > $characters ? $characters : $charsToTab;
254 26
                    $this->column              += $charsToAdvance;
255 26
                    $this->currentPosition     += $this->partiallyConsumedTab ? 0 : 1;
256 26
                    $characters                -= $charsToAdvance;
257
                } else {
258 14
                    $this->partiallyConsumedTab = false;
259 14
                    $this->column              += $charsToTab;
260 14
                    $this->currentPosition++;
261 14
                    $characters--;
262
                }
263
            } else {
264 22
                $this->partiallyConsumedTab = false;
265 22
                $this->currentPosition++;
266 22
                $this->column++;
267 22
                $characters--;
268
            }
269
270 40
            if ($characters <= 0) {
271 40
                break;
272
            }
273
        }
274
    }
275
276
    /**
277
     * Advances the cursor by a single space or tab, if present
278
     */
279 296
    public function advanceBySpaceOrTab(): bool
280
    {
281 296
        $character = $this->getCurrentCharacter();
282
283 296
        if ($character === ' ' || $character === "\t") {
284 288
            $this->advanceBy(1, true);
285
286 288
            return true;
287
        }
288
289 228
        return false;
290
    }
291
292
    /**
293
     * Parse zero or more space/tab characters
294
     *
295
     * @return int Number of positions moved
296
     */
297 2260
    public function advanceToNextNonSpaceOrTab(): int
298
    {
299 2260
        $newPosition = $this->nextNonSpaceCache ?? $this->getNextNonSpacePosition();
300 2260
        if ($newPosition === $this->currentPosition) {
301 2232
            return 0;
302
        }
303
304 386
        $this->advanceBy($newPosition - $this->currentPosition);
305 386
        $this->partiallyConsumedTab = false;
306
307
        // We've just advanced to where that non-space is,
308
        // so any subsequent calls to find the next one will
309
        // always return the current position.
310 386
        $this->nextNonSpaceCache = $this->currentPosition;
311 386
        $this->indent            = 0;
312
313 386
        return $this->currentPosition - $this->previousPosition;
314
    }
315
316
    /**
317
     * Parse zero or more space characters, including at most one newline.
318
     *
319
     * Tab characters are not parsed with this function.
320
     *
321
     * @return int Number of positions moved
322
     */
323 358
    public function advanceToNextNonSpaceOrNewline(): int
324
    {
325 358
        $currentCharacter = $this->getCurrentCharacter();
326
327
        // Optimization: Avoid the regex if we know there are no spaces or newlines
328 358
        if ($currentCharacter !== ' ' && $currentCharacter !== "\n") {
329 324
            $this->previousPosition = $this->currentPosition;
330
331 324
            return 0;
332
        }
333
334 82
        $matches = [];
335 82
        \preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, \PREG_OFFSET_CAPTURE);
336
337
        // [0][0] contains the matched text
338
        // [0][1] contains the index of that match
339
        \assert(isset($matches[0]));
340 82
        $increment = $matches[0][1] + \strlen($matches[0][0]);
341
342 82
        $this->advanceBy($increment);
343
344 82
        return $this->currentPosition - $this->previousPosition;
345
    }
346
347
    /**
348
     * Move the position to the very end of the line
349
     *
350
     * @return int The number of characters moved
351
     */
352 778
    public function advanceToEnd(): int
353
    {
354 778
        $this->previousPosition  = $this->currentPosition;
355 778
        $this->nextNonSpaceCache = null;
356
357 778
        $this->currentPosition = $this->length;
358
359 778
        return $this->currentPosition - $this->previousPosition;
360
    }
361
362 2498
    public function getRemainder(): string
363
    {
364 2498
        if ($this->currentPosition >= $this->length) {
365 556
            return '';
366
        }
367
368 2488
        $prefix   = '';
369 2488
        $position = $this->currentPosition;
370 2488
        if ($this->partiallyConsumedTab) {
371 8
            $position++;
372 8
            $charsToTab = 4 - ($this->column % 4);
373 8
            $prefix     = \str_repeat(' ', $charsToTab);
374
        }
375
376 2488
        $subString = $this->isMultibyte ?
377 74
            \mb_substr($this->line, $position, null, 'UTF-8') :
378 2426
            \substr($this->line, $position);
379
380 2488
        return $prefix . $subString;
381
    }
382
383 1414
    public function getLine(): string
384
    {
385 1414
        return $this->line;
386
    }
387
388 2114
    public function isAtEnd(): bool
389
    {
390 2114
        return $this->currentPosition >= $this->length;
391
    }
392
393
    /**
394
     * Try to match a regular expression
395
     *
396
     * Returns the matching text and advances to the end of that match
397
     *
398
     * @psalm-param non-empty-string $regex
399
     */
400 806
    public function match(string $regex): ?string
401
    {
402 806
        $subject = $this->getRemainder();
403
404 806
        if (! \preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
405 410
            return null;
406
        }
407
408
        // $matches[0][0] contains the matched text
409
        // $matches[0][1] contains the index of that match
410
411 734
        if ($this->isMultibyte) {
412
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
413 22
            $offset      = \mb_strlen(\substr($subject, 0, $matches[0][1]), 'UTF-8');
414 22
            $matchLength = \mb_strlen($matches[0][0], 'UTF-8');
415
        } else {
416 716
            $offset      = $matches[0][1];
417 716
            $matchLength = \strlen($matches[0][0]);
418
        }
419
420
        // [0][0] contains the matched text
421
        // [0][1] contains the index of that match
422 734
        $this->advanceBy($offset + $matchLength);
423
424 734
        return $matches[0][0];
425
    }
426
427
    /**
428
     * Encapsulates the current state of this cursor in case you need to rollback later.
429
     *
430
     * WARNING: Do not parse or use the return value for ANYTHING except for
431
     * passing it back into restoreState(), as the number of values and their
432
     * contents may change in any future release without warning.
433
     */
434 1556
    public function saveState(): CursorState
435
    {
436 1556
        return new CursorState([
437 1556
            $this->currentPosition,
438 1556
            $this->previousPosition,
439 1556
            $this->nextNonSpaceCache,
440 1556
            $this->indent,
441 1556
            $this->column,
442 1556
            $this->partiallyConsumedTab,
443 1556
        ]);
444
    }
445
446
    /**
447
     * Restore the cursor to a previous state.
448
     *
449
     * Pass in the value previously obtained by calling saveState().
450
     */
451 1248
    public function restoreState(CursorState $state): void
452
    {
453 1248
        [
454 1248
            $this->currentPosition,
455 1248
            $this->previousPosition,
456 1248
            $this->nextNonSpaceCache,
457 1248
            $this->indent,
458 1248
            $this->column,
459 1248
            $this->partiallyConsumedTab,
460 1248
        ] = $state->toArray();
461
    }
462
463 1786
    public function getPosition(): int
464
    {
465 1786
        return $this->currentPosition;
466
    }
467
468 1392
    public function getPreviousText(): string
469
    {
470 1392
        if ($this->isMultibyte) {
471 48
            return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
472
        }
473
474 1346
        return \substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition);
475
    }
476
477 322
    public function getSubstring(int $start, ?int $length = null): string
478
    {
479 322
        if ($this->isMultibyte) {
480 14
            return \mb_substr($this->line, $start, $length, 'UTF-8');
481
        }
482
483 308
        if ($length !== null) {
484 306
            return \substr($this->line, $start, $length);
485
        }
486
487 2
        return \substr($this->line, $start);
488
    }
489
490 218
    public function getColumn(): int
491
    {
492 218
        return $this->column;
493
    }
494
}
495