Cursor::advanceBy()   C
last analyzed

Complexity

Conditions 14
Paths 84

Size

Total Lines 68
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 39
CRAP Score 14.0713

Importance

Changes 0
Metric Value
eloc 44
dl 0
loc 68
ccs 39
cts 42
cp 0.9286
rs 6.2666
c 0
b 0
f 0
cc 14
nc 84
nop 2
crap 14.0713

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Parser;
15
16
use League\CommonMark\Exception\UnexpectedEncodingException;
17
18
class Cursor
19
{
20
    public const INDENT_LEVEL = 4;
21
22
    /**
23
     * @var string
24
     *
25
     * @psalm-readonly
26
     */
27
    private $line;
28
29
    /**
30
     * @var int
31
     *
32
     * @psalm-readonly
33
     */
34
    private $length;
35
36
    /**
37
     * @var int
38
     *
39
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
40
     * reached the end.  In this state, any character-returning method MUST return null.
41
     */
42
    private $currentPosition = 0;
43
44
    /** @var int */
45
    private $column = 0;
46
47
    /** @var int */
48
    private $indent = 0;
49
50
    /** @var int */
51
    private $previousPosition = 0;
52
53
    /** @var int|null */
54
    private $nextNonSpaceCache;
55
56
    /** @var bool */
57
    private $partiallyConsumedTab = false;
58
59
    /**
60
     * @var bool
61
     *
62
     * @psalm-readonly
63
     */
64
    private $lineContainsTabs;
65
66
    /**
67
     * @var bool
68
     *
69
     * @psalm-readonly
70
     */
71
    private $isMultibyte;
72
73
    /** @var array<int, string> */
74
    private $charCache = [];
75
76
    /**
77
     * @param string $line The line being parsed (ASCII or UTF-8)
78
     */
79 3336
    public function __construct(string $line)
80
    {
81 3336
        if (! \mb_check_encoding($line, 'UTF-8')) {
82 3
            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
83
        }
84
85 3333
        $this->line             = $line;
86 3333
        $this->length           = \mb_strlen($line, 'UTF-8') ?: 0;
87 3333
        $this->isMultibyte      = $this->length !== \strlen($line);
88 3333
        $this->lineContainsTabs = \strpos($line, "\t") !== false;
89 3333
    }
90
91
    /**
92
     * Returns the position of the next character which is not a space (or tab)
93
     */
94 3033
    public function getNextNonSpacePosition(): int
95
    {
96 3033
        if ($this->nextNonSpaceCache !== null) {
97 2853
            return $this->nextNonSpaceCache;
98
        }
99
100 3033
        $c    = null;
0 ignored issues
show
Unused Code introduced by Colin O'Dell
The assignment to $c is dead and can be removed.
Loading history...
101 3033
        $i    = $this->currentPosition;
102 3033
        $cols = $this->column;
103
104 3033
        while (($c = $this->getCharacter($i)) !== null) {
105 3009
            if ($c === ' ') {
106 837
                $i++;
107 837
                $cols++;
108 2967
            } elseif ($c === "\t") {
109 36
                $i++;
110 36
                $cols += 4 - ($cols % 4);
111
            } else {
112 2967
                break;
113
            }
114
        }
115
116 3033
        $nextNonSpace = $c === null ? $this->length : $i;
0 ignored issues
show
introduced by Colin O'Dell
The condition $c === null is always true.
Loading history...
117 3033
        $this->indent = $cols - $this->column;
118
119 3033
        return $this->nextNonSpaceCache = $nextNonSpace;
120
    }
121
122
    /**
123
     * Returns the next character which isn't a space (or tab)
124
     */
125 2799
    public function getNextNonSpaceCharacter(): ?string
126
    {
127 2799
        return $this->getCharacter($this->getNextNonSpacePosition());
128
    }
129
130
    /**
131
     * Calculates the current indent (number of spaces after current position)
132
     */
133 2913
    public function getIndent(): int
134
    {
135 2913
        if ($this->nextNonSpaceCache === null) {
136 369
            $this->getNextNonSpacePosition();
137
        }
138
139 2913
        return $this->indent;
140
    }
141
142
    /**
143
     * Whether the cursor is indented to INDENT_LEVEL
144
     */
145 2853
    public function isIndented(): bool
146
    {
147 2853
        return $this->getIndent() >= self::INDENT_LEVEL;
148
    }
149
150 3186
    public function getCharacter(?int $index = null): ?string
151
    {
152 3186
        if ($index === null) {
153 2898
            $index = $this->currentPosition;
154
        }
155
156
        // Index out-of-bounds, or we're at the end
157 3186
        if ($index < 0 || $index >= $this->length) {
158 2706
            return null;
159
        }
160
161 3141
        if ($this->isMultibyte) {
162 120
            if (isset($this->charCache[$index])) {
163 66
                return $this->charCache[$index];
164
            }
165
166 120
            return $this->charCache[$index] = \mb_substr($this->line, $index, 1, 'UTF-8');
167
        }
168
169 3027
        return $this->line[$index];
170
    }
171
172
    /**
173
     * Returns the next character (or null, if none) without advancing forwards
174
     */
175 1464
    public function peek(int $offset = 1): ?string
176
    {
177 1464
        return $this->getCharacter($this->currentPosition + $offset);
178
    }
179
180
    /**
181
     * Whether the remainder is blank
182
     */
183 2847
    public function isBlank(): bool
184
    {
185 2847
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
186
    }
187
188
    /**
189
     * Move the cursor forwards
190
     */
191 537
    public function advance(): void
192
    {
193 537
        $this->advanceBy(1);
194 537
    }
195
196
    /**
197
     * Move the cursor forwards
198
     *
199
     * @param int  $characters       Number of characters to advance by
200
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
201
     */
202 3291
    public function advanceBy(int $characters, bool $advanceByColumns = false): void
203
    {
204 3291
        if ($characters === 0) {
205 2910
            $this->previousPosition = $this->currentPosition;
206
207 2910
            return;
208
        }
209
210 3102
        $this->previousPosition  = $this->currentPosition;
211 3102
        $this->nextNonSpaceCache = null;
212
213
        // Optimization to avoid tab handling logic if we have no tabs
214 3102
        if (! $this->lineContainsTabs) {
215 3075
            $this->advanceWithoutTabCharacters($characters);
216
217 3075
            return;
218
        }
219
220 51
        $nextFewChars = $this->isMultibyte ?
221 6
            \mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
222 51
            \substr($this->line, $this->currentPosition, $characters);
223
224
        // Optimization to avoid tab handling logic if we have no tabs
225 51
        if (\strpos($nextFewChars, "\t") === false) {
226 24
            $this->advanceWithoutTabCharacters($characters);
227
228 24
            return;
229
        }
230
231 45
        if ($nextFewChars === '') {
232
            $this->previousPosition = $this->currentPosition;
233
234
            return;
235
        }
236
237 45
        if ($characters === 1) {
238 18
            $asArray = [$nextFewChars];
239 39
        } elseif ($this->isMultibyte) {
240
            /** @var string[] $asArray */
241
            $asArray = \preg_split('//u', $nextFewChars, -1, \PREG_SPLIT_NO_EMPTY);
242
        } else {
243 39
            $asArray = \str_split($nextFewChars);
244
        }
245
246 45
        foreach ($asArray as $relPos => $c) {
247 45
            if ($c === "\t") {
248 45
                $charsToTab = 4 - ($this->column % 4);
249 45
                if ($advanceByColumns) {
250 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
251 33
                    $charsToAdvance             = $charsToTab > $characters ? $characters : $charsToTab;
252 33
                    $this->column              += $charsToAdvance;
253 33
                    $this->currentPosition     += $this->partiallyConsumedTab ? 0 : 1;
254 33
                    $characters                -= $charsToAdvance;
255
                } else {
256 18
                    $this->partiallyConsumedTab = false;
257 18
                    $this->column              += $charsToTab;
258 18
                    $this->currentPosition++;
259 45
                    $characters--;
260
                }
261
            } else {
262 12
                $this->partiallyConsumedTab = false;
263 12
                $this->currentPosition++;
264 12
                $this->column++;
265 12
                $characters--;
266
            }
267
268 45
            if ($characters <= 0) {
269 45
                break;
270
            }
271
        }
272 45
    }
273
274 3090
    private function advanceWithoutTabCharacters(int $characters): void
275
    {
276 3090
        $length                     = \min($characters, $this->length - $this->currentPosition);
277 3090
        $this->partiallyConsumedTab = false;
278 3090
        $this->currentPosition     += $length;
279 3090
        $this->column              += $length;
280
281 3090
        return;
282
    }
283
284
    /**
285
     * Advances the cursor by a single space or tab, if present
286
     */
287 399
    public function advanceBySpaceOrTab(): bool
288
    {
289 399
        $character = $this->getCharacter();
290
291 399
        if ($character === ' ' || $character === "\t") {
292 387
            $this->advanceBy(1, true);
293
294 387
            return true;
295
        }
296
297 303
        return false;
298
    }
299
300
    /**
301
     * Parse zero or more space/tab characters
302
     *
303
     * @return int Number of positions moved
304
     */
305 2865
    public function advanceToNextNonSpaceOrTab(): int
306
    {
307 2865
        $newPosition = $this->getNextNonSpacePosition();
308 2865
        $this->advanceBy($newPosition - $this->currentPosition);
309 2865
        $this->partiallyConsumedTab = false;
310
311 2865
        return $this->currentPosition - $this->previousPosition;
312
    }
313
314
    /**
315
     * Parse zero or more space characters, including at most one newline.
316
     *
317
     * Tab characters are not parsed with this function.
318
     *
319
     * @return int Number of positions moved
320
     */
321 381
    public function advanceToNextNonSpaceOrNewline(): int
322
    {
323 381
        $remainder = $this->getRemainder();
324
325
        // Optimization: Avoid the regex if we know there are no spaces or newlines
326 381
        if ($remainder === '' || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
327 333
            $this->previousPosition = $this->currentPosition;
328
329 333
            return 0;
330
        }
331
332 114
        $matches = [];
333 114
        \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
334
335
        // [0][0] contains the matched text
336
        // [0][1] contains the index of that match
337 114
        $increment = $matches[0][1] + \strlen($matches[0][0]);
338
339 114
        $this->advanceBy($increment);
340
341 114
        return $this->currentPosition - $this->previousPosition;
342
    }
343
344
    /**
345
     * Move the position to the very end of the line
346
     *
347
     * @return int The number of characters moved
348
     */
349 870
    public function advanceToEnd(): int
350
    {
351 870
        $this->previousPosition  = $this->currentPosition;
352 870
        $this->nextNonSpaceCache = null;
353
354 870
        $this->currentPosition = $this->length;
355
356 870
        return $this->currentPosition - $this->previousPosition;
357
    }
358
359 3060
    public function getRemainder(): string
360
    {
361 3060
        if ($this->currentPosition >= $this->length) {
362 594
            return '';
363
        }
364
365 3030
        $prefix   = '';
366 3030
        $position = $this->currentPosition;
367 3030
        if ($this->partiallyConsumedTab) {
368 12
            $position++;
369 12
            $charsToTab = 4 - ($this->column % 4);
370 12
            $prefix     = \str_repeat(' ', $charsToTab);
371
        }
372
373 3030
        $subString = $this->isMultibyte ?
374 90
            \mb_substr($this->line, $position, null, 'UTF-8') :
375 3030
            \substr($this->line, $position);
376
377 3030
        return $prefix . $subString;
378
    }
379
380 2052
    public function getLine(): string
381
    {
382 2052
        return $this->line;
383
    }
384
385 2481
    public function isAtEnd(): bool
386
    {
387 2481
        return $this->currentPosition >= $this->length;
388
    }
389
390
    /**
391
     * Try to match a regular expression
392
     *
393
     * Returns the matching text and advances to the end of that match
394
     */
395 2775
    public function match(string $regex): ?string
396
    {
397 2775
        $subject = $this->getRemainder();
398
399 2775
        if (! \preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
400 1269
            return null;
401
        }
402
403
        // $matches[0][0] contains the matched text
404
        // $matches[0][1] contains the index of that match
405
406 2742
        if ($this->isMultibyte) {
407
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
408 75
            $offset      = \mb_strlen(\substr($subject, 0, $matches[0][1]), 'UTF-8');
409 75
            $matchLength = \mb_strlen($matches[0][0], 'UTF-8');
410
        } else {
411 2667
            $offset      = $matches[0][1];
412 2667
            $matchLength = \strlen($matches[0][0]);
413
        }
414
415
        // [0][0] contains the matched text
416
        // [0][1] contains the index of that match
417 2742
        $this->advanceBy($offset + $matchLength);
418
419 2742
        return $matches[0][0];
420
    }
421
422
    /**
423
     * Encapsulates the current state of this cursor in case you need to rollback later.
424
     *
425
     * WARNING: Do not parse or use the return value for ANYTHING except for
426
     * passing it back into restoreState(), as the number of values and their
427
     * contents may change in any future release without warning.
428
     */
429 1914
    public function saveState(): CursorState
430
    {
431 1914
        return new CursorState([
432 1914
            $this->currentPosition,
433 1914
            $this->previousPosition,
434 1914
            $this->nextNonSpaceCache,
435 1914
            $this->indent,
436 1914
            $this->column,
437 1914
            $this->partiallyConsumedTab,
438
        ]);
439
    }
440
441
    /**
442
     * Restore the cursor to a previous state.
443
     *
444
     * Pass in the value previously obtained by calling saveState().
445
     */
446 1719
    public function restoreState(CursorState $state): void
447
    {
448
        [
449 1719
            $this->currentPosition,
450 1719
            $this->previousPosition,
451 1719
            $this->nextNonSpaceCache,
452 1719
            $this->indent,
453 1719
            $this->column,
454 1719
            $this->partiallyConsumedTab,
455 1719
        ] = $state->toArray();
456 1719
    }
457
458 777
    public function getPosition(): int
459
    {
460 777
        return $this->currentPosition;
461
    }
462
463 432
    public function getPreviousText(): string
464
    {
465 432
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
466
    }
467
468 432
    public function getSubstring(int $start, ?int $length = null): string
469
    {
470 432
        if ($this->isMultibyte) {
471 21
            return \mb_substr($this->line, $start, $length, 'UTF-8');
472
        }
473
474 411
        if ($length !== null) {
475 408
            return \substr($this->line, $start, $length);
476
        }
477
478 3
        return \substr($this->line, $start);
479
    }
480
481 291
    public function getColumn(): int
482
    {
483 291
        return $this->column;
484
    }
485
}
486