Completed
Push — master ( a80598...8e1550 )
by Colin
06:03
created

Cursor::advanceBy()   C

Complexity

Conditions 15
Paths 84

Size

Total Lines 71

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 40
CRAP Score 15.0765

Importance

Changes 0
Metric Value
dl 0
loc 71
ccs 40
cts 43
cp 0.9302
rs 5.9166
c 0
b 0
f 0
cc 15
nc 84
nop 2
crap 15.0765

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Parser;
15
16
class Cursor
17
{
18
    public const INDENT_LEVEL = 4;
19
20
    /**
21
     * @var string
22
     *
23
     * @psalm-readonly
24
     */
25
    private $line;
26
27
    /**
28
     * @var int
29
     *
30
     * @psalm-readonly
31
     */
32
    private $length;
33
34
    /**
35
     * @var int
36
     *
37
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
38
     * reached the end.  In this state, any character-returning method MUST return null.
39
     */
40
    private $currentPosition = 0;
41
42
    /** @var int */
43
    private $column = 0;
44
45
    /** @var int */
46
    private $indent = 0;
47
48
    /** @var int */
49
    private $previousPosition = 0;
50
51
    /** @var int|null */
52
    private $nextNonSpaceCache;
53
54
    /** @var bool */
55
    private $partiallyConsumedTab = false;
56
57
    /**
58
     * @var bool
59
     *
60
     * @psalm-readonly
61
     */
62
    private $lineContainsTabs;
63
64
    /**
65
     * @var bool
66
     *
67
     * @psalm-readonly
68
     */
69
    private $isMultibyte;
70
71
    /** @var array<int, string> */
72
    private $charCache = [];
73
74
    /**
75
     * @param string $line The line being parsed (ASCII or UTF-8)
76
     */
77 3213
    public function __construct(string $line)
78
    {
79 3213
        $this->line             = $line;
80 3213
        $this->length           = \mb_strlen($line, 'UTF-8') ?: 0;
81 3213
        $this->isMultibyte      = $this->length !== \strlen($line);
82 3213
        $this->lineContainsTabs = \strpos($line, "\t") !== false;
83 3213
    }
84
85
    /**
86
     * Returns the position of the next character which is not a space (or tab)
87
     */
88 2913
    public function getNextNonSpacePosition(): int
89
    {
90 2913
        if ($this->nextNonSpaceCache !== null) {
91 2733
            return $this->nextNonSpaceCache;
92
        }
93
94 2913
        $c    = null;
0 ignored issues
show
Unused Code introduced by
$c is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
95 2913
        $i    = $this->currentPosition;
96 2913
        $cols = $this->column;
97
98 2913
        while (($c = $this->getCharacter($i)) !== null) {
99 2889
            if ($c === ' ') {
100 813
                $i++;
101 813
                $cols++;
102 2847
            } elseif ($c === "\t") {
103 36
                $i++;
104 36
                $cols += 4 - ($cols % 4);
105
            } else {
106 2847
                break;
107
            }
108
        }
109
110 2913
        $nextNonSpace = $c === null ? $this->length : $i;
111 2913
        $this->indent = $cols - $this->column;
112
113 2913
        return $this->nextNonSpaceCache = $nextNonSpace;
114
    }
115
116
    /**
117
     * Returns the next character which isn't a space (or tab)
118
     */
119 2679
    public function getNextNonSpaceCharacter(): ?string
120
    {
121 2679
        return $this->getCharacter($this->getNextNonSpacePosition());
122
    }
123
124
    /**
125
     * Calculates the current indent (number of spaces after current position)
126
     */
127 2793
    public function getIndent(): int
128
    {
129 2793
        if ($this->nextNonSpaceCache === null) {
130 360
            $this->getNextNonSpacePosition();
131
        }
132
133 2793
        return $this->indent;
134
    }
135
136
    /**
137
     * Whether the cursor is indented to INDENT_LEVEL
138
     */
139 2733
    public function isIndented(): bool
140
    {
141 2733
        return $this->getIndent() >= self::INDENT_LEVEL;
142
    }
143
144 2973
    public function getCharacter(?int $index = null): ?string
145
    {
146 2973
        if ($index === null) {
147 2685
            $index = $this->currentPosition;
148
        }
149
150
        // Index out-of-bounds, or we're at the end
151 2973
        if ($index < 0 || $index >= $this->length) {
152 2583
            return null;
153
        }
154
155 2931
        if ($this->isMultibyte) {
156 102
            if (isset($this->charCache[$index])) {
157 48
                return $this->charCache[$index];
158
            }
159
160 102
            return $this->charCache[$index] = \mb_substr($this->line, $index, 1, 'UTF-8');
161
        }
162
163 2835
        return $this->line[$index];
164
    }
165
166
    /**
167
     * Returns the next character (or null, if none) without advancing forwards
168
     */
169 1350
    public function peek(int $offset = 1): ?string
170
    {
171 1350
        return $this->getCharacter($this->currentPosition + $offset);
172
    }
173
174
    /**
175
     * Whether the remainder is blank
176
     */
177 2727
    public function isBlank(): bool
178
    {
179 2727
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
180
    }
181
182
    /**
183
     * Move the cursor forwards
184
     */
185 528
    public function advance(): void
186
    {
187 528
        $this->advanceBy(1);
188 528
    }
189
190
    /**
191
     * Move the cursor forwards
192
     *
193
     * @param int  $characters       Number of characters to advance by
194
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
195
     */
196 3081
    public function advanceBy(int $characters, bool $advanceByColumns = false): void
197
    {
198 3081
        if ($characters === 0) {
199 2790
            $this->previousPosition = $this->currentPosition;
200
201 2790
            return;
202
        }
203
204 2892
        $this->previousPosition  = $this->currentPosition;
205 2892
        $this->nextNonSpaceCache = null;
206
207
        // Optimization to avoid tab handling logic if we have no tabs
208 2892
        if (! $this->lineContainsTabs) {
209 2865
            $this->advanceWithoutTabCharacters($characters);
210
211 2865
            return;
212
        }
213
214 51
        $nextFewChars = $this->isMultibyte ?
215 6
            \mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
216 51
            \substr($this->line, $this->currentPosition, $characters);
217
218
        // Optimization to avoid tab handling logic if we have no tabs
219 51
        if (\strpos($nextFewChars, "\t") === false) {
220 24
            $this->advanceWithoutTabCharacters($characters);
221
222 24
            return;
223
        }
224
225 45
        if ($nextFewChars === '' || $nextFewChars === false) {
226
            $this->previousPosition = $this->currentPosition;
227
228
            return;
229
        }
230
231 45
        if ($characters === 1) {
232 18
            $asArray = [$nextFewChars];
233 39
        } elseif ($this->isMultibyte) {
234
            /** @var string[] $asArray */
235
            $asArray = \preg_split('//u', $nextFewChars, -1, \PREG_SPLIT_NO_EMPTY);
236
        } else {
237 39
            $asArray = \str_split($nextFewChars);
238
        }
239
240 45
        foreach ($asArray as $relPos => $c) {
241 45
            if ($c === "\t") {
242 45
                $charsToTab = 4 - ($this->column % 4);
243 45
                if ($advanceByColumns) {
244 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
245 33
                    $charsToAdvance             = $charsToTab > $characters ? $characters : $charsToTab;
246 33
                    $this->column              += $charsToAdvance;
247 33
                    $this->currentPosition     += $this->partiallyConsumedTab ? 0 : 1;
248 33
                    $characters                -= $charsToAdvance;
249
                } else {
250 18
                    $this->partiallyConsumedTab = false;
251 18
                    $this->column              += $charsToTab;
252 18
                    $this->currentPosition++;
253 45
                    $characters--;
254
                }
255
            } else {
256 12
                $this->partiallyConsumedTab = false;
257 12
                $this->currentPosition++;
258 12
                $this->column++;
259 12
                $characters--;
260
            }
261
262 45
            if ($characters <= 0) {
263 45
                break;
264
            }
265
        }
266 45
    }
267
268 2880
    private function advanceWithoutTabCharacters(int $characters): void
269
    {
270 2880
        $length                     = \min($characters, $this->length - $this->currentPosition);
271 2880
        $this->partiallyConsumedTab = false;
272 2880
        $this->currentPosition     += $length;
273 2880
        $this->column              += $length;
274
275 2880
        return;
276
    }
277
278
    /**
279
     * Advances the cursor by a single space or tab, if present
280
     */
281 390
    public function advanceBySpaceOrTab(): bool
282
    {
283 390
        $character = $this->getCharacter();
284
285 390
        if ($character === ' ' || $character === "\t") {
286 378
            $this->advanceBy(1, true);
287
288 378
            return true;
289
        }
290
291 294
        return false;
292
    }
293
294
    /**
295
     * Parse zero or more space/tab characters
296
     *
297
     * @return int Number of positions moved
298
     */
299 2745
    public function advanceToNextNonSpaceOrTab(): int
300
    {
301 2745
        $newPosition = $this->getNextNonSpacePosition();
302 2745
        $this->advanceBy($newPosition - $this->currentPosition);
303 2745
        $this->partiallyConsumedTab = false;
304
305 2745
        return $this->currentPosition - $this->previousPosition;
306
    }
307
308
    /**
309
     * Parse zero or more space characters, including at most one newline.
310
     *
311
     * Tab characters are not parsed with this function.
312
     *
313
     * @return int Number of positions moved
314
     */
315 249
    public function advanceToNextNonSpaceOrNewline(): int
316
    {
317 249
        $remainder = $this->getRemainder();
318
319
        // Optimization: Avoid the regex if we know there are no spaces or newlines
320 249
        if ($remainder === '' || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
321 216
            $this->previousPosition = $this->currentPosition;
322
323 216
            return 0;
324
        }
325
326 84
        $matches = [];
327 84
        \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
328
329
        // [0][0] contains the matched text
330
        // [0][1] contains the index of that match
331 84
        $increment = $matches[0][1] + \strlen($matches[0][0]);
332
333 84
        $this->advanceBy($increment);
334
335 84
        return $this->currentPosition - $this->previousPosition;
336
    }
337
338
    /**
339
     * Move the position to the very end of the line
340
     *
341
     * @return int The number of characters moved
342
     */
343 798
    public function advanceToEnd(): int
344
    {
345 798
        $this->previousPosition  = $this->currentPosition;
346 798
        $this->nextNonSpaceCache = null;
347
348 798
        $this->currentPosition = $this->length;
349
350 798
        return $this->currentPosition - $this->previousPosition;
351
    }
352
353 2847
    public function getRemainder(): string
354
    {
355 2847
        if ($this->currentPosition >= $this->length) {
356 555
            return '';
357
        }
358
359 2820
        $prefix   = '';
360 2820
        $position = $this->currentPosition;
361 2820
        if ($this->partiallyConsumedTab) {
362 12
            $position++;
363 12
            $charsToTab = 4 - ($this->column % 4);
364 12
            $prefix     = \str_repeat(' ', $charsToTab);
365
        }
366
367 2820
        $subString = $this->isMultibyte ?
368 72
            \mb_substr($this->line, $position, null, 'UTF-8') :
369 2820
            \substr($this->line, $position);
370
371 2820
        return $prefix . $subString;
372
    }
373
374 1965
    public function getLine(): string
375
    {
376 1965
        return $this->line;
377
    }
378
379 2367
    public function isAtEnd(): bool
380
    {
381 2367
        return $this->currentPosition >= $this->length;
382
    }
383
384
    /**
385
     * Try to match a regular expression
386
     *
387
     * Returns the matching text and advances to the end of that match
388
     */
389 2565
    public function match(string $regex): ?string
390
    {
391 2565
        $subject = $this->getRemainder();
392
393 2565
        if (! \preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
394 1110
            return null;
395
        }
396
397
        // $matches[0][0] contains the matched text
398
        // $matches[0][1] contains the index of that match
399
400 2532
        if ($this->isMultibyte) {
401
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
402 57
            $offset      = \mb_strlen(\substr($subject, 0, $matches[0][1]), 'UTF-8');
403 57
            $matchLength = \mb_strlen($matches[0][0], 'UTF-8');
404
        } else {
405 2475
            $offset      = $matches[0][1];
406 2475
            $matchLength = \strlen($matches[0][0]);
407
        }
408
409
        // [0][0] contains the matched text
410
        // [0][1] contains the index of that match
411 2532
        $this->advanceBy($offset + $matchLength);
412
413 2532
        return $matches[0][0];
414
    }
415
416
    /**
417
     * Encapsulates the current state of this cursor in case you need to rollback later.
418
     *
419
     * WARNING: Do not parse or use the return value for ANYTHING except for
420
     * passing it back into restoreState(), as the number of values and their
421
     * contents may change in any future release without warning.
422
     */
423 1704
    public function saveState(): CursorState
424
    {
425 1704
        return new CursorState([
426 1704
            $this->currentPosition,
427 1704
            $this->previousPosition,
428 1704
            $this->nextNonSpaceCache,
429 1704
            $this->indent,
430 1704
            $this->column,
431 1704
            $this->partiallyConsumedTab,
432
        ]);
433
    }
434
435
    /**
436
     * Restore the cursor to a previous state.
437
     *
438
     * Pass in the value previously obtained by calling saveState().
439
     */
440 1602
    public function restoreState(CursorState $state): void
441
    {
442
        [
443 1602
            $this->currentPosition,
444 1602
            $this->previousPosition,
445 1602
            $this->nextNonSpaceCache,
446 1602
            $this->indent,
447 1602
            $this->column,
448 1602
            $this->partiallyConsumedTab,
449 1602
        ] = $state->toArray();
450 1602
    }
451
452 735
    public function getPosition(): int
453
    {
454 735
        return $this->currentPosition;
455
    }
456
457 399
    public function getPreviousText(): string
458
    {
459 399
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
460
    }
461
462 426
    public function getSubstring(int $start, ?int $length = null): string
463
    {
464 426
        if ($this->isMultibyte) {
465 21
            return \mb_substr($this->line, $start, $length, 'UTF-8');
466
        }
467
468 405
        if ($length !== null) {
469 402
            return \substr($this->line, $start, $length);
470
        }
471
472 3
        return \substr($this->line, $start);
473
    }
474
475 285
    public function getColumn(): int
476
    {
477 285
        return $this->column;
478
    }
479
}
480