Completed
Push — master ( d4d1b7...0de0ea )
by Colin
01:02
created

Cursor::advanceBySpaceOrTab()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 12
ccs 6
cts 6
cp 1
rs 9.8666
c 0
b 0
f 0
cc 3
nc 2
nop 0
crap 3
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Parser;
15
16
class Cursor
17
{
18
    public const INDENT_LEVEL = 4;
19
20
    /** @var string */
21
    private $line;
22
23
    /** @var int */
24
    private $length;
25
26
    /**
27
     * @var int
28
     *
29
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
30
     * reached the end.  In this state, any character-returning method MUST return null.
31
     */
32
    private $currentPosition = 0;
33
34
    /** @var int */
35
    private $column = 0;
36
37
    /** @var int */
38
    private $indent = 0;
39
40
    /** @var int */
41
    private $previousPosition = 0;
42
43
    /** @var int|null */
44
    private $nextNonSpaceCache;
45
46
    /** @var bool */
47
    private $partiallyConsumedTab = false;
48
49
    /** @var bool */
50
    private $lineContainsTabs;
51
52
    /** @var bool */
53
    private $isMultibyte;
54
55
    /** @var array<int, string> */
56
    private $charCache = [];
57
58
    /**
59
     * @param string $line The line being parsed (ASCII or UTF-8)
60
     */
61 3000
    public function __construct(string $line)
62
    {
63 3000
        $this->line             = $line;
64 3000
        $this->length           = \mb_strlen($line, 'UTF-8') ?: 0;
65 3000
        $this->isMultibyte      = $this->length !== \strlen($line);
66 3000
        $this->lineContainsTabs = \strpos($line, "\t") !== false;
67 3000
    }
68
69
    /**
70
     * Returns the position of the next character which is not a space (or tab)
71
     */
72 2700
    public function getNextNonSpacePosition(): int
73
    {
74 2700
        if ($this->nextNonSpaceCache !== null) {
75 2520
            return $this->nextNonSpaceCache;
76
        }
77
78 2700
        $i    = $this->currentPosition;
79 2700
        $cols = $this->column;
80
81 2700
        while (($c = $this->getCharacter($i)) !== null) {
82 2676
            if ($c === ' ') {
83 813
                $i++;
84 813
                $cols++;
85 2634
            } elseif ($c === "\t") {
86 36
                $i++;
87 36
                $cols += 4 - ($cols % 4);
88
            } else {
89 2634
                break;
90
            }
91
        }
92
93 2700
        $nextNonSpace = $c === null ? $this->length : $i;
94 2700
        $this->indent = $cols - $this->column;
95
96 2700
        return $this->nextNonSpaceCache = $nextNonSpace;
97
    }
98
99
    /**
100
     * Returns the next character which isn't a space (or tab)
101
     */
102 2466
    public function getNextNonSpaceCharacter(): ?string
103
    {
104 2466
        return $this->getCharacter($this->getNextNonSpacePosition());
105
    }
106
107
    /**
108
     * Calculates the current indent (number of spaces after current position)
109
     */
110 2580
    public function getIndent(): int
111
    {
112 2580
        if ($this->nextNonSpaceCache === null) {
113 360
            $this->getNextNonSpacePosition();
114
        }
115
116 2580
        return $this->indent;
117
    }
118
119
    /**
120
     * Whether the cursor is indented to INDENT_LEVEL
121
     */
122 2520
    public function isIndented(): bool
123
    {
124 2520
        return $this->getIndent() >= self::INDENT_LEVEL;
125
    }
126
127 2760
    public function getCharacter(?int $index = null): ?string
128
    {
129 2760
        if ($index === null) {
130 2472
            $index = $this->currentPosition;
131
        }
132
133
        // Index out-of-bounds, or we're at the end
134 2760
        if ($index < 0 || $index >= $this->length) {
135 2370
            return null;
136
        }
137
138 2718
        if ($this->isMultibyte) {
139 102
            if (isset($this->charCache[$index])) {
140 48
                return $this->charCache[$index];
141
            }
142
143 102
            return $this->charCache[$index] = \mb_substr($this->line, $index, 1, 'UTF-8');
144
        }
145
146 2622
        return $this->line[$index];
147
    }
148
149
    /**
150
     * Returns the next character (or null, if none) without advancing forwards
151
     */
152 1332
    public function peek(int $offset = 1): ?string
153
    {
154 1332
        return $this->getCharacter($this->currentPosition + $offset);
155
    }
156
157
    /**
158
     * Whether the remainder is blank
159
     */
160 2514
    public function isBlank(): bool
161
    {
162 2514
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
163
    }
164
165
    /**
166
     * Move the cursor forwards
167
     */
168 513
    public function advance(): void
169
    {
170 513
        $this->advanceBy(1);
171 513
    }
172
173
    /**
174
     * Move the cursor forwards
175
     *
176
     * @param int  $characters       Number of characters to advance by
177
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
178
     */
179 2868
    public function advanceBy(int $characters, bool $advanceByColumns = false): void
180
    {
181 2868
        if ($characters === 0) {
182 2577
            $this->previousPosition = $this->currentPosition;
183
184 2577
            return;
185
        }
186
187 2679
        $this->previousPosition  = $this->currentPosition;
188 2679
        $this->nextNonSpaceCache = null;
189
190
        // Optimization to avoid tab handling logic if we have no tabs
191
        if (
192 2679
            ! $this->lineContainsTabs || \strpos(
193 51
                $nextFewChars = $this->isMultibyte ?
194 6
                \mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
195 51
                \substr($this->line, $this->currentPosition, $characters),
196 51
                "\t"
197 2679
            ) === false
198
        ) {
199 2667
            $length                     = \min($characters, $this->length - $this->currentPosition);
200 2667
            $this->partiallyConsumedTab = false;
201 2667
            $this->currentPosition     += $length;
202 2667
            $this->column              += $length;
203
204 2667
            return;
205
        }
206
207 45
        if ($characters === 1 && ! empty($nextFewChars)) {
208 18
            $asArray = [$nextFewChars];
209 39
        } elseif ($this->isMultibyte) {
210
            /** @var string[] $asArray */
211
            $asArray = \preg_split('//u', $nextFewChars, -1, \PREG_SPLIT_NO_EMPTY);
212
        } else {
213 39
            $asArray = \str_split($nextFewChars);
214
        }
215
216 45
        foreach ($asArray as $relPos => $c) {
217 45
            if ($c === "\t") {
218 45
                $charsToTab = 4 - ($this->column % 4);
219 45
                if ($advanceByColumns) {
220 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
221 33
                    $charsToAdvance             = $charsToTab > $characters ? $characters : $charsToTab;
222 33
                    $this->column              += $charsToAdvance;
223 33
                    $this->currentPosition     += $this->partiallyConsumedTab ? 0 : 1;
224 33
                    $characters                -= $charsToAdvance;
225
                } else {
226 18
                    $this->partiallyConsumedTab = false;
227 18
                    $this->column              += $charsToTab;
228 18
                    $this->currentPosition++;
229 45
                    $characters--;
230
                }
231
            } else {
232 12
                $this->partiallyConsumedTab = false;
233 12
                $this->currentPosition++;
234 12
                $this->column++;
235 12
                $characters--;
236
            }
237
238 45
            if ($characters <= 0) {
239 45
                break;
240
            }
241
        }
242 45
    }
243
244
    /**
245
     * Advances the cursor by a single space or tab, if present
246
     */
247 390
    public function advanceBySpaceOrTab(): bool
248
    {
249 390
        $character = $this->getCharacter();
250
251 390
        if ($character === ' ' || $character === "\t") {
252 378
            $this->advanceBy(1, true);
253
254 378
            return true;
255
        }
256
257 294
        return false;
258
    }
259
260
    /**
261
     * Parse zero or more space/tab characters
262
     *
263
     * @return int Number of positions moved
264
     */
265 2532
    public function advanceToNextNonSpaceOrTab(): int
266
    {
267 2532
        $newPosition = $this->getNextNonSpacePosition();
268 2532
        $this->advanceBy($newPosition - $this->currentPosition);
269 2532
        $this->partiallyConsumedTab = false;
270
271 2532
        return $this->currentPosition - $this->previousPosition;
272
    }
273
274
    /**
275
     * Parse zero or more space characters, including at most one newline.
276
     *
277
     * Tab characters are not parsed with this function.
278
     *
279
     * @return int Number of positions moved
280
     */
281 249
    public function advanceToNextNonSpaceOrNewline(): int
282
    {
283 249
        $remainder = $this->getRemainder();
284
285
        // Optimization: Avoid the regex if we know there are no spaces or newlines
286 249
        if (empty($remainder) || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
287 216
            $this->previousPosition = $this->currentPosition;
288
289 216
            return 0;
290
        }
291
292 84
        $matches = [];
293 84
        \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
294
295
        // [0][0] contains the matched text
296
        // [0][1] contains the index of that match
297 84
        $increment = $matches[0][1] + \strlen($matches[0][0]);
298
299 84
        $this->advanceBy($increment);
300
301 84
        return $this->currentPosition - $this->previousPosition;
302
    }
303
304
    /**
305
     * Move the position to the very end of the line
306
     *
307
     * @return int The number of characters moved
308
     */
309 798
    public function advanceToEnd(): int
310
    {
311 798
        $this->previousPosition  = $this->currentPosition;
312 798
        $this->nextNonSpaceCache = null;
313
314 798
        $this->currentPosition = $this->length;
315
316 798
        return $this->currentPosition - $this->previousPosition;
317
    }
318
319 2634
    public function getRemainder(): string
320
    {
321 2634
        if ($this->currentPosition >= $this->length) {
322 555
            return '';
323
        }
324
325 2607
        $prefix   = '';
326 2607
        $position = $this->currentPosition;
327 2607
        if ($this->partiallyConsumedTab) {
328 12
            $position++;
329 12
            $charsToTab = 4 - ($this->column % 4);
330 12
            $prefix     = \str_repeat(' ', $charsToTab);
331
        }
332
333 2607
        $subString = $this->isMultibyte ?
334 72
            \mb_substr($this->line, $position, null, 'UTF-8') :
335 2607
            \substr($this->line, $position);
336
337 2607
        return $prefix . $subString;
338
    }
339
340 1965
    public function getLine(): string
341
    {
342 1965
        return $this->line;
343
    }
344
345 2154
    public function isAtEnd(): bool
346
    {
347 2154
        return $this->currentPosition >= $this->length;
348
    }
349
350
    /**
351
     * Try to match a regular expression
352
     *
353
     * Returns the matching text and advances to the end of that match
354
     */
355 2352
    public function match(string $regex): ?string
356
    {
357 2352
        $subject = $this->getRemainder();
358
359 2352
        if (! \preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
360 909
            return null;
361
        }
362
363
        // $matches[0][0] contains the matched text
364
        // $matches[0][1] contains the index of that match
365
366 2319
        if ($this->isMultibyte) {
367
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
368 57
            $offset      = \mb_strlen(\substr($subject, 0, $matches[0][1]), 'UTF-8');
369 57
            $matchLength = \mb_strlen($matches[0][0], 'UTF-8');
370
        } else {
371 2262
            $offset      = $matches[0][1];
372 2262
            $matchLength = \strlen($matches[0][0]);
373
        }
374
375
        // [0][0] contains the matched text
376
        // [0][1] contains the index of that match
377 2319
        $this->advanceBy($offset + $matchLength);
378
379 2319
        return $matches[0][0];
380
    }
381
382
    /**
383
     * Encapsulates the current state of this cursor in case you need to rollback later.
384
     *
385
     * WARNING: Do not parse or use the return value for ANYTHING except for
386
     * passing it back into restoreState(), as the number of values and their
387
     * contents may change in any future release without warning.
388
     */
389 1689
    public function saveState(): CursorState
390
    {
391 1689
        return new CursorState([
392 1689
            $this->currentPosition,
393 1689
            $this->previousPosition,
394 1689
            $this->nextNonSpaceCache,
395 1689
            $this->indent,
396 1689
            $this->column,
397 1689
            $this->partiallyConsumedTab,
398
        ]);
399
    }
400
401
    /**
402
     * Restore the cursor to a previous state.
403
     *
404
     * Pass in the value previously obtained by calling saveState().
405
     */
406 1596
    public function restoreState(CursorState $state): void
407
    {
408
        [
409 1596
            $this->currentPosition,
410 1596
            $this->previousPosition,
411 1596
            $this->nextNonSpaceCache,
412 1596
            $this->indent,
413 1596
            $this->column,
414 1596
            $this->partiallyConsumedTab,
415 1596
        ] = $state->toArray();
416 1596
    }
417
418 735
    public function getPosition(): int
419
    {
420 735
        return $this->currentPosition;
421
    }
422
423 399
    public function getPreviousText(): string
424
    {
425 399
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
426
    }
427
428 426
    public function getSubstring(int $start, ?int $length = null): string
429
    {
430 426
        if ($this->isMultibyte) {
431 21
            return \mb_substr($this->line, $start, $length, 'UTF-8');
432
        }
433
434 405
        if ($length !== null) {
435 402
            return \substr($this->line, $start, $length);
436
        }
437
438 3
        return \substr($this->line, $start);
439
    }
440
441 285
    public function getColumn(): int
442
    {
443 285
        return $this->column;
444
    }
445
}
446