Completed
Push — master ( b42489...8df37e )
by Colin
14s queued 11s
created

Cursor::advanceBySpaceOrTab()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 12
ccs 6
cts 6
cp 1
rs 9.8666
c 0
b 0
f 0
cc 3
nc 2
nop 0
crap 3
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark;
15
16
class Cursor
17
{
18
    public const INDENT_LEVEL = 4;
19
20
    /**
21
     * @var string
22
     */
23
    private $line;
24
25
    /**
26
     * @var int
27
     */
28
    private $length;
29
30
    /**
31
     * @var int
32
     *
33
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
34
     * reached the end.  In this state, any character-returning method MUST return null.
35
     */
36
    private $currentPosition = 0;
37
38
    /**
39
     * @var int
40
     */
41
    private $column = 0;
42
43
    /**
44
     * @var int
45
     */
46
    private $indent = 0;
47
48
    /**
49
     * @var int
50
     */
51
    private $previousPosition = 0;
52
53
    /**
54
     * @var int|null
55
     */
56
    private $nextNonSpaceCache;
57
58
    /**
59
     * @var bool
60
     */
61
    private $partiallyConsumedTab = false;
62
63
    /**
64
     * @var bool
65
     */
66
    private $lineContainsTabs;
67
68
    /**
69
     * @var bool
70
     */
71
    private $isMultibyte;
72
73
    /**
74
     * @var array<int, string>
75
     */
76
    private $charCache = [];
77
78
    /**
79
     * @param string $line The line being parsed (ASCII or UTF-8)
80
     */
81 2532
    public function __construct(string $line)
82
    {
83 2532
        $this->line = $line;
84 2532
        $this->length = \mb_strlen($line, 'UTF-8') ?: 0;
85 2532
        $this->isMultibyte = $this->length !== \strlen($line);
86 2532
        $this->lineContainsTabs = false !== \strpos($line, "\t");
87 2532
    }
88
89
    /**
90
     * Returns the position of the next character which is not a space (or tab)
91
     *
92
     * @return int
93
     */
94 2250
    public function getNextNonSpacePosition(): int
95
    {
96 2250
        if ($this->nextNonSpaceCache !== null) {
97 2070
            return $this->nextNonSpaceCache;
98
        }
99
100 2250
        $i = $this->currentPosition;
101 2250
        $cols = $this->column;
102
103 2250
        while (($c = $this->getCharacter($i)) !== null) {
104 2226
            if ($c === ' ') {
105 510
                $i++;
106 510
                $cols++;
107 2184
            } elseif ($c === "\t") {
108 36
                $i++;
109 36
                $cols += (4 - ($cols % 4));
110
            } else {
111 2184
                break;
112
            }
113
        }
114
115 2250
        $nextNonSpace = ($c === null) ? $this->length : $i;
116 2250
        $this->indent = $cols - $this->column;
117
118 2250
        return $this->nextNonSpaceCache = $nextNonSpace;
119
    }
120
121
    /**
122
     * Returns the next character which isn't a space (or tab)
123
     *
124
     * @return string
125
     */
126 2043
    public function getNextNonSpaceCharacter(): ?string
127
    {
128 2043
        return $this->getCharacter($this->getNextNonSpacePosition());
129
    }
130
131
    /**
132
     * Calculates the current indent (number of spaces after current position)
133
     *
134
     * @return int
135
     */
136 2130
    public function getIndent(): int
137
    {
138 2130
        if ($this->nextNonSpaceCache === null) {
139 2130
            $this->getNextNonSpacePosition();
140
        }
141
142 2130
        return $this->indent;
143
    }
144
145
    /**
146
     * Whether the cursor is indented to INDENT_LEVEL
147
     *
148
     * @return bool
149
     */
150 2070
    public function isIndented(): bool
151
    {
152 2070
        return $this->getIndent() >= self::INDENT_LEVEL;
153
    }
154
155
    /**
156
     * @param int|null $index
157
     *
158
     * @return string|null
159
     */
160 2313
    public function getCharacter(?int $index = null): ?string
161
    {
162 2313
        if ($index === null) {
163 2085
            $index = $this->currentPosition;
164
        }
165
166
        // Index out-of-bounds, or we're at the end
167 2313
        if ($index < 0 || $index >= $this->length) {
168 2154
            return null;
169
        }
170
171 2271
        if ($this->isMultibyte) {
172 105
            if (isset($this->charCache[$index])) {
173 48
                return $this->charCache[$index];
174
            }
175
176 105
            return $this->charCache[$index] = \mb_substr($this->line, $index, 1, 'UTF-8');
177
        }
178
179 2172
        return $this->line[$index];
180
    }
181
182
    /**
183
     * Returns the next character (or null, if none) without advancing forwards
184
     *
185
     * @param int $offset
186
     *
187
     * @return string|null
188
     */
189 1158
    public function peek(int $offset = 1): ?string
190
    {
191 1158
        return $this->getCharacter($this->currentPosition + $offset);
192
    }
193
194
    /**
195
     * Whether the remainder is blank
196
     *
197
     * @return bool
198
     */
199 2088
    public function isBlank(): bool
200
    {
201 2088
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
202
    }
203
204
    /**
205
     * Move the cursor forwards
206
     */
207 39
    public function advance()
208
    {
209 39
        $this->advanceBy(1);
210 39
    }
211
212
    /**
213
     * Move the cursor forwards
214
     *
215
     * @param int  $characters       Number of characters to advance by
216
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
217
     */
218 2376
    public function advanceBy(int $characters, bool $advanceByColumns = false)
219
    {
220 2376
        if ($characters === 0) {
221 2100
            $this->previousPosition = $this->currentPosition;
222
223 2100
            return;
224
        }
225
226 2301
        $this->previousPosition = $this->currentPosition;
227 2301
        $this->nextNonSpaceCache = null;
228
229
        // Optimization to avoid tab handling logic if we have no tabs
230 2301
        if (!$this->lineContainsTabs || false === \strpos(
231 51
            $nextFewChars = $this->isMultibyte ?
232 6
                \mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
233 51
                \substr($this->line, $this->currentPosition, $characters),
234 2301
            "\t")) {
235 2289
            $length = \min($characters, $this->length - $this->currentPosition);
236 2289
            $this->partiallyConsumedTab = false;
237 2289
            $this->currentPosition += $length;
238 2289
            $this->column += $length;
239
240 2289
            return;
241
        }
242
243 48
        if ($characters === 1 && !empty($nextFewChars)) {
244 18
            $asArray = [$nextFewChars];
245 42
        } elseif ($this->isMultibyte) {
246
            /** @var string[] $asArray */
247 3
            $asArray = \preg_split('//u', $nextFewChars, -1, \PREG_SPLIT_NO_EMPTY);
248
        } else {
249 39
            $asArray = \str_split($nextFewChars);
250
        }
251
252 48
        foreach ($asArray as $relPos => $c) {
253 48
            if ($c === "\t") {
254 48
                $charsToTab = 4 - ($this->column % 4);
255 48
                if ($advanceByColumns) {
256 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
257 33
                    $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
258 33
                    $this->column += $charsToAdvance;
259 33
                    $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
260 33
                    $characters -= $charsToAdvance;
261
                } else {
262 27
                    $this->partiallyConsumedTab = false;
263 27
                    $this->column += $charsToTab;
264 27
                    $this->currentPosition++;
265 48
                    $characters--;
266
                }
267
            } else {
268 18
                $this->partiallyConsumedTab = false;
269 18
                $this->currentPosition++;
270 18
                $this->column++;
271 18
                $characters--;
272
            }
273
274 48
            if ($characters <= 0) {
275 48
                break;
276
            }
277
        }
278 48
    }
279
280
    /**
281
     * Advances the cursor by a single space or tab, if present
282
     *
283
     * @return bool
284
     */
285 348
    public function advanceBySpaceOrTab(): bool
286
    {
287 348
        $character = $this->getCharacter();
288
289 348
        if ($character === ' ' || $character === "\t") {
290 336
            $this->advanceBy(1, true);
291
292 336
            return true;
293
        }
294
295 258
        return false;
296
    }
297
298
    /**
299
     * Parse zero or more space/tab characters
300
     *
301
     * @return int Number of positions moved
302
     */
303 1977
    public function advanceToNextNonSpaceOrTab(): int
304
    {
305 1977
        $newPosition = $this->getNextNonSpacePosition();
306 1977
        $this->advanceBy($newPosition - $this->currentPosition);
307 1977
        $this->partiallyConsumedTab = false;
308
309 1977
        return $this->currentPosition - $this->previousPosition;
310
    }
311
312
    /**
313
     * Parse zero or more space characters, including at most one newline.
314
     *
315
     * Tab characters are not parsed with this function.
316
     *
317
     * @return int Number of positions moved
318
     */
319 474
    public function advanceToNextNonSpaceOrNewline(): int
320
    {
321 474
        $remainder = $this->getRemainder();
322
323
        // Optimization: Avoid the regex if we know there are no spaces or newlines
324 474
        if (empty($remainder) || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
325 333
            $this->previousPosition = $this->currentPosition;
326
327 333
            return 0;
328
        }
329
330 324
        $matches = [];
331 324
        \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
332
333
        // [0][0] contains the matched text
334
        // [0][1] contains the index of that match
335 324
        $increment = $matches[0][1] + \strlen($matches[0][0]);
336
337 324
        $this->advanceBy($increment);
338
339 324
        return $this->currentPosition - $this->previousPosition;
340
    }
341
342
    /**
343
     * Move the position to the very end of the line
344
     *
345
     * @return int The number of characters moved
346
     */
347 84
    public function advanceToEnd(): int
348
    {
349 84
        $this->previousPosition = $this->currentPosition;
350 84
        $this->nextNonSpaceCache = null;
351
352 84
        $this->currentPosition = $this->length;
353
354 84
        return $this->currentPosition - $this->previousPosition;
355
    }
356
357
    /**
358
     * @return string
359
     */
360 2154
    public function getRemainder(): string
361
    {
362 2154
        if ($this->currentPosition >= $this->length) {
363 711
            return '';
364
        }
365
366 2142
        $prefix = '';
367 2142
        $position = $this->currentPosition;
368 2142
        if ($this->partiallyConsumedTab) {
369 15
            $position++;
370 15
            $charsToTab = 4 - ($this->column % 4);
371 15
            $prefix = \str_repeat(' ', $charsToTab);
372
        }
373
374 2142
        $subString = $this->isMultibyte ?
375 69
            \mb_substr($this->line, $position, null, 'UTF-8') :
376 2142
            \substr($this->line, $position);
377
378 2142
        return $prefix . $subString;
379
    }
380
381
    /**
382
     * @return string
383
     */
384 2010
    public function getLine(): string
385
    {
386 2010
        return $this->line;
387
    }
388
389
    /**
390
     * @return bool
391
     */
392 444
    public function isAtEnd(): bool
393
    {
394 444
        return $this->currentPosition >= $this->length;
395
    }
396
397
    /**
398
     * Try to match a regular expression
399
     *
400
     * Returns the matching text and advances to the end of that match
401
     *
402
     * @param string $regex
403
     *
404
     * @return string|null
405
     */
406 2064
    public function match(string $regex): ?string
407
    {
408 2064
        $subject = $this->getRemainder();
409
410 2064
        if (!\preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
411 993
            return null;
412
        }
413
414
        // $matches[0][0] contains the matched text
415
        // $matches[0][1] contains the index of that match
416
417 2049
        if ($this->isMultibyte) {
418
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
419 57
            $offset = \mb_strlen(\mb_strcut($subject, 0, $matches[0][1], 'UTF-8'), 'UTF-8');
420 57
            $matchLength = \mb_strlen($matches[0][0], 'UTF-8');
421
        } else {
422 1992
            $offset = $matches[0][1];
423 1992
            $matchLength = \strlen($matches[0][0]);
424
        }
425
426
        // [0][0] contains the matched text
427
        // [0][1] contains the index of that match
428 2049
        $this->advanceBy($offset + $matchLength);
429
430 2049
        return $matches[0][0];
431
    }
432
433
    /**
434
     * Encapsulates the current state of this cursor in case you need to rollback later.
435
     *
436
     * WARNING: Do not parse or use the return value for ANYTHING except for
437
     * passing it back into restoreState(), as the number of values and their
438
     * contents may change in any future release without warning.
439
     *
440
     * @return array
441
     */
442 1092
    public function saveState()
443
    {
444
        return [
445 1092
            $this->currentPosition,
446 1092
            $this->previousPosition,
447 1092
            $this->nextNonSpaceCache,
448 1092
            $this->indent,
449 1092
            $this->column,
450 1092
            $this->partiallyConsumedTab,
451
        ];
452
    }
453
454
    /**
455
     * Restore the cursor to a previous state.
456
     *
457
     * Pass in the value previously obtained by calling saveState().
458
     *
459
     * @param array $state
460
     */
461 855
    public function restoreState($state)
462
    {
463
        list(
464 855
            $this->currentPosition,
465 855
            $this->previousPosition,
466 855
            $this->nextNonSpaceCache,
467 855
            $this->indent,
468 855
            $this->column,
469 855
            $this->partiallyConsumedTab,
470 855
          ) = $state;
471 855
    }
472
473
    /**
474
     * @return int
475
     */
476 717
    public function getPosition(): int
477
    {
478 717
        return $this->currentPosition;
479
    }
480
481
    /**
482
     * @return string
483
     */
484 456
    public function getPreviousText(): string
485
    {
486 456
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
487
    }
488
489
    /**
490
     * @param int      $start
491
     * @param int|null $length
492
     *
493
     * @return string
494
     */
495 405
    public function getSubstring(int $start, ?int $length = null): string
496
    {
497 405
        if ($this->isMultibyte) {
498 21
            return \mb_substr($this->line, $start, $length, 'UTF-8');
499 384
        } elseif ($length !== null) {
500 381
            return \substr($this->line, $start, $length);
501
        }
502
503 3
        return \substr($this->line, $start);
504
    }
505
506
    /**
507
     * @return int
508
     */
509 249
    public function getColumn(): int
510
    {
511 249
        return $this->column;
512
    }
513
}
514