Completed
Push — master ( 040f68...6483aa )
by Colin
14s queued 10s
created

Cursor::getColumn()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark;
15
16
class Cursor
17
{
18
    public const INDENT_LEVEL = 4;
19
20
    /**
21
     * @var string
22
     */
23
    private $line;
24
25
    /**
26
     * @var int
27
     */
28
    private $length;
29
30
    /**
31
     * @var int
32
     *
33
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
34
     * reached the end.  In this state, any character-returning method MUST return null.
35
     */
36
    private $currentPosition = 0;
37
38
    /**
39
     * @var int
40
     */
41
    private $column = 0;
42
43
    /**
44
     * @var int
45
     */
46
    private $indent = 0;
47
48
    /**
49
     * @var int
50
     */
51
    private $previousPosition = 0;
52
53
    /**
54
     * @var int|null
55
     */
56
    private $nextNonSpaceCache;
57
58
    /**
59
     * @var bool
60
     */
61
    private $partiallyConsumedTab = false;
62
63
    /**
64
     * @var string
65
     */
66
    private $encoding;
67
68
    /**
69
     * @var bool
70
     */
71
    private $lineContainsTabs;
72
73
    /**
74
     * @var bool
75
     */
76
    private $isMultibyte;
77
78
    /**
79
     * @var array<int, string>
80
     */
81
    private $charCache = [];
82
83
    /**
84
     * @param string $line The line being parsed (ASCII or UTF-8)
85
     */
86 2514
    public function __construct(string $line)
87
    {
88 2514
        $this->line = $line;
89 2514
        $this->length = \mb_strlen($line, 'UTF-8') ?: 0;
90 2514
        $this->isMultibyte = $this->length !== \strlen($line);
91 2514
        $this->encoding = $this->isMultibyte ? 'UTF-8' : 'ASCII';
92 2514
        $this->lineContainsTabs = false !== \strpos($line, "\t");
93 2514
    }
94
95
    /**
96
     * Returns the position of the next character which is not a space (or tab)
97
     *
98
     * @return int
99
     */
100 2244
    public function getNextNonSpacePosition(): int
101
    {
102 2244
        if ($this->nextNonSpaceCache !== null) {
103 2064
            return $this->nextNonSpaceCache;
104
        }
105
106 2244
        $i = $this->currentPosition;
107 2244
        $cols = $this->column;
108
109 2244
        while (($c = $this->getCharacter($i)) !== null) {
110 2220
            if ($c === ' ') {
111 510
                $i++;
112 510
                $cols++;
113 2178
            } elseif ($c === "\t") {
114 36
                $i++;
115 36
                $cols += (4 - ($cols % 4));
116
            } else {
117 2178
                break;
118
            }
119
        }
120
121 2244
        $nextNonSpace = ($c === null) ? $this->length : $i;
122 2244
        $this->indent = $cols - $this->column;
123
124 2244
        return $this->nextNonSpaceCache = $nextNonSpace;
125
    }
126
127
    /**
128
     * Returns the next character which isn't a space (or tab)
129
     *
130
     * @return string
131
     */
132 2037
    public function getNextNonSpaceCharacter(): ?string
133
    {
134 2037
        return $this->getCharacter($this->getNextNonSpacePosition());
135
    }
136
137
    /**
138
     * Calculates the current indent (number of spaces after current position)
139
     *
140
     * @return int
141
     */
142 2124
    public function getIndent(): int
143
    {
144 2124
        if ($this->nextNonSpaceCache === null) {
145 2124
            $this->getNextNonSpacePosition();
146
        }
147
148 2124
        return $this->indent;
149
    }
150
151
    /**
152
     * Whether the cursor is indented to INDENT_LEVEL
153
     *
154
     * @return bool
155
     */
156 2064
    public function isIndented(): bool
157
    {
158 2064
        return $this->getIndent() >= self::INDENT_LEVEL;
159
    }
160
161
    /**
162
     * @param int|null $index
163
     *
164
     * @return string|null
165
     */
166 2298
    public function getCharacter(?int $index = null): ?string
167
    {
168 2298
        if ($index === null) {
169 2043
            $index = $this->currentPosition;
170
        }
171
172 2298
        if (isset($this->charCache[$index])) {
173 2082
            return $this->charCache[$index];
174
        }
175
176
        // Index out-of-bounds, or we're at the end
177 2298
        if ($index < 0 || $index >= $this->length) {
178 2148
            return null;
179
        }
180
181 2256
        return $this->charCache[$index] = $this->isMultibyte ?
182 99
            \mb_substr($this->line, $index, 1, $this->encoding) :
183 2256
            \substr($this->line, $index, 1);
184
    }
185
186
    /**
187
     * Returns the next character (or null, if none) without advancing forwards
188
     *
189
     * @param int $offset
190
     *
191
     * @return string|null
192
     */
193 1152
    public function peek(int $offset = 1): ?string
194
    {
195 1152
        return $this->getCharacter($this->currentPosition + $offset);
196
    }
197
198
    /**
199
     * Whether the remainder is blank
200
     *
201
     * @return bool
202
     */
203 2082
    public function isBlank(): bool
204
    {
205 2082
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
206
    }
207
208
    /**
209
     * Move the cursor forwards
210
     */
211 1011
    public function advance()
212
    {
213 1011
        $this->advanceBy(1);
214 1011
    }
215
216
    /**
217
     * Move the cursor forwards
218
     *
219
     * @param int  $characters       Number of characters to advance by
220
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
221
     */
222 2370
    public function advanceBy(int $characters, bool $advanceByColumns = false)
223
    {
224 2370
        if ($characters === 0) {
225 2094
            $this->previousPosition = $this->currentPosition;
226
227 2094
            return;
228
        }
229
230 2295
        $this->previousPosition = $this->currentPosition;
231 2295
        $this->nextNonSpaceCache = null;
232
233
        // Optimization to avoid tab handling logic if we have no tabs
234 2295
        if (!$this->lineContainsTabs || false === \strpos(
235 51
            $nextFewChars = $this->isMultibyte ?
236 6
                \mb_substr($this->line, $this->currentPosition, $characters, $this->encoding) :
237 51
                \substr($this->line, $this->currentPosition, $characters),
238 2295
            "\t")) {
239 2283
            $length = \min($characters, $this->length - $this->currentPosition);
240 2283
            $this->partiallyConsumedTab = false;
241 2283
            $this->currentPosition += $length;
242 2283
            $this->column += $length;
243
244 2283
            return;
245
        }
246
247 48
        if ($characters === 1 && !empty($nextFewChars)) {
248 18
            $asArray = [$nextFewChars];
249 42
        } elseif ($this->isMultibyte) {
250 3
            $asArray = \preg_split('//u', $nextFewChars, -1, \PREG_SPLIT_NO_EMPTY);
251
        } else {
252 39
            $asArray = \str_split($nextFewChars);
253
        }
254
255 48
        foreach ($asArray as $relPos => $c) {
256 48
            if ($c === "\t") {
257 48
                $charsToTab = 4 - ($this->column % 4);
258 48
                if ($advanceByColumns) {
259 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
260 33
                    $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
261 33
                    $this->column += $charsToAdvance;
262 33
                    $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
263 33
                    $characters -= $charsToAdvance;
264
                } else {
265 27
                    $this->partiallyConsumedTab = false;
266 27
                    $this->column += $charsToTab;
267 27
                    $this->currentPosition++;
268 48
                    $characters--;
269
                }
270
            } else {
271 18
                $this->partiallyConsumedTab = false;
272 18
                $this->currentPosition++;
273 18
                $this->column++;
274 18
                $characters--;
275
            }
276
277 48
            if ($characters <= 0) {
278 48
                break;
279
            }
280
        }
281 48
    }
282
283
    /**
284
     * Advances the cursor by a single space or tab, if present
285
     *
286
     * @return bool
287
     */
288 348
    public function advanceBySpaceOrTab(): bool
289
    {
290 348
        $character = $this->getCharacter();
291
292 348
        if ($character === ' ' || $character === "\t") {
293 336
            $this->advanceBy(1, true);
294
295 336
            return true;
296
        }
297
298 258
        return false;
299
    }
300
301
    /**
302
     * Parse zero or more space/tab characters
303
     *
304
     * @return int Number of positions moved
305
     */
306 1971
    public function advanceToNextNonSpaceOrTab(): int
307
    {
308 1971
        $newPosition = $this->getNextNonSpacePosition();
309 1971
        $this->advanceBy($newPosition - $this->currentPosition);
310 1971
        $this->partiallyConsumedTab = false;
311
312 1971
        return $this->currentPosition - $this->previousPosition;
313
    }
314
315
    /**
316
     * Parse zero or more space characters, including at most one newline.
317
     *
318
     * Tab characters are not parsed with this function.
319
     *
320
     * @return int Number of positions moved
321
     */
322 471
    public function advanceToNextNonSpaceOrNewline(): int
323
    {
324 471
        $matches = [];
325 471
        \preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, \PREG_OFFSET_CAPTURE);
326
327
        // [0][0] contains the matched text
328
        // [0][1] contains the index of that match
329 471
        $increment = $matches[0][1] + \strlen($matches[0][0]);
330
331 471
        if ($increment === 0) {
332 330
            return 0;
333
        }
334
335 324
        $this->advanceBy($increment);
336
337 324
        return $this->currentPosition - $this->previousPosition;
338
    }
339
340
    /**
341
     * Move the position to the very end of the line
342
     *
343
     * @return int The number of characters moved
344
     */
345 84
    public function advanceToEnd(): int
346
    {
347 84
        $this->previousPosition = $this->currentPosition;
348 84
        $this->nextNonSpaceCache = null;
349
350 84
        $this->currentPosition = $this->length;
351
352 84
        return $this->currentPosition - $this->previousPosition;
353
    }
354
355
    /**
356
     * @return string
357
     */
358 2166
    public function getRemainder(): string
359
    {
360 2166
        if ($this->currentPosition >= $this->length) {
361 711
            return '';
362
        }
363
364 2154
        $prefix = '';
365 2154
        $position = $this->currentPosition;
366 2154
        if ($this->partiallyConsumedTab) {
367 15
            $position++;
368 15
            $charsToTab = 4 - ($this->column % 4);
369 15
            $prefix = \str_repeat(' ', $charsToTab);
370
        }
371
372 2154
        $subString = $this->isMultibyte ?
373 69
            \mb_substr($this->line, $position, null, $this->encoding) :
374 2154
            \substr($this->line, $position);
375
376 2154
        return $prefix . $subString;
377
    }
378
379
    /**
380
     * @return string
381
     */
382 2004
    public function getLine(): string
383
    {
384 2004
        return $this->line;
385
    }
386
387
    /**
388
     * @return bool
389
     */
390 441
    public function isAtEnd(): bool
391
    {
392 441
        return $this->currentPosition >= $this->length;
393
    }
394
395
    /**
396
     * Try to match a regular expression
397
     *
398
     * Returns the matching text and advances to the end of that match
399
     *
400
     * @param string $regex
401
     *
402
     * @return string|null
403
     */
404 2085
    public function match(string $regex): ?string
405
    {
406 2085
        $subject = $this->getRemainder();
407
408 2085
        if (!\preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
409 1905
            return null;
410
        }
411
412
        // $matches[0][0] contains the matched text
413
        // $matches[0][1] contains the index of that match
414
415 2046
        if ($this->isMultibyte) {
416
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
417 57
            $offset = \mb_strlen(\mb_strcut($subject, 0, $matches[0][1], $this->encoding), $this->encoding);
418 57
            $matchLength = \mb_strlen($matches[0][0], $this->encoding);
419
        } else {
420 1989
            $offset = $matches[0][1];
421 1989
            $matchLength = \strlen($matches[0][0]);
422
        }
423
424
        // [0][0] contains the matched text
425
        // [0][1] contains the index of that match
426 2046
        $this->advanceBy($offset + $matchLength);
427
428 2046
        return $matches[0][0];
429
    }
430
431
    /**
432
     * Encapsulates the current state of this cursor in case you need to rollback later.
433
     *
434
     * WARNING: Do not parse or use the return value for ANYTHING except for
435
     * passing it back into restoreState(), as the number of values and their
436
     * contents may change in any future release without warning.
437
     *
438
     * @return array
439
     */
440 1089
    public function saveState()
441
    {
442
        return [
443 1089
            $this->currentPosition,
444 1089
            $this->previousPosition,
445 1089
            $this->nextNonSpaceCache,
446 1089
            $this->indent,
447 1089
            $this->column,
448 1089
            $this->partiallyConsumedTab,
449
        ];
450
    }
451
452
    /**
453
     * Restore the cursor to a previous state.
454
     *
455
     * Pass in the value previously obtained by calling saveState().
456
     *
457
     * @param array $state
458
     */
459 852
    public function restoreState($state)
460
    {
461
        list(
462 852
            $this->currentPosition,
463 852
            $this->previousPosition,
464 852
            $this->nextNonSpaceCache,
465 852
            $this->indent,
466 852
            $this->column,
467 852
            $this->partiallyConsumedTab,
468 852
          ) = $state;
469 852
    }
470
471
    /**
472
     * @return int
473
     */
474 714
    public function getPosition(): int
475
    {
476 714
        return $this->currentPosition;
477
    }
478
479
    /**
480
     * @return string
481
     */
482 999
    public function getPreviousText(): string
483
    {
484 999
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, $this->encoding);
485
    }
486
487
    /**
488
     * @param int      $start
489
     * @param int|null $length
490
     *
491
     * @return string
492
     */
493 393
    public function getSubstring(int $start, ?int $length = null): string
494
    {
495 393
        if ($this->isMultibyte) {
496 18
            return \mb_substr($this->line, $start, $length, $this->encoding);
497 375
        } elseif ($length !== null) {
498 375
            return \substr($this->line, $start, $length);
499
        }
500
501
        return \substr($this->line, $start);
502
    }
503
504
    /**
505
     * @return int
506
     */
507 249
    public function getColumn(): int
508
    {
509 249
        return $this->column;
510
    }
511
}
512