Completed
Push — master ( d6425f...46570e )
by Colin
15s queued 10s
created

Cursor::advanceBySpaceOrTab()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 12
ccs 6
cts 6
cp 1
rs 9.8666
c 0
b 0
f 0
cc 3
nc 2
nop 0
crap 3
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark;
13
14
class Cursor
15
{
16
    const INDENT_LEVEL = 4;
17
18
    /**
19
     * @var string
20
     */
21
    private $line;
22
23
    /**
24
     * @var int
25
     */
26
    private $length;
27
28
    /**
29
     * @var int
30
     *
31
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
32
     * reached the end.  In this state, any character-returning method MUST return null.
33
     */
34
    private $currentPosition = 0;
35
36
    /**
37
     * @var int
38
     */
39
    private $column = 0;
40
41
    /**
42
     * @var int
43
     */
44
    private $indent = 0;
45
46
    /**
47
     * @var int
48
     */
49
    private $previousPosition = 0;
50
51
    /**
52
     * @var int|null
53
     */
54
    private $nextNonSpaceCache;
55
56
    /**
57
     * @var bool
58
     */
59
    private $partiallyConsumedTab = false;
60
61
    /**
62
     * @var string
63
     */
64
    private $encoding;
65
66
    /**
67
     * @var bool
68
     */
69
    private $lineContainsTabs;
70
71
    /**
72
     * @var bool
73
     */
74
    private $isMultibyte;
75
76
    /**
77
     * @var array<int, string>
78
     */
79
    private $charCache = [];
80
81
    /**
82
     * @param string $line     The line being parsed
83
     * @param string $encoding The encoding of that line
84
     */
85 2478
    public function __construct(string $line, string $encoding = 'UTF-8')
86
    {
87 2478
        $this->line = $line;
88 2478
        $this->encoding = $encoding;
89 2478
        $this->length = \mb_strlen($line, $this->encoding);
90 2478
        $this->isMultibyte = $this->length !== \strlen($line);
91 2478
        $this->lineContainsTabs = \preg_match('/\t/', $line) > 0;
92 2478
    }
93
94 60
    public function getEncoding(): string
95
    {
96 60
        return $this->encoding;
97
    }
98
99
    /**
100
     * Returns the position of the next character which is not a space (or tab)
101
     *
102
     * @return int
103
     */
104 2208
    public function getNextNonSpacePosition(): int
105
    {
106 2208
        if ($this->nextNonSpaceCache !== null) {
107 2028
            return $this->nextNonSpaceCache;
108
        }
109
110 2208
        $i = $this->currentPosition;
111 2208
        $cols = $this->column;
112
113 2208
        while (($c = $this->getCharacter($i)) !== null) {
114 2184
            if ($c === ' ') {
115 516
                $i++;
116 516
                $cols++;
117 2142
            } elseif ($c === "\t") {
118 36
                $i++;
119 36
                $cols += (4 - ($cols % 4));
120
            } else {
121 2142
                break;
122
            }
123
        }
124
125 2208
        $nextNonSpace = ($c === null) ? $this->length : $i;
126 2208
        $this->indent = $cols - $this->column;
127
128 2208
        return $this->nextNonSpaceCache = $nextNonSpace;
129
    }
130
131
    /**
132
     * Returns the next character which isn't a space (or tab)
133
     *
134
     * @return string
135
     */
136 2001
    public function getNextNonSpaceCharacter(): ?string
137
    {
138 2001
        return $this->getCharacter($this->getNextNonSpacePosition());
139
    }
140
141
    /**
142
     * Calculates the current indent (number of spaces after current position)
143
     *
144
     * @return int
145
     */
146 2088
    public function getIndent(): int
147
    {
148 2088
        if ($this->nextNonSpaceCache === null) {
149 2088
            $this->getNextNonSpacePosition();
150
        }
151
152 2088
        return $this->indent;
153
    }
154
155
    /**
156
     * Whether the cursor is indented to INDENT_LEVEL
157
     *
158
     * @return bool
159
     */
160 2028
    public function isIndented(): bool
161
    {
162 2028
        return $this->getIndent() >= self::INDENT_LEVEL;
163
    }
164
165
    /**
166
     * @param int|null $index
167
     *
168
     * @return string|null
169
     */
170 2262
    public function getCharacter(?int $index = null): ?string
171
    {
172 2262
        if ($index === null) {
173 1773
            $index = $this->currentPosition;
174
        }
175
176 2262
        if (isset($this->charCache[$index])) {
177 2046
            return $this->charCache[$index];
178
        }
179
180
        // Index out-of-bounds, or we're at the end
181 2262
        if ($index < 0 || $index >= $this->length) {
182 1980
            return null;
183
        }
184
185 2220
        return $this->charCache[$index] = \mb_substr($this->line, $index, 1, $this->encoding);
186
    }
187
188
    /**
189
     * Returns the next character (or null, if none) without advancing forwards
190
     *
191
     * @param int $offset
192
     *
193
     * @return string|null
194
     */
195 1065
    public function peek(int $offset = 1): ?string
196
    {
197 1065
        return $this->getCharacter($this->currentPosition + $offset);
198
    }
199
200
    /**
201
     * Whether the remainder is blank
202
     *
203
     * @return bool
204
     */
205 2046
    public function isBlank(): bool
206
    {
207 2046
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
208
    }
209
210
    /**
211
     * Move the cursor forwards
212
     */
213 846
    public function advance()
214
    {
215 846
        $this->advanceBy(1);
216 846
    }
217
218
    /**
219
     * Move the cursor forwards
220
     *
221
     * @param int  $characters       Number of characters to advance by
222
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
223
     */
224 2334
    public function advanceBy(int $characters, bool $advanceByColumns = false)
225
    {
226 2334
        if ($characters === 0) {
227 2058
            $this->previousPosition = $this->currentPosition;
228
229 2058
            return;
230
        }
231
232 2190
        $this->previousPosition = $this->currentPosition;
233 2190
        $this->nextNonSpaceCache = null;
234
235
        // Optimization to avoid tab handling logic if we have no tabs
236 2190
        if (!$this->lineContainsTabs || \preg_match('/\t/', $nextFewChars = mb_substr($this->line, $this->currentPosition, $characters, $this->encoding)) === 0) {
237 2178
            $length = \min($characters, $this->length - $this->currentPosition);
238 2178
            $this->partiallyConsumedTab = false;
239 2178
            $this->currentPosition += $length;
240 2178
            $this->column += $length;
241
242 2178
            return;
243
        }
244
245 45
        if ($characters === 1 && !empty($nextFewChars)) {
246 18
            $asArray = [$nextFewChars];
247
        } else {
248 39
            $asArray = \preg_split('//u', $nextFewChars, null, PREG_SPLIT_NO_EMPTY);
249
        }
250
251 45
        foreach ($asArray as $relPos => $c) {
252 45
            if ($c === "\t") {
253 45
                $charsToTab = 4 - ($this->column % 4);
254 45
                if ($advanceByColumns) {
255 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
256 33
                    $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
257 33
                    $this->column += $charsToAdvance;
258 33
                    $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
259 33
                    $characters -= $charsToAdvance;
260
                } else {
261 18
                    $this->partiallyConsumedTab = false;
262 18
                    $this->column += $charsToTab;
263 18
                    $this->currentPosition++;
264 45
                    $characters--;
265
                }
266
            } else {
267 12
                $this->partiallyConsumedTab = false;
268 12
                $this->currentPosition++;
269 12
                $this->column++;
270 12
                $characters--;
271
            }
272
273 45
            if ($characters <= 0) {
274 45
                break;
275
            }
276
        }
277 45
    }
278
279
    /**
280
     * Advances the cursor by a single space or tab, if present
281
     *
282
     * @return bool
283
     */
284 348
    public function advanceBySpaceOrTab(): bool
285
    {
286 348
        $character = $this->getCharacter();
287
288 348
        if ($character === ' ' || $character === "\t") {
289 336
            $this->advanceBy(1, true);
290
291 336
            return true;
292
        }
293
294 258
        return false;
295
    }
296
297
    /**
298
     * Parse zero or more space/tab characters
299
     *
300
     * @return int Number of positions moved
301
     */
302 1935
    public function advanceToNextNonSpaceOrTab(): int
303
    {
304 1935
        $newPosition = $this->getNextNonSpacePosition();
305 1935
        $this->advanceBy($newPosition - $this->currentPosition);
306 1935
        $this->partiallyConsumedTab = false;
307
308 1935
        return $this->currentPosition - $this->previousPosition;
309
    }
310
311
    /**
312
     * Parse zero or more space characters, including at most one newline.
313
     *
314
     * Tab characters are not parsed with this function.
315
     *
316
     * @return int Number of positions moved
317
     */
318 477
    public function advanceToNextNonSpaceOrNewline(): int
319
    {
320 477
        $matches = [];
321 477
        \preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, PREG_OFFSET_CAPTURE);
322
323
        // [0][0] contains the matched text
324
        // [0][1] contains the index of that match
325 477
        $increment = $matches[0][1] + \strlen($matches[0][0]);
326
327 477
        if ($increment === 0) {
328 336
            return 0;
329
        }
330
331 324
        $this->advanceBy($increment);
332
333 324
        return $this->currentPosition - $this->previousPosition;
334
    }
335
336
    /**
337
     * Move the position to the very end of the line
338
     *
339
     * @return int The number of characters moved
340
     */
341 84
    public function advanceToEnd(): int
342
    {
343 84
        $this->previousPosition = $this->currentPosition;
344 84
        $this->nextNonSpaceCache = null;
345
346 84
        $this->currentPosition = $this->length;
347
348 84
        return $this->currentPosition - $this->previousPosition;
349
    }
350
351
    /**
352
     * @return string
353
     */
354 2130
    public function getRemainder(): string
355
    {
356 2130
        if ($this->currentPosition >= $this->length) {
357 717
            return '';
358
        }
359
360 2118
        $prefix = '';
361 2118
        $position = $this->currentPosition;
362 2118
        if ($this->partiallyConsumedTab) {
363 15
            $position++;
364 15
            $charsToTab = 4 - ($this->column % 4);
365 15
            $prefix = \str_repeat(' ', $charsToTab);
366
        }
367
368 2118
        return $prefix . \mb_substr($this->line, $position, null, $this->encoding);
369
    }
370
371
    /**
372
     * @return string
373
     */
374 1977
    public function getLine(): string
375
    {
376 1977
        return $this->line;
377
    }
378
379
    /**
380
     * @return bool
381
     */
382 447
    public function isAtEnd(): bool
383
    {
384 447
        return $this->currentPosition >= $this->length;
385
    }
386
387
    /**
388
     * Try to match a regular expression
389
     *
390
     * Returns the matching text and advances to the end of that match
391
     *
392
     * @param string $regex
393
     *
394
     * @return string|null
395
     */
396 1998
    public function match(string $regex): ?string
397
    {
398 1998
        $subject = $this->getRemainder();
399
400 1998
        if (!\preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
401 1845
            return null;
402
        }
403
404
        // $matches[0][0] contains the matched text
405
        // $matches[0][1] contains the index of that match
406
407 1869
        if ($this->isMultibyte) {
408
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
409 54
            $offset = \mb_strlen(\mb_strcut($subject, 0, $matches[0][1], $this->encoding), $this->encoding);
410 54
            $matchLength = \mb_strlen($matches[0][0], $this->encoding);
411
        } else {
412 1815
            $offset = $matches[0][1];
413 1815
            $matchLength = \strlen($matches[0][0]);
414
        }
415
416
        // [0][0] contains the matched text
417
        // [0][1] contains the index of that match
418 1869
        $this->advanceBy($offset + $matchLength);
419
420 1869
        return $matches[0][0];
421
    }
422
423
    /**
424
     * Encapsulates the current state of this cursor in case you need to rollback later.
425
     *
426
     * WARNING: Do not parse or use the return value for ANYTHING except for
427
     * passing it back into restoreState(), as the number of values and their
428
     * contents may change in any future release without warning.
429
     *
430
     * @return array
431
     */
432 1077
    public function saveState()
433
    {
434
        return [
435 1077
            $this->currentPosition,
436 1077
            $this->previousPosition,
437 1077
            $this->nextNonSpaceCache,
438 1077
            $this->indent,
439 1077
            $this->column,
440 1077
            $this->partiallyConsumedTab,
441
        ];
442
    }
443
444
    /**
445
     * Restore the cursor to a previous state.
446
     *
447
     * Pass in the value previously obtained by calling saveState().
448
     *
449
     * @param array $state
450
     */
451 843
    public function restoreState($state)
452
    {
453
        list(
454 843
            $this->currentPosition,
455 843
            $this->previousPosition,
456 843
            $this->nextNonSpaceCache,
457 843
            $this->indent,
458 843
            $this->column,
459 843
            $this->partiallyConsumedTab,
460 843
          ) = $state;
461 843
    }
462
463
    /**
464
     * @return int
465
     */
466 699
    public function getPosition(): int
467
    {
468 699
        return $this->currentPosition;
469
    }
470
471
    /**
472
     * @return string
473
     */
474 921
    public function getPreviousText(): string
475
    {
476 921
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, $this->encoding);
477
    }
478
479
    /**
480
     * @return int
481
     */
482 249
    public function getColumn(): int
483
    {
484 249
        return $this->column;
485
    }
486
}
487