Completed
Push — master ( c30c99...290a7f )
by Colin
24s queued 10s
created

Cursor::getEncoding()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark;
13
14
class Cursor
15
{
16
    const INDENT_LEVEL = 4;
17
18
    /**
19
     * @var string
20
     */
21
    private $line;
22
23
    /**
24
     * @var int
25
     */
26
    private $length;
27
28
    /**
29
     * @var int
30
     *
31
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
32
     * reached the end.  In this state, any character-returning method MUST return null.
33
     */
34
    private $currentPosition = 0;
35
36
    /**
37
     * @var int
38
     */
39
    private $column = 0;
40
41
    /**
42
     * @var int
43
     */
44
    private $indent = 0;
45
46
    /**
47
     * @var int
48
     */
49
    private $previousPosition = 0;
50
51
    /**
52
     * @var int|null
53
     */
54
    private $nextNonSpaceCache;
55
56
    /**
57
     * @var bool
58
     */
59
    private $partiallyConsumedTab = false;
60
61
    /**
62
     * @var string
63
     */
64
    private $encoding;
65
66
    /**
67
     * @var bool
68
     */
69
    private $lineContainsTabs;
70
71
    /**
72
     * @var bool
73
     */
74
    private $isMultibyte;
75
76
    /**
77
     * @var array<int, string>
78
     */
79
    private $charCache = [];
80
81
    /**
82
     * @param string $line The line being parsed (ASCII or UTF-8)
83
     */
84 2517
    public function __construct(string $line)
85
    {
86 2517
        $this->line = $line;
87 2517
        $this->length = \mb_strlen($line, 'UTF-8') ?: 0;
88 2517
        $this->isMultibyte = $this->length !== \strlen($line);
89 2517
        $this->encoding = $this->isMultibyte ? 'UTF-8' : 'ASCII';
90 2517
        $this->lineContainsTabs = \preg_match('/\t/', $line) > 0;
91 2517
    }
92
93
    /**
94
     * Returns the position of the next character which is not a space (or tab)
95
     *
96
     * @return int
97
     */
98 2247
    public function getNextNonSpacePosition(): int
99
    {
100 2247
        if ($this->nextNonSpaceCache !== null) {
101 2067
            return $this->nextNonSpaceCache;
102
        }
103
104 2247
        $i = $this->currentPosition;
105 2247
        $cols = $this->column;
106
107 2247
        while (($c = $this->getCharacter($i)) !== null) {
108 2223
            if ($c === ' ') {
109 516
                $i++;
110 516
                $cols++;
111 2181
            } elseif ($c === "\t") {
112 36
                $i++;
113 36
                $cols += (4 - ($cols % 4));
114
            } else {
115 2181
                break;
116
            }
117
        }
118
119 2247
        $nextNonSpace = ($c === null) ? $this->length : $i;
120 2247
        $this->indent = $cols - $this->column;
121
122 2247
        return $this->nextNonSpaceCache = $nextNonSpace;
123
    }
124
125
    /**
126
     * Returns the next character which isn't a space (or tab)
127
     *
128
     * @return string
129
     */
130 2040
    public function getNextNonSpaceCharacter(): ?string
131
    {
132 2040
        return $this->getCharacter($this->getNextNonSpacePosition());
133
    }
134
135
    /**
136
     * Calculates the current indent (number of spaces after current position)
137
     *
138
     * @return int
139
     */
140 2127
    public function getIndent(): int
141
    {
142 2127
        if ($this->nextNonSpaceCache === null) {
143 2127
            $this->getNextNonSpacePosition();
144
        }
145
146 2127
        return $this->indent;
147
    }
148
149
    /**
150
     * Whether the cursor is indented to INDENT_LEVEL
151
     *
152
     * @return bool
153
     */
154 2067
    public function isIndented(): bool
155
    {
156 2067
        return $this->getIndent() >= self::INDENT_LEVEL;
157
    }
158
159
    /**
160
     * @param int|null $index
161
     *
162
     * @return string|null
163
     */
164 2301
    public function getCharacter(?int $index = null): ?string
165
    {
166 2301
        if ($index === null) {
167 2046
            $index = $this->currentPosition;
168
        }
169
170 2301
        if (isset($this->charCache[$index])) {
171 2085
            return $this->charCache[$index];
172
        }
173
174
        // Index out-of-bounds, or we're at the end
175 2301
        if ($index < 0 || $index >= $this->length) {
176 2151
            return null;
177
        }
178
179 2259
        return $this->charCache[$index] = \mb_substr($this->line, $index, 1, $this->encoding);
180
    }
181
182
    /**
183
     * Returns the next character (or null, if none) without advancing forwards
184
     *
185
     * @param int $offset
186
     *
187
     * @return string|null
188
     */
189 1158
    public function peek(int $offset = 1): ?string
190
    {
191 1158
        return $this->getCharacter($this->currentPosition + $offset);
192
    }
193
194
    /**
195
     * Whether the remainder is blank
196
     *
197
     * @return bool
198
     */
199 2085
    public function isBlank(): bool
200
    {
201 2085
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
202
    }
203
204
    /**
205
     * Move the cursor forwards
206
     */
207 1017
    public function advance()
208
    {
209 1017
        $this->advanceBy(1);
210 1017
    }
211
212
    /**
213
     * Move the cursor forwards
214
     *
215
     * @param int  $characters       Number of characters to advance by
216
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
217
     */
218 2373
    public function advanceBy(int $characters, bool $advanceByColumns = false)
219
    {
220 2373
        if ($characters === 0) {
221 2097
            $this->previousPosition = $this->currentPosition;
222
223 2097
            return;
224
        }
225
226 2298
        $this->previousPosition = $this->currentPosition;
227 2298
        $this->nextNonSpaceCache = null;
228
229
        // Optimization to avoid tab handling logic if we have no tabs
230 2298
        if (!$this->lineContainsTabs || \preg_match('/\t/', $nextFewChars = mb_substr($this->line, $this->currentPosition, $characters, $this->encoding)) === 0) {
231 2286
            $length = \min($characters, $this->length - $this->currentPosition);
232 2286
            $this->partiallyConsumedTab = false;
233 2286
            $this->currentPosition += $length;
234 2286
            $this->column += $length;
235
236 2286
            return;
237
        }
238
239 48
        if ($characters === 1 && !empty($nextFewChars)) {
240 18
            $asArray = [$nextFewChars];
241
        } else {
242 42
            $asArray = \preg_split('//u', $nextFewChars, null, PREG_SPLIT_NO_EMPTY);
243
        }
244
245 48
        foreach ($asArray as $relPos => $c) {
246 48
            if ($c === "\t") {
247 48
                $charsToTab = 4 - ($this->column % 4);
248 48
                if ($advanceByColumns) {
249 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
250 33
                    $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
251 33
                    $this->column += $charsToAdvance;
252 33
                    $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
253 33
                    $characters -= $charsToAdvance;
254
                } else {
255 27
                    $this->partiallyConsumedTab = false;
256 27
                    $this->column += $charsToTab;
257 27
                    $this->currentPosition++;
258 48
                    $characters--;
259
                }
260
            } else {
261 18
                $this->partiallyConsumedTab = false;
262 18
                $this->currentPosition++;
263 18
                $this->column++;
264 18
                $characters--;
265
            }
266
267 48
            if ($characters <= 0) {
268 48
                break;
269
            }
270
        }
271 48
    }
272
273
    /**
274
     * Advances the cursor by a single space or tab, if present
275
     *
276
     * @return bool
277
     */
278 348
    public function advanceBySpaceOrTab(): bool
279
    {
280 348
        $character = $this->getCharacter();
281
282 348
        if ($character === ' ' || $character === "\t") {
283 336
            $this->advanceBy(1, true);
284
285 336
            return true;
286
        }
287
288 258
        return false;
289
    }
290
291
    /**
292
     * Parse zero or more space/tab characters
293
     *
294
     * @return int Number of positions moved
295
     */
296 1974
    public function advanceToNextNonSpaceOrTab(): int
297
    {
298 1974
        $newPosition = $this->getNextNonSpacePosition();
299 1974
        $this->advanceBy($newPosition - $this->currentPosition);
300 1974
        $this->partiallyConsumedTab = false;
301
302 1974
        return $this->currentPosition - $this->previousPosition;
303
    }
304
305
    /**
306
     * Parse zero or more space characters, including at most one newline.
307
     *
308
     * Tab characters are not parsed with this function.
309
     *
310
     * @return int Number of positions moved
311
     */
312 477
    public function advanceToNextNonSpaceOrNewline(): int
313
    {
314 477
        $matches = [];
315 477
        \preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, PREG_OFFSET_CAPTURE);
316
317
        // [0][0] contains the matched text
318
        // [0][1] contains the index of that match
319 477
        $increment = $matches[0][1] + \strlen($matches[0][0]);
320
321 477
        if ($increment === 0) {
322 336
            return 0;
323
        }
324
325 324
        $this->advanceBy($increment);
326
327 324
        return $this->currentPosition - $this->previousPosition;
328
    }
329
330
    /**
331
     * Move the position to the very end of the line
332
     *
333
     * @return int The number of characters moved
334
     */
335 84
    public function advanceToEnd(): int
336
    {
337 84
        $this->previousPosition = $this->currentPosition;
338 84
        $this->nextNonSpaceCache = null;
339
340 84
        $this->currentPosition = $this->length;
341
342 84
        return $this->currentPosition - $this->previousPosition;
343
    }
344
345
    /**
346
     * @return string
347
     */
348 2169
    public function getRemainder(): string
349
    {
350 2169
        if ($this->currentPosition >= $this->length) {
351 717
            return '';
352
        }
353
354 2157
        $prefix = '';
355 2157
        $position = $this->currentPosition;
356 2157
        if ($this->partiallyConsumedTab) {
357 15
            $position++;
358 15
            $charsToTab = 4 - ($this->column % 4);
359 15
            $prefix = \str_repeat(' ', $charsToTab);
360
        }
361
362 2157
        return $prefix . \mb_substr($this->line, $position, null, $this->encoding);
363
    }
364
365
    /**
366
     * @return string
367
     */
368 2019
    public function getLine(): string
369
    {
370 2019
        return $this->line;
371
    }
372
373
    /**
374
     * @return bool
375
     */
376 447
    public function isAtEnd(): bool
377
    {
378 447
        return $this->currentPosition >= $this->length;
379
    }
380
381
    /**
382
     * Try to match a regular expression
383
     *
384
     * Returns the matching text and advances to the end of that match
385
     *
386
     * @param string $regex
387
     *
388
     * @return string|null
389
     */
390 2088
    public function match(string $regex): ?string
391
    {
392 2088
        $subject = $this->getRemainder();
393
394 2088
        if (!\preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
395 1908
            return null;
396
        }
397
398
        // $matches[0][0] contains the matched text
399
        // $matches[0][1] contains the index of that match
400
401 2049
        if ($this->isMultibyte) {
402
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
403 57
            $offset = \mb_strlen(\mb_strcut($subject, 0, $matches[0][1], $this->encoding), $this->encoding);
404 57
            $matchLength = \mb_strlen($matches[0][0], $this->encoding);
405
        } else {
406 1992
            $offset = $matches[0][1];
407 1992
            $matchLength = \strlen($matches[0][0]);
408
        }
409
410
        // [0][0] contains the matched text
411
        // [0][1] contains the index of that match
412 2049
        $this->advanceBy($offset + $matchLength);
413
414 2049
        return $matches[0][0];
415
    }
416
417
    /**
418
     * Encapsulates the current state of this cursor in case you need to rollback later.
419
     *
420
     * WARNING: Do not parse or use the return value for ANYTHING except for
421
     * passing it back into restoreState(), as the number of values and their
422
     * contents may change in any future release without warning.
423
     *
424
     * @return array
425
     */
426 1095
    public function saveState()
427
    {
428
        return [
429 1095
            $this->currentPosition,
430 1095
            $this->previousPosition,
431 1095
            $this->nextNonSpaceCache,
432 1095
            $this->indent,
433 1095
            $this->column,
434 1095
            $this->partiallyConsumedTab,
435
        ];
436
    }
437
438
    /**
439
     * Restore the cursor to a previous state.
440
     *
441
     * Pass in the value previously obtained by calling saveState().
442
     *
443
     * @param array $state
444
     */
445 858
    public function restoreState($state)
446
    {
447
        list(
448 858
            $this->currentPosition,
449 858
            $this->previousPosition,
450 858
            $this->nextNonSpaceCache,
451 858
            $this->indent,
452 858
            $this->column,
453 858
            $this->partiallyConsumedTab,
454 858
          ) = $state;
455 858
    }
456
457
    /**
458
     * @return int
459
     */
460 720
    public function getPosition(): int
461
    {
462 720
        return $this->currentPosition;
463
    }
464
465
    /**
466
     * @return string
467
     */
468 1005
    public function getPreviousText(): string
469
    {
470 1005
        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, $this->encoding);
471
    }
472
473
    /**
474
     * @return int
475
     */
476 249
    public function getColumn(): int
477
    {
478 249
        return $this->column;
479
    }
480
}
481