Completed
Push — master ( afd04b...3b4c22 )
by Colin
10s
created

Cursor::getNextNonSpacePosition()   B

Complexity

Conditions 6
Paths 9

Size

Total Lines 26
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 6

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 26
ccs 18
cts 18
cp 1
rs 8.439
cc 6
eloc 17
nc 9
nop 0
crap 6
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark;
13
14
class Cursor
15
{
16
    const INDENT_LEVEL = 4;
17
18
    /**
19
     * @var string
20
     */
21
    private $line;
22
23
    /**
24
     * @var int
25
     */
26
    private $length;
27
28
    /**
29
     * @var int
30
     *
31
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
32
     * reached the end.  In this state, any character-returning method MUST return null.
33
     */
34
    private $currentPosition = 0;
35
36
    /**
37
     * @var int
38
     */
39
    private $column = 0;
40
41
    /**
42
     * @var int
43
     */
44
    private $indent = 0;
45
46
    /**
47
     * @var int
48
     */
49
    private $previousPosition = 0;
50
51
    /**
52
     * @var int|null
53
     */
54
    private $nextNonSpaceCache;
55
56
    /**
57
     * @var bool
58
     */
59
    private $partiallyConsumedTab = false;
60
61
    /**
62
     * @var string
63
     */
64
    private $encoding;
65
66
    /**
67
     * @var bool
68
     */
69
    private $lineContainsTabs;
70
71
    /**
72
     * @var bool
73
     */
74
    private $isMultibyte;
75
76
    /**
77
     * @param string $line
78
     */
79 2391
    public function __construct($line)
80
    {
81 2391
        $this->line = $line;
82 2391
        $this->encoding = mb_detect_encoding($line, 'ASCII,UTF-8', true) ?: 'ISO-8859-1';
83 2391
        $this->length = mb_strlen($line, $this->encoding);
84 2391
        $this->isMultibyte = $this->length !== strlen($line);
85 2391
        $this->lineContainsTabs = preg_match('/\t/', $line) > 0;
86 2391
    }
87
88
    /**
89
     * Returns the position of the next character which is not a space (or tab)
90
     *
91
     * @return int
92
     */
93 2118
    public function getNextNonSpacePosition()
94
    {
95 2118
        if ($this->nextNonSpaceCache !== null) {
96 1938
            return $this->nextNonSpaceCache;
97
        }
98
99 2118
        $i = $this->currentPosition;
100 2118
        $cols = $this->column;
101
102 2118
        while (($c = $this->getCharacter($i)) !== null) {
103 2094
            if ($c === ' ') {
104 504
                $i++;
105 504
                $cols++;
106 2066
            } elseif ($c === "\t") {
107 36
                $i++;
108 36
                $cols += (4 - ($cols % 4));
109 12
            } else {
110 2052
                break;
111
            }
112 175
        }
113
114 2118
        $nextNonSpace = ($c === null) ? $this->length : $i;
115 2118
        $this->indent = $cols - $this->column;
116
117 2118
        return $this->nextNonSpaceCache = $nextNonSpace;
118
    }
119
120
    /**
121
     * Returns the next character which isn't a space (or tab)
122
     *
123
     * @return string
124
     */
125 1911
    public function getNextNonSpaceCharacter()
126
    {
127 1911
        return $this->getCharacter($this->getNextNonSpacePosition());
128
    }
129
130
    /**
131
     * Calculates the current indent (number of spaces after current position)
132
     *
133
     * @return int
134
     */
135 1890
    public function getIndent()
136
    {
137 1890
        $this->getNextNonSpacePosition();
138
139 1890
        return $this->indent;
140
    }
141
142
    /**
143
     * Whether the cursor is indented to INDENT_LEVEL
144
     *
145
     * @return bool
146
     */
147 1938
    public function isIndented()
148
    {
149 1938
        $this->getNextNonSpacePosition();
150
151 1938
        return $this->indent >= self::INDENT_LEVEL;
152
    }
153
154
    /**
155
     * @param int|null $index
156
     *
157
     * @return string|null
158
     */
159 2172
    public function getCharacter($index = null)
160
    {
161 2172
        if ($index === null) {
162 1689
            $index = $this->currentPosition;
163 563
        }
164
165
        // Index out-of-bounds, or we're at the end
166 2172
        if ($index < 0 || $index >= $this->length) {
167 1893
            return;
168
        }
169
170 2130
        return mb_substr($this->line, $index, 1, $this->encoding);
171
    }
172
173
    /**
174
     * Returns the next character (or null, if none) without advancing forwards
175
     *
176
     * @param int $offset
177
     *
178
     * @return string|null
179
     */
180 1014
    public function peek($offset = 1)
181
    {
182 1014
        return $this->getCharacter($this->currentPosition + $offset);
183
    }
184
185
    /**
186
     * Whether the remainder is blank
187
     *
188
     * @return bool
189
     */
190 1956
    public function isBlank()
191
    {
192 1956
        return $this->getNextNonSpacePosition() === $this->length;
193
    }
194
195
    /**
196
     * Move the cursor forwards
197
     */
198 795
    public function advance()
199
    {
200 795
        $this->advanceBy(1);
201 795
    }
202
203
    /**
204
     * Move the cursor forwards
205
     *
206
     * @param int  $characters       Number of characters to advance by
207
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
208
     */
209 2247
    public function advanceBy($characters, $advanceByColumns = false)
210
    {
211 2247
        if ($characters === 0) {
212 1968
            $this->previousPosition = $this->currentPosition;
213
214 1968
            return;
215
        }
216
217 2106
        $this->previousPosition = $this->currentPosition;
218 2106
        $this->nextNonSpaceCache = null;
219
220 2106
        $nextFewChars = mb_substr($this->line, $this->currentPosition, $characters, $this->encoding);
221
222
        // Optimization to avoid tab handling logic if we have no tabs
223 2106
        if (!$this->lineContainsTabs || preg_match('/\t/', $nextFewChars) === 0) {
224 2094
            $length = min($characters, $this->length - $this->currentPosition);
225 2094
            $this->partiallyConsumedTab = false;
226 2094
            $this->currentPosition += $length;
227 2094
            $this->column += $length;
228
229 2094
            return;
230
        }
231
232 45
        if ($characters === 1 && !empty($nextFewChars)) {
233 18
            $asArray = [$nextFewChars];
234 6
        } else {
235 39
            $asArray = preg_split('//u', $nextFewChars, null, PREG_SPLIT_NO_EMPTY);
236
        }
237
238 45
        foreach ($asArray as $relPos => $c) {
239 45
            if ($c === "\t") {
240 45
                $charsToTab = 4 - ($this->column % 4);
241 45
                if ($advanceByColumns) {
242 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
243 33
                    $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
244 33
                    $this->column += $charsToAdvance;
245 33
                    $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
246 33
                    $characters -= $charsToAdvance;
247 11
                } else {
248 18
                    $this->partiallyConsumedTab = false;
249 18
                    $this->column += $charsToTab;
250 18
                    $this->currentPosition++;
251 36
                    $characters--;
252
                }
253 15
            } else {
254 12
                $this->partiallyConsumedTab = false;
255 12
                $this->currentPosition++;
256 12
                $this->column++;
257 12
                $characters--;
258
            }
259
260 45
            if ($characters <= 0) {
261 45
                break;
262
            }
263 15
        }
264 45
    }
265
266
    /**
267
     * Advances the cursor by a single space or tab, if present
268
     *
269
     * @return bool
270
     */
271 339
    public function advanceBySpaceOrTab()
272
    {
273 339
        $character = $this->getCharacter();
274
275 339
        if ($character === ' ' || $character === "\t") {
276 327
            $this->advanceBy(1, true);
277
278 327
            return true;
279
        }
280
281 249
        return false;
282
    }
283
284
    /**
285
     * Parse zero or more space/tab characters
286
     *
287
     * @return int Number of positions moved
288
     */
289 1848
    public function advanceToNextNonSpaceOrTab()
290
    {
291 1848
        $newPosition = $this->getNextNonSpacePosition();
292 1848
        $this->advanceBy($newPosition - $this->currentPosition);
293 1848
        $this->partiallyConsumedTab = false;
294
295 1848
        return $this->currentPosition - $this->previousPosition;
296
    }
297
298
    /**
299
     * Parse zero or more space characters, including at most one newline.
300
     *
301
     * Tab characters are not parsed with this function.
302
     *
303
     * @return int Number of positions moved
304
     */
305 441
    public function advanceToNextNonSpaceOrNewline()
306
    {
307 441
        $matches = [];
308 441
        preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, PREG_OFFSET_CAPTURE);
309
310
        // [0][0] contains the matched text
311
        // [0][1] contains the index of that match
312 441
        $increment = $matches[0][1] + strlen($matches[0][0]);
313
314 441
        if ($increment === 0) {
315 300
            return 0;
316
        }
317
318 300
        $this->advanceBy($increment);
319
320 300
        return $this->currentPosition - $this->previousPosition;
321
    }
322
323
    /**
324
     * Move the position to the very end of the line
325
     *
326
     * @return int The number of characters moved
327
     */
328 84
    public function advanceToEnd()
329
    {
330 84
        $this->previousPosition = $this->currentPosition;
331 84
        $this->nextNonSpaceCache = null;
332
333 84
        $this->currentPosition = $this->length;
334
335 84
        return $this->currentPosition - $this->previousPosition;
336
    }
337
338
    /**
339
     * @return string
340
     */
341 2043
    public function getRemainder()
342
    {
343 2043
        if ($this->currentPosition >= $this->length) {
344 693
            return '';
345
        }
346
347 2028
        $prefix = '';
348 2028
        $position = $this->currentPosition;
349 2028
        if ($this->partiallyConsumedTab) {
350 15
            $position++;
351 15
            $charsToTab = 4 - ($this->column % 4);
352 15
            $prefix = str_repeat(' ', $charsToTab);
353 5
        }
354
355 2028
        return $prefix . mb_substr($this->line, $position, null, $this->encoding);
356
    }
357
358
    /**
359
     * @return string
360
     */
361 1887
    public function getLine()
362
    {
363 1887
        return $this->line;
364
    }
365
366
    /**
367
     * @return bool
368
     */
369 411
    public function isAtEnd()
370
    {
371 411
        return $this->currentPosition >= $this->length;
372
    }
373
374
    /**
375
     * Try to match a regular expression
376
     *
377
     * Returns the matching text and advances to the end of that match
378
     *
379
     * @param string $regex
380
     *
381
     * @return string|null
382
     */
383 1908
    public function match($regex)
384
    {
385 1908
        $subject = $this->getRemainder();
386
387 1908
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
388 1758
            return;
389
        }
390
391
        // $matches[0][0] contains the matched text
392
        // $matches[0][1] contains the index of that match
393
394 1782
        if ($this->isMultibyte) {
395
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
396 51
            $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], $this->encoding), $this->encoding);
397 17
        } else {
398 1731
            $offset = $matches[0][1];
399
        }
400
401
        // [0][0] contains the matched text
402
        // [0][1] contains the index of that match
403 1782
        $this->advanceBy($offset + mb_strlen($matches[0][0], $this->encoding));
404
405 1782
        return $matches[0][0];
406
    }
407
408
    /**
409
     * Encapsulates the current state of this cursor in case you need to rollback later.
410
     *
411
     * WARNING: Do not parse or use the return value for ANYTHING except for
412
     * passing it back into restoreState(), as the number of values and their
413
     * contents may change in any future release without warning.
414
     *
415
     * @return array
416
     */
417 1011
    public function saveState()
418
    {
419
        return [
420 1011
            $this->currentPosition,
421 1011
            $this->previousPosition,
422 1011
            $this->nextNonSpaceCache,
423 1011
            $this->indent,
424 1011
            $this->column,
425 1011
            $this->partiallyConsumedTab,
426 337
        ];
427
    }
428
429
    /**
430
     * Restore the cursor to a previous state.
431
     *
432
     * Pass in the value previously obtained by calling saveState().
433
     *
434
     * @param array $state
435
     */
436 795
    public function restoreState($state)
437
    {
438
        list(
439 795
            $this->currentPosition,
440 795
            $this->previousPosition,
441 795
            $this->nextNonSpaceCache,
442 795
            $this->indent,
443 795
            $this->column,
444 795
            $this->partiallyConsumedTab,
445 795
          ) = $state;
446 795
    }
447
448
    /**
449
     * @return int
450
     */
451 648
    public function getPosition()
452
    {
453 648
        return $this->currentPosition;
454
    }
455
456
    /**
457
     * @return string
458
     */
459 870
    public function getPreviousText()
460
    {
461 870
        return mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, $this->encoding);
462
    }
463
464
    /**
465
     * @return int
466
     */
467 240
    public function getColumn()
468
    {
469 240
        return $this->column;
470
    }
471
}
472