Completed
Push — optimizations ( 75e26b )
by Colin
03:14
created

Cursor   B

Complexity

Total Complexity 52

Size/Duplication

Total Lines 467
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Test Coverage

Coverage 100%

Importance

Changes 4
Bugs 0 Features 0
Metric Value
wmc 52
lcom 1
cbo 0
dl 0
loc 467
ccs 163
cts 163
cp 1
rs 7.9487
c 4
b 0
f 0

23 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 8 1
B getNextNonSpacePosition() 0 26 6
A getNextNonSpaceCharacter() 0 4 1
A getIndent() 0 8 2
A isIndented() 0 4 1
B getCharacter() 0 17 5
A peek() 0 4 1
A isBlank() 0 4 2
A advance() 0 4 1
C advanceBy() 0 54 12
A advanceBySpaceOrTab() 0 12 3
A advanceToNextNonSpaceOrTab() 0 8 1
A advanceToNextNonSpaceOrNewline() 0 17 2
A advanceToEnd() 0 9 1
A getRemainder() 0 16 3
A getLine() 0 4 1
A isAtEnd() 0 4 1
B match() 0 26 3
A saveState() 0 11 1
A restoreState() 0 11 1
A getPosition() 0 4 1
A getPreviousText() 0 4 1
A getColumn() 0 4 1

How to fix   Complexity   

Complex Class

Complex classes like Cursor often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Cursor, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark;
13
14
class Cursor
15
{
16
    const INDENT_LEVEL = 4;
17
18
    /**
19
     * @var string
20
     */
21
    private $line;
22
23
    /**
24
     * @var int
25
     */
26
    private $length;
27
28
    /**
29
     * @var int
30
     *
31
     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
32
     * reached the end.  In this state, any character-returning method MUST return null.
33
     */
34
    private $currentPosition = 0;
35
36
    /**
37
     * @var int
38
     */
39
    private $column = 0;
40
41
    /**
42
     * @var int
43
     */
44
    private $indent = 0;
45
46
    /**
47
     * @var int
48
     */
49
    private $previousPosition = 0;
50
51
    /**
52
     * @var int|null
53
     */
54
    private $nextNonSpaceCache;
55
56
    /**
57
     * @var bool
58
     */
59
    private $partiallyConsumedTab = false;
60
61
    /**
62
     * @var string
63
     */
64
    private $encoding;
65
66
    /**
67
     * @var bool
68
     */
69
    private $lineContainsTabs;
70
71
    /**
72
     * @var bool
73
     */
74
    private $isMultibyte;
75
76
    /**
77
     * @var int
78
     */
79
    private $charCache = [];
80
81
    /**
82
     * @param string $line
83
     */
84 2391
    public function __construct($line, $encoding = 'UTF-8')
85
    {
86 2391
        $this->line = $line;
87 2391
        $this->encoding = $encoding;
88 2391
        $this->length = mb_strlen($line, $this->encoding);
89 2391
        $this->isMultibyte = $this->length !== strlen($line);
90 2391
        $this->lineContainsTabs = preg_match('/\t/', $line) > 0;
91 2391
    }
92
93
    /**
94
     * Returns the position of the next character which is not a space (or tab)
95
     *
96
     * @return int
97
     */
98 2118
    public function getNextNonSpacePosition()
99
    {
100 2118
        if ($this->nextNonSpaceCache !== null) {
101 1938
            return $this->nextNonSpaceCache;
102
        }
103
104 2118
        $i = $this->currentPosition;
105 2118
        $cols = $this->column;
106
107 2118
        while (($c = $this->getCharacter($i)) !== null) {
108 2094
            if ($c === ' ') {
109 504
                $i++;
110 504
                $cols++;
111 2066
            } elseif ($c === "\t") {
112 36
                $i++;
113 36
                $cols += (4 - ($cols % 4));
114 12
            } else {
115 2052
                break;
116
            }
117 175
        }
118
119 2118
        $nextNonSpace = ($c === null) ? $this->length : $i;
120 2118
        $this->indent = $cols - $this->column;
121
122 2118
        return $this->nextNonSpaceCache = $nextNonSpace;
123
    }
124
125
    /**
126
     * Returns the next character which isn't a space (or tab)
127
     *
128
     * @return string
129
     */
130 1911
    public function getNextNonSpaceCharacter()
131
    {
132 1911
        return $this->getCharacter($this->getNextNonSpacePosition());
133
    }
134
135
    /**
136
     * Calculates the current indent (number of spaces after current position)
137
     *
138
     * @return int
139
     */
140 1998
    public function getIndent()
141
    {
142 1998
        if ($this->nextNonSpaceCache === null) {
143 1998
            $this->getNextNonSpacePosition();
144 666
        }
145
146 1998
        return $this->indent;
147
    }
148
149
    /**
150
     * Whether the cursor is indented to INDENT_LEVEL
151
     *
152
     * @return bool
153
     */
154 1938
    public function isIndented()
155
    {
156 1938
        return $this->getIndent() >= self::INDENT_LEVEL;
157
    }
158
159
    /**
160
     * @param int|null $index
161
     *
162
     * @return string|null
163
     */
164 2172
    public function getCharacter($index = null)
165
    {
166 2172
        if ($index === null) {
167 1689
            $index = $this->currentPosition;
168 563
        }
169
170 2172
        if (isset($this->charCache[$index])) {
171 1956
            return $this->charCache[$index];
172
        }
173
174
        // Index out-of-bounds, or we're at the end
175 2172
        if ($index < 0 || $index >= $this->length) {
176 1893
            return;
177
        }
178
179 2130
        return $this->charCache[$index] = mb_substr($this->line, $index, 1, $this->encoding);
180
    }
181
182
    /**
183
     * Returns the next character (or null, if none) without advancing forwards
184
     *
185
     * @param int $offset
186
     *
187
     * @return string|null
188
     */
189 1020
    public function peek($offset = 1)
190
    {
191 1020
        return $this->getCharacter($this->currentPosition + $offset);
192
    }
193
194
    /**
195
     * Whether the remainder is blank
196
     *
197
     * @return bool
198
     */
199 1956
    public function isBlank()
200
    {
201 1956
        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
202
    }
203
204
    /**
205
     * Move the cursor forwards
206
     */
207 795
    public function advance()
208
    {
209 795
        $this->advanceBy(1);
210 795
    }
211
212
    /**
213
     * Move the cursor forwards
214
     *
215
     * @param int  $characters       Number of characters to advance by
216
     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
217
     */
218 2247
    public function advanceBy($characters, $advanceByColumns = false)
219
    {
220 2247
        if ($characters === 0) {
221 1968
            $this->previousPosition = $this->currentPosition;
222
223 1968
            return;
224
        }
225
226 2106
        $this->previousPosition = $this->currentPosition;
227 2106
        $this->nextNonSpaceCache = null;
228
229
        // Optimization to avoid tab handling logic if we have no tabs
230 2106
        if (!$this->lineContainsTabs || preg_match('/\t/', $nextFewChars = mb_substr($this->line, $this->currentPosition, $characters, $this->encoding)) === 0) {
231 2094
            $length = min($characters, $this->length - $this->currentPosition);
232 2094
            $this->partiallyConsumedTab = false;
233 2094
            $this->currentPosition += $length;
234 2094
            $this->column += $length;
235
236 2094
            return;
237
        }
238
239 45
        if ($characters === 1 && !empty($nextFewChars)) {
240 18
            $asArray = [$nextFewChars];
241 6
        } else {
242 39
            $asArray = preg_split('//u', $nextFewChars, null, PREG_SPLIT_NO_EMPTY);
243
        }
244
245 45
        foreach ($asArray as $relPos => $c) {
246 45
            if ($c === "\t") {
247 45
                $charsToTab = 4 - ($this->column % 4);
248 45
                if ($advanceByColumns) {
249 33
                    $this->partiallyConsumedTab = $charsToTab > $characters;
250 33
                    $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
251 33
                    $this->column += $charsToAdvance;
252 33
                    $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
253 33
                    $characters -= $charsToAdvance;
254 11
                } else {
255 18
                    $this->partiallyConsumedTab = false;
256 18
                    $this->column += $charsToTab;
257 18
                    $this->currentPosition++;
258 36
                    $characters--;
259
                }
260 15
            } else {
261 12
                $this->partiallyConsumedTab = false;
262 12
                $this->currentPosition++;
263 12
                $this->column++;
264 12
                $characters--;
265
            }
266
267 45
            if ($characters <= 0) {
268 45
                break;
269
            }
270 15
        }
271 45
    }
272
273
    /**
274
     * Advances the cursor by a single space or tab, if present
275
     *
276
     * @return bool
277
     */
278 339
    public function advanceBySpaceOrTab()
279
    {
280 339
        $character = $this->getCharacter();
281
282 339
        if ($character === ' ' || $character === "\t") {
283 327
            $this->advanceBy(1, true);
284
285 327
            return true;
286
        }
287
288 249
        return false;
289
    }
290
291
    /**
292
     * Parse zero or more space/tab characters
293
     *
294
     * @return int Number of positions moved
295
     */
296 1848
    public function advanceToNextNonSpaceOrTab()
297
    {
298 1848
        $newPosition = $this->getNextNonSpacePosition();
299 1848
        $this->advanceBy($newPosition - $this->currentPosition);
300 1848
        $this->partiallyConsumedTab = false;
301
302 1848
        return $this->currentPosition - $this->previousPosition;
303
    }
304
305
    /**
306
     * Parse zero or more space characters, including at most one newline.
307
     *
308
     * Tab characters are not parsed with this function.
309
     *
310
     * @return int Number of positions moved
311
     */
312 441
    public function advanceToNextNonSpaceOrNewline()
313
    {
314 441
        $matches = [];
315 441
        preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, PREG_OFFSET_CAPTURE);
316
317
        // [0][0] contains the matched text
318
        // [0][1] contains the index of that match
319 441
        $increment = $matches[0][1] + strlen($matches[0][0]);
320
321 441
        if ($increment === 0) {
322 300
            return 0;
323
        }
324
325 300
        $this->advanceBy($increment);
326
327 300
        return $this->currentPosition - $this->previousPosition;
328
    }
329
330
    /**
331
     * Move the position to the very end of the line
332
     *
333
     * @return int The number of characters moved
334
     */
335 84
    public function advanceToEnd()
336
    {
337 84
        $this->previousPosition = $this->currentPosition;
338 84
        $this->nextNonSpaceCache = null;
339
340 84
        $this->currentPosition = $this->length;
341
342 84
        return $this->currentPosition - $this->previousPosition;
343
    }
344
345
    /**
346
     * @return string
347
     */
348 2043
    public function getRemainder()
349
    {
350 2043
        if ($this->currentPosition >= $this->length) {
351 693
            return '';
352
        }
353
354 2028
        $prefix = '';
355 2028
        $position = $this->currentPosition;
356 2028
        if ($this->partiallyConsumedTab) {
357 15
            $position++;
358 15
            $charsToTab = 4 - ($this->column % 4);
359 15
            $prefix = str_repeat(' ', $charsToTab);
360 5
        }
361
362 2028
        return $prefix . mb_substr($this->line, $position, null, $this->encoding);
363
    }
364
365
    /**
366
     * @return string
367
     */
368 1887
    public function getLine()
369
    {
370 1887
        return $this->line;
371
    }
372
373
    /**
374
     * @return bool
375
     */
376 411
    public function isAtEnd()
377
    {
378 411
        return $this->currentPosition >= $this->length;
379
    }
380
381
    /**
382
     * Try to match a regular expression
383
     *
384
     * Returns the matching text and advances to the end of that match
385
     *
386
     * @param string $regex
387
     *
388
     * @return string|null
389
     */
390 1908
    public function match($regex)
391
    {
392 1908
        $subject = $this->getRemainder();
393
394 1908
        if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
395 1758
            return;
396
        }
397
398
        // $matches[0][0] contains the matched text
399
        // $matches[0][1] contains the index of that match
400
401 1782
        if ($this->isMultibyte) {
402
            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
403 51
            $offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], $this->encoding), $this->encoding);
404 51
            $matchLength = mb_strlen($matches[0][0], $this->encoding);
405 17
        } else {
406 1731
            $offset = $matches[0][1];
407 1731
            $matchLength = strlen($matches[0][0]);
408
        }
409
410
        // [0][0] contains the matched text
411
        // [0][1] contains the index of that match
412 1782
        $this->advanceBy($offset + $matchLength);
413
414 1782
        return $matches[0][0];
415
    }
416
417
    /**
418
     * Encapsulates the current state of this cursor in case you need to rollback later.
419
     *
420
     * WARNING: Do not parse or use the return value for ANYTHING except for
421
     * passing it back into restoreState(), as the number of values and their
422
     * contents may change in any future release without warning.
423
     *
424
     * @return array
425
     */
426 1011
    public function saveState()
427
    {
428
        return [
429 1011
            $this->currentPosition,
430 1011
            $this->previousPosition,
431 1011
            $this->nextNonSpaceCache,
432 1011
            $this->indent,
433 1011
            $this->column,
434 1011
            $this->partiallyConsumedTab,
435 337
        ];
436
    }
437
438
    /**
439
     * Restore the cursor to a previous state.
440
     *
441
     * Pass in the value previously obtained by calling saveState().
442
     *
443
     * @param array $state
444
     */
445 795
    public function restoreState($state)
446
    {
447
        list(
448 795
            $this->currentPosition,
449 795
            $this->previousPosition,
450 795
            $this->nextNonSpaceCache,
451 795
            $this->indent,
452 795
            $this->column,
453 795
            $this->partiallyConsumedTab,
454 795
          ) = $state;
455 795
    }
456
457
    /**
458
     * @return int
459
     */
460 648
    public function getPosition()
461
    {
462 648
        return $this->currentPosition;
463
    }
464
465
    /**
466
     * @return string
467
     */
468 870
    public function getPreviousText()
469
    {
470 870
        return mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, $this->encoding);
471
    }
472
473
    /**
474
     * @return int
475
     */
476 240
    public function getColumn()
477
    {
478 240
        return $this->column;
479
    }
480
}
481