1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
* This file is part of the league/commonmark package. |
5
|
|
|
* |
6
|
|
|
* (c) Colin O'Dell <[email protected]> |
7
|
|
|
* |
8
|
|
|
* For the full copyright and license information, please view the LICENSE |
9
|
|
|
* file that was distributed with this source code. |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
namespace League\CommonMark; |
13
|
|
|
|
14
|
|
|
class Cursor |
15
|
|
|
{ |
16
|
|
|
const INDENT_LEVEL = 4; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* @var string |
20
|
|
|
*/ |
21
|
|
|
private $line; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* @var int |
25
|
|
|
*/ |
26
|
|
|
private $length; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @var int |
30
|
|
|
* |
31
|
|
|
* It's possible for this to be 1 char past the end, meaning we've parsed all chars and have |
32
|
|
|
* reached the end. In this state, any character-returning method MUST return null. |
33
|
|
|
*/ |
34
|
|
|
private $currentPosition = 0; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @var int |
38
|
|
|
*/ |
39
|
|
|
private $column = 0; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @var int |
43
|
|
|
*/ |
44
|
|
|
private $indent = 0; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* @var int |
48
|
|
|
*/ |
49
|
|
|
private $previousPosition = 0; |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* @var int|null |
53
|
|
|
*/ |
54
|
|
|
private $nextNonSpaceCache; |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* @var bool |
58
|
|
|
*/ |
59
|
|
|
private $partiallyConsumedTab = false; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* @param string $line |
63
|
|
|
*/ |
64
|
2430 |
|
public function __construct($line) |
65
|
|
|
{ |
66
|
2430 |
|
$this->line = $line; |
67
|
2430 |
|
$this->length = mb_strlen($line, 'utf-8'); |
68
|
2430 |
|
} |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Returns the position of the next character which is not a space (or tab) |
72
|
|
|
* |
73
|
|
|
* @deprecated Use getNextNonSpacePosition() instead |
74
|
|
|
* |
75
|
|
|
* @return int |
76
|
|
|
*/ |
77
|
16 |
|
public function getFirstNonSpacePosition() |
78
|
|
|
{ |
79
|
16 |
|
@trigger_error('Cursor::getFirstNonSpacePosition() will be removed in a future 0.x release. Use getNextNonSpacePosition() instead. See https://github.com/thephpleague/commonmark/issues/280', E_USER_DEPRECATED); |
|
|
|
|
80
|
|
|
|
81
|
16 |
|
return $this->getNextNonSpacePosition(); |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* Returns the position of the next character which is not a space (or tab) |
86
|
|
|
* |
87
|
|
|
* @return int |
88
|
|
|
*/ |
89
|
2112 |
|
public function getNextNonSpacePosition() |
90
|
|
|
{ |
91
|
2112 |
|
if ($this->nextNonSpaceCache !== null) { |
92
|
1980 |
|
return $this->nextNonSpaceCache; |
93
|
|
|
} |
94
|
|
|
|
95
|
2112 |
|
$i = $this->currentPosition; |
96
|
2112 |
|
$cols = $this->column; |
97
|
|
|
|
98
|
2112 |
|
while (($c = $this->getCharacter($i)) !== null) { |
99
|
2088 |
|
if ($c === ' ') { |
100
|
504 |
|
$i++; |
101
|
504 |
|
$cols++; |
102
|
2074 |
|
} elseif ($c === "\t") { |
103
|
36 |
|
$i++; |
104
|
36 |
|
$cols += (4 - ($cols % 4)); |
105
|
24 |
|
} else { |
106
|
2046 |
|
break; |
107
|
|
|
} |
108
|
350 |
|
} |
109
|
|
|
|
110
|
2112 |
|
$nextNonSpace = ($c === null) ? $this->length : $i; |
111
|
2112 |
|
$this->indent = $cols - $this->column; |
112
|
|
|
|
113
|
2112 |
|
return $this->nextNonSpaceCache = $nextNonSpace; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* Returns the next character which isn't a space (or tab) |
118
|
|
|
* |
119
|
|
|
* @deprecated Use getNextNonSpaceCharacter() instead |
120
|
|
|
* |
121
|
|
|
* @return string |
122
|
|
|
*/ |
123
|
16 |
|
public function getFirstNonSpaceCharacter() |
124
|
|
|
{ |
125
|
16 |
|
@trigger_error('Cursor::getFirstNonSpaceCharacter() will be removed in a future 0.x release. Use getNextNonSpaceCharacter() instead. See https://github.com/thephpleague/commonmark/issues/280', E_USER_DEPRECATED); |
|
|
|
|
126
|
|
|
|
127
|
16 |
|
return $this->getNextNonSpaceCharacter(); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Returns the next character which isn't a space (or tab) |
132
|
|
|
* |
133
|
|
|
* @return string |
134
|
|
|
*/ |
135
|
1905 |
|
public function getNextNonSpaceCharacter() |
136
|
|
|
{ |
137
|
1905 |
|
return $this->getCharacter($this->getNextNonSpacePosition()); |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* Calculates the current indent (number of spaces after current position) |
142
|
|
|
* |
143
|
|
|
* @return int |
144
|
|
|
*/ |
145
|
1992 |
|
public function getIndent() |
146
|
|
|
{ |
147
|
1992 |
|
$this->getNextNonSpacePosition(); |
148
|
|
|
|
149
|
1992 |
|
return $this->indent; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* Whether the cursor is indented to INDENT_LEVEL |
154
|
|
|
* |
155
|
|
|
* @return bool |
156
|
|
|
*/ |
157
|
1932 |
|
public function isIndented() |
158
|
|
|
{ |
159
|
1932 |
|
return $this->getIndent() >= self::INDENT_LEVEL; |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
/** |
163
|
|
|
* @param int|null $index |
164
|
|
|
* |
165
|
|
|
* @return string|null |
166
|
|
|
*/ |
167
|
2205 |
|
public function getCharacter($index = null) |
168
|
|
|
{ |
169
|
2205 |
|
if ($index === null) { |
170
|
1683 |
|
$index = $this->currentPosition; |
171
|
1122 |
|
} |
172
|
|
|
|
173
|
|
|
// Index out-of-bounds, or we're at the end |
174
|
2205 |
|
if ($index < 0 || $index >= $this->length) { |
175
|
1887 |
|
return; |
176
|
|
|
} |
177
|
|
|
|
178
|
2163 |
|
return mb_substr($this->line, $index, 1, 'utf-8'); |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
/** |
182
|
|
|
* Returns the next character (or null, if none) without advancing forwards |
183
|
|
|
* |
184
|
|
|
* @param int $offset |
185
|
|
|
* |
186
|
|
|
* @return string|null |
187
|
|
|
*/ |
188
|
1014 |
|
public function peek($offset = 1) |
189
|
|
|
{ |
190
|
1014 |
|
return $this->getCharacter($this->currentPosition + $offset); |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
/** |
194
|
|
|
* Whether the remainder is blank |
195
|
|
|
* |
196
|
|
|
* @return bool |
197
|
|
|
*/ |
198
|
1950 |
|
public function isBlank() |
199
|
|
|
{ |
200
|
1950 |
|
return $this->getNextNonSpacePosition() === $this->length; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* Move the cursor forwards |
205
|
|
|
*/ |
206
|
789 |
|
public function advance() |
207
|
|
|
{ |
208
|
789 |
|
$this->advanceBy(1); |
209
|
789 |
|
} |
210
|
|
|
|
211
|
|
|
/** |
212
|
|
|
* Move the cursor forwards |
213
|
|
|
* |
214
|
|
|
* @param int $characters Number of characters to advance by |
215
|
|
|
* @param bool $advanceByColumns Whether to advance by columns instead of spaces |
216
|
|
|
*/ |
217
|
2313 |
|
public function advanceBy($characters, $advanceByColumns = false) |
218
|
|
|
{ |
219
|
2313 |
|
if ($characters === 0) { |
220
|
1986 |
|
$this->previousPosition = $this->currentPosition; |
221
|
|
|
|
222
|
1986 |
|
return; |
223
|
|
|
} |
224
|
|
|
|
225
|
2151 |
|
$this->previousPosition = $this->currentPosition; |
226
|
2151 |
|
$this->nextNonSpaceCache = null; |
227
|
|
|
|
228
|
2151 |
|
$nextFewChars = mb_substr($this->line, $this->currentPosition, $characters, 'utf-8'); |
229
|
|
|
|
230
|
|
|
// Optimization to avoid tab handling logic if we have no tabs |
231
|
2151 |
|
if (preg_match('/\t/', $nextFewChars) === 0) { |
232
|
2139 |
|
$length = min($characters, $this->length - $this->currentPosition); |
233
|
2139 |
|
$this->partiallyConsumedTab = false; |
234
|
2139 |
|
$this->currentPosition += $length; |
235
|
2139 |
|
$this->column += $length; |
236
|
|
|
|
237
|
2139 |
|
return; |
238
|
|
|
} |
239
|
|
|
|
240
|
45 |
|
if ($characters === 1 && !empty($nextFewChars)) { |
241
|
18 |
|
$asArray = [$nextFewChars]; |
242
|
12 |
|
} else { |
243
|
39 |
|
$asArray = preg_split('//u', $nextFewChars, null, PREG_SPLIT_NO_EMPTY); |
244
|
|
|
} |
245
|
|
|
|
246
|
45 |
|
foreach ($asArray as $relPos => $c) { |
247
|
45 |
|
if ($c === "\t") { |
248
|
45 |
|
$charsToTab = 4 - ($this->column % 4); |
249
|
45 |
|
if ($advanceByColumns) { |
250
|
33 |
|
$this->partiallyConsumedTab = $charsToTab > $characters; |
251
|
33 |
|
$charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab; |
252
|
33 |
|
$this->column += $charsToAdvance; |
253
|
33 |
|
$this->currentPosition += $this->partiallyConsumedTab ? 0 : 1; |
254
|
33 |
|
$characters -= $charsToAdvance; |
255
|
22 |
|
} else { |
256
|
18 |
|
$this->partiallyConsumedTab = false; |
257
|
18 |
|
$this->column += $charsToTab; |
258
|
18 |
|
$this->currentPosition++; |
259
|
27 |
|
$characters--; |
260
|
|
|
} |
261
|
30 |
|
} else { |
262
|
12 |
|
$this->partiallyConsumedTab = false; |
263
|
12 |
|
$this->currentPosition++; |
264
|
12 |
|
$this->column++; |
265
|
12 |
|
$characters--; |
266
|
|
|
} |
267
|
|
|
|
268
|
45 |
|
if ($characters <= 0) { |
269
|
45 |
|
break; |
270
|
|
|
} |
271
|
30 |
|
} |
272
|
45 |
|
} |
273
|
|
|
|
274
|
|
|
/** |
275
|
|
|
* Advances the cursor by a single space or tab, if present |
276
|
|
|
* |
277
|
|
|
* @return bool |
278
|
|
|
*/ |
279
|
333 |
|
public function advanceBySpaceOrTab() |
280
|
|
|
{ |
281
|
333 |
|
$character = $this->getCharacter(); |
282
|
|
|
|
283
|
333 |
|
if ($character === ' ' || $character === "\t") { |
284
|
321 |
|
$this->advanceBy(1, true); |
285
|
|
|
|
286
|
321 |
|
return true; |
287
|
|
|
} |
288
|
|
|
|
289
|
249 |
|
return false; |
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
/** |
293
|
|
|
* Advances the cursor while the given character is matched |
294
|
|
|
* |
295
|
|
|
* @param string $character Character to match |
296
|
|
|
* @param int|null $maximumCharactersToAdvance Maximum number of characters to advance before giving up |
297
|
|
|
* |
298
|
|
|
* @return int Number of positions moved (0 if unsuccessful) |
299
|
|
|
*/ |
300
|
141 |
|
public function advanceWhileMatches($character, $maximumCharactersToAdvance = null) |
301
|
|
|
{ |
302
|
|
|
// Calculate how far to advance |
303
|
141 |
|
$start = $this->currentPosition; |
304
|
141 |
|
$newIndex = $start; |
305
|
141 |
|
if ($maximumCharactersToAdvance === null) { |
306
|
18 |
|
$maximumCharactersToAdvance = $this->length; |
307
|
12 |
|
} |
308
|
|
|
|
309
|
141 |
|
$max = min($start + $maximumCharactersToAdvance, $this->length); |
310
|
|
|
|
311
|
141 |
|
while ($newIndex < $max && $this->getCharacter($newIndex) === $character) { |
312
|
45 |
|
++$newIndex; |
313
|
30 |
|
} |
314
|
|
|
|
315
|
141 |
|
if ($newIndex <= $start) { |
316
|
105 |
|
return 0; |
317
|
|
|
} |
318
|
|
|
|
319
|
45 |
|
$this->advanceBy($newIndex - $start); |
320
|
|
|
|
321
|
45 |
|
return $this->currentPosition - $this->previousPosition; |
322
|
|
|
} |
323
|
|
|
|
324
|
|
|
/** |
325
|
|
|
* Parse zero or more space characters, including at most one newline. |
326
|
|
|
* |
327
|
|
|
* @deprecated Use advanceToNextNonSpaceOrNewline() instead |
328
|
|
|
*/ |
329
|
36 |
|
public function advanceToFirstNonSpace() |
330
|
|
|
{ |
331
|
36 |
|
@trigger_error('Cursor::advanceToFirstNonSpace() will be removed in a future 0.x release. Use advanceToNextNonSpaceOrTab() or advanceToNextNonSpaceOrNewline() instead. See https://github.com/thephpleague/commonmark/issues/280', E_USER_DEPRECATED); |
|
|
|
|
332
|
|
|
|
333
|
36 |
|
return $this->advanceToNextNonSpaceOrNewline(); |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
/** |
337
|
|
|
* Parse zero or more space/tab characters |
338
|
|
|
* |
339
|
|
|
* @return int Number of positions moved |
340
|
|
|
*/ |
341
|
1944 |
|
public function advanceToNextNonSpaceOrTab() |
342
|
|
|
{ |
343
|
1944 |
|
$newPosition = $this->getNextNonSpacePosition(); |
344
|
1944 |
|
$this->advanceBy($newPosition - $this->currentPosition); |
345
|
1944 |
|
$this->partiallyConsumedTab = false; |
346
|
|
|
|
347
|
1944 |
|
return $this->currentPosition - $this->previousPosition; |
348
|
|
|
} |
349
|
|
|
|
350
|
|
|
/** |
351
|
|
|
* Parse zero or more space characters, including at most one newline. |
352
|
|
|
* |
353
|
|
|
* Tab characters are not parsed with this function. |
354
|
|
|
* |
355
|
|
|
* @return int Number of positions moved |
356
|
|
|
*/ |
357
|
441 |
|
public function advanceToNextNonSpaceOrNewline() |
358
|
|
|
{ |
359
|
441 |
|
$matches = []; |
360
|
441 |
|
preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, PREG_OFFSET_CAPTURE); |
361
|
|
|
|
362
|
|
|
// [0][0] contains the matched text |
363
|
|
|
// [0][1] contains the index of that match |
364
|
441 |
|
$increment = $matches[0][1] + strlen($matches[0][0]); |
365
|
|
|
|
366
|
441 |
|
if ($increment === 0) { |
367
|
300 |
|
return 0; |
368
|
|
|
} |
369
|
|
|
|
370
|
300 |
|
$this->advanceBy($increment); |
371
|
|
|
|
372
|
300 |
|
return $this->currentPosition - $this->previousPosition; |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
/** |
376
|
|
|
* Move the position to the very end of the line |
377
|
|
|
* |
378
|
|
|
* @return int The number of characters moved |
379
|
|
|
*/ |
380
|
84 |
|
public function advanceToEnd() |
381
|
|
|
{ |
382
|
84 |
|
$this->previousPosition = $this->currentPosition; |
383
|
84 |
|
$this->nextNonSpaceCache = null; |
384
|
|
|
|
385
|
84 |
|
$this->currentPosition = $this->length; |
386
|
|
|
|
387
|
84 |
|
return $this->currentPosition - $this->previousPosition; |
388
|
|
|
} |
389
|
|
|
|
390
|
|
|
/** |
391
|
|
|
* @return string |
392
|
|
|
*/ |
393
|
2037 |
|
public function getRemainder() |
394
|
|
|
{ |
395
|
2037 |
|
if ($this->isAtEnd()) { |
396
|
693 |
|
return ''; |
397
|
|
|
} |
398
|
|
|
|
399
|
2019 |
|
$prefix = ''; |
400
|
2019 |
|
$position = $this->currentPosition; |
401
|
2019 |
|
if ($this->partiallyConsumedTab) { |
402
|
12 |
|
$position++; |
403
|
12 |
|
$charsToTab = 4 - ($this->column % 4); |
404
|
12 |
|
$prefix = str_repeat(' ', $charsToTab); |
405
|
8 |
|
} |
406
|
|
|
|
407
|
2019 |
|
return $prefix . mb_substr($this->line, $position, null, 'utf-8'); |
408
|
|
|
} |
409
|
|
|
|
410
|
|
|
/** |
411
|
|
|
* @return string |
412
|
|
|
*/ |
413
|
1887 |
|
public function getLine() |
414
|
|
|
{ |
415
|
1887 |
|
return $this->line; |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
/** |
419
|
|
|
* @return bool |
420
|
|
|
*/ |
421
|
2058 |
|
public function isAtEnd() |
422
|
|
|
{ |
423
|
2058 |
|
return $this->currentPosition >= $this->length; |
424
|
|
|
} |
425
|
|
|
|
426
|
|
|
/** |
427
|
|
|
* Try to match a regular expression |
428
|
|
|
* |
429
|
|
|
* Returns the matching text and advances to the end of that match |
430
|
|
|
* |
431
|
|
|
* @param string $regex |
432
|
|
|
* |
433
|
|
|
* @return string|null |
434
|
|
|
*/ |
435
|
1902 |
|
public function match($regex) |
436
|
|
|
{ |
437
|
1902 |
|
$subject = $this->getRemainder(); |
438
|
|
|
|
439
|
1902 |
|
$matches = []; |
440
|
1902 |
|
if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) { |
441
|
1758 |
|
return; |
442
|
|
|
} |
443
|
|
|
|
444
|
|
|
// PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying |
445
|
1776 |
|
$offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8'); |
446
|
|
|
|
447
|
|
|
// [0][0] contains the matched text |
448
|
|
|
// [0][1] contains the index of that match |
449
|
1776 |
|
$this->advanceBy($offset + mb_strlen($matches[0][0], 'utf-8')); |
450
|
|
|
|
451
|
1776 |
|
return $matches[0][0]; |
452
|
|
|
} |
453
|
|
|
|
454
|
|
|
/** |
455
|
|
|
* @return CursorState |
456
|
|
|
*/ |
457
|
1839 |
|
public function saveState() |
458
|
|
|
{ |
459
|
1839 |
|
return new CursorState( |
460
|
1839 |
|
$this->line, |
461
|
1839 |
|
$this->length, |
462
|
1839 |
|
$this->currentPosition, |
463
|
1839 |
|
$this->previousPosition, |
464
|
1839 |
|
$this->nextNonSpaceCache, |
465
|
1839 |
|
$this->indent, |
466
|
1839 |
|
$this->column, |
467
|
1839 |
|
$this->partiallyConsumedTab |
468
|
1226 |
|
); |
469
|
|
|
} |
470
|
|
|
|
471
|
|
|
/** |
472
|
|
|
* @param CursorState $state |
473
|
|
|
*/ |
474
|
1758 |
|
public function restoreState(CursorState $state) |
475
|
|
|
{ |
476
|
1758 |
|
$this->line = $state->getLine(); |
477
|
1758 |
|
$this->length = $state->getLength(); |
478
|
1758 |
|
$this->currentPosition = $state->getCurrentPosition(); |
479
|
1758 |
|
$this->previousPosition = $state->getPreviousPosition(); |
480
|
1758 |
|
$this->nextNonSpaceCache = $state->getNextNonSpaceCache(); |
481
|
1758 |
|
$this->column = $state->getColumn(); |
482
|
1758 |
|
$this->indent = $state->getIndent(); |
483
|
1758 |
|
$this->partiallyConsumedTab = $state->getPartiallyConsumedTab(); |
484
|
1758 |
|
} |
485
|
|
|
|
486
|
|
|
/** |
487
|
|
|
* @return int |
488
|
|
|
*/ |
489
|
630 |
|
public function getPosition() |
490
|
|
|
{ |
491
|
630 |
|
return $this->currentPosition; |
492
|
|
|
} |
493
|
|
|
|
494
|
|
|
/** |
495
|
|
|
* @return string |
496
|
|
|
*/ |
497
|
870 |
|
public function getPreviousText() |
498
|
|
|
{ |
499
|
870 |
|
return mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'utf-8'); |
500
|
|
|
} |
501
|
|
|
|
502
|
|
|
/** |
503
|
|
|
* @return int |
504
|
|
|
*/ |
505
|
240 |
|
public function getColumn() |
506
|
|
|
{ |
507
|
240 |
|
return $this->column; |
508
|
|
|
} |
509
|
|
|
} |
510
|
|
|
|
If you suppress an error, we recommend checking for the error condition explicitly: