1 | <?php |
||||
2 | |||||
3 | declare(strict_types=1); |
||||
4 | |||||
5 | /* |
||||
6 | * This file is part of the league/commonmark package. |
||||
7 | * |
||||
8 | * (c) Colin O'Dell <[email protected]> |
||||
9 | * |
||||
10 | * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) |
||||
11 | * - (c) John MacFarlane |
||||
12 | * |
||||
13 | * For the full copyright and license information, please view the LICENSE |
||||
14 | * file that was distributed with this source code. |
||||
15 | */ |
||||
16 | |||||
17 | namespace League\CommonMark\Reference; |
||||
18 | |||||
19 | use League\CommonMark\Parser\Cursor; |
||||
20 | use League\CommonMark\Util\LinkParserHelper; |
||||
21 | |||||
22 | final class ReferenceParser |
||||
23 | { |
||||
24 | // Looking for the start of a definition, i.e. `[` |
||||
25 | private const START_DEFINITION = 0; |
||||
26 | // Looking for and parsing the label, i.e. `[foo]` within `[foo]` |
||||
27 | private const LABEL = 1; |
||||
28 | // Parsing the destination, i.e. `/url` in `[foo]: /url` |
||||
29 | private const DESTINATION = 2; |
||||
30 | // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"` |
||||
31 | private const START_TITLE = 3; |
||||
32 | // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"` |
||||
33 | private const TITLE = 4; |
||||
34 | // End state, no matter what kind of lines we add, they won't be references |
||||
35 | private const PARAGRAPH = 5; |
||||
36 | |||||
37 | /** |
||||
38 | * @var string |
||||
39 | * |
||||
40 | * @psalm-readonly-allow-private-mutation |
||||
41 | */ |
||||
42 | private $paragraph = ''; |
||||
43 | |||||
44 | /** |
||||
45 | * @var array<int, ReferenceInterface> |
||||
46 | * |
||||
47 | * @psalm-readonly-allow-private-mutation |
||||
48 | */ |
||||
49 | private $references = []; |
||||
50 | |||||
51 | /** |
||||
52 | * @var int |
||||
53 | * |
||||
54 | * @psalm-readonly-allow-private-mutation |
||||
55 | */ |
||||
56 | private $state = self::START_DEFINITION; |
||||
57 | |||||
58 | /** |
||||
59 | * @var string|null |
||||
60 | * |
||||
61 | * @psalm-readonly-allow-private-mutation |
||||
62 | */ |
||||
63 | private $label; |
||||
64 | |||||
65 | /** |
||||
66 | * @var string|null |
||||
67 | * |
||||
68 | * @psalm-readonly-allow-private-mutation |
||||
69 | */ |
||||
70 | private $destination; |
||||
71 | |||||
72 | /** |
||||
73 | * @var string string |
||||
74 | * |
||||
75 | * @psalm-readonly-allow-private-mutation |
||||
76 | */ |
||||
77 | private $title = ''; |
||||
78 | |||||
79 | /** |
||||
80 | * @var string|null |
||||
81 | * |
||||
82 | * @psalm-readonly-allow-private-mutation |
||||
83 | */ |
||||
84 | private $titleDelimiter; |
||||
85 | |||||
86 | /** |
||||
87 | * @var bool |
||||
88 | * |
||||
89 | * @psalm-readonly-allow-private-mutation |
||||
90 | */ |
||||
91 | private $referenceValid = false; |
||||
92 | |||||
93 | 2604 | public function getParagraphContent(): string |
|||
94 | { |
||||
95 | 2604 | return $this->paragraph; |
|||
96 | } |
||||
97 | |||||
98 | /** |
||||
99 | * @return ReferenceInterface[] |
||||
100 | */ |
||||
101 | 2604 | public function getReferences(): iterable |
|||
102 | { |
||||
103 | 2604 | $this->finishReference(); |
|||
104 | |||||
105 | 2604 | return $this->references; |
|||
106 | } |
||||
107 | |||||
108 | 2496 | public function hasReferences(): bool |
|||
109 | { |
||||
110 | 2496 | return $this->references !== []; |
|||
111 | } |
||||
112 | |||||
113 | 2604 | public function parse(string $line): void |
|||
114 | { |
||||
115 | 2604 | if ($this->paragraph !== '') { |
|||
116 | 396 | $this->paragraph .= "\n"; |
|||
117 | } |
||||
118 | |||||
119 | 2604 | $this->paragraph .= $line; |
|||
120 | |||||
121 | 2604 | $cursor = new Cursor($line); |
|||
122 | 2604 | while (! $cursor->isAtEnd()) { |
|||
123 | 2604 | $result = false; |
|||
124 | 2604 | switch ($this->state) { |
|||
125 | 2604 | case self::PARAGRAPH: |
|||
126 | // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once |
||||
127 | // we're in a paragraph, there's no going back. |
||||
128 | 375 | return; |
|||
129 | 2604 | case self::START_DEFINITION: |
|||
130 | 2604 | $result = $this->parseStartDefinition($cursor); |
|||
131 | 2604 | break; |
|||
132 | 453 | case self::LABEL: |
|||
133 | 453 | $result = $this->parseLabel($cursor); |
|||
134 | 453 | break; |
|||
135 | 249 | case self::DESTINATION: |
|||
136 | 249 | $result = $this->parseDestination($cursor); |
|||
137 | 249 | break; |
|||
138 | 138 | case self::START_TITLE: |
|||
139 | 138 | $result = $this->parseStartTitle($cursor); |
|||
140 | 138 | break; |
|||
141 | 114 | case self::TITLE: |
|||
142 | 114 | $result = $this->parseTitle($cursor); |
|||
143 | 114 | break; |
|||
144 | default: |
||||
145 | // this should never happen |
||||
146 | break; |
||||
147 | } |
||||
148 | |||||
149 | 2604 | if (! $result) { |
|||
150 | 2595 | $this->state = self::PARAGRAPH; |
|||
151 | |||||
152 | 2595 | return; |
|||
153 | } |
||||
154 | } |
||||
155 | 249 | } |
|||
156 | |||||
157 | 2604 | private function parseStartDefinition(Cursor $cursor): bool |
|||
158 | { |
||||
159 | 2604 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
160 | 2604 | if ($cursor->isAtEnd() || $cursor->getCharacter() !== '[') { |
|||
161 | 2247 | return false; |
|||
162 | } |
||||
163 | |||||
164 | 453 | $this->state = self::LABEL; |
|||
165 | 453 | $this->label = ''; |
|||
166 | |||||
167 | 453 | $cursor->advance(); |
|||
168 | 453 | if ($cursor->isAtEnd()) { |
|||
169 | 6 | $this->label .= "\n"; |
|||
170 | } |
||||
171 | |||||
172 | 453 | return true; |
|||
173 | } |
||||
174 | |||||
175 | 453 | private function parseLabel(Cursor $cursor): bool |
|||
176 | { |
||||
177 | 453 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
178 | |||||
179 | 453 | $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor); |
|||
180 | 453 | if ($partialLabel === null) { |
|||
181 | return false; |
||||
182 | } |
||||
183 | |||||
184 | \assert($this->label !== null); |
||||
185 | 453 | $this->label .= $partialLabel; |
|||
186 | |||||
187 | 453 | if ($cursor->isAtEnd()) { |
|||
188 | // label might continue on next line |
||||
189 | 9 | $this->label .= "\n"; |
|||
190 | |||||
191 | 9 | return true; |
|||
192 | } |
||||
193 | |||||
194 | 450 | if ($cursor->getCharacter() !== ']') { |
|||
195 | 45 | return false; |
|||
196 | } |
||||
197 | |||||
198 | 429 | $cursor->advance(); |
|||
199 | |||||
200 | // end of label |
||||
201 | 429 | if ($cursor->getCharacter() !== ':') { |
|||
202 | 333 | return false; |
|||
203 | } |
||||
204 | |||||
205 | 258 | $cursor->advance(); |
|||
206 | |||||
207 | // spec: A link label can have at most 999 characters inside the square brackets |
||||
208 | 258 | if (\mb_strlen($this->label, 'utf-8') > 999) { |
|||
209 | return false; |
||||
210 | } |
||||
211 | |||||
212 | // spec: A link label must contain at least one non-whitespace character |
||||
213 | 258 | if (\trim($this->label) === '') { |
|||
214 | 6 | return false; |
|||
215 | } |
||||
216 | |||||
217 | 252 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
218 | |||||
219 | 252 | $this->state = self::DESTINATION; |
|||
220 | |||||
221 | 252 | return true; |
|||
222 | } |
||||
223 | |||||
224 | 249 | private function parseDestination(Cursor $cursor): bool |
|||
225 | { |
||||
226 | 249 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
227 | |||||
228 | 249 | $destination = LinkParserHelper::parseLinkDestination($cursor); |
|||
229 | 249 | if ($destination === null) { |
|||
230 | return false; |
||||
231 | } |
||||
232 | |||||
233 | 249 | $this->destination = $destination; |
|||
234 | |||||
235 | 249 | $advanced = $cursor->advanceToNextNonSpaceOrTab(); |
|||
236 | 249 | if ($cursor->isAtEnd()) { |
|||
237 | // Destination was at end of line, so this is a valid reference for sure (and maybe a title). |
||||
238 | // If not at end of line, wait for title to be valid first. |
||||
239 | 141 | $this->referenceValid = true; |
|||
240 | 141 | $this->paragraph = ''; |
|||
241 | 111 | } elseif ($advanced === 0) { |
|||
242 | // spec: The title must be separated from the link destination by whitespace |
||||
243 | 3 | return false; |
|||
244 | } |
||||
245 | |||||
246 | 246 | $this->state = self::START_TITLE; |
|||
247 | |||||
248 | 246 | return true; |
|||
249 | } |
||||
250 | |||||
251 | 138 | private function parseStartTitle(Cursor $cursor): bool |
|||
252 | { |
||||
253 | 138 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
254 | 138 | if ($cursor->isAtEnd()) { |
|||
255 | $this->state = self::START_DEFINITION; |
||||
256 | |||||
257 | return true; |
||||
258 | } |
||||
259 | |||||
260 | 138 | $this->titleDelimiter = null; |
|||
261 | 138 | switch ($c = $cursor->getCharacter()) { |
|||
262 | 138 | case '"': |
|||
263 | 39 | case "'": |
|||
264 | 114 | $this->titleDelimiter = $c; |
|||
265 | 114 | break; |
|||
266 | 24 | case '(': |
|||
267 | $this->titleDelimiter = ')'; |
||||
268 | break; |
||||
269 | default: |
||||
270 | // no title delimter found |
||||
271 | 24 | break; |
|||
272 | } |
||||
273 | |||||
274 | 138 | if ($this->titleDelimiter !== null) { |
|||
275 | 114 | $this->state = self::TITLE; |
|||
276 | 114 | $cursor->advance(); |
|||
277 | 114 | if ($cursor->isAtEnd()) { |
|||
278 | 114 | $this->title .= "\n"; |
|||
279 | } |
||||
280 | } else { |
||||
281 | 24 | $this->finishReference(); |
|||
282 | // There might be another reference instead, try that for the same character. |
||||
283 | 24 | $this->state = self::START_DEFINITION; |
|||
284 | } |
||||
285 | |||||
286 | 138 | return true; |
|||
287 | } |
||||
288 | |||||
289 | 114 | private function parseTitle(Cursor $cursor): bool |
|||
290 | { |
||||
291 | \assert($this->titleDelimiter !== null); |
||||
292 | 114 | $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter); |
|||
293 | |||||
294 | 114 | if ($title === null) { |
|||
295 | // Invalid title, stop |
||||
296 | return false; |
||||
297 | } |
||||
298 | |||||
299 | // Did we find the end delimiter? |
||||
300 | 114 | $endDelimiterFound = false; |
|||
301 | 114 | if (\substr($title, -1) === $this->titleDelimiter) { |
|||
302 | 111 | $endDelimiterFound = true; |
|||
303 | // Chop it off |
||||
304 | 111 | $title = \substr($title, 0, -1); |
|||
305 | } |
||||
306 | |||||
307 | 114 | $this->title .= $title; |
|||
308 | |||||
309 | 114 | if (! $endDelimiterFound && $cursor->isAtEnd()) { |
|||
310 | // Title still going, continue on next line |
||||
311 | 6 | $this->title .= "\n"; |
|||
312 | |||||
313 | 6 | return true; |
|||
314 | } |
||||
315 | |||||
316 | // We either hit the end delimiter or some extra whitespace |
||||
317 | 111 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
318 | 111 | if (! $cursor->isAtEnd()) { |
|||
319 | // spec: No further non-whitespace characters may occur on the line. |
||||
320 | 6 | return false; |
|||
321 | } |
||||
322 | |||||
323 | 105 | $this->referenceValid = true; |
|||
324 | 105 | $this->finishReference(); |
|||
325 | 105 | $this->paragraph = ''; |
|||
326 | |||||
327 | // See if there's another definition |
||||
328 | 105 | $this->state = self::START_DEFINITION; |
|||
329 | |||||
330 | 105 | return true; |
|||
331 | } |
||||
332 | |||||
333 | 2604 | private function finishReference(): void |
|||
334 | { |
||||
335 | 2604 | if (! $this->referenceValid) { |
|||
336 | 2595 | return; |
|||
337 | } |
||||
338 | |||||
339 | /** @psalm-suppress PossiblyNullArgument -- these can't possibly be null if we're in this state */ |
||||
340 | 237 | $this->references[] = new Reference($this->label, $this->destination, $this->title); |
|||
0 ignored issues
–
show
Bug
introduced
by
Loading history...
It seems like
$this->destination can also be of type null ; however, parameter $destination of League\CommonMark\Refere...eference::__construct() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
341 | |||||
342 | 237 | $this->label = null; |
|||
343 | 237 | $this->referenceValid = false; |
|||
344 | 237 | $this->destination = null; |
|||
345 | 237 | $this->title = ''; |
|||
346 | 237 | $this->titleDelimiter = null; |
|||
347 | 237 | } |
|||
348 | } |
||||
349 |