1 | <?php |
||||
2 | |||||
3 | declare(strict_types=1); |
||||
4 | |||||
5 | /* |
||||
6 | * This file is part of the league/commonmark package. |
||||
7 | * |
||||
8 | * (c) Colin O'Dell <[email protected]> |
||||
9 | * |
||||
10 | * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) |
||||
11 | * - (c) John MacFarlane |
||||
12 | * |
||||
13 | * For the full copyright and license information, please view the LICENSE |
||||
14 | * file that was distributed with this source code. |
||||
15 | */ |
||||
16 | |||||
17 | namespace League\CommonMark\Reference; |
||||
18 | |||||
19 | use League\CommonMark\Parser\Cursor; |
||||
20 | use League\CommonMark\Util\LinkParserHelper; |
||||
21 | |||||
22 | final class ReferenceParser |
||||
23 | { |
||||
24 | // Looking for the start of a definition, i.e. `[` |
||||
25 | private const START_DEFINITION = 0; |
||||
26 | // Looking for and parsing the label, i.e. `[foo]` within `[foo]` |
||||
27 | private const LABEL = 1; |
||||
28 | // Parsing the destination, i.e. `/url` in `[foo]: /url` |
||||
29 | private const DESTINATION = 2; |
||||
30 | // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"` |
||||
31 | private const START_TITLE = 3; |
||||
32 | // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"` |
||||
33 | private const TITLE = 4; |
||||
34 | // End state, no matter what kind of lines we add, they won't be references |
||||
35 | private const PARAGRAPH = 5; |
||||
36 | |||||
37 | /** @psalm-readonly-allow-private-mutation */ |
||||
38 | private string $paragraph = ''; |
||||
39 | |||||
40 | /** |
||||
41 | * @var array<int, ReferenceInterface> |
||||
42 | * |
||||
43 | * @psalm-readonly-allow-private-mutation |
||||
44 | */ |
||||
45 | private array $references = []; |
||||
46 | |||||
47 | /** @psalm-readonly-allow-private-mutation */ |
||||
48 | private int $state = self::START_DEFINITION; |
||||
49 | |||||
50 | /** @psalm-readonly-allow-private-mutation */ |
||||
51 | private ?string $label = null; |
||||
52 | |||||
53 | /** @psalm-readonly-allow-private-mutation */ |
||||
54 | private ?string $destination = null; |
||||
55 | |||||
56 | /** |
||||
57 | * @var string string |
||||
58 | * |
||||
59 | * @psalm-readonly-allow-private-mutation |
||||
60 | */ |
||||
61 | private string $title = ''; |
||||
62 | |||||
63 | /** @psalm-readonly-allow-private-mutation */ |
||||
64 | private ?string $titleDelimiter = null; |
||||
65 | |||||
66 | /** @psalm-readonly-allow-private-mutation */ |
||||
67 | private bool $referenceValid = false; |
||||
68 | |||||
69 | 2002 | public function getParagraphContent(): string |
|||
70 | { |
||||
71 | 2002 | return $this->paragraph; |
|||
72 | } |
||||
73 | |||||
74 | /** |
||||
75 | * @return ReferenceInterface[] |
||||
76 | */ |
||||
77 | 2002 | public function getReferences(): iterable |
|||
78 | { |
||||
79 | 2002 | $this->finishReference(); |
|||
80 | |||||
81 | 2002 | return $this->references; |
|||
82 | } |
||||
83 | |||||
84 | 1914 | public function hasReferences(): bool |
|||
85 | { |
||||
86 | 1914 | return $this->references !== []; |
|||
87 | } |
||||
88 | |||||
89 | 2002 | public function parse(string $line): void |
|||
90 | { |
||||
91 | 2002 | if ($this->paragraph !== '') { |
|||
92 | 294 | $this->paragraph .= "\n"; |
|||
93 | } |
||||
94 | |||||
95 | 2002 | $this->paragraph .= $line; |
|||
96 | |||||
97 | 2002 | $cursor = new Cursor($line); |
|||
98 | 2002 | while (! $cursor->isAtEnd()) { |
|||
99 | 2002 | $result = false; |
|||
100 | 2002 | switch ($this->state) { |
|||
101 | 1001 | case self::PARAGRAPH: |
|||
102 | // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once |
||||
103 | // we're in a paragraph, there's no going back. |
||||
104 | 278 | return; |
|||
105 | 1001 | case self::START_DEFINITION: |
|||
106 | 2002 | $result = $this->parseStartDefinition($cursor); |
|||
107 | 2002 | break; |
|||
108 | 169 | case self::LABEL: |
|||
109 | 338 | $result = $this->parseLabel($cursor); |
|||
110 | 338 | break; |
|||
111 | 89 | case self::DESTINATION: |
|||
112 | 178 | $result = $this->parseDestination($cursor); |
|||
113 | 178 | break; |
|||
114 | 49 | case self::START_TITLE: |
|||
115 | 98 | $result = $this->parseStartTitle($cursor); |
|||
116 | 98 | break; |
|||
117 | 41 | case self::TITLE: |
|||
118 | 82 | $result = $this->parseTitle($cursor); |
|||
119 | 82 | break; |
|||
120 | default: |
||||
121 | // this should never happen |
||||
122 | break; |
||||
123 | } |
||||
124 | |||||
125 | 2002 | if (! $result) { |
|||
126 | 1998 | $this->state = self::PARAGRAPH; |
|||
127 | |||||
128 | 1998 | return; |
|||
129 | } |
||||
130 | } |
||||
131 | } |
||||
132 | |||||
133 | 2002 | private function parseStartDefinition(Cursor $cursor): bool |
|||
134 | { |
||||
135 | 2002 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
136 | 2002 | if ($cursor->isAtEnd() || $cursor->getCurrentCharacter() !== '[') { |
|||
137 | 1736 | return false; |
|||
138 | } |
||||
139 | |||||
140 | 338 | $this->state = self::LABEL; |
|||
141 | 338 | $this->label = ''; |
|||
142 | |||||
143 | 338 | $cursor->advance(); |
|||
144 | 338 | if ($cursor->isAtEnd()) { |
|||
145 | 4 | $this->label .= "\n"; |
|||
146 | } |
||||
147 | |||||
148 | 338 | return true; |
|||
149 | } |
||||
150 | |||||
151 | 338 | private function parseLabel(Cursor $cursor): bool |
|||
152 | { |
||||
153 | 338 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
154 | |||||
155 | 338 | $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor); |
|||
156 | 338 | if ($partialLabel === null) { |
|||
157 | return false; |
||||
158 | } |
||||
159 | |||||
160 | \assert($this->label !== null); |
||||
161 | 338 | $this->label .= $partialLabel; |
|||
162 | |||||
163 | 338 | if ($cursor->isAtEnd()) { |
|||
164 | // label might continue on next line |
||||
165 | 8 | $this->label .= "\n"; |
|||
166 | |||||
167 | 8 | return true; |
|||
168 | } |
||||
169 | |||||
170 | 336 | if ($cursor->getCurrentCharacter() !== ']') { |
|||
171 | 30 | return false; |
|||
172 | } |
||||
173 | |||||
174 | 322 | $cursor->advance(); |
|||
175 | |||||
176 | // end of label |
||||
177 | 322 | if ($cursor->getCurrentCharacter() !== ':') { |
|||
178 | 256 | return false; |
|||
179 | } |
||||
180 | |||||
181 | 184 | $cursor->advance(); |
|||
182 | |||||
183 | // spec: A link label can have at most 999 characters inside the square brackets |
||||
184 | 184 | if (\mb_strlen($this->label, 'UTF-8') > 999) { |
|||
185 | return false; |
||||
186 | } |
||||
187 | |||||
188 | // spec: A link label must contain at least one non-whitespace character |
||||
189 | 184 | if (\trim($this->label) === '') { |
|||
190 | 4 | return false; |
|||
191 | } |
||||
192 | |||||
193 | 180 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
194 | |||||
195 | 180 | $this->state = self::DESTINATION; |
|||
196 | |||||
197 | 180 | return true; |
|||
198 | } |
||||
199 | |||||
200 | 178 | private function parseDestination(Cursor $cursor): bool |
|||
201 | { |
||||
202 | 178 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
203 | |||||
204 | 178 | $destination = LinkParserHelper::parseLinkDestination($cursor); |
|||
205 | 178 | if ($destination === null) { |
|||
206 | 2 | return false; |
|||
207 | } |
||||
208 | |||||
209 | 176 | $this->destination = $destination; |
|||
210 | |||||
211 | 176 | $advanced = $cursor->advanceToNextNonSpaceOrTab(); |
|||
212 | 176 | if ($cursor->isAtEnd()) { |
|||
213 | // Destination was at end of line, so this is a valid reference for sure (and maybe a title). |
||||
214 | // If not at end of line, wait for title to be valid first. |
||||
215 | 100 | $this->referenceValid = true; |
|||
216 | 100 | $this->paragraph = ''; |
|||
217 | 78 | } elseif ($advanced === 0) { |
|||
218 | // spec: The title must be separated from the link destination by whitespace |
||||
219 | 2 | return false; |
|||
220 | } |
||||
221 | |||||
222 | 174 | $this->state = self::START_TITLE; |
|||
223 | |||||
224 | 174 | return true; |
|||
225 | } |
||||
226 | |||||
227 | 98 | private function parseStartTitle(Cursor $cursor): bool |
|||
228 | { |
||||
229 | 98 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
230 | 98 | if ($cursor->isAtEnd()) { |
|||
231 | $this->state = self::START_DEFINITION; |
||||
232 | |||||
233 | return true; |
||||
234 | } |
||||
235 | |||||
236 | 98 | $this->titleDelimiter = null; |
|||
237 | 98 | switch ($c = $cursor->getCurrentCharacter()) { |
|||
238 | 98 | case '"': |
|||
239 | 26 | case "'": |
|||
240 | 82 | $this->titleDelimiter = $c; |
|||
241 | 82 | break; |
|||
242 | 16 | case '(': |
|||
243 | $this->titleDelimiter = ')'; |
||||
244 | break; |
||||
245 | default: |
||||
246 | // no title delimter found |
||||
247 | 16 | break; |
|||
248 | } |
||||
249 | |||||
250 | 98 | if ($this->titleDelimiter !== null) { |
|||
251 | 82 | $this->state = self::TITLE; |
|||
252 | 82 | $cursor->advance(); |
|||
253 | 82 | if ($cursor->isAtEnd()) { |
|||
254 | 42 | $this->title .= "\n"; |
|||
255 | } |
||||
256 | } else { |
||||
257 | 16 | $this->finishReference(); |
|||
258 | // There might be another reference instead, try that for the same character. |
||||
259 | 16 | $this->state = self::START_DEFINITION; |
|||
260 | } |
||||
261 | |||||
262 | 98 | return true; |
|||
263 | } |
||||
264 | |||||
265 | 82 | private function parseTitle(Cursor $cursor): bool |
|||
266 | { |
||||
267 | \assert($this->titleDelimiter !== null); |
||||
268 | 82 | $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter); |
|||
269 | |||||
270 | 82 | if ($title === null) { |
|||
271 | // Invalid title, stop |
||||
272 | return false; |
||||
273 | } |
||||
274 | |||||
275 | // Did we find the end delimiter? |
||||
276 | 82 | $endDelimiterFound = false; |
|||
277 | 82 | if (\substr($title, -1) === $this->titleDelimiter) { |
|||
278 | 80 | $endDelimiterFound = true; |
|||
279 | // Chop it off |
||||
280 | 80 | $title = \substr($title, 0, -1); |
|||
281 | } |
||||
282 | |||||
283 | 82 | $this->title .= $title; |
|||
284 | |||||
285 | 82 | if (! $endDelimiterFound && $cursor->isAtEnd()) { |
|||
286 | // Title still going, continue on next line |
||||
287 | 4 | $this->title .= "\n"; |
|||
288 | |||||
289 | 4 | return true; |
|||
290 | } |
||||
291 | |||||
292 | // We either hit the end delimiter or some extra whitespace |
||||
293 | 80 | $cursor->advanceToNextNonSpaceOrTab(); |
|||
294 | 80 | if (! $cursor->isAtEnd()) { |
|||
295 | // spec: No further non-whitespace characters may occur on the line. |
||||
296 | 6 | return false; |
|||
297 | } |
||||
298 | |||||
299 | 74 | $this->referenceValid = true; |
|||
300 | 74 | $this->finishReference(); |
|||
301 | 74 | $this->paragraph = ''; |
|||
302 | |||||
303 | // See if there's another definition |
||||
304 | 74 | $this->state = self::START_DEFINITION; |
|||
305 | |||||
306 | 74 | return true; |
|||
307 | } |
||||
308 | |||||
309 | 2002 | private function finishReference(): void |
|||
310 | { |
||||
311 | 2002 | if (! $this->referenceValid) { |
|||
312 | 1996 | return; |
|||
313 | } |
||||
314 | |||||
315 | /** @psalm-suppress PossiblyNullArgument -- these can't possibly be null if we're in this state */ |
||||
316 | 168 | $this->references[] = new Reference($this->label, $this->destination, $this->title); |
|||
0 ignored issues
–
show
Bug
introduced
by
![]() It seems like
$this->destination can also be of type null ; however, parameter $destination of League\CommonMark\Refere...eference::__construct() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
317 | |||||
318 | 168 | $this->label = null; |
|||
319 | 168 | $this->referenceValid = false; |
|||
320 | 168 | $this->destination = null; |
|||
321 | 168 | $this->title = ''; |
|||
322 | 168 | $this->titleDelimiter = null; |
|||
323 | } |
||||
324 | } |
||||
325 |