1 | <?php |
||||
2 | |||||
3 | declare(strict_types=1); |
||||
4 | |||||
5 | /* |
||||
6 | * This file is part of the league/commonmark package. |
||||
7 | * |
||||
8 | * (c) Colin O'Dell <[email protected]> |
||||
9 | * |
||||
10 | * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) |
||||
11 | * - (c) John MacFarlane |
||||
12 | * |
||||
13 | * For the full copyright and license information, please view the LICENSE |
||||
14 | * file that was distributed with this source code. |
||||
15 | */ |
||||
16 | |||||
17 | namespace League\CommonMark\Reference; |
||||
18 | |||||
19 | use League\CommonMark\Parser\Cursor; |
||||
20 | use League\CommonMark\Util\LinkParserHelper; |
||||
21 | |||||
22 | final class ReferenceParser |
||||
23 | { |
||||
24 | // Looking for the start of a definition, i.e. `[` |
||||
25 | private const START_DEFINITION = 0; |
||||
26 | // Looking for and parsing the label, i.e. `[foo]` within `[foo]` |
||||
27 | private const LABEL = 1; |
||||
28 | // Parsing the destination, i.e. `/url` in `[foo]: /url` |
||||
29 | private const DESTINATION = 2; |
||||
30 | // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"` |
||||
31 | private const START_TITLE = 3; |
||||
32 | // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"` |
||||
33 | private const TITLE = 4; |
||||
34 | // End state, no matter what kind of lines we add, they won't be references |
||||
35 | private const PARAGRAPH = 5; |
||||
36 | |||||
37 | /** @psalm-readonly-allow-private-mutation */ |
||||
38 | private string $paragraph = ''; |
||||
39 | |||||
40 | /** |
||||
41 | * @var array<int, ReferenceInterface> |
||||
42 | * |
||||
43 | * @psalm-readonly-allow-private-mutation |
||||
44 | */ |
||||
45 | private array $references = []; |
||||
46 | |||||
47 | /** @psalm-readonly-allow-private-mutation */ |
||||
48 | private int $state = self::START_DEFINITION; |
||||
49 | |||||
50 | /** @psalm-readonly-allow-private-mutation */ |
||||
51 | private ?string $label = null; |
||||
52 | |||||
53 | /** @psalm-readonly-allow-private-mutation */ |
||||
54 | private ?string $destination = null; |
||||
55 | |||||
56 | /** |
||||
57 | * @var string string |
||||
58 | * |
||||
59 | * @psalm-readonly-allow-private-mutation |
||||
60 | */ |
||||
61 | private string $title = ''; |
||||
62 | |||||
63 | /** @psalm-readonly-allow-private-mutation */ |
||||
64 | private ?string $titleDelimiter = null; |
||||
65 | |||||
66 | /** @psalm-readonly-allow-private-mutation */ |
||||
67 | private bool $referenceValid = false; |
||||
68 | |||||
69 | public function getParagraphContent(): string |
||||
70 | { |
||||
71 | return $this->paragraph; |
||||
72 | } |
||||
73 | |||||
74 | /** |
||||
75 | * @return ReferenceInterface[] |
||||
76 | */ |
||||
77 | public function getReferences(): iterable |
||||
78 | { |
||||
79 | $this->finishReference(); |
||||
80 | |||||
81 | return $this->references; |
||||
82 | } |
||||
83 | |||||
84 | public function hasReferences(): bool |
||||
85 | { |
||||
86 | return $this->references !== []; |
||||
87 | } |
||||
88 | |||||
89 | public function parse(string $line): void |
||||
90 | { |
||||
91 | if ($this->paragraph !== '') { |
||||
92 | $this->paragraph .= "\n"; |
||||
93 | } |
||||
94 | |||||
95 | $this->paragraph .= $line; |
||||
96 | |||||
97 | $cursor = new Cursor($line); |
||||
98 | while (! $cursor->isAtEnd()) { |
||||
99 | $result = false; |
||||
100 | switch ($this->state) { |
||||
101 | case self::PARAGRAPH: |
||||
102 | // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once |
||||
103 | // we're in a paragraph, there's no going back. |
||||
104 | return; |
||||
105 | case self::START_DEFINITION: |
||||
106 | $result = $this->parseStartDefinition($cursor); |
||||
107 | break; |
||||
108 | case self::LABEL: |
||||
109 | $result = $this->parseLabel($cursor); |
||||
110 | break; |
||||
111 | case self::DESTINATION: |
||||
112 | $result = $this->parseDestination($cursor); |
||||
113 | break; |
||||
114 | case self::START_TITLE: |
||||
115 | $result = $this->parseStartTitle($cursor); |
||||
116 | break; |
||||
117 | case self::TITLE: |
||||
118 | $result = $this->parseTitle($cursor); |
||||
119 | break; |
||||
120 | default: |
||||
121 | // this should never happen |
||||
122 | break; |
||||
123 | } |
||||
124 | |||||
125 | if (! $result) { |
||||
126 | $this->state = self::PARAGRAPH; |
||||
127 | |||||
128 | return; |
||||
129 | } |
||||
130 | } |
||||
131 | } |
||||
132 | |||||
133 | private function parseStartDefinition(Cursor $cursor): bool |
||||
134 | { |
||||
135 | $cursor->advanceToNextNonSpaceOrTab(); |
||||
136 | if ($cursor->isAtEnd() || $cursor->getCurrentCharacter() !== '[') { |
||||
137 | return false; |
||||
138 | } |
||||
139 | |||||
140 | $this->state = self::LABEL; |
||||
141 | $this->label = ''; |
||||
142 | |||||
143 | $cursor->advance(); |
||||
144 | if ($cursor->isAtEnd()) { |
||||
145 | $this->label .= "\n"; |
||||
146 | } |
||||
147 | |||||
148 | return true; |
||||
149 | } |
||||
150 | |||||
151 | private function parseLabel(Cursor $cursor): bool |
||||
152 | { |
||||
153 | $cursor->advanceToNextNonSpaceOrTab(); |
||||
154 | |||||
155 | $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor); |
||||
156 | if ($partialLabel === null) { |
||||
157 | return false; |
||||
158 | } |
||||
159 | |||||
160 | \assert($this->label !== null); |
||||
161 | $this->label .= $partialLabel; |
||||
162 | |||||
163 | if ($cursor->isAtEnd()) { |
||||
164 | // label might continue on next line |
||||
165 | $this->label .= "\n"; |
||||
166 | |||||
167 | return true; |
||||
168 | } |
||||
169 | |||||
170 | if ($cursor->getCurrentCharacter() !== ']') { |
||||
171 | return false; |
||||
172 | } |
||||
173 | |||||
174 | $cursor->advance(); |
||||
175 | |||||
176 | // end of label |
||||
177 | if ($cursor->getCurrentCharacter() !== ':') { |
||||
178 | return false; |
||||
179 | } |
||||
180 | |||||
181 | $cursor->advance(); |
||||
182 | |||||
183 | // spec: A link label can have at most 999 characters inside the square brackets |
||||
184 | if (\mb_strlen($this->label, 'UTF-8') > 999) { |
||||
185 | return false; |
||||
186 | } |
||||
187 | |||||
188 | // spec: A link label must contain at least one non-whitespace character |
||||
189 | if (\trim($this->label) === '') { |
||||
190 | return false; |
||||
191 | } |
||||
192 | |||||
193 | $cursor->advanceToNextNonSpaceOrTab(); |
||||
194 | |||||
195 | $this->state = self::DESTINATION; |
||||
196 | |||||
197 | return true; |
||||
198 | } |
||||
199 | |||||
200 | private function parseDestination(Cursor $cursor): bool |
||||
201 | { |
||||
202 | $cursor->advanceToNextNonSpaceOrTab(); |
||||
203 | |||||
204 | $destination = LinkParserHelper::parseLinkDestination($cursor); |
||||
205 | if ($destination === null) { |
||||
206 | return false; |
||||
207 | } |
||||
208 | |||||
209 | $this->destination = $destination; |
||||
210 | |||||
211 | $advanced = $cursor->advanceToNextNonSpaceOrTab(); |
||||
212 | if ($cursor->isAtEnd()) { |
||||
213 | // Destination was at end of line, so this is a valid reference for sure (and maybe a title). |
||||
214 | // If not at end of line, wait for title to be valid first. |
||||
215 | $this->referenceValid = true; |
||||
216 | $this->paragraph = ''; |
||||
217 | } elseif ($advanced === 0) { |
||||
218 | // spec: The title must be separated from the link destination by whitespace |
||||
219 | return false; |
||||
220 | } |
||||
221 | |||||
222 | $this->state = self::START_TITLE; |
||||
223 | |||||
224 | return true; |
||||
225 | } |
||||
226 | |||||
227 | private function parseStartTitle(Cursor $cursor): bool |
||||
228 | { |
||||
229 | $cursor->advanceToNextNonSpaceOrTab(); |
||||
230 | if ($cursor->isAtEnd()) { |
||||
231 | $this->state = self::START_DEFINITION; |
||||
232 | |||||
233 | return true; |
||||
234 | } |
||||
235 | |||||
236 | $this->titleDelimiter = null; |
||||
237 | switch ($c = $cursor->getCurrentCharacter()) { |
||||
238 | case '"': |
||||
239 | case "'": |
||||
240 | $this->titleDelimiter = $c; |
||||
241 | break; |
||||
242 | case '(': |
||||
243 | $this->titleDelimiter = ')'; |
||||
244 | break; |
||||
245 | default: |
||||
246 | // no title delimter found |
||||
247 | break; |
||||
248 | } |
||||
249 | |||||
250 | if ($this->titleDelimiter !== null) { |
||||
251 | $this->state = self::TITLE; |
||||
252 | $cursor->advance(); |
||||
253 | if ($cursor->isAtEnd()) { |
||||
254 | $this->title .= "\n"; |
||||
255 | } |
||||
256 | } else { |
||||
257 | $this->finishReference(); |
||||
258 | // There might be another reference instead, try that for the same character. |
||||
259 | $this->state = self::START_DEFINITION; |
||||
260 | } |
||||
261 | |||||
262 | return true; |
||||
263 | } |
||||
264 | |||||
265 | private function parseTitle(Cursor $cursor): bool |
||||
266 | { |
||||
267 | \assert($this->titleDelimiter !== null); |
||||
268 | $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter); |
||||
269 | |||||
270 | if ($title === null) { |
||||
271 | // Invalid title, stop |
||||
272 | return false; |
||||
273 | } |
||||
274 | |||||
275 | // Did we find the end delimiter? |
||||
276 | $endDelimiterFound = false; |
||||
277 | if (\substr($title, -1) === $this->titleDelimiter) { |
||||
278 | $endDelimiterFound = true; |
||||
279 | // Chop it off |
||||
280 | $title = \substr($title, 0, -1); |
||||
281 | } |
||||
282 | |||||
283 | $this->title .= $title; |
||||
284 | |||||
285 | if (! $endDelimiterFound && $cursor->isAtEnd()) { |
||||
286 | // Title still going, continue on next line |
||||
287 | $this->title .= "\n"; |
||||
288 | |||||
289 | return true; |
||||
290 | } |
||||
291 | |||||
292 | // We either hit the end delimiter or some extra whitespace |
||||
293 | $cursor->advanceToNextNonSpaceOrTab(); |
||||
294 | if (! $cursor->isAtEnd()) { |
||||
295 | // spec: No further non-whitespace characters may occur on the line. |
||||
296 | return false; |
||||
297 | } |
||||
298 | |||||
299 | $this->referenceValid = true; |
||||
300 | $this->finishReference(); |
||||
301 | $this->paragraph = ''; |
||||
302 | |||||
303 | // See if there's another definition |
||||
304 | $this->state = self::START_DEFINITION; |
||||
305 | |||||
306 | return true; |
||||
307 | } |
||||
308 | |||||
309 | private function finishReference(): void |
||||
310 | { |
||||
311 | if (! $this->referenceValid) { |
||||
312 | return; |
||||
313 | } |
||||
314 | |||||
315 | /** @psalm-suppress PossiblyNullArgument -- these can't possibly be null if we're in this state */ |
||||
316 | $this->references[] = new Reference($this->label, $this->destination, $this->title); |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() It seems like
$this->destination can also be of type null ; however, parameter $destination of League\CommonMark\Refere...eference::__construct() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
317 | |||||
318 | $this->label = null; |
||||
319 | $this->referenceValid = false; |
||||
320 | $this->destination = null; |
||||
321 | $this->title = ''; |
||||
322 | $this->titleDelimiter = null; |
||||
323 | } |
||||
324 | } |
||||
325 |