Completed
Push — refactor-parsing ( adbb6b...fbe6de )
by Colin
08:21 queued 07:01
created

ReferenceParser::finishReference()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 14
ccs 10
cts 10
cp 1
rs 9.7998
c 0
b 0
f 0
cc 2
nc 2
nop 0
crap 2
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
9
 *  - (c) John MacFarlane
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace League\CommonMark\Reference;
16
17
use League\CommonMark\Parser\Cursor;
18
use League\CommonMark\Util\LinkParserHelper;
19
20
final class ReferenceParser
21
{
22
    // Looking for the start of a definition, i.e. `[`
23
    private const START_DEFINITION = 0;
24
    // Looking for and parsing the label, i.e. `[foo]` within `[foo]`
25
    private const LABEL = 1;
26
    // Parsing the destination, i.e. `/url` in `[foo]: /url`
27
    private const DESTINATION = 2;
28
    // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"`
29
    private const START_TITLE = 3;
30
    // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"`
31
    private const TITLE = 4;
32
    // End state, no matter what kind of lines we add, they won't be references
33
    private const PARAGRAPH = 5;
34
35
    /** @var string */
36
    private $paragraph = '';
37
38
    /** @var array<int, ReferenceInterface> */
39
    private $references = [];
40
41
    /** @var int */
42
    private $state = self::START_DEFINITION;
43
    /** @var string|null */
44
    private $label;
45
    /** @var string|null */
46
    private $destination;
47
    /** @var string string */
48
    private $title = '';
49
    /** @var string|null */
50
    private $titleDelimiter;
51
    /** @var bool */
52
    private $referenceValid = false;
53
54 2130
    public function getParagraphContent(): string
55
    {
56 2130
        return $this->paragraph;
57
    }
58
59
    /**
60
     * @return ReferenceInterface[]
61
     */
62 2130
    public function getReferences(): iterable
63
    {
64 2130
        $this->finishReference();
65
66 2130
        return $this->references;
67
    }
68
69 2028
    public function hasReferences(): bool
70
    {
71 2028
        return $this->references !== [];
72
    }
73
74 2130
    public function parse(string $line): void
75
    {
76 2130
        if ($this->paragraph !== '') {
77 324
            $this->paragraph .= "\n";
78
        }
79 2130
        $this->paragraph .= $line;
80
81 2130
        $cursor = new Cursor($line);
82 2130
        while (!$cursor->isAtEnd()) {
83 2130
            $result = false;
84 2130
            switch ($this->state) {
85 2130
                case self::PARAGRAPH:
86
                    // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once
87
                    // we're in a paragraph, there's no going back.
88 303
                    return;
89 2130
                case self::START_DEFINITION:
90 2130
                    $result = $this->parseStartDefinition($cursor);
91 2130
                    break;
92 432
                case self::LABEL:
93 432
                    $result = $this->parseLabel($cursor);
94 432
                    break;
95 243
                case self::DESTINATION:
96 243
                    $result = $this->parseDestination($cursor);
97 243
                    break;
98 132
                case self::START_TITLE:
99 132
                    $result = $this->parseStartTitle($cursor);
100 132
                    break;
101 111
                case self::TITLE:
102 111
                    $result = $this->parseTitle($cursor);
103 111
                    break;
104
            }
105
106 2130
            if (!$result) {
107 2121
                $this->state = self::PARAGRAPH;
108
109 2121
                return;
110
            }
111
        }
112 246
    }
113
114 2130
    private function parseStartDefinition(Cursor $cursor): bool
115
    {
116 2130
        $cursor->advanceToNextNonSpaceOrTab();
117 2130
        if ($cursor->isAtEnd() || $cursor->getCharacter() !== '[') {
118 1782
            return false;
119
        }
120
121 432
        $this->state = self::LABEL;
122 432
        $this->label = '';
123
124 432
        $cursor->advance();
125 432
        if ($cursor->isAtEnd()) {
126 6
            $this->label .= "\n";
127
        }
128
129 432
        return true;
130
    }
131
132 432
    private function parseLabel(Cursor $cursor): bool
133
    {
134 432
        $cursor->advanceToNextNonSpaceOrTab();
135
136 432
        $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor);
137 432
        if ($partialLabel === null) {
138
            return false;
139
        }
140
141 432
        \assert($this->label !== null);
142 432
        $this->label .= $partialLabel;
143
144 432
        if ($cursor->isAtEnd()) {
145
            // label might continue on next line
146 9
            $this->label .= "\n";
147
148 9
            return true;
149
        }
150
151 429
        if ($cursor->getCharacter() !== ']') {
152 45
            return false;
153
        }
154
155 408
        $cursor->advance();
156
157
        // end of label
158 408
        if ($cursor->getCharacter() !== ':') {
159 315
            return false;
160
        }
161
162 252
        $cursor->advance();
163
164
        // spec: A link label can have at most 999 characters inside the square brackets
165 252
        if (\mb_strlen($this->label, 'utf-8') > 999) {
166
            return false;
167
        }
168
169
        // spec: A link label must contain at least one non-whitespace character
170 252
        if (\trim($this->label) === '') {
171 6
            return false;
172
        }
173
174 246
        $cursor->advanceToNextNonSpaceOrTab();
175
176 246
        $this->state = self::DESTINATION;
177
178 246
        return true;
179
    }
180
181 243
    private function parseDestination(Cursor $cursor): bool
182
    {
183 243
        $cursor->advanceToNextNonSpaceOrTab();
184
185 243
        $destination = LinkParserHelper::parseLinkDestination($cursor);
186 243
        if ($destination === null) {
187
            return false;
188
        }
189
190 243
        $this->destination = $destination;
191
192 243
        $advanced = $cursor->advanceToNextNonSpaceOrTab();
193 243
        if ($cursor->isAtEnd()) {
194
            // Destination was at end of line, so this is a valid reference for sure (and maybe a title).
195
            // If not at end of line, wait for title to be valid first.
196 141
            $this->referenceValid = true;
197 141
            $this->paragraph = '';
198 105
        } elseif ($advanced === 0) {
199
            // spec: The title must be separated from the link destination by whitespace
200 3
            return false;
201
        }
202
203 240
        $this->state = self::START_TITLE;
204
205 240
        return true;
206
    }
207
208 132
    private function parseStartTitle(Cursor $cursor): bool
209
    {
210 132
        $cursor->advanceToNextNonSpaceOrTab();
211 132
        if ($cursor->isAtEnd()) {
212
            $this->state = self::START_DEFINITION;
213
214
            return true;
215
        }
216
217 132
        $this->titleDelimiter = null;
218 132
        switch ($c = $cursor->getCharacter()) {
219 132
            case '"':
220 36
            case "'":
221 111
                $this->titleDelimiter = $c;
222 111
                break;
223 21
            case '(':
224
                $this->titleDelimiter = ')';
225
                break;
226
        }
227
228 132
        if ($this->titleDelimiter !== null) {
229 111
            $this->state = self::TITLE;
230 111
            $cursor->advance();
231 111
            if ($cursor->isAtEnd()) {
232 111
                $this->title .= "\n";
233
            }
234
        } else {
235 21
            $this->finishReference();
236
            // There might be another reference instead, try that for the same character.
237 21
            $this->state = self::START_DEFINITION;
238
        }
239
240 132
        return true;
241
    }
242
243 111
    private function parseTitle(Cursor $cursor): bool
244
    {
245 111
        \assert($this->titleDelimiter !== null);
246 111
        $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter);
247
248 111
        if ($title === null) {
249
            // Invalid title, stop
250
            return false;
251
        }
252
253
        // Did we find the end delimiter?
254 111
        $endDelimiterFound = false;
255 111
        if (\substr($title, -1) === $this->titleDelimiter) {
256
            // Chop it off
257 108
            $title = \substr($title, 0, -1);
258 108
            $endDelimiterFound = true;
259
        }
260
261 111
        $this->title .= $title;
262
263 111
        if (!$endDelimiterFound && $cursor->isAtEnd()) {
264
            // Title still going, continue on next line
265 6
            $this->title .= "\n";
266
267 6
            return true;
268
        }
269
270
        // We either hit the end delimiter or some extra whitespace
271 108
        $cursor->advanceToNextNonSpaceOrTab();
272 108
        if (!$cursor->isAtEnd()) {
273
            // spec: No further non-whitespace characters may occur on the line.
274 6
            return false;
275
        }
276
277 102
        $this->referenceValid = true;
278 102
        $this->finishReference();
279 102
        $this->paragraph = '';
280
281
        // See if there's another definition
282 102
        $this->state = self::START_DEFINITION;
283
284 102
        return true;
285
    }
286
287 2130
    private function finishReference(): void
288
    {
289 2130
        if (!$this->referenceValid) {
290 2121
            return;
291
        }
292
293 234
        $this->references[] = new Reference($this->label, $this->destination, $this->title);
294
295 234
        $this->label = null;
296 234
        $this->referenceValid = false;
297 234
        $this->destination = null;
298 234
        $this->title = '';
299 234
        $this->titleDelimiter = null;
300 234
    }
301
}
302