Completed
Push — master ( fbe6de...9292e6 )
by Colin
03:57 queued 02:54
created

ReferenceParser::parseTitle()   B

Complexity

Conditions 6
Paths 7

Size

Total Lines 43

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 20
CRAP Score 6.0038

Importance

Changes 0
Metric Value
dl 0
loc 43
ccs 20
cts 21
cp 0.9524
rs 8.6097
c 0
b 0
f 0
cc 6
nc 7
nop 1
crap 6.0038
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
9
 *  - (c) John MacFarlane
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace League\CommonMark\Reference;
16
17
use League\CommonMark\Parser\Cursor;
18
use League\CommonMark\Util\LinkParserHelper;
19
20
final class ReferenceParser
21
{
22
    // Looking for the start of a definition, i.e. `[`
23
    private const START_DEFINITION = 0;
24
    // Looking for and parsing the label, i.e. `[foo]` within `[foo]`
25
    private const LABEL = 1;
26
    // Parsing the destination, i.e. `/url` in `[foo]: /url`
27
    private const DESTINATION = 2;
28
    // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"`
29
    private const START_TITLE = 3;
30
    // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"`
31
    private const TITLE = 4;
32
    // End state, no matter what kind of lines we add, they won't be references
33
    private const PARAGRAPH = 5;
34
35
    /** @var string */
36
    private $paragraph = '';
37
38
    /** @var array<int, ReferenceInterface> */
39
    private $references = [];
40
41
    /** @var int */
42
    private $state = self::START_DEFINITION;
43
    /** @var string|null */
44
    private $label;
45
    /** @var string|null */
46
    private $destination;
47
    /** @var string string */
48
    private $title = '';
49
    /** @var string|null */
50
    private $titleDelimiter;
51
    /** @var bool */
52
    private $referenceValid = false;
53
54 2130
    public function getParagraphContent(): string
55
    {
56 2130
        return $this->paragraph;
57
    }
58
59
    /**
60
     * @return ReferenceInterface[]
61
     */
62 2130
    public function getReferences(): iterable
63
    {
64 2130
        $this->finishReference();
65
66 2130
        return $this->references;
67
    }
68
69 2028
    public function hasReferences(): bool
70
    {
71 2028
        return $this->references !== [];
72
    }
73
74 2130
    public function parse(string $line): void
75
    {
76 2130
        if ($this->paragraph !== '') {
77 324
            $this->paragraph .= "\n";
78
        }
79 2130
        $this->paragraph .= $line;
80
81 2130
        $cursor = new Cursor($line);
82 2130
        while (!$cursor->isAtEnd()) {
83 2130
            $result = false;
84 2130
            switch ($this->state) {
85 2130
                case self::PARAGRAPH:
86
                    // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once
87
                    // we're in a paragraph, there's no going back.
88 303
                    return;
89 2130
                case self::START_DEFINITION:
90 2130
                    $result = $this->parseStartDefinition($cursor);
91 2130
                    break;
92 432
                case self::LABEL:
93 432
                    $result = $this->parseLabel($cursor);
94 432
                    break;
95 243
                case self::DESTINATION:
96 243
                    $result = $this->parseDestination($cursor);
97 243
                    break;
98 132
                case self::START_TITLE:
99 132
                    $result = $this->parseStartTitle($cursor);
100 132
                    break;
101 111
                case self::TITLE:
102 111
                    $result = $this->parseTitle($cursor);
103 111
                    break;
104
                default:
105
                    // this should never happen
106
                    break;
107
            }
108
109 2130
            if (!$result) {
110 2121
                $this->state = self::PARAGRAPH;
111
112 2121
                return;
113
            }
114
        }
115 246
    }
116
117 2130
    private function parseStartDefinition(Cursor $cursor): bool
118
    {
119 2130
        $cursor->advanceToNextNonSpaceOrTab();
120 2130
        if ($cursor->isAtEnd() || $cursor->getCharacter() !== '[') {
121 1782
            return false;
122
        }
123
124 432
        $this->state = self::LABEL;
125 432
        $this->label = '';
126
127 432
        $cursor->advance();
128 432
        if ($cursor->isAtEnd()) {
129 6
            $this->label .= "\n";
130
        }
131
132 432
        return true;
133
    }
134
135 432
    private function parseLabel(Cursor $cursor): bool
136
    {
137 432
        $cursor->advanceToNextNonSpaceOrTab();
138
139 432
        $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor);
140 432
        if ($partialLabel === null) {
141
            return false;
142
        }
143
144 432
        \assert($this->label !== null);
145 432
        $this->label .= $partialLabel;
146
147 432
        if ($cursor->isAtEnd()) {
148
            // label might continue on next line
149 9
            $this->label .= "\n";
150
151 9
            return true;
152
        }
153
154 429
        if ($cursor->getCharacter() !== ']') {
155 45
            return false;
156
        }
157
158 408
        $cursor->advance();
159
160
        // end of label
161 408
        if ($cursor->getCharacter() !== ':') {
162 315
            return false;
163
        }
164
165 252
        $cursor->advance();
166
167
        // spec: A link label can have at most 999 characters inside the square brackets
168 252
        if (\mb_strlen($this->label, 'utf-8') > 999) {
169
            return false;
170
        }
171
172
        // spec: A link label must contain at least one non-whitespace character
173 252
        if (\trim($this->label) === '') {
174 6
            return false;
175
        }
176
177 246
        $cursor->advanceToNextNonSpaceOrTab();
178
179 246
        $this->state = self::DESTINATION;
180
181 246
        return true;
182
    }
183
184 243
    private function parseDestination(Cursor $cursor): bool
185
    {
186 243
        $cursor->advanceToNextNonSpaceOrTab();
187
188 243
        $destination = LinkParserHelper::parseLinkDestination($cursor);
189 243
        if ($destination === null) {
190
            return false;
191
        }
192
193 243
        $this->destination = $destination;
194
195 243
        $advanced = $cursor->advanceToNextNonSpaceOrTab();
196 243
        if ($cursor->isAtEnd()) {
197
            // Destination was at end of line, so this is a valid reference for sure (and maybe a title).
198
            // If not at end of line, wait for title to be valid first.
199 141
            $this->referenceValid = true;
200 141
            $this->paragraph = '';
201 105
        } elseif ($advanced === 0) {
202
            // spec: The title must be separated from the link destination by whitespace
203 3
            return false;
204
        }
205
206 240
        $this->state = self::START_TITLE;
207
208 240
        return true;
209
    }
210
211 132
    private function parseStartTitle(Cursor $cursor): bool
212
    {
213 132
        $cursor->advanceToNextNonSpaceOrTab();
214 132
        if ($cursor->isAtEnd()) {
215
            $this->state = self::START_DEFINITION;
216
217
            return true;
218
        }
219
220 132
        $this->titleDelimiter = null;
221 132
        switch ($c = $cursor->getCharacter()) {
222 132
            case '"':
223 36
            case "'":
224 111
                $this->titleDelimiter = $c;
225 111
                break;
226 21
            case '(':
227
                $this->titleDelimiter = ')';
228
                break;
229
            default:
230
                // no title delimter found
231 21
                break;
232
        }
233
234 132
        if ($this->titleDelimiter !== null) {
235 111
            $this->state = self::TITLE;
236 111
            $cursor->advance();
237 111
            if ($cursor->isAtEnd()) {
238 111
                $this->title .= "\n";
239
            }
240
        } else {
241 21
            $this->finishReference();
242
            // There might be another reference instead, try that for the same character.
243 21
            $this->state = self::START_DEFINITION;
244
        }
245
246 132
        return true;
247
    }
248
249 111
    private function parseTitle(Cursor $cursor): bool
250
    {
251 111
        \assert($this->titleDelimiter !== null);
252 111
        $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter);
253
254 111
        if ($title === null) {
255
            // Invalid title, stop
256
            return false;
257
        }
258
259
        // Did we find the end delimiter?
260 111
        $endDelimiterFound = false;
261 111
        if (\substr($title, -1) === $this->titleDelimiter) {
262
            // Chop it off
263 108
            $title = \substr($title, 0, -1);
264 108
            $endDelimiterFound = true;
265
        }
266
267 111
        $this->title .= $title;
268
269 111
        if (!$endDelimiterFound && $cursor->isAtEnd()) {
270
            // Title still going, continue on next line
271 6
            $this->title .= "\n";
272
273 6
            return true;
274
        }
275
276
        // We either hit the end delimiter or some extra whitespace
277 108
        $cursor->advanceToNextNonSpaceOrTab();
278 108
        if (!$cursor->isAtEnd()) {
279
            // spec: No further non-whitespace characters may occur on the line.
280 6
            return false;
281
        }
282
283 102
        $this->referenceValid = true;
284 102
        $this->finishReference();
285 102
        $this->paragraph = '';
286
287
        // See if there's another definition
288 102
        $this->state = self::START_DEFINITION;
289
290 102
        return true;
291
    }
292
293 2130
    private function finishReference(): void
294
    {
295 2130
        if (!$this->referenceValid) {
296 2121
            return;
297
        }
298
299 234
        $this->references[] = new Reference($this->label, $this->destination, $this->title);
300
301 234
        $this->label = null;
302 234
        $this->referenceValid = false;
303 234
        $this->destination = null;
304 234
        $this->title = '';
305 234
        $this->titleDelimiter = null;
306 234
    }
307
}
308