Passed
Push — 2.0 ( b7ed7b...934f6b )
by Colin
35:11 queued 31:18
created

ReferenceParser::getParagraphContent()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11
 *  - (c) John MacFarlane
12
 *
13
 * For the full copyright and license information, please view the LICENSE
14
 * file that was distributed with this source code.
15
 */
16
17
namespace League\CommonMark\Reference;
18
19
use League\CommonMark\Parser\Cursor;
20
use League\CommonMark\Util\LinkParserHelper;
21
22
final class ReferenceParser
23
{
24
    // Looking for the start of a definition, i.e. `[`
25
    private const START_DEFINITION = 0;
26
    // Looking for and parsing the label, i.e. `[foo]` within `[foo]`
27
    private const LABEL = 1;
28
    // Parsing the destination, i.e. `/url` in `[foo]: /url`
29
    private const DESTINATION = 2;
30
    // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"`
31
    private const START_TITLE = 3;
32
    // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"`
33
    private const TITLE = 4;
34
    // End state, no matter what kind of lines we add, they won't be references
35
    private const PARAGRAPH = 5;
36
37
    /** @psalm-readonly-allow-private-mutation */
38
    private string $paragraph = '';
39
40
    /**
41
     * @var array<int, ReferenceInterface>
42
     *
43
     * @psalm-readonly-allow-private-mutation
44
     */
45
    private array $references = [];
46
47
    /** @psalm-readonly-allow-private-mutation */
48
    private int $state = self::START_DEFINITION;
49
50
    /** @psalm-readonly-allow-private-mutation */
51
    private ?string $label = null;
52
53
    /** @psalm-readonly-allow-private-mutation */
54
    private ?string $destination = null;
55
56
    /**
57
     * @var string string
58
     *
59
     * @psalm-readonly-allow-private-mutation
60
     */
61
    private string $title = '';
62
63
    /** @psalm-readonly-allow-private-mutation */
64
    private ?string $titleDelimiter = null;
65
66
    /** @psalm-readonly-allow-private-mutation */
67
    private bool $referenceValid = false;
68
69 2721
    public function getParagraphContent(): string
70
    {
71 2721
        return $this->paragraph;
72
    }
73
74
    /**
75
     * @return ReferenceInterface[]
76
     */
77 2721
    public function getReferences(): iterable
78
    {
79 2721
        $this->finishReference();
80
81 2721
        return $this->references;
82
    }
83
84 2601
    public function hasReferences(): bool
85
    {
86 2601
        return $this->references !== [];
87
    }
88
89 2721
    public function parse(string $line): void
90
    {
91 2721
        if ($this->paragraph !== '') {
92 408
            $this->paragraph .= "\n";
93
        }
94
95 2721
        $this->paragraph .= $line;
96
97 2721
        $cursor = new Cursor($line);
98 2721
        while (! $cursor->isAtEnd()) {
99 2721
            $result = false;
100 2721
            switch ($this->state) {
101 2721
                case self::PARAGRAPH:
102
                    // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once
103
                    // we're in a paragraph, there's no going back.
104 387
                    return;
105 2721
                case self::START_DEFINITION:
106 2721
                    $result = $this->parseStartDefinition($cursor);
107 2721
                    break;
108 462
                case self::LABEL:
109 462
                    $result = $this->parseLabel($cursor);
110 462
                    break;
111 252
                case self::DESTINATION:
112 252
                    $result = $this->parseDestination($cursor);
113 252
                    break;
114 141
                case self::START_TITLE:
115 141
                    $result = $this->parseStartTitle($cursor);
116 141
                    break;
117 117
                case self::TITLE:
118 117
                    $result = $this->parseTitle($cursor);
119 117
                    break;
120
                default:
121
                    // this should never happen
122
                    break;
123
            }
124
125 2721
            if (! $result) {
126 2712
                $this->state = self::PARAGRAPH;
127
128 2712
                return;
129
            }
130
        }
131 252
    }
132
133 2721
    private function parseStartDefinition(Cursor $cursor): bool
134
    {
135 2721
        $cursor->advanceToNextNonSpaceOrTab();
136 2721
        if ($cursor->isAtEnd() || $cursor->getCurrentCharacter() !== '[') {
137 2364
            return false;
138
        }
139
140 462
        $this->state = self::LABEL;
141 462
        $this->label = '';
142
143 462
        $cursor->advance();
144 462
        if ($cursor->isAtEnd()) {
145 6
            $this->label .= "\n";
146
        }
147
148 462
        return true;
149
    }
150
151 462
    private function parseLabel(Cursor $cursor): bool
152
    {
153 462
        $cursor->advanceToNextNonSpaceOrTab();
154
155 462
        $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor);
156 462
        if ($partialLabel === null) {
157
            return false;
158
        }
159
160
        \assert($this->label !== null);
161 462
        $this->label .= $partialLabel;
162
163 462
        if ($cursor->isAtEnd()) {
164
            // label might continue on next line
165 9
            $this->label .= "\n";
166
167 9
            return true;
168
        }
169
170 459
        if ($cursor->getCurrentCharacter() !== ']') {
171 45
            return false;
172
        }
173
174 438
        $cursor->advance();
175
176
        // end of label
177 438
        if ($cursor->getCurrentCharacter() !== ':') {
178 342
            return false;
179
        }
180
181 261
        $cursor->advance();
182
183
        // spec: A link label can have at most 999 characters inside the square brackets
184 261
        if (\mb_strlen($this->label, 'utf-8') > 999) {
185
            return false;
186
        }
187
188
        // spec: A link label must contain at least one non-whitespace character
189 261
        if (\trim($this->label) === '') {
190 6
            return false;
191
        }
192
193 255
        $cursor->advanceToNextNonSpaceOrTab();
194
195 255
        $this->state = self::DESTINATION;
196
197 255
        return true;
198
    }
199
200 252
    private function parseDestination(Cursor $cursor): bool
201
    {
202 252
        $cursor->advanceToNextNonSpaceOrTab();
203
204 252
        $destination = LinkParserHelper::parseLinkDestination($cursor);
205 252
        if ($destination === null) {
206
            return false;
207
        }
208
209 252
        $this->destination = $destination;
210
211 252
        $advanced = $cursor->advanceToNextNonSpaceOrTab();
212 252
        if ($cursor->isAtEnd()) {
213
            // Destination was at end of line, so this is a valid reference for sure (and maybe a title).
214
            // If not at end of line, wait for title to be valid first.
215 141
            $this->referenceValid = true;
216 141
            $this->paragraph      = '';
217 114
        } elseif ($advanced === 0) {
218
            // spec: The title must be separated from the link destination by whitespace
219 3
            return false;
220
        }
221
222 249
        $this->state = self::START_TITLE;
223
224 249
        return true;
225
    }
226
227 141
    private function parseStartTitle(Cursor $cursor): bool
228
    {
229 141
        $cursor->advanceToNextNonSpaceOrTab();
230 141
        if ($cursor->isAtEnd()) {
231
            $this->state = self::START_DEFINITION;
232
233
            return true;
234
        }
235
236 141
        $this->titleDelimiter = null;
237 141
        switch ($c = $cursor->getCurrentCharacter()) {
238 141
            case '"':
239 39
            case "'":
240 117
                $this->titleDelimiter = $c;
241 117
                break;
242 24
            case '(':
243
                $this->titleDelimiter = ')';
244
                break;
245
            default:
246
                // no title delimter found
247 24
                break;
248
        }
249
250 141
        if ($this->titleDelimiter !== null) {
251 117
            $this->state = self::TITLE;
252 117
            $cursor->advance();
253 117
            if ($cursor->isAtEnd()) {
254 117
                $this->title .= "\n";
255
            }
256
        } else {
257 24
            $this->finishReference();
258
            // There might be another reference instead, try that for the same character.
259 24
            $this->state = self::START_DEFINITION;
260
        }
261
262 141
        return true;
263
    }
264
265 117
    private function parseTitle(Cursor $cursor): bool
266
    {
267
        \assert($this->titleDelimiter !== null);
268 117
        $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter);
269
270 117
        if ($title === null) {
271
            // Invalid title, stop
272
            return false;
273
        }
274
275
        // Did we find the end delimiter?
276 117
        $endDelimiterFound = false;
277 117
        if (\substr($title, -1) === $this->titleDelimiter) {
278 114
            $endDelimiterFound = true;
279
            // Chop it off
280 114
            $title = \substr($title, 0, -1);
281
        }
282
283 117
        $this->title .= $title;
284
285 117
        if (! $endDelimiterFound && $cursor->isAtEnd()) {
286
            // Title still going, continue on next line
287 6
            $this->title .= "\n";
288
289 6
            return true;
290
        }
291
292
        // We either hit the end delimiter or some extra whitespace
293 114
        $cursor->advanceToNextNonSpaceOrTab();
294 114
        if (! $cursor->isAtEnd()) {
295
            // spec: No further non-whitespace characters may occur on the line.
296 6
            return false;
297
        }
298
299 108
        $this->referenceValid = true;
300 108
        $this->finishReference();
301 108
        $this->paragraph = '';
302
303
        // See if there's another definition
304 108
        $this->state = self::START_DEFINITION;
305
306 108
        return true;
307
    }
308
309 2721
    private function finishReference(): void
310
    {
311 2721
        if (! $this->referenceValid) {
312 2712
            return;
313
        }
314
315
        /** @psalm-suppress PossiblyNullArgument -- these can't possibly be null if we're in this state */
316 240
        $this->references[] = new Reference($this->label, $this->destination, $this->title);
0 ignored issues
show
Bug introduced by
It seems like $this->label can also be of type null; however, parameter $label of League\CommonMark\Refere...eference::__construct() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

316
        $this->references[] = new Reference(/** @scrutinizer ignore-type */ $this->label, $this->destination, $this->title);
Loading history...
Bug introduced by
It seems like $this->destination can also be of type null; however, parameter $destination of League\CommonMark\Refere...eference::__construct() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

316
        $this->references[] = new Reference($this->label, /** @scrutinizer ignore-type */ $this->destination, $this->title);
Loading history...
317
318 240
        $this->label          = null;
319 240
        $this->referenceValid = false;
320 240
        $this->destination    = null;
321 240
        $this->title          = '';
322 240
        $this->titleDelimiter = null;
323 240
    }
324
}
325