Completed
Push — master ( 83739a...a13ee4 )
by Colin
14s queued 11s
created

DocParser::assertValidUTF8()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
9
 *  - (c) John MacFarlane
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace League\CommonMark;
16
17
use League\CommonMark\Block\Element\AbstractBlock;
18
use League\CommonMark\Block\Element\AbstractStringContainerBlock;
19
use League\CommonMark\Block\Element\Document;
20
use League\CommonMark\Block\Element\Paragraph;
21
use League\CommonMark\Block\Element\StringContainerInterface;
22
use League\CommonMark\Event\DocumentParsedEvent;
23
use League\CommonMark\Exception\UnexpectedEncodingException;
24
25
final class DocParser implements DocParserInterface
26
{
27
    /**
28
     * @var EnvironmentInterface
29
     */
30
    private $environment;
31
32
    /**
33
     * @var InlineParserEngine
34
     */
35
    private $inlineParserEngine;
36
37
    /**
38
     * @var int|float
39
     */
40
    private $maxNestingLevel;
41
42
    /**
43
     * @param EnvironmentInterface $environment
44
     */
45 2442
    public function __construct(EnvironmentInterface $environment)
46
    {
47 2442
        $this->environment = $environment;
48 2442
        $this->inlineParserEngine = new InlineParserEngine($environment);
49 2442
        $this->maxNestingLevel = $environment->getConfig('max_nesting_level', \INF);
50 2442
    }
51
52
    /**
53
     * @param string $input
54
     *
55
     * @return string[]
56
     */
57 2427
    private function preProcessInput(string $input): array
58
    {
59
        /** @var string[] $lines */
60 2427
        $lines = \preg_split('/\r\n|\n|\r/', $input);
61
62
        // Remove any newline which appears at the very end of the string.
63
        // We've already split the document by newlines, so we can simply drop
64
        // any empty element which appears on the end.
65 2427
        if (\end($lines) === '') {
66 2187
            \array_pop($lines);
67
        }
68
69 2427
        return $lines;
70
    }
71
72
    /**
73
     * @param string $input
74
     *
75
     * @throws \RuntimeException
76
     *
77
     * @return Document
78
     */
79 2433
    public function parse(string $input): Document
80
    {
81 2433
        $document = new Document();
82 2433
        $context = new Context($document, $this->environment);
83
84 2433
        $this->assertValidUTF8($input);
85 2427
        $lines = $this->preProcessInput($input);
86 2427
        foreach ($lines as $line) {
87 2427
            $context->setNextLine($line);
88 2427
            $this->incorporateLine($context);
89
        }
90
91 2427
        $lineCount = \count($lines);
92 2427
        while ($tip = $context->getTip()) {
93 2427
            $tip->finalize($context, $lineCount);
94
        }
95
96 2427
        $this->processInlines($context);
97
98 2427
        $this->environment->dispatch(new DocumentParsedEvent($document));
99
100 2427
        return $document;
101
    }
102
103 2427
    private function incorporateLine(ContextInterface $context)
104
    {
105 2427
        $context->getBlockCloser()->resetTip();
106 2427
        $context->setBlocksParsed(false);
107
108 2427
        $cursor = new Cursor($context->getLine());
109
110 2427
        $this->resetContainer($context, $cursor);
111 2427
        $context->getBlockCloser()->setLastMatchedContainer($context->getContainer());
112
113 2427
        $this->parseBlocks($context, $cursor);
114
115
        // What remains at the offset is a text line.  Add the text to the appropriate container.
116
        // First check for a lazy paragraph continuation:
117 2427
        if ($this->handleLazyParagraphContinuation($context, $cursor)) {
118 36
            return;
119
        }
120
121
        // not a lazy continuation
122
        // finalize any blocks not matched
123 2427
        $context->getBlockCloser()->closeUnmatchedBlocks();
124
125
        // Determine whether the last line is blank, updating parents as needed
126 2427
        $this->setAndPropagateLastLineBlank($context, $cursor);
127
128
        // Handle any remaining cursor contents
129 2427
        if ($context->getContainer() instanceof StringContainerInterface) {
130 864
            $context->getContainer()->handleRemainingContents($context, $cursor);
131 2175
        } elseif (!$cursor->isBlank()) {
132
            // Create paragraph container for line
133 2094
            $p = new Paragraph();
134 2094
            $context->addBlock($p);
135 2094
            $cursor->advanceToNextNonSpaceOrTab();
136 2094
            $p->addLine($cursor->getRemainder());
137
        }
138 2427
    }
139
140 2427
    private function processInlines(ContextInterface $context)
141
    {
142 2427
        $walker = $context->getDocument()->walker();
143
144 2427
        while ($event = $walker->next()) {
145 2427
            if (!$event->isEntering()) {
146 2427
                continue;
147
            }
148
149 2427
            $node = $event->getNode();
150 2427
            if ($node instanceof AbstractStringContainerBlock) {
151 2385
                $this->inlineParserEngine->parse($node, $context->getDocument()->getReferenceMap());
152
            }
153
        }
154 2427
    }
155
156
    /**
157
     * Sets the container to the last open child (or its parent)
158
     *
159
     * @param ContextInterface $context
160
     * @param Cursor           $cursor
161
     */
162 2427
    private function resetContainer(ContextInterface $context, Cursor $cursor)
163
    {
164 2427
        $container = $context->getDocument();
165
166 2427
        while ($lastChild = $container->lastChild()) {
167 1263
            if (!($lastChild instanceof AbstractBlock)) {
168
                break;
169
            }
170
171 1263
            if (!$lastChild->isOpen()) {
172 513
                break;
173
            }
174
175 1254
            $container = $lastChild;
176 1254
            if (!$container->matchesNextLine($cursor)) {
177 822
                $container = $container->parent(); // back up to the last matching block
178 822
                break;
179
            }
180
        }
181
182 2427
        $context->setContainer($container);
183 2427
    }
184
185
    /**
186
     * Parse blocks
187
     *
188
     * @param ContextInterface $context
189
     * @param Cursor           $cursor
190
     */
191 2427
    private function parseBlocks(ContextInterface $context, Cursor $cursor)
192
    {
193 2427
        while (!$context->getContainer()->isCode() && !$context->getBlocksParsed()) {
194 2427
            $parsed = false;
195 2427
            foreach ($this->environment->getBlockParsers() as $parser) {
196 2427
                if ($parser->parse($context, $cursor)) {
197 918
                    $parsed = true;
198 918
                    break;
199
                }
200
            }
201
202 2427
            if (!$parsed || $context->getContainer() instanceof StringContainerInterface || (($tip = $context->getTip()) && $tip->getDepth() >= $this->maxNestingLevel)) {
203 2400
                $context->setBlocksParsed(true);
204 2400
                break;
205
            }
206
        }
207 2427
    }
208
209
    /**
210
     * @param ContextInterface $context
211
     * @param Cursor           $cursor
212
     *
213
     * @return bool
214
     */
215 2427
    private function handleLazyParagraphContinuation(ContextInterface $context, Cursor $cursor): bool
216
    {
217 2427
        $tip = $context->getTip();
218
219 2427
        if ($tip instanceof Paragraph &&
220 2427
            !$context->getBlockCloser()->areAllClosed() &&
221 2427
            !$cursor->isBlank() &&
222 2427
            \count($tip->getStrings()) > 0) {
223
224
            // lazy paragraph continuation
225 36
            $tip->addLine($cursor->getRemainder());
226
227 36
            return true;
228
        }
229
230 2427
        return false;
231
    }
232
233
    /**
234
     * @param ContextInterface $context
235
     * @param Cursor           $cursor
236
     */
237 2427
    private function setAndPropagateLastLineBlank(ContextInterface $context, Cursor $cursor)
238
    {
239 2427
        $container = $context->getContainer();
240
241 2427
        if ($cursor->isBlank() && $lastChild = $container->lastChild()) {
242 579
            if ($lastChild instanceof AbstractBlock) {
243 579
                $lastChild->setLastLineBlank(true);
244
            }
245
        }
246
247 2427
        $lastLineBlank = $container->shouldLastLineBeBlank($cursor, $context->getLineNumber());
248
249
        // Propagate lastLineBlank up through parents:
250 2427
        while ($container instanceof AbstractBlock && $container->endsWithBlankLine() !== $lastLineBlank) {
251 729
            $container->setLastLineBlank($lastLineBlank);
252 729
            $container = $container->parent();
253
        }
254 2427
    }
255
256 2433
    private function assertValidUTF8(string $input)
257
    {
258 2433
        if (!\mb_check_encoding($input, 'UTF-8')) {
259 6
            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
260
        }
261 2427
    }
262
}
263