Completed
Push — master ( 2b1b39...0164a1 )
by Colin
35:09 queued 10s
created

DocParser::assertValidUTF8()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 0
cts 0
cp 0
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 6
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
9
 *  - (c) John MacFarlane
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
namespace League\CommonMark;
16
17
use League\CommonMark\Block\Element\AbstractBlock;
18
use League\CommonMark\Block\Element\AbstractStringContainerBlock;
19
use League\CommonMark\Block\Element\Document;
20
use League\CommonMark\Block\Element\Paragraph;
21
use League\CommonMark\Block\Element\StringContainerInterface;
22
use League\CommonMark\Event\DocumentParsedEvent;
23
use League\CommonMark\Exception\UnexpectedEncodingException;
24
25
final class DocParser implements DocParserInterface
26
{
27
    /**
28
     * @var EnvironmentInterface
29
     */
30
    private $environment;
31
32
    /**
33
     * @var InlineParserEngine
34
     */
35
    private $inlineParserEngine;
36
37
    /**
38
     * @var int|float
39
     */
40
    private $maxNestingLevel;
41
42
    /**
43
     * @param EnvironmentInterface $environment
44 2079
     */
45
    public function __construct(EnvironmentInterface $environment)
46 2079
    {
47 2079
        $this->environment = $environment;
48 2079
        $this->inlineParserEngine = new InlineParserEngine($environment);
49 2079
        $this->maxNestingLevel = $environment->getConfig('max_nesting_level', \INF);
50
    }
51
52
    /**
53
     * @param string $input
54
     *
55
     * @return string[]
56 2070
     */
57
    private function preProcessInput(string $input): array
58
    {
59 2070
        /** @var string[] $lines */
60
        $lines = \preg_split('/\r\n|\n|\r/', $input);
61
62
        // Remove any newline which appears at the very end of the string.
63
        // We've already split the document by newlines, so we can simply drop
64 2070
        // any empty element which appears on the end.
65 2019
        if (\end($lines) === '') {
66
            \array_pop($lines);
67
        }
68 2070
69
        return $lines;
70
    }
71
72
    /**
73
     * @param string $input
74
     *
75
     * @throws \RuntimeException
76 2070
     *
77
     * @return Document
78 2070
     */
79 2070
    public function parse(string $input): Document
80
    {
81 2070
        $document = new Document();
82 2070
        $context = new Context($document, $this->environment);
83 2070
84 2070
        $this->assertValidUTF8($input);
85
        $lines = $this->preProcessInput($input);
86
        foreach ($lines as $line) {
87 2070
            $context->setNextLine($line);
88 2070
            $this->incorporateLine($context);
89 2070
        }
90
91
        $lineCount = \count($lines);
92 2070
        while ($tip = $context->getTip()) {
93
            $tip->finalize($context, $lineCount);
94 2070
        }
95
96 2070
        $this->processInlines($context);
97
98
        $this->environment->dispatch(new DocumentParsedEvent($document));
99 2070
100
        return $document;
101 2070
    }
102 2070
103
    private function incorporateLine(ContextInterface $context)
104 2070
    {
105
        $context->getBlockCloser()->resetTip();
106 2070
        $context->setBlocksParsed(false);
107 2070
108
        $cursor = new Cursor($context->getLine());
109 2070
110
        $this->resetContainer($context, $cursor);
111
        $context->getBlockCloser()->setLastMatchedContainer($context->getContainer());
112
113 2070
        $this->parseBlocks($context, $cursor);
114 36
115
        // What remains at the offset is a text line.  Add the text to the appropriate container.
116
        // First check for a lazy paragraph continuation:
117
        if ($this->handleLazyParagraphContinuation($context, $cursor)) {
118
            return;
119 2070
        }
120
121
        // not a lazy continuation
122 2070
        // finalize any blocks not matched
123
        $context->getBlockCloser()->closeUnmatchedBlocks();
124
125 2070
        // Determine whether the last line is blank, updating parents as needed
126 756
        $this->setAndPropagateLastLineBlank($context, $cursor);
127 1821
128
        // Handle any remaining cursor contents
129 1740
        if ($context->getContainer() instanceof StringContainerInterface) {
130 1740
            $context->getContainer()->handleRemainingContents($context, $cursor);
131 1740
        } elseif (!$cursor->isBlank()) {
132 1740
            // Create paragraph container for line
133
            $p = new Paragraph();
134 2070
            $context->addBlock($p);
135
            $cursor->advanceToNextNonSpaceOrTab();
136 2070
            $p->addLine($cursor->getRemainder());
137
        }
138 2070
    }
139
140 2070
    private function processInlines(ContextInterface $context)
141 2070
    {
142 2070
        $walker = $context->getDocument()->walker();
143
144
        while ($event = $walker->next()) {
145 2070
            if (!$event->isEntering()) {
146 2070
                continue;
147 2028
            }
148
149
            $node = $event->getNode();
150 2070
            if ($node instanceof AbstractStringContainerBlock) {
151
                $this->inlineParserEngine->parse($node, $context->getDocument()->getReferenceMap());
152
            }
153
        }
154
    }
155
156
    /**
157
     * Sets the container to the last open child (or its parent)
158 2070
     *
159
     * @param ContextInterface $context
160 2070
     * @param Cursor           $cursor
161
     */
162 2070
    private function resetContainer(ContextInterface $context, Cursor $cursor)
163 1122
    {
164
        $container = $context->getDocument();
165
166
        while ($lastChild = $container->lastChild()) {
167 1122
            if (!($lastChild instanceof AbstractBlock)) {
168 453
                break;
169
            }
170
171 1113
            if (!$lastChild->isOpen()) {
172 1113
                break;
173 708
            }
174 708
175
            $container = $lastChild;
176
            if (!$container->matchesNextLine($cursor)) {
177
                $container = $container->parent(); // back up to the last matching block
178 2070
                break;
179 2070
            }
180
        }
181
182
        $context->setContainer($container);
183
    }
184
185
    /**
186
     * Parse blocks
187 2070
     *
188
     * @param ContextInterface $context
189 2070
     * @param Cursor           $cursor
190 2070
     */
191 2070
    private function parseBlocks(ContextInterface $context, Cursor $cursor)
192 2070
    {
193 819
        while (!$context->getContainer()->isCode() && !$context->getBlocksParsed()) {
194 1236
            $parsed = false;
195
            foreach ($this->environment->getBlockParsers() as $parser) {
196
                if ($parser->parse($context, $cursor)) {
197
                    $parsed = true;
198 2070
                    break;
199 2043
                }
200 2043
            }
201
202
            if (!$parsed || $context->getContainer() instanceof StringContainerInterface || (($tip = $context->getTip()) && $tip->getDepth() >= $this->maxNestingLevel)) {
203 2070
                $context->setBlocksParsed(true);
204
                break;
205
            }
206
        }
207
    }
208
209
    /**
210
     * @param ContextInterface $context
211 2070
     * @param Cursor           $cursor
212
     *
213 2070
     * @return bool
214
     */
215 2070
    private function handleLazyParagraphContinuation(ContextInterface $context, Cursor $cursor): bool
216 2070
    {
217 2070
        $tip = $context->getTip();
218 2070
219
        if ($tip instanceof Paragraph &&
220
            !$context->getBlockCloser()->areAllClosed() &&
221 36
            !$cursor->isBlank() &&
222
            \count($tip->getStrings()) > 0) {
223 36
224
            // lazy paragraph continuation
225
            $tip->addLine($cursor->getRemainder());
226 2070
227
            return true;
228
        }
229
230
        return false;
231
    }
232
233 2070
    /**
234
     * @param ContextInterface $context
235 2070
     * @param Cursor           $cursor
236
     */
237 2070
    private function setAndPropagateLastLineBlank(ContextInterface $context, Cursor $cursor)
238 468
    {
239 468
        $container = $context->getContainer();
240
241
        if ($cursor->isBlank() && $lastChild = $container->lastChild()) {
242
            if ($lastChild instanceof AbstractBlock) {
243 2070
                $lastChild->setLastLineBlank(true);
244
            }
245
        }
246 2070
247 618
        $lastLineBlank = $container->shouldLastLineBeBlank($cursor, $context->getLineNumber());
248 618
249
        // Propagate lastLineBlank up through parents:
250 2070
        while ($container instanceof AbstractBlock && $container->endsWithBlankLine() !== $lastLineBlank) {
251
            $container->setLastLineBlank($lastLineBlank);
252
            $container = $container->parent();
253
        }
254
    }
255
256
    private function assertValidUTF8(string $input)
257
    {
258
        if (!\mb_check_encoding($input, 'UTF-8')) {
259
            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
260
        }
261
    }
262
}
263