Completed
Push — master ( 63fd31...35d47f )
by Carsten
01:49
created

Parser::parseBlock()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 8
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 8
ccs 3
cts 3
cp 1
rs 9.4285
cc 1
eloc 3
nc 1
nop 2
crap 1
1
<?php
2
/**
3
 * @copyright Copyright (c) 2014 Carsten Brandt
4
 * @license https://github.com/cebe/markdown/blob/master/LICENSE
5
 * @link https://github.com/cebe/markdown#readme
6
 */
7
8
namespace cebe\markdown;
9
use ReflectionMethod;
10
11
/**
12
 * A generic parser for markdown-like languages.
13
 *
14
 * @author Carsten Brandt <[email protected]>
15
 */
16
abstract class Parser
17
{
18
	/**
19
	 * @var integer the maximum nesting level for language elements.
20
	 */
21
	public $maximumNestingLevel = 32;
22
23
	/**
24
	 * @var string the current context the parser is in.
25
	 * TODO remove in favor of absy
26
	 */
27
	protected $context = [];
28
	/**
29
	 * @var array these are "escapeable" characters. When using one of these prefixed with a
30
	 * backslash, the character will be outputted without the backslash and is not interpreted
31
	 * as markdown.
32
	 */
33
	protected $escapeCharacters = [
34
		'\\', // backslash
35
	];
36
37
	private $_depth = 0;
38
39
40
	/**
41
	 * Parses the given text considering the full language.
42
	 *
43
	 * This includes parsing block elements as well as inline elements.
44
	 *
45
	 * @param string $text the text to parse
46
	 * @return string parsed markup
47
	 */
48 195
	public function parse($text)
49
	{
50 195
		$this->prepare();
51
52 195
		if (ltrim($text) === '') {
53
			return '';
54
		}
55
56 195
		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
57
58 195
		$this->prepareMarkers($text);
59
60 195
		$absy = $this->parseBlocks(explode("\n", $text));
61 195
		$markup = $this->renderAbsy($absy);
62
63 195
		$this->cleanup();
64 195
		return $markup;
65
	}
66
67
	/**
68
	 * Parses a paragraph without block elements (block elements are ignored).
69
	 *
70
	 * @param string $text the text to parse
71
	 * @return string parsed markup
72
	 */
73 46
	public function parseParagraph($text)
74
	{
75 46
		$this->prepare();
76
77 46
		if (ltrim($text) === '') {
78
			return '';
79
		}
80
81 46
		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
82
83 46
		$this->prepareMarkers($text);
84
85 46
		$absy = $this->parseInline($text);
86 46
		$markup = $this->renderAbsy($absy);
87
88 46
		$this->cleanup();
89 46
		return $markup;
90
	}
91
92
	/**
93
	 * This method will be called before `parse()` and `parseParagraph()`.
94
	 * You can override it to do some initialization work.
95
	 */
96 3
	protected function prepare()
97
	{
98 3
	}
99
100
	/**
101
	 * This method will be called after `parse()` and `parseParagraph()`.
102
	 * You can override it to do cleanup.
103
	 */
104 196
	protected function cleanup()
105
	{
106 196
	}
107
108
109
	// block parsing
110
111
	private $_blockTypes;
112
113
	/**
114
	 * @return array a list of block element types available.
115
	 */
116 195
	protected function blockTypes()
117
	{
118 195
		if ($this->_blockTypes === null) {
119
			// detect block types via "identify" functions
120 195
			$reflection = new \ReflectionClass($this);
121 195
			$this->_blockTypes = array_filter(array_map(function($method) {
122 195
				$name = $method->getName();
123 195
				return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false;
124 195
			}, $reflection->getMethods(ReflectionMethod::IS_PROTECTED)));
125
126 195
			sort($this->_blockTypes);
127 195
		}
128 195
		return $this->_blockTypes;
129
	}
130
131
	/**
132
	 * Given a set of lines and an index of a current line it uses the registed block types to
133
	 * detect the type of this line.
134
	 * @param array $lines
135
	 * @param integer $current
136
	 * @return string name of the block type in lower case
137
	 */
138 195
	protected function detectLineType($lines, $current)
139
	{
140 195
		$line = $lines[$current];
141 195
		$blockTypes = $this->blockTypes();
142 195
		foreach($blockTypes as $blockType) {
143 193
			if ($this->{'identify' . $blockType}($line, $lines, $current)) {
144 118
				return $blockType;
145
			}
146 195
		}
147
		// consider the line a normal paragraph if no other block type matches
148 194
		return 'paragraph';
149
	}
150
151
	/**
152
	 * Parse block elements by calling `detectLineType()` to identify them
153
	 * and call consume function afterwards.
154
	 */
155 195
	protected function parseBlocks($lines)
156
	{
157 195
		if ($this->_depth >= $this->maximumNestingLevel) {
158
			// maximum depth is reached, do not parse input
159
			return [['text', implode("\n", $lines)]];
160
		}
161 195
		$this->_depth++;
162
163 195
		$blocks = [];
164
165
		// convert lines to blocks
166 195
		for ($i = 0, $count = count($lines); $i < $count; $i++) {
167 195
			$line = $lines[$i];
168 195
			if ($line !== '' && rtrim($line) !== '') { // skip empty lines
169
				// identify a blocks beginning and parse the content
170 195
				list($block, $i) = $this->parseBlock($lines, $i);
171 195
				if ($block !== false) {
172 195
					$blocks[] = $block;
173 195
				}
174 195
			}
175 195
		}
176
177 195
		$this->_depth--;
178
179 195
		return $blocks;
180
	}
181
182
	/**
183
	 * Parses the block at current line by identifying the block type and parsing the content
184
	 * @param $lines
185
	 * @param $current
186
	 * @return array Array of two elements, the first element contains the block,
187
	 * the second contains the next line index to be parsed.
188
	 */
189 195
	protected function parseBlock($lines, $current)
190
	{
191
		// identify block type for this line
192 195
		$blockType = $this->detectLineType($lines, $current);
193
194
		// call consume method for the detected block type to consume further lines
195 195
		return $this->{'consume' . $blockType}($lines, $current);
196
	}
197
198 196
	protected function renderAbsy($blocks)
199
	{
200 196
		$output = '';
201 196
		foreach ($blocks as $block) {
202 196
			array_unshift($this->context, $block[0]);
203 196
			$output .= $this->{'render' . $block[0]}($block);
204 196
			array_shift($this->context);
205 196
		}
206 196
		return $output;
207
	}
208
209
	/**
210
	 * Consume lines for a paragraph
211
	 *
212
	 * @param $lines
213
	 * @param $current
214
	 * @return array
215
	 */
216 2
	protected function consumeParagraph($lines, $current)
217
	{
218
		// consume until newline
219 2
		$content = [];
220 2
		for ($i = $current, $count = count($lines); $i < $count; $i++) {
221 2
			if (ltrim($lines[$i]) !== '') {
222 2
				$content[] = $lines[$i];
223 2
			} else {
224
				break;
225
			}
226 2
		}
227
		$block = [
228 2
			'paragraph',
229 2
			'content' => $this->parseInline(implode("\n", $content)),
230 2
		];
231 2
		return [$block, --$i];
232
	}
233
234
	/**
235
	 * Render a paragraph block
236
	 *
237
	 * @param $block
238
	 * @return string
239
	 */
240 190
	protected function renderParagraph($block)
241
	{
242 190
		return '<p>' . $this->renderAbsy($block['content']) . "</p>\n";
243
	}
244
245
246
	// inline parsing
247
248
249
	/**
250
	 * @var array the set of inline markers to use in different contexts.
251
	 */
252
	private $_inlineMarkers = [];
253
254
	/**
255
	 * Returns a map of inline markers to the corresponding parser methods.
256
	 *
257
	 * This array defines handler methods for inline markdown markers.
258
	 * When a marker is found in the text, the handler method is called with the text
259
	 * starting at the position of the marker.
260
	 *
261
	 * Note that markers starting with whitespace may slow down the parser,
262
	 * you may want to use [[renderText]] to deal with them.
263
	 *
264
	 * You may override this method to define a set of markers and parsing methods.
265
	 * The default implementation looks for protected methods starting with `parse` that
266
	 * also have an `@marker` annotation in PHPDoc.
267
	 *
268
	 * @return array a map of markers to parser methods
269
	 */
270 193
	protected function inlineMarkers()
271
	{
272 193
		$markers = [];
273
		// detect "parse" functions
274 193
		$reflection = new \ReflectionClass($this);
275 193
		foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) {
276 193
			$methodName = $method->getName();
277 193
			if (strncmp($methodName, 'parse', 5) === 0) {
278 193
				preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches);
279 193
				foreach($matches[1] as $match) {
280 193
					$markers[$match] = $methodName;
281 193
				}
282 193
			}
283 193
		}
284 193
		return $markers;
285
	}
286
287
	/**
288
	 * Prepare markers that are used in the text to parse
289
	 *
290
	 * Add all markers that are present in markdown.
291
	 * Check is done to avoid iterations in parseInline(), good for huge markdown files
292
	 * @param string $text
293
	 */
294 196
	protected function prepareMarkers($text)
295
	{
296 196
		$this->_inlineMarkers = [];
297 196
		foreach ($this->inlineMarkers() as $marker => $method) {
298 195
			if (strpos($text, $marker) !== false) {
299 137
				$m = $marker[0];
300
				// put the longest marker first
301 137
				if (isset($this->_inlineMarkers[$m])) {
302 1
					reset($this->_inlineMarkers[$m]);
303 1
					if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) {
304 1
						$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
305 1
						continue;
306
					}
307 1
				}
308 137
				$this->_inlineMarkers[$m][$marker] = $method;
309 137
			}
310 196
		}
311 196
	}
312
313
	/**
314
	 * Parses inline elements of the language.
315
	 *
316
	 * @param string $text the inline text to parse.
317
	 * @return array
318
	 */
319 195
	protected function parseInline($text)
320
	{
321 195
		if ($this->_depth >= $this->maximumNestingLevel) {
322
			// maximum depth is reached, do not parse input
323 1
			return [['text', $text]];
324
		}
325 195
		$this->_depth++;
326
327 195
		$markers = implode('', array_keys($this->_inlineMarkers));
328
329 195
		$paragraph = [];
330
331 195
		while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) {
332
333 93
			$pos = strpos($text, $found);
334
335
			// add the text up to next marker to the paragraph
336 93 View Code Duplication
			if ($pos !== 0) {
337 79
				$paragraph[] = ['text', substr($text, 0, $pos)];
338 79
			}
339 93
			$text = $found;
340
341 93
			$parsed = false;
342 93
			foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) {
343 93
				if (strncmp($text, $marker, strlen($marker)) === 0) {
344
					// parse the marker
345 93
					array_unshift($this->context, $method);
346 93
					list($output, $offset) = $this->$method($text);
347 93
					array_shift($this->context);
348
349 93
					$paragraph[] = $output;
350 93
					$text = substr($text, $offset);
351 93
					$parsed = true;
352 93
					break;
353
				}
354 93
			}
355 93 View Code Duplication
			if (!$parsed) {
356 15
				$paragraph[] = ['text', substr($text, 0, 1)];
357 15
				$text = substr($text, 1);
358 15
			}
359 93
		}
360
361 195
		$paragraph[] = ['text', $text];
362
363 195
		$this->_depth--;
364
365 195
		return $paragraph;
366
	}
367
368
	/**
369
	 * Parses escaped special characters.
370
	 * @marker \
371
	 */
372 17
	protected function parseEscape($text)
373
	{
374 17
		if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
375 13
			return [['text', $text[1]], 2];
376
		}
377 7
		return [['text', $text[0]], 1];
378
	}
379
380
	/**
381
	 * This function renders plain text sections in the markdown text.
382
	 * It can be used to work on normal text sections for example to highlight keywords or
383
	 * do special escaping.
384
	 */
385 3
	protected function renderText($block)
386
	{
387 3
		return $block[1];
388
	}
389
}
390