Completed
Push — master ( 9ffe58...cd3227 )
by Carsten
03:21 queued 01:21
created

Parser::parseBlocks()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 26
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 6.0087

Importance

Changes 2
Bugs 0 Features 1
Metric Value
c 2
b 0
f 1
dl 0
loc 26
ccs 15
cts 16
cp 0.9375
rs 8.439
cc 6
eloc 13
nc 5
nop 1
crap 6.0087
1
<?php
2
/**
3
 * @copyright Copyright (c) 2014 Carsten Brandt
4
 * @license https://github.com/cebe/markdown/blob/master/LICENSE
5
 * @link https://github.com/cebe/markdown#readme
6
 */
7
8
namespace cebe\markdown;
9
use ReflectionMethod;
10
11
/**
12
 * A generic parser for markdown-like languages.
13
 *
14
 * @author Carsten Brandt <[email protected]>
15
 */
16
abstract class Parser
17
{
18
	/**
19
	 * @var integer the maximum nesting level for language elements.
20
	 */
21
	public $maximumNestingLevel = 32;
22
23
	/**
24
	 * @var string the current context the parser is in.
25
	 * TODO remove in favor of absy
26
	 */
27
	protected $context = [];
28
	/**
29
	 * @var array these are "escapeable" characters. When using one of these prefixed with a
30
	 * backslash, the character will be outputted without the backslash and is not interpreted
31
	 * as markdown.
32
	 */
33
	protected $escapeCharacters = [
34
		'\\', // backslash
35
	];
36
37
	private $_depth = 0;
38
39
40
	/**
41
	 * Parses the given text considering the full language.
42
	 *
43
	 * This includes parsing block elements as well as inline elements.
44
	 *
45
	 * @param string $text the text to parse
46
	 * @return string parsed markup
47
	 */
48 205 View Code Duplication
	public function parse($text)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
49
	{
50 205
		$this->prepare();
51
52 205
		if (ltrim($text) === '') {
53
			return '';
54
		}
55
56 205
		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
57
58 205
		$this->prepareMarkers($text);
59
60 205
		$absy = $this->parseBlocks(explode("\n", $text));
61 205
		$markup = $this->renderAbsy($absy);
62
63 205
		$this->cleanup();
64 205
		return $markup;
65
	}
66
67
	/**
68
	 * Parses a paragraph without block elements (block elements are ignored).
69
	 *
70
	 * @param string $text the text to parse
71
	 * @return string parsed markup
72
	 */
73 48 View Code Duplication
	public function parseParagraph($text)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
74
	{
75 48
		$this->prepare();
76
77 48
		if (ltrim($text) === '') {
78
			return '';
79
		}
80
81 48
		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
82
83 48
		$this->prepareMarkers($text);
84
85 48
		$absy = $this->parseInline($text);
86 48
		$markup = $this->renderAbsy($absy);
87
88 48
		$this->cleanup();
89 48
		return $markup;
90
	}
91
92
	/**
93
	 * This method will be called before `parse()` and `parseParagraph()`.
94
	 * You can override it to do some initialization work.
95
	 */
96 3
	protected function prepare()
97
	{
98 3
	}
99
100
	/**
101
	 * This method will be called after `parse()` and `parseParagraph()`.
102
	 * You can override it to do cleanup.
103
	 */
104 208
	protected function cleanup()
105
	{
106 208
	}
107
108
109
	// block parsing
110
111
	private $_blockTypes;
112
113
	/**
114
	 * @return array a list of block element types available.
115
	 */
116 205
	protected function blockTypes()
117
	{
118 205
		if ($this->_blockTypes === null) {
119
			// detect block types via "identify" functions
120 205
			$reflection = new \ReflectionClass($this);
121 205
			$this->_blockTypes = array_filter(array_map(function($method) {
122 205
				$name = $method->getName();
123 205
				return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false;
124 205
			}, $reflection->getMethods(ReflectionMethod::IS_PROTECTED)));
125
126 205
			sort($this->_blockTypes);
127 205
		}
128 205
		return $this->_blockTypes;
129
	}
130
131
	/**
132
	 * Given a set of lines and an index of a current line it uses the registed block types to
133
	 * detect the type of this line.
134
	 * @param array $lines
135
	 * @param integer $current
136
	 * @return string name of the block type in lower case
137
	 */
138 205
	protected function detectLineType($lines, $current)
139
	{
140 205
		$line = $lines[$current];
141 205
		$blockTypes = $this->blockTypes();
142 205
		foreach($blockTypes as $blockType) {
143 203
			if ($this->{'identify' . $blockType}($line, $lines, $current)) {
144 122
				return $blockType;
145
			}
146 205
		}
147
		// consider the line a normal paragraph if no other block type matches
148 204
		return 'paragraph';
149
	}
150
151
	/**
152
	 * Parse block elements by calling `detectLineType()` to identify them
153
	 * and call consume function afterwards.
154
	 */
155 205
	protected function parseBlocks($lines)
156
	{
157 205
		if ($this->_depth >= $this->maximumNestingLevel) {
158
			// maximum depth is reached, do not parse input
159
			return [['text', implode("\n", $lines)]];
160
		}
161 205
		$this->_depth++;
162
163 205
		$blocks = [];
164
165
		// convert lines to blocks
166 205
		for ($i = 0, $count = count($lines); $i < $count; $i++) {
167 205
			$line = $lines[$i];
168 205
			if ($line !== '' && rtrim($line) !== '') { // skip empty lines
169
				// identify a blocks beginning and parse the content
170 205
				list($block, $i) = $this->parseBlock($lines, $i);
171 205
				if ($block !== false) {
172 205
					$blocks[] = $block;
173 205
				}
174 205
			}
175 205
		}
176
177 205
		$this->_depth--;
178
179 205
		return $blocks;
180
	}
181
182
	/**
183
	 * Parses the block at current line by identifying the block type and parsing the content
184
	 * @param $lines
185
	 * @param $current
186
	 * @return array Array of two elements, the first element contains the block,
187
	 * the second contains the next line index to be parsed.
188
	 */
189 205
	protected function parseBlock($lines, $current)
190
	{
191
		// identify block type for this line
192 205
		$blockType = $this->detectLineType($lines, $current);
193
194
		// call consume method for the detected block type to consume further lines
195 205
		return $this->{'consume' . $blockType}($lines, $current);
196
	}
197
198 208
	protected function renderAbsy($blocks)
199
	{
200 208
		$output = '';
201 208
		foreach ($blocks as $block) {
202 208
			array_unshift($this->context, $block[0]);
203 208
			$output .= $this->{'render' . $block[0]}($block);
204 208
			array_shift($this->context);
205 208
		}
206 208
		return $output;
207
	}
208
209
	/**
210
	 * Consume lines for a paragraph
211
	 *
212
	 * @param $lines
213
	 * @param $current
214
	 * @return array
215
	 */
216 2
	protected function consumeParagraph($lines, $current)
217
	{
218
		// consume until newline
219 2
		$content = [];
220 2
		for ($i = $current, $count = count($lines); $i < $count; $i++) {
221 2
			if (ltrim($lines[$i]) !== '') {
222 2
				$content[] = $lines[$i];
223 2
			} else {
224
				break;
225
			}
226 2
		}
227
		$block = [
228 2
			'paragraph',
229 2
			'content' => $this->parseInline(implode("\n", $content)),
230 2
		];
231 2
		return [$block, --$i];
232
	}
233
234
	/**
235
	 * Render a paragraph block
236
	 *
237
	 * @param $block
238
	 * @return string
239
	 */
240 200
	protected function renderParagraph($block)
241
	{
242 200
		return '<p>' . $this->renderAbsy($block['content']) . "</p>\n";
243
	}
244
245
246
	// inline parsing
247
248
249
	/**
250
	 * @var array the set of inline markers to use in different contexts.
251
	 */
252
	private $_inlineMarkers = [];
253
254
	/**
255
	 * Returns a map of inline markers to the corresponding parser methods.
256
	 *
257
	 * This array defines handler methods for inline markdown markers.
258
	 * When a marker is found in the text, the handler method is called with the text
259
	 * starting at the position of the marker.
260
	 *
261
	 * Note that markers starting with whitespace may slow down the parser,
262
	 * you may want to use [[renderText]] to deal with them.
263
	 *
264
	 * You may override this method to define a set of markers and parsing methods.
265
	 * The default implementation looks for protected methods starting with `parse` that
266
	 * also have an `@marker` annotation in PHPDoc.
267
	 *
268
	 * @return array a map of markers to parser methods
269
	 */
270 205
	protected function inlineMarkers()
271
	{
272 205
		$markers = [];
273
		// detect "parse" functions
274 205
		$reflection = new \ReflectionClass($this);
275 205
		foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) {
276 205
			$methodName = $method->getName();
277 205
			if (strncmp($methodName, 'parse', 5) === 0) {
278 205
				preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches);
279 205
				foreach($matches[1] as $match) {
280 205
					$markers[$match] = $methodName;
281 205
				}
282 205
			}
283 205
		}
284 205
		return $markers;
285
	}
286
287
	/**
288
	 * Prepare markers that are used in the text to parse
289
	 *
290
	 * Add all markers that are present in markdown.
291
	 * Check is done to avoid iterations in parseInline(), good for huge markdown files
292
	 * @param string $text
293
	 */
294 208
	protected function prepareMarkers($text)
295
	{
296 208
		$this->_inlineMarkers = [];
297 208
		foreach ($this->inlineMarkers() as $marker => $method) {
298 207
			if (strpos($text, $marker) !== false) {
299 149
				$m = $marker[0];
300
				// put the longest marker first
301 149
				if (isset($this->_inlineMarkers[$m])) {
302 1
					reset($this->_inlineMarkers[$m]);
303 1
					if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) {
304 1
						$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
305 1
						continue;
306
					}
307 1
				}
308 149
				$this->_inlineMarkers[$m][$marker] = $method;
309 149
			}
310 208
		}
311 208
	}
312
313
	/**
314
	 * Parses inline elements of the language.
315
	 *
316
	 * @param string $text the inline text to parse.
317
	 * @return array
318
	 */
319 207
	protected function parseInline($text)
320
	{
321 207
		if ($this->_depth >= $this->maximumNestingLevel) {
322
			// maximum depth is reached, do not parse input
323 1
			return [['text', $text]];
324
		}
325 207
		$this->_depth++;
326
327 207
		$markers = implode('', array_keys($this->_inlineMarkers));
328
329 207
		$paragraph = [];
330
331 207
		while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) {
332
333 102
			$pos = strpos($text, $found);
334
335
			// add the text up to next marker to the paragraph
336 102 View Code Duplication
			if ($pos !== 0) {
337 86
				$paragraph[] = ['text', substr($text, 0, $pos)];
338 86
			}
339 102
			$text = $found;
340
341 102
			$parsed = false;
342 102
			foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) {
343 102
				if (strncmp($text, $marker, strlen($marker)) === 0) {
344
					// parse the marker
345 102
					array_unshift($this->context, $method);
346 102
					list($output, $offset) = $this->$method($text);
347 102
					array_shift($this->context);
348
349 102
					$paragraph[] = $output;
350 102
					$text = substr($text, $offset);
351 102
					$parsed = true;
352 102
					break;
353
				}
354 102
			}
355 102 View Code Duplication
			if (!$parsed) {
356 15
				$paragraph[] = ['text', substr($text, 0, 1)];
357 15
				$text = substr($text, 1);
358 15
			}
359 102
		}
360
361 207
		$paragraph[] = ['text', $text];
362
363 207
		$this->_depth--;
364
365 207
		return $paragraph;
366
	}
367
368
	/**
369
	 * Parses escaped special characters.
370
	 * @marker \
371
	 */
372 18
	protected function parseEscape($text)
373
	{
374 18
		if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
375 13
			return [['text', $text[1]], 2];
376
		}
377 8
		return [['text', $text[0]], 1];
378
	}
379
380
	/**
381
	 * This function renders plain text sections in the markdown text.
382
	 * It can be used to work on normal text sections for example to highlight keywords or
383
	 * do special escaping.
384
	 */
385 3
	protected function renderText($block)
386
	{
387 3
		return $block[1];
388
	}
389
}
390