Completed
Push — master ( af7bae...87972f )
by Carsten
01:23
created

Parser.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * @copyright Copyright (c) 2014 Carsten Brandt
4
 * @license https://github.com/cebe/markdown/blob/master/LICENSE
5
 * @link https://github.com/cebe/markdown#readme
6
 */
7
8
namespace cebe\markdown;
9
use ReflectionMethod;
10
11
/**
12
 * A generic parser for markdown-like languages.
13
 *
14
 * @author Carsten Brandt <[email protected]>
15
 */
16
abstract class Parser
17
{
18
	/**
19
	 * @var integer the maximum nesting level for language elements.
20
	 */
21
	public $maximumNestingLevel = 32;
22
23
	/**
24
	 * @var string the current context the parser is in.
25
	 * TODO remove in favor of absy
26
	 */
27
	protected $context = [];
28
	/**
29
	 * @var array these are "escapeable" characters. When using one of these prefixed with a
30
	 * backslash, the character will be outputted without the backslash and is not interpreted
31
	 * as markdown.
32
	 */
33
	protected $escapeCharacters = [
34
		'\\', // backslash
35
	];
36
37
	private $_depth = 0;
38
39
40
	/**
41
	 * Parses the given text considering the full language.
42
	 *
43
	 * This includes parsing block elements as well as inline elements.
44
	 *
45
	 * @param string $text the text to parse
46
	 * @return string parsed markup
47
	 */
48 201 View Code Duplication
	public function parse($text)
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
49
	{
50 201
		$this->prepare();
51
52 201
		if (ltrim($text) === '') {
53
			return '';
54
		}
55
56 201
		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
57
58 201
		$this->prepareMarkers($text);
59
60 201
		$absy = $this->parseBlocks(explode("\n", $text));
61 201
		$markup = $this->renderAbsy($absy);
62
63 201
		$this->cleanup();
64 201
		return $markup;
65
	}
66
67
	/**
68
	 * Parses a paragraph without block elements (block elements are ignored).
69
	 *
70
	 * @param string $text the text to parse
71
	 * @return string parsed markup
72
	 */
73 48 View Code Duplication
	public function parseParagraph($text)
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
74
	{
75 48
		$this->prepare();
76
77 48
		if (ltrim($text) === '') {
78
			return '';
79
		}
80
81 48
		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
82
83 48
		$this->prepareMarkers($text);
84
85 48
		$absy = $this->parseInline($text);
86 48
		$markup = $this->renderAbsy($absy);
87
88 48
		$this->cleanup();
89 48
		return $markup;
90
	}
91
92
	/**
93
	 * This method will be called before `parse()` and `parseParagraph()`.
94
	 * You can override it to do some initialization work.
95
	 */
96 3
	protected function prepare()
97
	{
98 3
	}
99
100
	/**
101
	 * This method will be called after `parse()` and `parseParagraph()`.
102
	 * You can override it to do cleanup.
103
	 */
104 204
	protected function cleanup()
105
	{
106 204
	}
107
108
109
	// block parsing
110
111
	private $_blockTypes;
112
113
	/**
114
	 * @return array a list of block element types available.
115
	 */
116 201
	protected function blockTypes()
117
	{
118 201
		if ($this->_blockTypes === null) {
119
			// detect block types via "identify" functions
120 201
			$reflection = new \ReflectionClass($this);
121 201
			$this->_blockTypes = array_filter(array_map(function($method) {
122 201
				$name = $method->getName();
123 201
				return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false;
124 201
			}, $reflection->getMethods(ReflectionMethod::IS_PROTECTED)));
125
126 201
			sort($this->_blockTypes);
127
		}
128 201
		return $this->_blockTypes;
129
	}
130
131
	/**
132
	 * Given a set of lines and an index of a current line it uses the registed block types to
133
	 * detect the type of this line.
134
	 * @param array $lines
135
	 * @param integer $current
136
	 * @return string name of the block type in lower case
137
	 */
138 201
	protected function detectLineType($lines, $current)
139
	{
140 201
		$line = $lines[$current];
141 201
		$blockTypes = $this->blockTypes();
142 201
		foreach($blockTypes as $blockType) {
143 199
			if ($this->{'identify' . $blockType}($line, $lines, $current)) {
144 199
				return $blockType;
145
			}
146
		}
147
		// consider the line a normal paragraph if no other block type matches
148 200
		return 'paragraph';
149
	}
150
151
	/**
152
	 * Parse block elements by calling `detectLineType()` to identify them
153
	 * and call consume function afterwards.
154
	 */
155 201
	protected function parseBlocks($lines)
156
	{
157 201
		if ($this->_depth >= $this->maximumNestingLevel) {
158
			// maximum depth is reached, do not parse input
159
			return [['text', implode("\n", $lines)]];
160
		}
161 201
		$this->_depth++;
162
163 201
		$blocks = [];
164
165
		// convert lines to blocks
166 201
		for ($i = 0, $count = count($lines); $i < $count; $i++) {
167 201
			$line = $lines[$i];
168 201
			if ($line !== '' && rtrim($line) !== '') { // skip empty lines
169
				// identify a blocks beginning and parse the content
170 201
				list($block, $i) = $this->parseBlock($lines, $i);
171 201
				if ($block !== false) {
172 201
					$blocks[] = $block;
173
				}
174
			}
175
		}
176
177 201
		$this->_depth--;
178
179 201
		return $blocks;
180
	}
181
182
	/**
183
	 * Parses the block at current line by identifying the block type and parsing the content
184
	 * @param $lines
185
	 * @param $current
186
	 * @return array Array of two elements, the first element contains the block,
187
	 * the second contains the next line index to be parsed.
188
	 */
189 201
	protected function parseBlock($lines, $current)
190
	{
191
		// identify block type for this line
192 201
		$blockType = $this->detectLineType($lines, $current);
193
194
		// call consume method for the detected block type to consume further lines
195 201
		return $this->{'consume' . $blockType}($lines, $current);
196
	}
197
198 204
	protected function renderAbsy($blocks)
199
	{
200 204
		$output = '';
201 204
		foreach ($blocks as $block) {
202 204
			array_unshift($this->context, $block[0]);
203 204
			$output .= $this->{'render' . $block[0]}($block);
204 204
			array_shift($this->context);
205
		}
206 204
		return $output;
207
	}
208
209
	/**
210
	 * Consume lines for a paragraph
211
	 *
212
	 * @param $lines
213
	 * @param $current
214
	 * @return array
215
	 */
216 2
	protected function consumeParagraph($lines, $current)
217
	{
218
		// consume until newline
219 2
		$content = [];
220 2
		for ($i = $current, $count = count($lines); $i < $count; $i++) {
221 2
			if (ltrim($lines[$i]) !== '') {
222 2
				$content[] = $lines[$i];
223
			} else {
224
				break;
225
			}
226
		}
227
		$block = [
228 2
			'paragraph',
229 2
			'content' => $this->parseInline(implode("\n", $content)),
230
		];
231 2
		return [$block, --$i];
232
	}
233
234
	/**
235
	 * Render a paragraph block
236
	 *
237
	 * @param $block
238
	 * @return string
239
	 */
240 199
	protected function renderParagraph($block)
241
	{
242 199
		return '<p>' . $this->renderAbsy($block['content']) . "</p>\n";
243
	}
244
245
246
	// inline parsing
247
248
249
	/**
250
	 * @var array the set of inline markers to use in different contexts.
251
	 */
252
	private $_inlineMarkers = [];
253
254
	/**
255
	 * Returns a map of inline markers to the corresponding parser methods.
256
	 *
257
	 * This array defines handler methods for inline markdown markers.
258
	 * When a marker is found in the text, the handler method is called with the text
259
	 * starting at the position of the marker.
260
	 *
261
	 * Note that markers starting with whitespace may slow down the parser,
262
	 * you may want to use [[renderText]] to deal with them.
263
	 *
264
	 * You may override this method to define a set of markers and parsing methods.
265
	 * The default implementation looks for protected methods starting with `parse` that
266
	 * also have an `@marker` annotation in PHPDoc.
267
	 *
268
	 * @return array a map of markers to parser methods
269
	 */
270 201
	protected function inlineMarkers()
271
	{
272 201
		$markers = [];
273
		// detect "parse" functions
274 201
		$reflection = new \ReflectionClass($this);
275 201
		foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) {
276 201
			$methodName = $method->getName();
277 201
			if (strncmp($methodName, 'parse', 5) === 0) {
278 201
				preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches);
279 201
				foreach($matches[1] as $match) {
280 201
					$markers[$match] = $methodName;
281
				}
282
			}
283
		}
284 201
		return $markers;
285
	}
286
287
	/**
288
	 * Prepare markers that are used in the text to parse
289
	 *
290
	 * Add all markers that are present in markdown.
291
	 * Check is done to avoid iterations in parseInline(), good for huge markdown files
292
	 * @param string $text
293
	 */
294 204
	protected function prepareMarkers($text)
295
	{
296 204
		$this->_inlineMarkers = [];
297 204
		foreach ($this->inlineMarkers() as $marker => $method) {
298 203
			if (strpos($text, $marker) !== false) {
299 145
				$m = $marker[0];
300
				// put the longest marker first
301 145
				if (isset($this->_inlineMarkers[$m])) {
302 1
					reset($this->_inlineMarkers[$m]);
303 1
					if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) {
304 1
						$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
305 1
						continue;
306
					}
307
				}
308 203
				$this->_inlineMarkers[$m][$marker] = $method;
309
			}
310
		}
311 204
	}
312
313
	/**
314
	 * Parses inline elements of the language.
315
	 *
316
	 * @param string $text the inline text to parse.
317
	 * @return array
318
	 */
319 203
	protected function parseInline($text)
320
	{
321 203
		if ($this->_depth >= $this->maximumNestingLevel) {
322
			// maximum depth is reached, do not parse input
323 1
			return [['text', $text]];
324
		}
325 203
		$this->_depth++;
326
327 203
		$markers = implode('', array_keys($this->_inlineMarkers));
328
329 203
		$paragraph = [];
330
331 203
		while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) {
332
333 101
			$pos = strpos($text, $found);
334
335
			// add the text up to next marker to the paragraph
336 101 View Code Duplication
			if ($pos !== 0) {
337 85
				$paragraph[] = ['text', substr($text, 0, $pos)];
338
			}
339 101
			$text = $found;
340
341 101
			$parsed = false;
342 101
			foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) {
343 101
				if (strncmp($text, $marker, strlen($marker)) === 0) {
344
					// parse the marker
345 101
					array_unshift($this->context, $method);
346 101
					list($output, $offset) = $this->$method($text);
347 101
					array_shift($this->context);
348
349 101
					$paragraph[] = $output;
350 101
					$text = substr($text, $offset);
351 101
					$parsed = true;
352 101
					break;
353
				}
354
			}
355 101 View Code Duplication
			if (!$parsed) {
356 15
				$paragraph[] = ['text', substr($text, 0, 1)];
357 15
				$text = substr($text, 1);
358
			}
359
		}
360
361 203
		$paragraph[] = ['text', $text];
362
363 203
		$this->_depth--;
364
365 203
		return $paragraph;
366
	}
367
368
	/**
369
	 * Parses escaped special characters.
370
	 * @marker \
371
	 */
372 17
	protected function parseEscape($text)
373
	{
374 17
		if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
375 13
			return [['text', $text[1]], 2];
376
		}
377 7
		return [['text', $text[0]], 1];
378
	}
379
380
	/**
381
	 * This function renders plain text sections in the markdown text.
382
	 * It can be used to work on normal text sections for example to highlight keywords or
383
	 * do special escaping.
384
	 */
385 3
	protected function renderText($block)
386
	{
387 3
		return $block[1];
388
	}
389
}
390