Test Failed
Pull Request — lib (#328)
by
unknown
02:43
created

Markdown::_doLists_callback()   B

Complexity

Conditions 8
Paths 32

Size

Total Lines 34

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 8

Importance

Changes 0
Metric Value
dl 0
loc 34
ccs 21
cts 21
cp 1
rs 8.1315
c 0
b 0
f 0
cc 8
nc 32
nop 1
crap 8
1
<?php
2
/**
3
 * Markdown  -  A text-to-HTML conversion tool for web writers
4
 *
5
 * @package   php-markdown
6
 * @author    Michel Fortin <[email protected]>
7
 * @copyright 2004-2019 Michel Fortin <https://michelf.com/projects/php-markdown/>
8
 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9
 */
10
11
namespace Michelf;
12
13
/**
14
 * Markdown Parser Class
15
 */
16
class Markdown implements MarkdownInterface {
17
	/**
18
	 * Define the package version
19
	 * @var string
20
	 */
21
	const MARKDOWNLIB_VERSION = "1.9.0";
22
23
	/**
24
	 * Simple function interface - Initialize the parser and return the result
25
	 * of its transform method. This will work fine for derived classes too.
26
	 *
27
	 * @api
28
	 *
29
	 * @param  string $text
30
	 * @return string
31
	 */
32 107
	public static function defaultTransform($text) {
33
		// Take parser class on which this function was called.
34 107
		$parser_class = \get_called_class();
35
36
		// Try to take parser from the static parser list
37 107
		static $parser_list;
38 107
		$parser =& $parser_list[$parser_class];
39
40
		// Create the parser it not already set
41 107
		if (!$parser) {
42 2
			$parser = new $parser_class;
43
		}
44
45
		// Transform text using parser.
46 107
		return $parser->transform($text);
47
	}
48
49
	/**
50
	 * Configuration variables
51
	 */
52
53
	/**
54
	 * Change to ">" for HTML output.
55
	 * @var string
56
	 */
57
	public $empty_element_suffix = " />";
58
59
	/**
60
	 * The width of indentation of the output markup
61
	 * @var int
62
	 */
63
	public $tab_width = 4;
64
65
	/**
66
	 * Change to `true` to disallow markup or entities.
67
	 * @var boolean
68
	 */
69
	public $no_markup   = false;
70
	public $no_entities = false;
71
72
73
	/**
74
	 * Change to `true` to enable line breaks on \n without two trailling spaces
75
	 * @var boolean
76
	 */
77
	public $hard_wrap = false;
78
79
	/**
80
	 * Predefined URLs and titles for reference links and images.
81
	 * @var array
82
	 */
83
	public $predef_urls   = array();
84
	public $predef_titles = array();
85
86
	/**
87
	 * Optional filter function for URLs
88
	 * @var callable|null
89
	 */
90
	public $url_filter_func = null;
91
92
	/**
93
	 * Optional header id="" generation callback function.
94
	 * @var callable|null
95
	 */
96
	public $header_id_func = null;
97
98
	/**
99
	 * Optional function for converting code block content to HTML
100
	 * @var callable|null
101
	 */
102
	public $code_block_content_func = null;
103
104
	/**
105
	 * Optional function for converting code span content to HTML.
106
	 * @var callable|null
107
	 */
108
	public $code_span_content_func = null;
109
110
	/**
111
	 * Class attribute to toggle "enhanced ordered list" behaviour
112
	 * setting this to true will allow ordered lists to start from the index
113
	 * number that is defined first.
114
	 *
115
	 * For example:
116
	 * 2. List item two
117
	 * 3. List item three
118
	 *
119
	 * Becomes:
120
	 * <ol start="2">
121
	 * <li>List item two</li>
122
	 * <li>List item three</li>
123
	 * </ol>
124
	 *
125
	 * @var bool
126
	 */
127
	public $enhanced_ordered_list = false;
128
129
	/**
130
	 * Parser implementation
131
	 */
132
133
	/**
134
	 * Regex to match balanced [brackets].
135
	 * Needed to insert a maximum bracked depth while converting to PHP.
136
	 * @var int
137
	 */
138
	protected $nested_brackets_depth = 6;
139
	protected $nested_brackets_re;
140
141
	protected $nested_url_parenthesis_depth = 4;
142
	protected $nested_url_parenthesis_re;
143
144
	/**
145
	 * Table of hash values for escaped characters:
146
	 * @var string
147
	 */
148
	protected $escape_chars = '\`*_{}[]()>#+-.!';
149
	protected $escape_chars_re;
150
151
	/**
152
	 * Constructor function. Initialize appropriate member variables.
153
	 * @return void
0 ignored issues
show
Comprehensibility Best Practice introduced by
Adding a @return annotation to constructors is generally not recommended as a constructor does not have a meaningful return value.

Adding a @return annotation to a constructor is not recommended, since a constructor does not have a meaningful return value.

Please refer to the PHP core documentation on constructors.

Loading history...
154
	 */
155 5
	public function __construct() {
156 5
		$this->_initDetab();
157 5
		$this->prepareItalicsAndBold();
158
159 5
		$this->nested_brackets_re =
160 5
			str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
161 5
			str_repeat('\])*', $this->nested_brackets_depth);
162
163 5
		$this->nested_url_parenthesis_re =
164 5
			str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
165 5
			str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
166
167 5
		$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
168
169
		// Sort document, block, and span gamut in ascendent priority order.
170 5
		asort($this->document_gamut);
171 5
		asort($this->block_gamut);
172 5
		asort($this->span_gamut);
173 5
	}
174
175
176
	/**
177
	 * Internal hashes used during transformation.
178
	 * @var array
179
	 */
180
	protected $urls        = array();
181
	protected $titles      = array();
182
	protected $html_hashes = array();
183
184
	/**
185
	 * Status flag to avoid invalid nesting.
186
	 * @var boolean
187
	 */
188
	protected $in_anchor = false;
189
190
	/**
191
	 * Status flag to avoid invalid nesting.
192
	 * @var boolean
193
	 */
194
	protected $in_emphasis_processing = false;
195
196
	/**
197
	 * Called before the transformation process starts to setup parser states.
198
	 * @return void
199
	 */
200 110
	protected function setup() {
201
		// Clear global hashes.
202 110
		$this->urls        = $this->predef_urls;
203 110
		$this->titles      = $this->predef_titles;
204 110
		$this->html_hashes = array();
205 110
		$this->in_anchor   = false;
206 110
		$this->in_emphasis_processing = false;
207 110
	}
208
209
	/**
210
	 * Called after the transformation process to clear any variable which may
211
	 * be taking up memory unnecessarly.
212
	 * @return void
213
	 */
214 102
	protected function teardown() {
215 102
		$this->urls        = array();
216 102
		$this->titles      = array();
217 102
		$this->html_hashes = array();
218 102
	}
219
220
	/**
221
	 * Main function. Performs some preprocessing on the input text and pass
222
	 * it through the document gamut.
223
	 *
224
	 * @api
225
	 *
226
	 * @param  string $text
227
	 * @return string
228
	 */
229 110
	public function transform($text) {
230 110
		$this->setup();
231
232
		# Remove UTF-8 BOM and marker character in input, if present.
233 110
		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
234
235
		# Standardize line endings:
236
		#   DOS to Unix and Mac to Unix
237 110
		$text = preg_replace('{\r\n?}', "\n", $text);
238
239
		# Make sure $text ends with a couple of newlines:
240 110
		$text .= "\n\n";
241
242
		# Convert all tabs to spaces.
243 110
		$text = $this->detab($text);
244
245
		# Turn block-level HTML blocks into hash entries
246 110
		$text = $this->hashHTMLBlocks($text);
247
248
		# Strip any lines consisting only of spaces and tabs.
249
		# This makes subsequent regexen easier to write, because we can
250
		# match consecutive blank lines with /\n+/ instead of something
251
		# contorted like /[ ]*\n+/ .
252 110
		$text = preg_replace('/^[ ]+$/m', '', $text);
253
254
		# Run document gamut methods.
255 110
		foreach ($this->document_gamut as $method => $priority) {
256 110
			$text = $this->$method($text);
257
		}
258
259 102
		$this->teardown();
260
261 102
		return $text . "\n";
262
	}
263
264
	/**
265
	 * Define the document gamut
266
	 * @var array
267
	 */
268
	protected $document_gamut = array(
269
		// Strip link definitions, store in hashes.
270
		"stripLinkDefinitions" => 20,
271
		"runBasicBlockGamut"   => 30,
272
	);
273
274
	/**
275
	 * Strips link definitions from text, stores the URLs and titles in
276
	 * hash references
277
	 * @param  string $text
278
	 * @return string
279
	 */
280 48 View Code Duplication
	protected function stripLinkDefinitions($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
281
282 48
		$less_than_tab = $this->tab_width - 1;
283
284
		// Link defs are in the form: ^[id]: url "optional title"
285 48
		$text = preg_replace_callback('{
286 48
							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
287
							  [ ]*
288
							  \n?				# maybe *one* newline
289
							  [ ]*
290
							(?:
291
							  <(.+?)>			# url = $2
292
							|
293
							  (\S+?)			# url = $3
294
							)
295
							  [ ]*
296
							  \n?				# maybe one newline
297
							  [ ]*
298
							(?:
299
								(?<=\s)			# lookbehind for whitespace
300
								["(]
301
								(.*?)			# title = $4
302
								[")]
303
								[ ]*
304
							)?	# title is optional
305
							(?:\n+|\Z)
306
			}xm',
307 48
			array($this, '_stripLinkDefinitions_callback'),
308 48
			$text
309
		);
310 48
		return $text;
311
	}
312
313
	/**
314
	 * The callback to strip link definitions
315
	 * @param  array $matches
316
	 * @return string
317
	 */
318 9
	protected function _stripLinkDefinitions_callback($matches) {
319 9
		$link_id = strtolower($matches[1]);
320 9
		$url = $matches[2] == '' ? $matches[3] : $matches[2];
321 9
		$this->urls[$link_id] = $url;
322 9
		$this->titles[$link_id] =& $matches[4];
323 9
		return ''; // String that will replace the block
324
	}
325
326
	/**
327
	 * Hashify HTML blocks
328
	 * @param  string $text
329
	 * @return string
330
	 */
331 48
	protected function hashHTMLBlocks($text) {
332 48
		if ($this->no_markup) {
333
			return $text;
334
		}
335
336 48
		$less_than_tab = $this->tab_width - 1;
337
338
		/**
339
		 * Hashify HTML blocks:
340
		 *
341
		 * We only want to do this for block-level HTML tags, such as headers,
342
		 * lists, and tables. That's because we still want to wrap <p>s around
343
		 * "paragraphs" that are wrapped in non-block-level tags, such as
344
		 * anchors, phrase emphasis, and spans. The list of tags we're looking
345
		 * for is hard-coded:
346
		 *
347
		 * *  List "a" is made of tags which can be both inline or block-level.
348
		 *    These will be treated block-level when the start tag is alone on
349
		 *    its line, otherwise they're not matched here and will be taken as
350
		 *    inline later.
351
		 * *  List "b" is made of tags which are always block-level;
352
		 */
353 48
		$block_tags_a_re = 'ins|del';
354
		$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
355
						   'script|noscript|style|form|fieldset|iframe|math|svg|'.
356
						   'article|section|nav|aside|hgroup|header|footer|'.
357 48
						   'figure';
358
359
		// Regular expression for the content of a block tag.
360 48
		$nested_tags_level = 4;
361 48
		$attr = '
362
			(?>				# optional tag attributes
363
			  \s			# starts with whitespace
364
			  (?>
365
				[^>"/]+		# text outside quotes
366
			  |
367
				/+(?!>)		# slash not followed by ">"
368
			  |
369
				"[^"]*"		# text inside double quotes (tolerate ">")
370
			  |
371
				\'[^\']*\'	# text inside single quotes (tolerate ">")
372
			  )*
373
			)?
374
			';
375
		$content =
376 48
			str_repeat('
377
				(?>
378
				  [^<]+			# content without tag
379
				|
380
				  <\2			# nested opening tag
381 48
					'.$attr.'	# attributes
382
					(?>
383
					  />
384
					|
385 48
					  >', $nested_tags_level).	// end of opening tag
386 48
					  '.*?'.					// last level nested tag content
387 48
			str_repeat('
388
					  </\2\s*>	# closing nested tag
389
					)
390
				  |
391
					<(?!/\2\s*>	# other tags with a different name
392
				  )
393
				)*',
394 48
				$nested_tags_level);
395 48
		$content2 = str_replace('\2', '\3', $content);
396
397
		/**
398
		 * First, look for nested blocks, e.g.:
399
		 * 	<div>
400
		 * 		<div>
401
		 * 		tags for inner block must be indented.
402
		 * 		</div>
403
		 * 	</div>
404
		 *
405
		 * The outermost tags must start at the left margin for this to match,
406
		 * and the inner nested divs must be indented.
407
		 * We need to do this before the next, more liberal match, because the
408
		 * next match will start at the first `<div>` and stop at the
409
		 * first `</div>`.
410
		 */
411 48
		$text = preg_replace_callback('{(?>
412
			(?>
413
				(?<=\n)			# Starting on its own line
414
				|				# or
415
				\A\n?			# the at beginning of the doc
416
			)
417
			(						# save in $1
418
419
			  # Match from `\n<tag>` to `</tag>\n`, handling nested tags
420
			  # in between.
421
422 48
						[ ]{0,'.$less_than_tab.'}
423 48
						<('.$block_tags_b_re.')# start tag = $2
424 48
						'.$attr.'>			# attributes followed by > and \n
425 48
						'.$content.'		# content, support nesting
426
						</\2>				# the matching end tag
427
						[ ]*				# trailing spaces/tabs
428
						(?=\n+|\Z)	# followed by a newline or end of document
429
430
			| # Special version for tags of group a.
431
432 48
						[ ]{0,'.$less_than_tab.'}
433 48
						<('.$block_tags_a_re.')# start tag = $3
434 48
						'.$attr.'>[ ]*\n	# attributes followed by >
435 48
						'.$content2.'		# content, support nesting
436
						</\3>				# the matching end tag
437
						[ ]*				# trailing spaces/tabs
438
						(?=\n+|\Z)	# followed by a newline or end of document
439
440
			| # Special case just for <hr />. It was easier to make a special
441
			  # case than to make the other regex more complicated.
442
443 48
						[ ]{0,'.$less_than_tab.'}
444
						<(hr)				# start tag = $2
445 48
						'.$attr.'			# attributes
446
						/?>					# the matching end tag
447
						[ ]*
448
						(?=\n{2,}|\Z)		# followed by a blank line or end of document
449
450
			| # Special case for standalone HTML comments:
451
452 48
					[ ]{0,'.$less_than_tab.'}
453
					(?s:
454
						<!-- .*? -->
455
					)
456
					[ ]*
457
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
458
459
			| # PHP and ASP-style processor instructions (<? and <%)
460
461 48
					[ ]{0,'.$less_than_tab.'}
462
					(?s:
463
						<([?%])			# $2
464
						.*?
465
						\2>
466
					)
467
					[ ]*
468
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
469
470
			)
471
			)}Sxmi',
472 48
			array($this, '_hashHTMLBlocks_callback'),
473 48
			$text
474
		);
475
476 48
		return $text;
477
	}
478
479
	/**
480
	 * The callback for hashing HTML blocks
481
	 * @param  string $matches
482
	 * @return string
483
	 */
484 9
	protected function _hashHTMLBlocks_callback($matches) {
485 9
		$text = $matches[1];
486 9
		$key  = $this->hashBlock($text);
487 9
		return "\n\n$key\n\n";
488
	}
489
490
	/**
491
	 * Called whenever a tag must be hashed when a function insert an atomic
492
	 * element in the text stream. Passing $text to through this function gives
493
	 * a unique text-token which will be reverted back when calling unhash.
494
	 *
495
	 * The $boundary argument specify what character should be used to surround
496
	 * the token. By convension, "B" is used for block elements that needs not
497
	 * to be wrapped into paragraph tags at the end, ":" is used for elements
498
	 * that are word separators and "X" is used in the general case.
499
	 *
500
	 * @param  string $text
501
	 * @param  string $boundary
502
	 * @return string
503
	 */
504 110
	protected function hashPart($text, $boundary = 'X') {
505
		// Swap back any tag hash found in $text so we do not have to `unhash`
506
		// multiple times at the end.
507 110
		$text = $this->unhash($text);
508
509
		// Then hash the block.
510 110
		static $i = 0;
511 110
		$key = "$boundary\x1A" . ++$i . $boundary;
512 110
		$this->html_hashes[$key] = $text;
513 110
		return $key; // String that will replace the tag.
514
	}
515
516
	/**
517
	 * Shortcut function for hashPart with block-level boundaries.
518
	 * @param  string $text
519
	 * @return string
520
	 */
521 110
	protected function hashBlock($text) {
522 110
		return $this->hashPart($text, 'B');
523
	}
524
525
	/**
526
	 * Define the block gamut - these are all the transformations that form
527
	 * block-level tags like paragraphs, headers, and list items.
528
	 * @var array
529
	 */
530
	protected $block_gamut = array(
531
		"doHeaders"         => 10,
532
		"doHorizontalRules" => 20,
533
		"doLists"           => 40,
534
		"doCodeBlocks"      => 50,
535
		"doBlockQuotes"     => 60,
536
	);
537
538
	/**
539
	 * Run block gamut tranformations.
540
	 *
541
	 * We need to escape raw HTML in Markdown source before doing anything
542
	 * else. This need to be done for each block, and not only at the
543
	 * begining in the Markdown function since hashed blocks can be part of
544
	 * list items and could have been indented. Indented blocks would have
545
	 * been seen as a code block in a previous pass of hashHTMLBlocks.
546
	 *
547
	 * @param  string $text
548
	 * @return string
549
	 */
550 20
	protected function runBlockGamut($text) {
551 20
		$text = $this->hashHTMLBlocks($text);
552 20
		return $this->runBasicBlockGamut($text);
553
	}
554
555
	/**
556
	 * Run block gamut tranformations, without hashing HTML blocks. This is
557
	 * useful when HTML blocks are known to be already hashed, like in the first
558
	 * whole-document pass.
559
	 *
560
	 * @param  string $text
561
	 * @return string
562
	 */
563 110
	protected function runBasicBlockGamut($text) {
564
565 110
		foreach ($this->block_gamut as $method => $priority) {
566 110
			$text = $this->$method($text);
567
		}
568
569
		// Finally form paragraph and restore hashed blocks.
570 109
		$text = $this->formParagraphs($text);
571
572 102
		return $text;
573
	}
574
575
	/**
576
	 * Convert horizontal rules
577
	 * @param  string $text
578
	 * @return string
579
	 */
580 110
	protected function doHorizontalRules($text) {
581 110
		return preg_replace(
582 110
			'{
583
				^[ ]{0,3}	# Leading space
584
				([-*_])		# $1: First marker
585
				(?>			# Repeated marker group
586
					[ ]{0,2}	# Zero, one, or two spaces.
587
					\1			# Marker character
588
				){2,}		# Group repeated at least twice
589
				[ ]*		# Tailing spaces
590
				$			# End of line.
591
			}mx',
592 110
			"\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
593 110
			$text
594
		);
595
	}
596
597
	/**
598
	 * These are all the transformations that occur *within* block-level
599
	 * tags like paragraphs, headers, and list items.
600
	 * @var array
601
	 */
602
	protected $span_gamut = array(
603
		// Process character escapes, code spans, and inline HTML
604
		// in one shot.
605
		"parseSpan"           => -30,
606
		// Process anchor and image tags. Images must come first,
607
		// because ![foo][f] looks like an anchor.
608
		"doImages"            =>  10,
609
		"doAnchors"           =>  20,
610
		// Make links out of things like `<https://example.com/>`
611
		// Must come after doAnchors, because you can use < and >
612
		// delimiters in inline links like [this](<url>).
613
		"doAutoLinks"         =>  30,
614
		"encodeAmpsAndAngles" =>  40,
615
		"doItalicsAndBold"    =>  50,
616
		"doHardBreaks"        =>  60,
617
	);
618
619
	/**
620
	 * Run span gamut transformations
621
	 * @param  string $text
622
	 * @return string
623
	 */
624 109
	protected function runSpanGamut($text) {
625 109
		foreach ($this->span_gamut as $method => $priority) {
626 109
			$text = $this->$method($text);
627
		}
628
629 108
		return $text;
630
	}
631
632
	/**
633
	 * Do hard breaks
634
	 * @param  string $text
635
	 * @return string
636
	 */
637 108
	protected function doHardBreaks($text) {
638 108
		if ($this->hard_wrap) {
639
			return preg_replace_callback('/ *\n/',
640
				array($this, '_doHardBreaks_callback'), $text);
641
		} else {
642 108
			return preg_replace_callback('/ {2,}\n/',
643 108
				array($this, '_doHardBreaks_callback'), $text);
644
		}
645
	}
646
647
	/**
648
	 * Trigger part hashing for the hard break (callback method)
649
	 * @param  array $matches
650
	 * @return string
651
	 */
652 3
	protected function _doHardBreaks_callback($matches) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
653 3
		return $this->hashPart("<br$this->empty_element_suffix\n");
654
	}
655
656
	/**
657
	 * Turn Markdown link shortcuts into XHTML <a> tags.
658
	 * @param  string $text
659
	 * @return string
660
	 */
661 47 View Code Duplication
	protected function doAnchors($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
662 47
		if ($this->in_anchor) {
663 12
			return $text;
664
		}
665 47
		$this->in_anchor = true;
666
667
		// First, handle reference-style links: [link text] [id]
668 47
		$text = preg_replace_callback('{
669
			(					# wrap whole match in $1
670
			  \[
671 47
				('.$this->nested_brackets_re.')	# link text = $2
672
			  \]
673
674
			  [ ]?				# one optional space
675
			  (?:\n[ ]*)?		# one optional newline followed by spaces
676
677
			  \[
678
				(.*?)		# id = $3
679
			  \]
680
			)
681
			}xs',
682 47
			array($this, '_doAnchors_reference_callback'), $text);
683
684
		// Next, inline-style links: [link text](url "optional title")
685 47
		$text = preg_replace_callback('{
686
			(				# wrap whole match in $1
687
			  \[
688 47
				('.$this->nested_brackets_re.')	# link text = $2
689
			  \]
690
			  \(			# literal paren
691
				[ \n]*
692
				(?:
693
					<(.+?)>	# href = $3
694
				|
695 47
					('.$this->nested_url_parenthesis_re.')	# href = $4
696
				)
697
				[ \n]*
698
				(			# $5
699
				  ([\'"])	# quote char = $6
700
				  (.*?)		# Title = $7
701
				  \6		# matching quote
702
				  [ \n]*	# ignore any spaces/tabs between closing quote and )
703
				)?			# title is optional
704
			  \)
705
			)
706
			}xs',
707 47
			array($this, '_doAnchors_inline_callback'), $text);
708
709
		// Last, handle reference-style shortcuts: [link text]
710
		// These must come last in case you've also got [link text][1]
711
		// or [link text](/foo)
712 47
		$text = preg_replace_callback('{
713
			(					# wrap whole match in $1
714
			  \[
715
				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
716
			  \]
717
			)
718
			}xs',
719 47
			array($this, '_doAnchors_reference_callback'), $text);
720
721 47
		$this->in_anchor = false;
722 47
		return $text;
723
	}
724
725
	/**
726
	 * Callback method to parse referenced anchors
727
	 * @param  string $matches
728
	 * @return string
729
	 */
730 9
	protected function _doAnchors_reference_callback($matches) {
731 9
		$whole_match =  $matches[1];
732 9
		$link_text   =  $matches[2];
733 9
		$link_id     =& $matches[3];
734
735 9
		if ($link_id == "") {
736
			// for shortcut links like [this][] or [this].
737 6
			$link_id = $link_text;
738
		}
739
740
		// lower-case and turn embedded newlines into spaces
741 9
		$link_id = strtolower($link_id);
742 9
		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
743
744 9
		if (isset($this->urls[$link_id])) {
745 8
			$url = $this->urls[$link_id];
746 8
			$url = $this->encodeURLAttribute($url);
747
748 8
			$result = "<a href=\"$url\"";
749 8 View Code Duplication
			if ( isset( $this->titles[$link_id] ) ) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
750 6
				$title = $this->titles[$link_id];
751 6
				$title = $this->encodeAttribute($title);
752 6
				$result .=  " title=\"$title\"";
753
			}
754
755 8
			$link_text = $this->runSpanGamut($link_text);
756 8
			$result .= ">$link_text</a>";
757 8
			$result = $this->hashPart($result);
758
		} else {
759 3
			$result = $whole_match;
760
		}
761 9
		return $result;
762
	}
763
764
	/**
765
	 * Callback method to parse inline anchors
766
	 * @param  string $matches
767
	 * @return string
768
	 */
769 10
	protected function _doAnchors_inline_callback($matches) {
770 10
		$link_text		=  $this->runSpanGamut($matches[2]);
771 10
		$url			=  $matches[3] === '' ? $matches[4] : $matches[3];
772 10
		$title			=& $matches[7];
773
774
		// If the URL was of the form <s p a c e s> it got caught by the HTML
775
		// tag parser and hashed. Need to reverse the process before using
776
		// the URL.
777 10
		$unhashed = $this->unhash($url);
778 10
		if ($unhashed !== $url)
779 2
			$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
780
781 10
		$url = $this->encodeURLAttribute($url);
782
783 10
		$result = "<a href=\"$url\"";
784 10
		if (isset($title)) {
785 4
			$title = $this->encodeAttribute($title);
786 4
			$result .=  " title=\"$title\"";
787
		}
788
789 10
		$link_text = $this->runSpanGamut($link_text);
790 10
		$result .= ">$link_text</a>";
791
792 10
		return $this->hashPart($result);
793
	}
794
795
	/**
796
	 * Turn Markdown image shortcuts into <img> tags.
797
	 * @param  string $text
798
	 * @return string
799
	 */
800 47 View Code Duplication
	protected function doImages($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
801
		// First, handle reference-style labeled images: ![alt text][id]
802 47
		$text = preg_replace_callback('{
803
			(				# wrap whole match in $1
804
			  !\[
805 47
				('.$this->nested_brackets_re.')		# alt text = $2
806
			  \]
807
808
			  [ ]?				# one optional space
809
			  (?:\n[ ]*)?		# one optional newline followed by spaces
810
811
			  \[
812
				(.*?)		# id = $3
813
			  \]
814
815
			)
816
			}xs',
817 47
			array($this, '_doImages_reference_callback'), $text);
818
819
		// Next, handle inline images:  ![alt text](url "optional title")
820
		// Don't forget: encode * and _
821 47
		$text = preg_replace_callback('{
822
			(				# wrap whole match in $1
823
			  !\[
824 47
				('.$this->nested_brackets_re.')		# alt text = $2
825
			  \]
826
			  \s?			# One optional whitespace character
827
			  \(			# literal paren
828
				[ \n]*
829
				(?:
830
					<(\S*)>	# src url = $3
831
				|
832 47
					('.$this->nested_url_parenthesis_re.')	# src url = $4
833
				)
834
				[ \n]*
835
				(			# $5
836
				  ([\'"])	# quote char = $6
837
				  (.*?)		# title = $7
838
				  \6		# matching quote
839
				  [ \n]*
840
				)?			# title is optional
841
			  \)
842
			)
843
			}xs',
844 47
			array($this, '_doImages_inline_callback'), $text);
845
846 47
		return $text;
847
	}
848
849
	/**
850
	 * Callback to parse references image tags
851
	 * @param  array $matches
852
	 * @return string
853
	 */
854 1
	protected function _doImages_reference_callback($matches) {
855 1
		$whole_match = $matches[1];
856 1
		$alt_text    = $matches[2];
857 1
		$link_id     = strtolower($matches[3]);
858
859 1
		if ($link_id == "") {
860
			$link_id = strtolower($alt_text); // for shortcut links like ![this][].
861
		}
862
863 1
		$alt_text = $this->encodeAttribute($alt_text);
864 1
		if (isset($this->urls[$link_id])) {
865 1
			$url = $this->encodeURLAttribute($this->urls[$link_id]);
866 1
			$result = "<img src=\"$url\" alt=\"$alt_text\"";
867 1 View Code Duplication
			if (isset($this->titles[$link_id])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
868 1
				$title = $this->titles[$link_id];
869 1
				$title = $this->encodeAttribute($title);
870 1
				$result .=  " title=\"$title\"";
871
			}
872 1
			$result .= $this->empty_element_suffix;
873 1
			$result = $this->hashPart($result);
874
		} else {
875
			// If there's no such link ID, leave intact:
876
			$result = $whole_match;
877
		}
878
879 1
		return $result;
880
	}
881
882
	/**
883
	 * Callback to parse inline image tags
884
	 * @param  array $matches
885
	 * @return string
886
	 */
887 2
	protected function _doImages_inline_callback($matches) {
888 2
		$whole_match	= $matches[1];
0 ignored issues
show
Unused Code introduced by
$whole_match is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
889 2
		$alt_text		= $matches[2];
890 2
		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
891 2
		$title			=& $matches[7];
892
893 2
		$alt_text = $this->encodeAttribute($alt_text);
894 2
		$url = $this->encodeURLAttribute($url);
895 2
		$result = "<img src=\"$url\" alt=\"$alt_text\"";
896 2
		if (isset($title)) {
897 1
			$title = $this->encodeAttribute($title);
898 1
			$result .=  " title=\"$title\""; // $title already quoted
899
		}
900 2
		$result .= $this->empty_element_suffix;
901
902 2
		return $this->hashPart($result);
903
	}
904
905
	/**
906
	 * Parse Markdown heading elements to HTML
907
	 * @param  string $text
908
	 * @return string
909
	 */
910 48
	protected function doHeaders($text) {
911
		/**
912
		 * Setext-style headers:
913
		 *	  Header 1
914
		 *	  ========
915
		 *
916
		 *	  Header 2
917
		 *	  --------
918
		 */
919 48
		$text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
920 48
			array($this, '_doHeaders_callback_setext'), $text);
921
922
		/**
923
		 * atx-style headers:
924
		 *   # Header 1
925
		 *   ## Header 2
926
		 *   ## Header 2 with closing hashes ##
927
		 *   ...
928
		 *   ###### Header 6
929
		 */
930 48
		$text = preg_replace_callback('{
931
				^(\#{1,6})	# $1 = string of #\'s
932
				[ ]*
933
				(.+?)		# $2 = Header text
934
				[ ]*
935
				\#*			# optional closing #\'s (not counted)
936
				\n+
937
			}xm',
938 48
			array($this, '_doHeaders_callback_atx'), $text);
939
940 48
		return $text;
941
	}
942
943
	/**
944
	 * Setext header parsing callback
945
	 * @param  array $matches
946
	 * @return string
947
	 */
948 5
	protected function _doHeaders_callback_setext($matches) {
949
		// Terrible hack to check we haven't found an empty list item.
950 5 View Code Duplication
		if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
951 1
			return $matches[0];
952
		}
953
954 4
		$level = $matches[2][0] == '=' ? 1 : 2;
955
956
		// ID attribute generation
957 4
		$idAtt = $this->_generateIdFromHeaderValue($matches[1]);
958
959 4
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
960 4
		return "\n" . $this->hashBlock($block) . "\n\n";
961
	}
962
963
	/**
964
	 * ATX header parsing callback
965
	 * @param  array $matches
966
	 * @return string
967
	 */
968 8
	protected function _doHeaders_callback_atx($matches) {
969
		// ID attribute generation
970 8
		$idAtt = $this->_generateIdFromHeaderValue($matches[2]);
971
972 8
		$level = strlen($matches[1]);
973 8
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
974 8
		return "\n" . $this->hashBlock($block) . "\n\n";
975
	}
976
977
	/**
978
	 * If a header_id_func property is set, we can use it to automatically
979
	 * generate an id attribute.
980
	 *
981
	 * This method returns a string in the form id="foo", or an empty string
982
	 * otherwise.
983
	 * @param  string $headerValue
984
	 * @return string
985
	 */
986 9
	protected function _generateIdFromHeaderValue($headerValue) {
987 9
		if (!is_callable($this->header_id_func)) {
988 9
			return "";
989
		}
990
991
		$idValue = call_user_func($this->header_id_func, $headerValue);
992
		if (!$idValue) {
993
			return "";
994
		}
995
996
		return ' id="' . $this->encodeAttribute($idValue) . '"';
997
	}
998
999
	/**
1000
	 * Form HTML ordered (numbered) and unordered (bulleted) lists.
1001
	 * @param  string $text
1002
	 * @return string
1003
	 */
1004 110
	protected function doLists($text) {
1005 110
		$less_than_tab = $this->tab_width - 1;
1006
1007
		// Re-usable patterns to match list item bullets and number markers:
1008 110
		$marker_ul_re  = '[*+-]';
1009 110
		$marker_ol_re  = '\d+[\.]';
1010
1011
		$markers_relist = array(
1012 110
			$marker_ul_re => $marker_ol_re,
1013 110
			$marker_ol_re => $marker_ul_re,
1014
			);
1015
1016 110
		foreach ($markers_relist as $marker_re => $other_marker_re) {
1017
			// Re-usable pattern to match any entirel ul or ol list:
1018
			$whole_list_re = '
1019
				(								# $1 = whole list
1020
				  (								# $2
1021 110
					([ ]{0,'.$less_than_tab.'})	# $3 = number of spaces
1022 110
					('.$marker_re.')			# $4 = first list item marker
1023
					[ ]+
1024
				  )
1025
				  (?s:.+?)
1026
				  (								# $5
1027
					  \z
1028
					|
1029
					  \n{2,}
1030
					  (?=\S)
1031
					  (?!						# Negative lookahead for another list item marker
1032
						[ ]*
1033 110
						'.$marker_re.'[ ]+
1034
					  )
1035
					|
1036
					  (?=						# Lookahead for another kind of list
1037
					    \n
1038
						\3						# Must have the same indentation
1039 110
						'.$other_marker_re.'[ ]+
1040
					  )
1041
				  )
1042
				)
1043
			'; // mx
1044
1045
			// We use a different prefix before nested lists than top-level lists.
1046
			//See extended comment in _ProcessListItems().
1047
1048 110
			if ($this->list_level) {
1049 33
				$text = preg_replace_callback('{
1050
						^
1051 33
						'.$whole_list_re.'
1052
					}mx',
1053 33
					array($this, '_doLists_callback'), $text);
1054
			} else {
1055 101
				$text = preg_replace_callback('{
1056
						(?:(?<=\n)\n|\A\n?) # Must eat the newline
1057 101
						'.$whole_list_re.'
1058
					}mx',
1059 110
					array($this, '_doLists_callback'), $text);
1060
			}
1061
		}
1062
1063 110
		return $text;
1064
	}
1065
1066
	/**
1067
	 * List parsing callback
1068
	 * @param  array $matches
1069
	 * @return string
1070
	 */
1071 25
	protected function _doLists_callback($matches) {
1072
		// Re-usable patterns to match list item bullets and number markers:
1073 25
		$marker_ul_re  = '[*+-]';
1074 25
		$marker_ol_re  = '\d+[\.]';
1075 25
		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
0 ignored issues
show
Unused Code introduced by
$marker_any_re is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1076 25
		$marker_ol_start_re = '[0-9]+';
1077
1078 25
		$list = $matches[1];
1079 25
		$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1080
1081 25
		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1082
1083 25
		$list .= "\n";
1084 25
		$result = $this->processListItems($list, $marker_any_re);
1085
1086 24
		$ol_start = 1;
1087 24
		if ($this->enhanced_ordered_list) {
1088
			// Get the start number for ordered list.
1089 14
			if ($list_type == 'ol') {
1090 6
				$ol_start_array = array();
1091 6
				$ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1092 6
				if ($ol_start_check){
1093 6
					$ol_start = $ol_start_array[0];
1094
				}
1095
			}
1096
		}
1097
1098 24
		if ($ol_start > 1 && $list_type == 'ol'){
1099 1
			$result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1100
		} else {
1101 24
			$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1102
		}
1103 24
		return "\n". $result ."\n\n";
1104
	}
1105
1106
	/**
1107
	 * Nesting tracker for list levels
1108
	 * @var integer
1109
	 */
1110
	protected $list_level = 0;
1111
1112
	/**
1113
	 * Process the contents of a single ordered or unordered list, splitting it
1114
	 * into individual list items.
1115
	 * @param  string $list_str
1116
	 * @param  string $marker_any_re
1117
	 * @return string
1118
	 */
1119 25
	protected function processListItems($list_str, $marker_any_re) {
1120
		/**
1121
		 * The $this->list_level global keeps track of when we're inside a list.
1122
		 * Each time we enter a list, we increment it; when we leave a list,
1123
		 * we decrement. If it's zero, we're not in a list anymore.
1124
		 *
1125
		 * We do this because when we're not inside a list, we want to treat
1126
		 * something like this:
1127
		 *
1128
		 *		I recommend upgrading to version
1129
		 *		8. Oops, now this line is treated
1130
		 *		as a sub-list.
1131
		 *
1132
		 * As a single paragraph, despite the fact that the second line starts
1133
		 * with a digit-period-space sequence.
1134
		 *
1135
		 * Whereas when we're inside a list (or sub-list), that line will be
1136
		 * treated as the start of a sub-list. What a kludge, huh? This is
1137
		 * an aspect of Markdown's syntax that's hard to parse perfectly
1138
		 * without resorting to mind-reading. Perhaps the solution is to
1139
		 * change the syntax rules such that sub-lists must start with a
1140
		 * starting cardinal number; e.g. "1." or "a.".
1141
		 */
1142 25
		$this->list_level++;
1143
1144
		// Trim trailing blank lines:
1145 25
		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1146
1147 25
		$list_str = preg_replace_callback('{
1148
			(\n)?							# leading line = $1
1149
			(^[ ]*)							# leading whitespace = $2
1150 25
			('.$marker_any_re.'				# list marker and space = $3
1151
				(?:[ ]+|(?=\n))	# space only required if item is not empty
1152
			)
1153
			((?s:.*?))						# list item text   = $4
1154
			(?:(\n+(?=\n))|\n)				# tailing blank line = $5
1155 25
			(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1156
			}xm',
1157 25
			array($this, '_processListItems_callback'), $list_str);
1158
1159 24
		$this->list_level--;
1160 24
		return $list_str;
1161
	}
1162
1163
	/**
1164
	 * List item parsing callback
1165
	 * @param  array $matches
1166
	 * @return string
1167
	 */
1168 25
	protected function _processListItems_callback($matches) {
1169 25
		$item = $matches[4];
1170 25
		$leading_line =& $matches[1];
1171 25
		$leading_space =& $matches[2];
1172 25
		$marker_space = $matches[3];
1173 25
		$tailing_blank_line =& $matches[5];
1174
1175 25
		if ($leading_line || $tailing_blank_line ||
1176 25
			preg_match('/\n{2,}/', $item))
1177
		{
1178
			// Replace marker with the appropriate whitespace indentation
1179 8
			$item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1180 8
			$item = $this->runBlockGamut($this->outdent($item)."\n");
1181
		} else {
1182
			// Recursion for sub-lists:
1183 19
			$item = $this->doLists($this->outdent($item));
1184 19
			$item = $this->formParagraphs($item, false);
1185
		}
1186
1187 24
		return "<li>" . $item . "</li>\n";
1188
	}
1189
1190
	/**
1191
	 * Process Markdown `<pre><code>` blocks.
1192
	 * @param  string $text
1193
	 * @return string
1194
	 */
1195 109
	protected function doCodeBlocks($text) {
1196 109
		$text = preg_replace_callback('{
1197
				(?:\n\n|\A\n?)
1198
				(	            # $1 = the code block -- one or more lines, starting with a space/tab
1199
				  (?>
1200 109
					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1201
					.*\n+
1202
				  )+
1203
				)
1204 109
				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
1205
			}xm',
1206 109
			array($this, '_doCodeBlocks_callback'), $text);
1207
1208 109
		return $text;
1209
	}
1210
1211
	/**
1212
	 * Code block parsing callback
1213
	 * @param  array $matches
1214
	 * @return string
1215
	 */
1216 34
	protected function _doCodeBlocks_callback($matches) {
1217 34
		$codeblock = $matches[1];
1218
1219 34
		$codeblock = $this->outdent($codeblock);
1220 34 View Code Duplication
		if (is_callable($this->code_block_content_func)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1221
			$codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1222
		} else {
1223 34
			$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1224
		}
1225
1226
		# trim leading newlines and trailing newlines
1227 34
		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1228
1229 34
		$codeblock = "<pre><code>$codeblock\n</code></pre>";
1230 34
		return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1231
	}
1232
1233
	/**
1234
	 * Create a code span markup for $code. Called from handleSpanToken.
1235
	 * @param  string $code
1236
	 * @return string
1237
	 */
1238 20
	protected function makeCodeSpan($code) {
1239 20 View Code Duplication
		if (is_callable($this->code_span_content_func)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1240
			$code = call_user_func($this->code_span_content_func, $code);
1241
		} else {
1242 20
			$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1243
		}
1244 20
		return $this->hashPart("<code>$code</code>");
1245
	}
1246
1247
	/**
1248
	 * Define the emphasis operators with their regex matches
1249
	 * @var array
1250
	 */
1251
	protected $em_relist = array(
1252
		''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1253
		'*' => '(?<![\s*])\*(?!\*)',
1254
		'_' => '(?<![\s_])_(?!_)',
1255
	);
1256
1257
	/**
1258
	 * Define the strong operators with their regex matches
1259
	 * @var array
1260
	 */
1261
	protected $strong_relist = array(
1262
		''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1263
		'**' => '(?<![\s*])\*\*(?!\*)',
1264
		'__' => '(?<![\s_])__(?!_)',
1265
	);
1266
1267
	/**
1268
	 * Define the emphasis + strong operators with their regex matches
1269
	 * @var array
1270
	 */
1271
	protected $em_strong_relist = array(
1272
		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1273
		'***' => '(?<![\s*])\*\*\*(?!\*)',
1274
		'___' => '(?<![\s_])___(?!_)',
1275
	);
1276
1277
	/**
1278
	 * Container for prepared regular expressions
1279
	 * @var array
1280
	 */
1281
	protected $em_strong_prepared_relist;
1282
1283
	/**
1284
	 * Prepare regular expressions for searching emphasis tokens in any
1285
	 * context.
1286
	 * @return void
1287
	 */
1288 5
	protected function prepareItalicsAndBold() {
1289 5
		foreach ($this->em_relist as $em => $em_re) {
1290 5
			foreach ($this->strong_relist as $strong => $strong_re) {
1291
				// Construct list of allowed token expressions.
1292 5
				$token_relist = array();
1293 5
				if (isset($this->em_strong_relist["$em$strong"])) {
1294 5
					$token_relist[] = $this->em_strong_relist["$em$strong"];
1295
				}
1296 5
				$token_relist[] = $em_re;
1297 5
				$token_relist[] = $strong_re;
1298
1299
				// Construct master expression from list.
1300 5
				$token_re = '{(' . implode('|', $token_relist) . ')}';
1301 5
				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
1302
			}
1303
		}
1304 5
	}
1305
1306
	/**
1307
	 * Convert Markdown italics (emphasis) and bold (strong) to HTML
1308
	 * @param  string $text
1309
	 * @return string
1310
	 */
1311 108
	protected function doItalicsAndBold($text) {
1312 108
		if ($this->in_emphasis_processing) {
1313 14
			return $text; // avoid reentrency
1314
		}
1315 108
		$this->in_emphasis_processing = true;
1316
1317 108
		$token_stack = array('');
1318 108
		$text_stack = array('');
1319 108
		$em = '';
1320 108
		$strong = '';
1321 108
		$tree_char_em = false;
1322
1323 108
		while (1) {
1324
			// Get prepared regular expression for seraching emphasis tokens
1325
			// in current context.
1326 108
			$token_re = $this->em_strong_prepared_relist["$em$strong"];
1327
1328
			// Each loop iteration search for the next emphasis token.
1329
			// Each token is then passed to handleSpanToken.
1330 108
			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1331 108
			$text_stack[0] .= $parts[0];
1332 108
			$token =& $parts[1];
1333 108
			$text =& $parts[2];
1334
1335 108
			if (empty($token)) {
1336
				// Reached end of text span: empty stack without emitting.
1337
				// any more emphasis.
1338 108
				while ($token_stack[0]) {
1339 4
					$text_stack[1] .= array_shift($token_stack);
1340 4
					$text_stack[0] .= array_shift($text_stack);
1341
				}
1342 108
				break;
1343
			}
1344
1345 16
			$token_len = strlen($token);
1346 16
			if ($tree_char_em) {
1347
				// Reached closing marker while inside a three-char emphasis.
1348 4
				if ($token_len == 3) {
1349
					// Three-char closing marker, close em and strong.
1350 4
					array_shift($token_stack);
1351 4
					$span = array_shift($text_stack);
1352 4
					$span = $this->runSpanGamut($span);
1353 4
					$span = "<strong><em>$span</em></strong>";
1354 4
					$text_stack[0] .= $this->hashPart($span);
1355 4
					$em = '';
1356 4
					$strong = '';
1357
				} else {
1358
					// Other closing marker: close one em or strong and
1359
					// change current token state to match the other
1360 2
					$token_stack[0] = str_repeat($token[0], 3-$token_len);
1361 2
					$tag = $token_len == 2 ? "strong" : "em";
1362 2
					$span = $text_stack[0];
1363 2
					$span = $this->runSpanGamut($span);
1364 2
					$span = "<$tag>$span</$tag>";
1365 2
					$text_stack[0] = $this->hashPart($span);
1366 2
					$$tag = ''; // $$tag stands for $em or $strong
1367
				}
1368 4
				$tree_char_em = false;
1369 16
			} else if ($token_len == 3) {
1370 4
				if ($em) {
1371
					// Reached closing marker for both em and strong.
1372
					// Closing strong marker:
1373 2
					for ($i = 0; $i < 2; ++$i) {
1374 2
						$shifted_token = array_shift($token_stack);
1375 2
						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
1376 2
						$span = array_shift($text_stack);
1377 2
						$span = $this->runSpanGamut($span);
1378 2
						$span = "<$tag>$span</$tag>";
1379 2
						$text_stack[0] .= $this->hashPart($span);
1380 2
						$$tag = ''; // $$tag stands for $em or $strong
1381
					}
1382
				} else {
1383
					// Reached opening three-char emphasis marker. Push on token
1384
					// stack; will be handled by the special condition above.
1385 4
					$em = $token[0];
1386 4
					$strong = "$em$em";
1387 4
					array_unshift($token_stack, $token);
1388 4
					array_unshift($text_stack, '');
1389 4
					$tree_char_em = true;
1390
				}
1391 14
			} else if ($token_len == 2) {
1392 9
				if ($strong) {
1393
					// Unwind any dangling emphasis marker:
1394 9
					if (strlen($token_stack[0]) == 1) {
1395 2
						$text_stack[1] .= array_shift($token_stack);
1396 2
						$text_stack[0] .= array_shift($text_stack);
1397 2
						$em = '';
1398
					}
1399
					// Closing strong marker:
1400 9
					array_shift($token_stack);
1401 9
					$span = array_shift($text_stack);
1402 9
					$span = $this->runSpanGamut($span);
1403 9
					$span = "<strong>$span</strong>";
1404 9
					$text_stack[0] .= $this->hashPart($span);
1405 9
					$strong = '';
1406
				} else {
1407 9
					array_unshift($token_stack, $token);
1408 9
					array_unshift($text_stack, '');
1409 9
					$strong = $token;
1410
				}
1411
			} else {
1412
				// Here $token_len == 1
1413 11
				if ($em) {
1414 9
					if (strlen($token_stack[0]) == 1) {
1415
						// Closing emphasis marker:
1416 9
						array_shift($token_stack);
1417 9
						$span = array_shift($text_stack);
1418 9
						$span = $this->runSpanGamut($span);
1419 9
						$span = "<em>$span</em>";
1420 9
						$text_stack[0] .= $this->hashPart($span);
1421 9
						$em = '';
1422
					} else {
1423 9
						$text_stack[0] .= $token;
1424
					}
1425
				} else {
1426 11
					array_unshift($token_stack, $token);
1427 11
					array_unshift($text_stack, '');
1428 11
					$em = $token;
1429
				}
1430
			}
1431
		}
1432 108
		$this->in_emphasis_processing = false;
1433 108
		return $text_stack[0];
1434
	}
1435
1436
	/**
1437
	 * Parse Markdown blockquotes to HTML
1438
	 * @param  string $text
1439
	 * @return string
1440
	 */
1441 109
	protected function doBlockQuotes($text) {
1442 109
		$text = preg_replace_callback('/
1443
			  (								# Wrap whole match in $1
1444
				(?>
1445
				  ^[ ]*>[ ]?			# ">" at the start of a line
1446
					.+\n					# rest of the first line
1447
				  (.+\n)*					# subsequent consecutive lines
1448
				  \n*						# blanks
1449
				)+
1450
			  )
1451
			/xm',
1452 109
			array($this, '_doBlockQuotes_callback'), $text);
1453
1454 109
		return $text;
1455
	}
1456
1457
	/**
1458
	 * Blockquote parsing callback
1459
	 * @param  array $matches
1460
	 * @return string
1461
	 */
1462 11
	protected function _doBlockQuotes_callback($matches) {
1463 11
		$bq = $matches[1];
1464
		// trim one level of quoting - trim whitespace-only lines
1465 11
		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1466 11
		$bq = $this->runBlockGamut($bq); // recurse
1467
1468 11
		$bq = preg_replace('/^/m', "  ", $bq);
1469
		// These leading spaces cause problem with <pre> content,
1470
		// so we need to fix that:
1471 11
		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1472 11
			array($this, '_doBlockQuotes_callback2'), $bq);
1473
1474 11
		return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1475
	}
1476
1477
	/**
1478
	 * Blockquote parsing callback
1479
	 * @param  array $matches
1480
	 * @return string
1481
	 */
1482 2
	protected function _doBlockQuotes_callback2($matches) {
1483 2
		$pre = $matches[1];
1484 2
		$pre = preg_replace('/^  /m', '', $pre);
1485 2
		return $pre;
1486
	}
1487
1488
	/**
1489
	 * Parse paragraphs
1490
	 *
1491
	 * @param  string $text String to process in paragraphs
1492
	 * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1493
	 * @return string
1494
	 */
1495 48
	protected function formParagraphs($text, $wrap_in_p = true) {
1496
		// Strip leading and trailing lines:
1497 48
		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1498
1499 48
		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1500
1501
		// Wrap <p> tags and unhashify HTML blocks
1502 48
		foreach ($grafs as $key => $value) {
1503 48
			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1504
				// Is a paragraph.
1505 47
				$value = $this->runSpanGamut($value);
1506 47
				if ($wrap_in_p) {
1507 45
					$value = preg_replace('/^([ ]*)/', "<p>", $value);
1508 45
					$value .= "</p>";
1509
				}
1510 47
				$grafs[$key] = $this->unhash($value);
1511
			} else {
1512
				// Is a block.
1513
				// Modify elements of @grafs in-place...
1514 31
				$graf = $value;
1515 31
				$block = $this->html_hashes[$graf];
1516 31
				$graf = $block;
1517
//				if (preg_match('{
1518
//					\A
1519
//					(							# $1 = <div> tag
1520
//					  <div  \s+
1521
//					  [^>]*
1522
//					  \b
1523
//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1524
//					  1
1525
//					  \2
1526
//					  [^>]*
1527
//					  >
1528
//					)
1529
//					(							# $3 = contents
1530
//					.*
1531
//					)
1532
//					(</div>)					# $4 = closing tag
1533
//					\z
1534
//					}xs', $block, $matches))
1535
//				{
1536
//					list(, $div_open, , $div_content, $div_close) = $matches;
1537
//
1538
//					// We can't call Markdown(), because that resets the hash;
1539
//					// that initialization code should be pulled into its own sub, though.
1540
//					$div_content = $this->hashHTMLBlocks($div_content);
1541
//
1542
//					// Run document gamut methods on the content.
1543
//					foreach ($this->document_gamut as $method => $priority) {
1544
//						$div_content = $this->$method($div_content);
1545
//					}
1546
//
1547
//					$div_open = preg_replace(
1548
//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1549
//
1550
//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1551
//				}
1552 48
				$grafs[$key] = $graf;
1553
			}
1554
		}
1555
1556 48
		return implode("\n\n", $grafs);
1557
	}
1558
1559
	/**
1560
	 * Encode text for a double-quoted HTML attribute. This function
1561
	 * is *not* suitable for attributes enclosed in single quotes.
1562
	 * @param  string $text
1563
	 * @return string
1564
	 */
1565 30
	protected function encodeAttribute($text) {
1566 30
		$text = $this->encodeAmpsAndAngles($text);
1567 30
		$text = str_replace('"', '&quot;', $text);
1568 30
		return $text;
1569
	}
1570
1571
	/**
1572
	 * Encode text for a double-quoted HTML attribute containing a URL,
1573
	 * applying the URL filter if set. Also generates the textual
1574
	 * representation for the URL (removing mailto: or tel:) storing it in $text.
1575
	 * This function is *not* suitable for attributes enclosed in single quotes.
1576
	 *
1577
	 * @param  string $url
1578
	 * @param  string $text Passed by reference
1579
	 * @return string        URL
1580
	 */
1581 27
	protected function encodeURLAttribute($url, &$text = null) {
1582 27
		if (is_callable($this->url_filter_func)) {
1583
			$url = call_user_func($this->url_filter_func, $url);
1584
		}
1585
1586 27
		if (preg_match('{^mailto:}i', $url)) {
1587 4
			$url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1588 25
		} else if (preg_match('{^tel:}i', $url)) {
1589
			$url = $this->encodeAttribute($url);
1590
			$text = substr($url, 4);
1591
		} else {
1592 25
			$url = $this->encodeAttribute($url);
1593 25
			$text = $url;
1594
		}
1595
1596 27
		return $url;
1597
	}
1598
1599
	/**
1600
	 * Smart processing for ampersands and angle brackets that need to
1601
	 * be encoded. Valid character entities are left alone unless the
1602
	 * no-entities mode is set.
1603
	 * @param  string $text
1604
	 * @return string
1605
	 */
1606 108
	protected function encodeAmpsAndAngles($text) {
1607 108
		if ($this->no_entities) {
1608
			$text = str_replace('&', '&amp;', $text);
1609
		} else {
1610
			// Ampersand-encoding based entirely on Nat Irons's Amputator
1611
			// MT plugin: <http://bumppo.net/projects/amputator/>
1612 108
			$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1613 108
								'&amp;', $text);
1614
		}
1615
		// Encode remaining <'s
1616 108
		$text = str_replace('<', '&lt;', $text);
1617
1618 108
		return $text;
1619
	}
1620
1621
	/**
1622
	 * Parse Markdown automatic links to anchor HTML tags
1623
	 * @param  string $text
1624
	 * @return string
1625
	 */
1626 108
	protected function doAutoLinks($text) {
1627 108
		$text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1628 108
			array($this, '_doAutoLinks_url_callback'), $text);
1629
1630
		// Email addresses: <[email protected]>
1631 108
		$text = preg_replace_callback('{
1632
			<
1633
			(?:mailto:)?
1634
			(
1635
				(?:
1636
					[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1637
				|
1638
					".*?"
1639
				)
1640
				\@
1641
				(?:
1642
					[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1643
				|
1644
					\[[\d.a-fA-F:]+\]	# IPv4 & IPv6
1645
				)
1646
			)
1647
			>
1648
			}xi',
1649 108
			array($this, '_doAutoLinks_email_callback'), $text);
1650
1651 108
		return $text;
1652
	}
1653
1654
	/**
1655
	 * Parse URL callback
1656
	 * @param  array $matches
1657
	 * @return string
1658
	 */
1659 4
	protected function _doAutoLinks_url_callback($matches) {
1660 4
		$url = $this->encodeURLAttribute($matches[1], $text);
1661 4
		$link = "<a href=\"$url\">$text</a>";
1662 4
		return $this->hashPart($link);
1663
	}
1664
1665
	/**
1666
	 * Parse email address callback
1667
	 * @param  array $matches
1668
	 * @return string
1669
	 */
1670 4
	protected function _doAutoLinks_email_callback($matches) {
1671 4
		$addr = $matches[1];
1672 4
		$url = $this->encodeURLAttribute("mailto:$addr", $text);
1673 4
		$link = "<a href=\"$url\">$text</a>";
1674 4
		return $this->hashPart($link);
1675
	}
1676
1677
	/**
1678
	 * Input: some text to obfuscate, e.g. "mailto:[email protected]"
1679
	 *
1680
	 * Output: the same text but with most characters encoded as either a
1681
	 *         decimal or hex entity, in the hopes of foiling most address
1682
	 *         harvesting spam bots. E.g.:
1683
	 *
1684
	 *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1685
	 *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1686
	 *        &#x6d;
1687
	 *
1688
	 * Note: the additional output $tail is assigned the same value as the
1689
	 * ouput, minus the number of characters specified by $head_length.
1690
	 *
1691
	 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1692
	 * With some optimizations by Milian Wolff. Forced encoding of HTML
1693
	 * attribute special characters by Allan Odgaard.
1694
	 *
1695
	 * @param  string  $text
1696
	 * @param  string  $tail Passed by reference
1697
	 * @param  integer $head_length
1698
	 * @return string
1699
	 */
1700 4
	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1701 4
		if ($text == "") {
1702
			return $tail = "";
1703
		}
1704
1705 4
		$chars = preg_split('/(?<!^)(?!$)/', $text);
1706 4
		$seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1707
1708 4
		foreach ($chars as $key => $char) {
1709 4
			$ord = ord($char);
1710
			// Ignore non-ascii chars.
1711 4
			if ($ord < 128) {
1712 4
				$r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1713
				// roughly 10% raw, 45% hex, 45% dec
1714
				// '@' *must* be encoded. I insist.
1715
				// '"' and '>' have to be encoded inside the attribute
1716 4
				if ($r > 90 && strpos('@"&>', $char) === false) {
1717
					/* do nothing */
1718 4
				} else if ($r < 45) {
1719 4
					$chars[$key] = '&#x'.dechex($ord).';';
1720
				} else {
1721 4
					$chars[$key] = '&#'.$ord.';';
1722
				}
1723
			}
1724
		}
1725
1726 4
		$text = implode('', $chars);
1727 4
		$tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1728
1729 4
		return $text;
1730
	}
1731
1732
	/**
1733
	 * Take the string $str and parse it into tokens, hashing embeded HTML,
1734
	 * escaped characters and handling code spans.
1735
	 * @param  string $str
1736
	 * @return string
1737
	 */
1738 109
	protected function parseSpan($str) {
1739 109
		$output = '';
1740
1741
		$span_re = '{
1742
				(
1743 109
					\\\\'.$this->escape_chars_re.'
1744
				|
1745
					(?<![`\\\\])
1746
					`+						# code span marker
1747 109
			'.( $this->no_markup ? '' : '
1748
				|
1749
					<!--    .*?     -->		# comment
1750
				|
1751
					<\?.*?\?> | <%.*?%>		# processing instruction
1752
				|
1753
					<[!$]?[-a-zA-Z0-9:_]+	# regular tags
1754
					(?>
1755
						\s
1756
						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1757
					)?
1758
					>
1759
				|
1760
					<[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1761
				|
1762
					</[-a-zA-Z0-9:_]+\s*> # closing tag
1763 109
			').'
1764
				)
1765
				}xs';
1766
1767 109
		while (1) {
1768
			// Each loop iteration seach for either the next tag, the next
1769
			// openning code span marker, or the next escaped character.
1770
			// Each token is then passed to handleSpanToken.
1771 109
			$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1772
1773
			// Create token from text preceding tag.
1774 109
			if ($parts[0] != "") {
1775 109
				$output .= $parts[0];
1776
			}
1777
1778
			// Check if we reach the end.
1779 109
			if (isset($parts[1])) {
1780 34
				$output .= $this->handleSpanToken($parts[1], $parts[2]);
1781 34
				$str = $parts[2];
1782
			} else {
1783 109
				break;
1784
			}
1785
		}
1786
1787 109
		return $output;
1788
	}
1789
1790
	/**
1791
	 * Handle $token provided by parseSpan by determining its nature and
1792
	 * returning the corresponding value that should replace it.
1793
	 * @param  string $token
1794
	 * @param  string $str Passed by reference
1795
	 * @return string
1796
	 */
1797 34
	protected function handleSpanToken($token, &$str) {
1798 34
		switch ($token[0]) {
1799 34
			case "\\":
1800 10
				return $this->hashPart("&#". ord($token[1]). ";");
1801 29
			case "`":
1802
				// Search for end marker in remaining text.
1803 22
				if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1804 22
					$str, $matches))
1805
				{
1806 20
					$str = $matches[2];
1807 20
					$codespan = $this->makeCodeSpan($matches[1]);
1808 20
					return $this->hashPart($codespan);
1809
				}
1810 2
				return $token; // Return as text since no ending marker found.
1811
			default:
1812 11
				return $this->hashPart($token);
1813
		}
1814
	}
1815
1816
	/**
1817
	 * Remove one level of line-leading tabs or spaces
1818
	 * @param  string $text
1819
	 * @return string
1820
	 */
1821 48
	protected function outdent($text) {
1822 48
		return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1823
	}
1824
1825
1826
	/**
1827
	 * String length function for detab. `_initDetab` will create a function to
1828
	 * handle UTF-8 if the default function does not exist.
1829
	 * @var string
1830
	 */
1831
	protected $utf8_strlen = 'mb_strlen';
1832
1833
	/**
1834
	 * Replace tabs with the appropriate amount of spaces.
1835
	 *
1836
	 * For each line we separate the line in blocks delemited by tab characters.
1837
	 * Then we reconstruct every line by adding the  appropriate number of space
1838
	 * between each blocks.
1839
	 *
1840
	 * @param  string $text
1841
	 * @return string
1842
	 */
1843 110
	protected function detab($text) {
1844 110
		$text = preg_replace_callback('/^.*\t.*$/m',
1845 110
			array($this, '_detab_callback'), $text);
1846
1847 110
		return $text;
1848
	}
1849
1850
	/**
1851
	 * Replace tabs callback
1852
	 * @param  string $matches
1853
	 * @return string
1854
	 */
1855 34
	protected function _detab_callback($matches) {
1856 34
		$line = $matches[0];
1857 34
		$strlen = $this->utf8_strlen; // strlen function for UTF-8.
1858
1859
		// Split in blocks.
1860 34
		$blocks = explode("\t", $line);
1861
		// Add each blocks to the line.
1862 34
		$line = $blocks[0];
1863 34
		unset($blocks[0]); // Do not add first block twice.
1864 34
		foreach ($blocks as $block) {
1865
			// Calculate amount of space, insert spaces, insert block.
1866 34
			$amount = $this->tab_width -
1867 34
				$strlen($line, 'UTF-8') % $this->tab_width;
1868 34
			$line .= str_repeat(" ", $amount) . $block;
1869
		}
1870 34
		return $line;
1871
	}
1872
1873
	/**
1874
	 * Check for the availability of the function in the `utf8_strlen` property
1875
	 * (initially `mb_strlen`). If the function is not available, create a
1876
	 * function that will loosely count the number of UTF-8 characters with a
1877
	 * regular expression.
1878
	 * @return void
1879
	 */
1880 5
	protected function _initDetab() {
1881
1882 5
		if (function_exists($this->utf8_strlen)) {
1883 5
			return;
1884
		}
1885
1886
		$this->utf8_strlen = function($text) {
0 ignored issues
show
Documentation Bug introduced by
It seems like function ($text) { r...\xBF]*/', $text, $m); } of type object<Closure> is incompatible with the declared type string of property $utf8_strlen.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
1887
			return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1888
		};
1889
	}
1890
1891
	/**
1892
	 * Swap back in all the tags hashed by _HashHTMLBlocks.
1893
	 * @param  string $text
1894
	 * @return string
1895
	 */
1896 110
	protected function unhash($text) {
1897 110
		return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1898 110
			array($this, '_unhash_callback'), $text);
1899
	}
1900
1901
	/**
1902
	 * Unhashing callback
1903
	 * @param  array $matches
1904
	 * @return string
1905
	 */
1906 80
	protected function _unhash_callback($matches) {
1907 80
		return $this->html_hashes[$matches[0]];
1908
	}
1909
}
1910