Markdown::_doAnchors_inline_callback()   A
last analyzed

Complexity

Conditions 4
Paths 8

Size

Total Lines 24
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 14
nc 8
nop 1
dl 0
loc 24
rs 9.7998
c 0
b 0
f 0
1
<?php
2
/**
3
 * Markdown  -  A text-to-HTML conversion tool for web writers
4
 *
5
 * @package   php-markdown
6
 * @author    Michel Fortin <[email protected]>
7
 * @copyright 2004-2019 Michel Fortin <https://michelf.com/projects/php-markdown/>
8
 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9
 */
10
11
namespace Michelf;
12
13
/**
14
 * Markdown Parser Class
15
 */
16
class Markdown implements MarkdownInterface {
17
	/**
18
	 * Define the package version
19
	 * @var string
20
	 */
21
	const MARKDOWNLIB_VERSION = "1.9.0";
22
23
	/**
24
	 * Simple function interface - Initialize the parser and return the result
25
	 * of its transform method. This will work fine for derived classes too.
26
	 *
27
	 * @api
28
	 *
29
	 * @param  string $text
30
	 * @return string
31
	 */
32
	public static function defaultTransform($text) {
33
		// Take parser class on which this function was called.
34
		$parser_class = \get_called_class();
35
36
		// Try to take parser from the static parser list
37
		static $parser_list;
38
		$parser =& $parser_list[$parser_class];
39
40
		// Create the parser it not already set
41
		if (!$parser) {
42
			$parser = new $parser_class;
43
		}
44
45
		// Transform text using parser.
46
		return $parser->transform($text);
47
	}
48
49
	/**
50
	 * Configuration variables
51
	 */
52
53
	/**
54
	 * Change to ">" for HTML output.
55
	 * @var string
56
	 */
57
	public $empty_element_suffix = " />";
58
59
	/**
60
	 * The width of indentation of the output markup
61
	 * @var int
62
	 */
63
	public $tab_width = 4;
64
65
	/**
66
	 * Change to `true` to disallow markup or entities.
67
	 * @var boolean
68
	 */
69
	public $no_markup   = false;
70
	public $no_entities = false;
71
72
73
	/**
74
	 * Change to `true` to enable line breaks on \n without two trailling spaces
75
	 * @var boolean
76
	 */
77
	public $hard_wrap = false;
78
79
	/**
80
	 * Predefined URLs and titles for reference links and images.
81
	 * @var array
82
	 */
83
	public $predef_urls   = array();
84
	public $predef_titles = array();
85
86
	/**
87
	 * Optional filter function for URLs
88
	 * @var callable|null
89
	 */
90
	public $url_filter_func = null;
91
92
	/**
93
	 * Optional header id="" generation callback function.
94
	 * @var callable|null
95
	 */
96
	public $header_id_func = null;
97
98
	/**
99
	 * Optional function for converting code block content to HTML
100
	 * @var callable|null
101
	 */
102
	public $code_block_content_func = null;
103
104
	/**
105
	 * Optional function for converting code span content to HTML.
106
	 * @var callable|null
107
	 */
108
	public $code_span_content_func = null;
109
110
	/**
111
	 * Class attribute to toggle "enhanced ordered list" behaviour
112
	 * setting this to true will allow ordered lists to start from the index
113
	 * number that is defined first.
114
	 *
115
	 * For example:
116
	 * 2. List item two
117
	 * 3. List item three
118
	 *
119
	 * Becomes:
120
	 * <ol start="2">
121
	 * <li>List item two</li>
122
	 * <li>List item three</li>
123
	 * </ol>
124
	 *
125
	 * @var bool
126
	 */
127
	public $enhanced_ordered_list = false;
128
129
	/**
130
	 * Parser implementation
131
	 */
132
133
	/**
134
	 * Regex to match balanced [brackets].
135
	 * Needed to insert a maximum bracked depth while converting to PHP.
136
	 * @var int
137
	 */
138
	protected $nested_brackets_depth = 6;
139
	protected $nested_brackets_re;
140
141
	protected $nested_url_parenthesis_depth = 4;
142
	protected $nested_url_parenthesis_re;
143
144
	/**
145
	 * Table of hash values for escaped characters:
146
	 * @var string
147
	 */
148
	protected $escape_chars = '\`*_{}[]()>#+-.!';
149
	protected $escape_chars_re;
150
151
	/**
152
	 * Constructor function. Initialize appropriate member variables.
153
	 * @return void
154
	 */
155
	public function __construct() {
156
		$this->_initDetab();
157
		$this->prepareItalicsAndBold();
158
159
		$this->nested_brackets_re =
160
			str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
161
			str_repeat('\])*', $this->nested_brackets_depth);
162
163
		$this->nested_url_parenthesis_re =
164
			str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
165
			str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
166
167
		$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
168
169
		// Sort document, block, and span gamut in ascendent priority order.
170
		asort($this->document_gamut);
171
		asort($this->block_gamut);
172
		asort($this->span_gamut);
173
	}
174
175
176
	/**
177
	 * Internal hashes used during transformation.
178
	 * @var array
179
	 */
180
	protected $urls        = array();
181
	protected $titles      = array();
182
	protected $html_hashes = array();
183
184
	/**
185
	 * Status flag to avoid invalid nesting.
186
	 * @var boolean
187
	 */
188
	protected $in_anchor = false;
189
190
	/**
191
	 * Status flag to avoid invalid nesting.
192
	 * @var boolean
193
	 */
194
	protected $in_emphasis_processing = false;
195
196
	/**
197
	 * Called before the transformation process starts to setup parser states.
198
	 * @return void
199
	 */
200
	protected function setup() {
201
		// Clear global hashes.
202
		$this->urls        = $this->predef_urls;
203
		$this->titles      = $this->predef_titles;
204
		$this->html_hashes = array();
205
		$this->in_anchor   = false;
206
		$this->in_emphasis_processing = false;
207
	}
208
209
	/**
210
	 * Called after the transformation process to clear any variable which may
211
	 * be taking up memory unnecessarly.
212
	 * @return void
213
	 */
214
	protected function teardown() {
215
		$this->urls        = array();
216
		$this->titles      = array();
217
		$this->html_hashes = array();
218
	}
219
220
	/**
221
	 * Main function. Performs some preprocessing on the input text and pass
222
	 * it through the document gamut.
223
	 *
224
	 * @api
225
	 *
226
	 * @param  string $text
227
	 * @return string
228
	 */
229
	public function transform($text) {
230
		$this->setup();
231
232
		# Remove UTF-8 BOM and marker character in input, if present.
233
		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
234
235
		# Standardize line endings:
236
		#   DOS to Unix and Mac to Unix
237
		$text = preg_replace('{\r\n?}', "\n", $text);
238
239
		# Make sure $text ends with a couple of newlines:
240
		$text .= "\n\n";
241
242
		# Convert all tabs to spaces.
243
		$text = $this->detab($text);
244
245
		# Turn block-level HTML blocks into hash entries
246
		$text = $this->hashHTMLBlocks($text);
247
248
		# Strip any lines consisting only of spaces and tabs.
249
		# This makes subsequent regexen easier to write, because we can
250
		# match consecutive blank lines with /\n+/ instead of something
251
		# contorted like /[ ]*\n+/ .
252
		$text = preg_replace('/^[ ]+$/m', '', $text);
253
254
		# Run document gamut methods.
255
		foreach ($this->document_gamut as $method => $priority) {
256
			$text = $this->$method($text);
257
		}
258
259
		$this->teardown();
260
261
		return $text . "\n";
262
	}
263
264
	/**
265
	 * Define the document gamut
266
	 * @var array
267
	 */
268
	protected $document_gamut = array(
269
		// Strip link definitions, store in hashes.
270
		"stripLinkDefinitions" => 20,
271
		"runBasicBlockGamut"   => 30,
272
	);
273
274
	/**
275
	 * Strips link definitions from text, stores the URLs and titles in
276
	 * hash references
277
	 * @param  string $text
278
	 * @return string
279
	 */
280
	protected function stripLinkDefinitions($text) {
281
282
		$less_than_tab = $this->tab_width - 1;
283
284
		// Link defs are in the form: ^[id]: url "optional title"
285
		$text = preg_replace_callback('{
286
							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
287
							  [ ]*
288
							  \n?				# maybe *one* newline
289
							  [ ]*
290
							(?:
291
							  <(.+?)>			# url = $2
292
							|
293
							  (\S+?)			# url = $3
294
							)
295
							  [ ]*
296
							  \n?				# maybe one newline
297
							  [ ]*
298
							(?:
299
								(?<=\s)			# lookbehind for whitespace
300
								["(]
301
								(.*?)			# title = $4
302
								[")]
303
								[ ]*
304
							)?	# title is optional
305
							(?:\n+|\Z)
306
			}xm',
307
			array($this, '_stripLinkDefinitions_callback'),
308
			$text
309
		);
310
		return $text;
311
	}
312
313
	/**
314
	 * The callback to strip link definitions
315
	 * @param  array $matches
316
	 * @return string
317
	 */
318
	protected function _stripLinkDefinitions_callback($matches) {
319
		$link_id = strtolower($matches[1]);
320
		$url = $matches[2] == '' ? $matches[3] : $matches[2];
321
		$this->urls[$link_id] = $url;
322
		$this->titles[$link_id] =& $matches[4];
323
		return ''; // String that will replace the block
324
	}
325
326
	/**
327
	 * Hashify HTML blocks
328
	 * @param  string $text
329
	 * @return string
330
	 */
331
	protected function hashHTMLBlocks($text) {
332
		if ($this->no_markup) {
333
			return $text;
334
		}
335
336
		$less_than_tab = $this->tab_width - 1;
337
338
		/**
339
		 * Hashify HTML blocks:
340
		 *
341
		 * We only want to do this for block-level HTML tags, such as headers,
342
		 * lists, and tables. That's because we still want to wrap <p>s around
343
		 * "paragraphs" that are wrapped in non-block-level tags, such as
344
		 * anchors, phrase emphasis, and spans. The list of tags we're looking
345
		 * for is hard-coded:
346
		 *
347
		 * *  List "a" is made of tags which can be both inline or block-level.
348
		 *    These will be treated block-level when the start tag is alone on
349
		 *    its line, otherwise they're not matched here and will be taken as
350
		 *    inline later.
351
		 * *  List "b" is made of tags which are always block-level;
352
		 */
353
		$block_tags_a_re = 'ins|del';
354
		$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
355
						   'script|noscript|style|form|fieldset|iframe|math|svg|'.
356
						   'article|section|nav|aside|hgroup|header|footer|'.
357
						   'figure';
358
359
		// Regular expression for the content of a block tag.
360
		$nested_tags_level = 4;
361
		$attr = '
362
			(?>				# optional tag attributes
363
			  \s			# starts with whitespace
364
			  (?>
365
				[^>"/]+		# text outside quotes
366
			  |
367
				/+(?!>)		# slash not followed by ">"
368
			  |
369
				"[^"]*"		# text inside double quotes (tolerate ">")
370
			  |
371
				\'[^\']*\'	# text inside single quotes (tolerate ">")
372
			  )*
373
			)?
374
			';
375
		$content =
376
			str_repeat('
377
				(?>
378
				  [^<]+			# content without tag
379
				|
380
				  <\2			# nested opening tag
381
					'.$attr.'	# attributes
382
					(?>
383
					  />
384
					|
385
					  >', $nested_tags_level).	// end of opening tag
386
					  '.*?'.					// last level nested tag content
387
			str_repeat('
388
					  </\2\s*>	# closing nested tag
389
					)
390
				  |
391
					<(?!/\2\s*>	# other tags with a different name
392
				  )
393
				)*',
394
				$nested_tags_level);
395
		$content2 = str_replace('\2', '\3', $content);
396
397
		/**
398
		 * First, look for nested blocks, e.g.:
399
		 * 	<div>
400
		 * 		<div>
401
		 * 		tags for inner block must be indented.
402
		 * 		</div>
403
		 * 	</div>
404
		 *
405
		 * The outermost tags must start at the left margin for this to match,
406
		 * and the inner nested divs must be indented.
407
		 * We need to do this before the next, more liberal match, because the
408
		 * next match will start at the first `<div>` and stop at the
409
		 * first `</div>`.
410
		 */
411
		$text = preg_replace_callback('{(?>
412
			(?>
413
				(?<=\n)			# Starting on its own line
414
				|				# or
415
				\A\n?			# the at beginning of the doc
416
			)
417
			(						# save in $1
418
419
			  # Match from `\n<tag>` to `</tag>\n`, handling nested tags
420
			  # in between.
421
422
						[ ]{0,'.$less_than_tab.'}
423
						<('.$block_tags_b_re.')# start tag = $2
424
						'.$attr.'>			# attributes followed by > and \n
425
						'.$content.'		# content, support nesting
426
						</\2>				# the matching end tag
427
						[ ]*				# trailing spaces/tabs
428
						(?=\n+|\Z)	# followed by a newline or end of document
429
430
			| # Special version for tags of group a.
431
432
						[ ]{0,'.$less_than_tab.'}
433
						<('.$block_tags_a_re.')# start tag = $3
434
						'.$attr.'>[ ]*\n	# attributes followed by >
435
						'.$content2.'		# content, support nesting
436
						</\3>				# the matching end tag
437
						[ ]*				# trailing spaces/tabs
438
						(?=\n+|\Z)	# followed by a newline or end of document
439
440
			| # Special case just for <hr />. It was easier to make a special
441
			  # case than to make the other regex more complicated.
442
443
						[ ]{0,'.$less_than_tab.'}
444
						<(hr)				# start tag = $2
445
						'.$attr.'			# attributes
446
						/?>					# the matching end tag
447
						[ ]*
448
						(?=\n{2,}|\Z)		# followed by a blank line or end of document
449
450
			| # Special case for standalone HTML comments:
451
452
					[ ]{0,'.$less_than_tab.'}
453
					(?s:
454
						<!-- .*? -->
455
					)
456
					[ ]*
457
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
458
459
			| # PHP and ASP-style processor instructions (<? and <%)
460
461
					[ ]{0,'.$less_than_tab.'}
462
					(?s:
463
						<([?%])			# $2
464
						.*?
465
						\2>
466
					)
467
					[ ]*
468
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
469
470
			)
471
			)}Sxmi',
472
			array($this, '_hashHTMLBlocks_callback'),
473
			$text
474
		);
475
476
		return $text;
477
	}
478
479
	/**
480
	 * The callback for hashing HTML blocks
481
	 * @param  string $matches
482
	 * @return string
483
	 */
484
	protected function _hashHTMLBlocks_callback($matches) {
485
		$text = $matches[1];
486
		$key  = $this->hashBlock($text);
487
		return "\n\n$key\n\n";
488
	}
489
490
	/**
491
	 * Called whenever a tag must be hashed when a function insert an atomic
492
	 * element in the text stream. Passing $text to through this function gives
493
	 * a unique text-token which will be reverted back when calling unhash.
494
	 *
495
	 * The $boundary argument specify what character should be used to surround
496
	 * the token. By convension, "B" is used for block elements that needs not
497
	 * to be wrapped into paragraph tags at the end, ":" is used for elements
498
	 * that are word separators and "X" is used in the general case.
499
	 *
500
	 * @param  string $text
501
	 * @param  string $boundary
502
	 * @return string
503
	 */
504
	protected function hashPart($text, $boundary = 'X') {
505
		// Swap back any tag hash found in $text so we do not have to `unhash`
506
		// multiple times at the end.
507
		$text = $this->unhash($text);
508
509
		// Then hash the block.
510
		static $i = 0;
511
		$key = "$boundary\x1A" . ++$i . $boundary;
512
		$this->html_hashes[$key] = $text;
513
		return $key; // String that will replace the tag.
514
	}
515
516
	/**
517
	 * Shortcut function for hashPart with block-level boundaries.
518
	 * @param  string $text
519
	 * @return string
520
	 */
521
	protected function hashBlock($text) {
522
		return $this->hashPart($text, 'B');
523
	}
524
525
	/**
526
	 * Define the block gamut - these are all the transformations that form
527
	 * block-level tags like paragraphs, headers, and list items.
528
	 * @var array
529
	 */
530
	protected $block_gamut = array(
531
		"doHeaders"         => 10,
532
		"doHorizontalRules" => 20,
533
		"doLists"           => 40,
534
		"doCodeBlocks"      => 50,
535
		"doBlockQuotes"     => 60,
536
	);
537
538
	/**
539
	 * Run block gamut tranformations.
540
	 *
541
	 * We need to escape raw HTML in Markdown source before doing anything
542
	 * else. This need to be done for each block, and not only at the
543
	 * begining in the Markdown function since hashed blocks can be part of
544
	 * list items and could have been indented. Indented blocks would have
545
	 * been seen as a code block in a previous pass of hashHTMLBlocks.
546
	 *
547
	 * @param  string $text
548
	 * @return string
549
	 */
550
	protected function runBlockGamut($text) {
551
		$text = $this->hashHTMLBlocks($text);
552
		return $this->runBasicBlockGamut($text);
553
	}
554
555
	/**
556
	 * Run block gamut tranformations, without hashing HTML blocks. This is
557
	 * useful when HTML blocks are known to be already hashed, like in the first
558
	 * whole-document pass.
559
	 *
560
	 * @param  string $text
561
	 * @return string
562
	 */
563
	protected function runBasicBlockGamut($text) {
564
565
		foreach ($this->block_gamut as $method => $priority) {
566
			$text = $this->$method($text);
567
		}
568
569
		// Finally form paragraph and restore hashed blocks.
570
		$text = $this->formParagraphs($text);
571
572
		return $text;
573
	}
574
575
	/**
576
	 * Convert horizontal rules
577
	 * @param  string $text
578
	 * @return string
579
	 */
580
	protected function doHorizontalRules($text) {
581
		return preg_replace(
582
			'{
583
				^[ ]{0,3}	# Leading space
584
				([-*_])		# $1: First marker
585
				(?>			# Repeated marker group
586
					[ ]{0,2}	# Zero, one, or two spaces.
587
					\1			# Marker character
588
				){2,}		# Group repeated at least twice
589
				[ ]*		# Tailing spaces
590
				$			# End of line.
591
			}mx',
592
			"\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
593
			$text
594
		);
595
	}
596
597
	/**
598
	 * These are all the transformations that occur *within* block-level
599
	 * tags like paragraphs, headers, and list items.
600
	 * @var array
601
	 */
602
	protected $span_gamut = array(
603
		// Process character escapes, code spans, and inline HTML
604
		// in one shot.
605
		"parseSpan"           => -30,
606
		// Process anchor and image tags. Images must come first,
607
		// because ![foo][f] looks like an anchor.
608
		"doImages"            =>  10,
609
		"doAnchors"           =>  20,
610
		// Make links out of things like `<https://example.com/>`
611
		// Must come after doAnchors, because you can use < and >
612
		// delimiters in inline links like [this](<url>).
613
		"doAutoLinks"         =>  30,
614
		"encodeAmpsAndAngles" =>  40,
615
		"doItalicsAndBold"    =>  50,
616
		"doHardBreaks"        =>  60,
617
	);
618
619
	/**
620
	 * Run span gamut transformations
621
	 * @param  string $text
622
	 * @return string
623
	 */
624
	protected function runSpanGamut($text) {
625
		foreach ($this->span_gamut as $method => $priority) {
626
			$text = $this->$method($text);
627
		}
628
629
		return $text;
630
	}
631
632
	/**
633
	 * Do hard breaks
634
	 * @param  string $text
635
	 * @return string
636
	 */
637
	protected function doHardBreaks($text) {
638
		if ($this->hard_wrap) {
639
			return preg_replace_callback('/ *\n/',
640
				array($this, '_doHardBreaks_callback'), $text);
641
		} else {
642
			return preg_replace_callback('/ {2,}\n/',
643
				array($this, '_doHardBreaks_callback'), $text);
644
		}
645
	}
646
647
	/**
648
	 * Trigger part hashing for the hard break (callback method)
649
	 * @param  array $matches
650
	 * @return string
651
	 */
652
	protected function _doHardBreaks_callback($matches) {
653
		return $this->hashPart("<br$this->empty_element_suffix\n");
654
	}
655
656
	/**
657
	 * Turn Markdown link shortcuts into XHTML <a> tags.
658
	 * @param  string $text
659
	 * @return string
660
	 */
661
	protected function doAnchors($text) {
662
		if ($this->in_anchor) {
663
			return $text;
664
		}
665
		$this->in_anchor = true;
666
667
		// First, handle reference-style links: [link text] [id]
668
		$text = preg_replace_callback('{
669
			(					# wrap whole match in $1
670
			  \[
671
				('.$this->nested_brackets_re.')	# link text = $2
672
			  \]
673
674
			  [ ]?				# one optional space
675
			  (?:\n[ ]*)?		# one optional newline followed by spaces
676
677
			  \[
678
				(.*?)		# id = $3
679
			  \]
680
			)
681
			}xs',
682
			array($this, '_doAnchors_reference_callback'), $text);
683
684
		// Next, inline-style links: [link text](url "optional title")
685
		$text = preg_replace_callback('{
686
			(				# wrap whole match in $1
687
			  \[
688
				('.$this->nested_brackets_re.')	# link text = $2
689
			  \]
690
			  \(			# literal paren
691
				[ \n]*
692
				(?:
693
					<(.+?)>	# href = $3
694
				|
695
					('.$this->nested_url_parenthesis_re.')	# href = $4
696
				)
697
				[ \n]*
698
				(			# $5
699
				  ([\'"])	# quote char = $6
700
				  (.*?)		# Title = $7
701
				  \6		# matching quote
702
				  [ \n]*	# ignore any spaces/tabs between closing quote and )
703
				)?			# title is optional
704
			  \)
705
			)
706
			}xs',
707
			array($this, '_doAnchors_inline_callback'), $text);
708
709
		// Last, handle reference-style shortcuts: [link text]
710
		// These must come last in case you've also got [link text][1]
711
		// or [link text](/foo)
712
		$text = preg_replace_callback('{
713
			(					# wrap whole match in $1
714
			  \[
715
				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
716
			  \]
717
			)
718
			}xs',
719
			array($this, '_doAnchors_reference_callback'), $text);
720
721
		$this->in_anchor = false;
722
		return $text;
723
	}
724
725
	/**
726
	 * Callback method to parse referenced anchors
727
	 * @param  string $matches
728
	 * @return string
729
	 */
730
	protected function _doAnchors_reference_callback($matches) {
731
		$whole_match =  $matches[1];
732
		$link_text   =  $matches[2];
733
		$link_id     =& $matches[3];
734
735
		if ($link_id == "") {
736
			// for shortcut links like [this][] or [this].
737
			$link_id = $link_text;
738
		}
739
740
		// lower-case and turn embedded newlines into spaces
741
		$link_id = strtolower($link_id);
742
		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
743
744
		if (isset($this->urls[$link_id])) {
745
			$url = $this->urls[$link_id];
746
			$url = $this->encodeURLAttribute($url);
747
748
			$result = "<a href=\"$url\"";
749
			if ( isset( $this->titles[$link_id] ) ) {
750
				$title = $this->titles[$link_id];
751
				$title = $this->encodeAttribute($title);
752
				$result .=  " title=\"$title\"";
753
			}
754
755
			$link_text = $this->runSpanGamut($link_text);
756
			$result .= ">$link_text</a>";
757
			$result = $this->hashPart($result);
758
		} else {
759
			$result = $whole_match;
760
		}
761
		return $result;
762
	}
763
764
	/**
765
	 * Callback method to parse inline anchors
766
	 * @param  string $matches
767
	 * @return string
768
	 */
769
	protected function _doAnchors_inline_callback($matches) {
770
		$link_text		=  $this->runSpanGamut($matches[2]);
771
		$url			=  $matches[3] === '' ? $matches[4] : $matches[3];
772
		$title			=& $matches[7];
773
774
		// If the URL was of the form <s p a c e s> it got caught by the HTML
775
		// tag parser and hashed. Need to reverse the process before using
776
		// the URL.
777
		$unhashed = $this->unhash($url);
778
		if ($unhashed !== $url)
779
			$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
780
781
		$url = $this->encodeURLAttribute($url);
782
783
		$result = "<a href=\"$url\"";
784
		if (isset($title)) {
785
			$title = $this->encodeAttribute($title);
786
			$result .=  " title=\"$title\"";
787
		}
788
789
		$link_text = $this->runSpanGamut($link_text);
790
		$result .= ">$link_text</a>";
791
792
		return $this->hashPart($result);
793
	}
794
795
	/**
796
	 * Turn Markdown image shortcuts into <img> tags.
797
	 * @param  string $text
798
	 * @return string
799
	 */
800
	protected function doImages($text) {
801
		// First, handle reference-style labeled images: ![alt text][id]
802
		$text = preg_replace_callback('{
803
			(				# wrap whole match in $1
804
			  !\[
805
				('.$this->nested_brackets_re.')		# alt text = $2
806
			  \]
807
808
			  [ ]?				# one optional space
809
			  (?:\n[ ]*)?		# one optional newline followed by spaces
810
811
			  \[
812
				(.*?)		# id = $3
813
			  \]
814
815
			)
816
			}xs',
817
			array($this, '_doImages_reference_callback'), $text);
818
819
		// Next, handle inline images:  ![alt text](url "optional title")
820
		// Don't forget: encode * and _
821
		$text = preg_replace_callback('{
822
			(				# wrap whole match in $1
823
			  !\[
824
				('.$this->nested_brackets_re.')		# alt text = $2
825
			  \]
826
			  \s?			# One optional whitespace character
827
			  \(			# literal paren
828
				[ \n]*
829
				(?:
830
					<(\S*)>	# src url = $3
831
				|
832
					('.$this->nested_url_parenthesis_re.')	# src url = $4
833
				)
834
				[ \n]*
835
				(			# $5
836
				  ([\'"])	# quote char = $6
837
				  (.*?)		# title = $7
838
				  \6		# matching quote
839
				  [ \n]*
840
				)?			# title is optional
841
			  \)
842
			)
843
			}xs',
844
			array($this, '_doImages_inline_callback'), $text);
845
846
		return $text;
847
	}
848
849
	/**
850
	 * Callback to parse references image tags
851
	 * @param  array $matches
852
	 * @return string
853
	 */
854
	protected function _doImages_reference_callback($matches) {
855
		$whole_match = $matches[1];
856
		$alt_text    = $matches[2];
857
		$link_id     = strtolower($matches[3]);
858
859
		if ($link_id == "") {
860
			$link_id = strtolower($alt_text); // for shortcut links like ![this][].
861
		}
862
863
		$alt_text = $this->encodeAttribute($alt_text);
864
		if (isset($this->urls[$link_id])) {
865
			$url = $this->encodeURLAttribute($this->urls[$link_id]);
866
			$result = "<img src=\"$url\" alt=\"$alt_text\"";
867
			if (isset($this->titles[$link_id])) {
868
				$title = $this->titles[$link_id];
869
				$title = $this->encodeAttribute($title);
870
				$result .=  " title=\"$title\"";
871
			}
872
			$result .= $this->empty_element_suffix;
873
			$result = $this->hashPart($result);
874
		} else {
875
			// If there's no such link ID, leave intact:
876
			$result = $whole_match;
877
		}
878
879
		return $result;
880
	}
881
882
	/**
883
	 * Callback to parse inline image tags
884
	 * @param  array $matches
885
	 * @return string
886
	 */
887
	protected function _doImages_inline_callback($matches) {
888
		$whole_match	= $matches[1];
0 ignored issues
show
Unused Code introduced by
The assignment to $whole_match is dead and can be removed.
Loading history...
889
		$alt_text		= $matches[2];
890
		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
891
		$title			=& $matches[7];
892
893
		$alt_text = $this->encodeAttribute($alt_text);
894
		$url = $this->encodeURLAttribute($url);
895
		$result = "<img src=\"$url\" alt=\"$alt_text\"";
896
		if (isset($title)) {
897
			$title = $this->encodeAttribute($title);
898
			$result .=  " title=\"$title\""; // $title already quoted
899
		}
900
		$result .= $this->empty_element_suffix;
901
902
		return $this->hashPart($result);
903
	}
904
905
	/**
906
	 * Parse Markdown heading elements to HTML
907
	 * @param  string $text
908
	 * @return string
909
	 */
910
	protected function doHeaders($text) {
911
		/**
912
		 * Setext-style headers:
913
		 *	  Header 1
914
		 *	  ========
915
		 *
916
		 *	  Header 2
917
		 *	  --------
918
		 */
919
		$text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
920
			array($this, '_doHeaders_callback_setext'), $text);
921
922
		/**
923
		 * atx-style headers:
924
		 *   # Header 1
925
		 *   ## Header 2
926
		 *   ## Header 2 with closing hashes ##
927
		 *   ...
928
		 *   ###### Header 6
929
		 */
930
		$text = preg_replace_callback('{
931
				^(\#{1,6})	# $1 = string of #\'s
932
				[ ]*
933
				(.+?)		# $2 = Header text
934
				[ ]*
935
				\#*			# optional closing #\'s (not counted)
936
				\n+
937
			}xm',
938
			array($this, '_doHeaders_callback_atx'), $text);
939
940
		return $text;
941
	}
942
943
	/**
944
	 * Setext header parsing callback
945
	 * @param  array $matches
946
	 * @return string
947
	 */
948
	protected function _doHeaders_callback_setext($matches) {
949
		// Terrible hack to check we haven't found an empty list item.
950
		if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
951
			return $matches[0];
952
		}
953
954
		$level = $matches[2][0] == '=' ? 1 : 2;
955
956
		// ID attribute generation
957
		$idAtt = $this->_generateIdFromHeaderValue($matches[1]);
958
959
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
960
		return "\n" . $this->hashBlock($block) . "\n\n";
961
	}
962
963
	/**
964
	 * ATX header parsing callback
965
	 * @param  array $matches
966
	 * @return string
967
	 */
968
	protected function _doHeaders_callback_atx($matches) {
969
		// ID attribute generation
970
		$idAtt = $this->_generateIdFromHeaderValue($matches[2]);
971
972
		$level = strlen($matches[1]);
973
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
974
		return "\n" . $this->hashBlock($block) . "\n\n";
975
	}
976
977
	/**
978
	 * If a header_id_func property is set, we can use it to automatically
979
	 * generate an id attribute.
980
	 *
981
	 * This method returns a string in the form id="foo", or an empty string
982
	 * otherwise.
983
	 * @param  string $headerValue
984
	 * @return string
985
	 */
986
	protected function _generateIdFromHeaderValue($headerValue) {
987
		if (!is_callable($this->header_id_func)) {
988
			return "";
989
		}
990
991
		$idValue = call_user_func($this->header_id_func, $headerValue);
0 ignored issues
show
Bug introduced by
It seems like $this->header_id_func can also be of type null; however, parameter $callback of call_user_func() does only seem to accept callable, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

991
		$idValue = call_user_func(/** @scrutinizer ignore-type */ $this->header_id_func, $headerValue);
Loading history...
992
		if (!$idValue) {
993
			return "";
994
		}
995
996
		return ' id="' . $this->encodeAttribute($idValue) . '"';
997
	}
998
999
	/**
1000
	 * Form HTML ordered (numbered) and unordered (bulleted) lists.
1001
	 * @param  string $text
1002
	 * @return string
1003
	 */
1004
	protected function doLists($text) {
1005
		$less_than_tab = $this->tab_width - 1;
1006
1007
		// Re-usable patterns to match list item bullets and number markers:
1008
		$marker_ul_re  = '[*+-]';
1009
		$marker_ol_re  = '\d+[\.]';
1010
1011
		$markers_relist = array(
1012
			$marker_ul_re => $marker_ol_re,
1013
			$marker_ol_re => $marker_ul_re,
1014
			);
1015
1016
		foreach ($markers_relist as $marker_re => $other_marker_re) {
1017
			// Re-usable pattern to match any entirel ul or ol list:
1018
			$whole_list_re = '
1019
				(								# $1 = whole list
1020
				  (								# $2
1021
					([ ]{0,'.$less_than_tab.'})	# $3 = number of spaces
1022
					('.$marker_re.')			# $4 = first list item marker
1023
					[ ]+
1024
				  )
1025
				  (?s:.+?)
1026
				  (								# $5
1027
					  \z
1028
					|
1029
					  \n{2,}
1030
					  (?=\S)
1031
					  (?!						# Negative lookahead for another list item marker
1032
						[ ]*
1033
						'.$marker_re.'[ ]+
1034
					  )
1035
					|
1036
					  (?=						# Lookahead for another kind of list
1037
					    \n
1038
						\3						# Must have the same indentation
1039
						'.$other_marker_re.'[ ]+
1040
					  )
1041
				  )
1042
				)
1043
			'; // mx
1044
1045
			// We use a different prefix before nested lists than top-level lists.
1046
			//See extended comment in _ProcessListItems().
1047
1048
			if ($this->list_level) {
1049
				$text = preg_replace_callback('{
1050
						^
1051
						'.$whole_list_re.'
1052
					}mx',
1053
					array($this, '_doLists_callback'), $text);
1054
			} else {
1055
				$text = preg_replace_callback('{
1056
						(?:(?<=\n)\n|\A\n?) # Must eat the newline
1057
						'.$whole_list_re.'
1058
					}mx',
1059
					array($this, '_doLists_callback'), $text);
1060
			}
1061
		}
1062
1063
		return $text;
1064
	}
1065
1066
	/**
1067
	 * List parsing callback
1068
	 * @param  array $matches
1069
	 * @return string
1070
	 */
1071
	protected function _doLists_callback($matches) {
1072
		// Re-usable patterns to match list item bullets and number markers:
1073
		$marker_ul_re  = '[*+-]';
1074
		$marker_ol_re  = '\d+[\.]';
1075
		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
0 ignored issues
show
Unused Code introduced by
The assignment to $marker_any_re is dead and can be removed.
Loading history...
1076
		$marker_ol_start_re = '[0-9]+';
1077
1078
		$list = $matches[1];
1079
		$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1080
1081
		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1082
1083
		$list .= "\n";
1084
		$result = $this->processListItems($list, $marker_any_re);
1085
1086
		$ol_start = 1;
1087
		if ($this->enhanced_ordered_list) {
1088
			// Get the start number for ordered list.
1089
			if ($list_type == 'ol') {
1090
				$ol_start_array = array();
1091
				$ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1092
				if ($ol_start_check){
1093
					$ol_start = $ol_start_array[0];
1094
				}
1095
			}
1096
		}
1097
1098
		if ($ol_start > 1 && $list_type == 'ol'){
1099
			$result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1100
		} else {
1101
			$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1102
		}
1103
		return "\n". $result ."\n\n";
1104
	}
1105
1106
	/**
1107
	 * Nesting tracker for list levels
1108
	 * @var integer
1109
	 */
1110
	protected $list_level = 0;
1111
1112
	/**
1113
	 * Process the contents of a single ordered or unordered list, splitting it
1114
	 * into individual list items.
1115
	 * @param  string $list_str
1116
	 * @param  string $marker_any_re
1117
	 * @return string
1118
	 */
1119
	protected function processListItems($list_str, $marker_any_re) {
1120
		/**
1121
		 * The $this->list_level global keeps track of when we're inside a list.
1122
		 * Each time we enter a list, we increment it; when we leave a list,
1123
		 * we decrement. If it's zero, we're not in a list anymore.
1124
		 *
1125
		 * We do this because when we're not inside a list, we want to treat
1126
		 * something like this:
1127
		 *
1128
		 *		I recommend upgrading to version
1129
		 *		8. Oops, now this line is treated
1130
		 *		as a sub-list.
1131
		 *
1132
		 * As a single paragraph, despite the fact that the second line starts
1133
		 * with a digit-period-space sequence.
1134
		 *
1135
		 * Whereas when we're inside a list (or sub-list), that line will be
1136
		 * treated as the start of a sub-list. What a kludge, huh? This is
1137
		 * an aspect of Markdown's syntax that's hard to parse perfectly
1138
		 * without resorting to mind-reading. Perhaps the solution is to
1139
		 * change the syntax rules such that sub-lists must start with a
1140
		 * starting cardinal number; e.g. "1." or "a.".
1141
		 */
1142
		$this->list_level++;
1143
1144
		// Trim trailing blank lines:
1145
		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1146
1147
		$list_str = preg_replace_callback('{
1148
			(\n)?							# leading line = $1
1149
			(^[ ]*)							# leading whitespace = $2
1150
			('.$marker_any_re.'				# list marker and space = $3
1151
				(?:[ ]+|(?=\n))	# space only required if item is not empty
1152
			)
1153
			((?s:.*?))						# list item text   = $4
1154
			(?:(\n+(?=\n))|\n)				# tailing blank line = $5
1155
			(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1156
			}xm',
1157
			array($this, '_processListItems_callback'), $list_str);
1158
1159
		$this->list_level--;
1160
		return $list_str;
1161
	}
1162
1163
	/**
1164
	 * List item parsing callback
1165
	 * @param  array $matches
1166
	 * @return string
1167
	 */
1168
	protected function _processListItems_callback($matches) {
1169
		$item = $matches[4];
1170
		$leading_line =& $matches[1];
1171
		$leading_space =& $matches[2];
1172
		$marker_space = $matches[3];
1173
		$tailing_blank_line =& $matches[5];
1174
1175
		if ($leading_line || $tailing_blank_line ||
1176
			preg_match('/\n{2,}/', $item))
1177
		{
1178
			// Replace marker with the appropriate whitespace indentation
1179
			$item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1180
			$item = $this->runBlockGamut($this->outdent($item)."\n");
1181
		} else {
1182
			// Recursion for sub-lists:
1183
			$item = $this->doLists($this->outdent($item));
1184
			$item = $this->formParagraphs($item, false);
1185
		}
1186
1187
		return "<li>" . $item . "</li>\n";
1188
	}
1189
1190
	/**
1191
	 * Process Markdown `<pre><code>` blocks.
1192
	 * @param  string $text
1193
	 * @return string
1194
	 */
1195
	protected function doCodeBlocks($text) {
1196
		$text = preg_replace_callback('{
1197
				(?:\n\n|\A\n?)
1198
				(	            # $1 = the code block -- one or more lines, starting with a space/tab
1199
				  (?>
1200
					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1201
					.*\n+
1202
				  )+
1203
				)
1204
				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
1205
			}xm',
1206
			array($this, '_doCodeBlocks_callback'), $text);
1207
1208
		return $text;
1209
	}
1210
1211
	/**
1212
	 * Code block parsing callback
1213
	 * @param  array $matches
1214
	 * @return string
1215
	 */
1216
	protected function _doCodeBlocks_callback($matches) {
1217
		$codeblock = $matches[1];
1218
1219
		$codeblock = $this->outdent($codeblock);
1220
		if (is_callable($this->code_block_content_func)) {
1221
			$codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
0 ignored issues
show
Bug introduced by
It seems like $this->code_block_content_func can also be of type null; however, parameter $callback of call_user_func() does only seem to accept callable, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1221
			$codeblock = call_user_func(/** @scrutinizer ignore-type */ $this->code_block_content_func, $codeblock, "");
Loading history...
1222
		} else {
1223
			$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1224
		}
1225
1226
		# trim leading newlines and trailing newlines
1227
		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1228
1229
		$codeblock = "<pre><code>$codeblock\n</code></pre>";
1230
		return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1231
	}
1232
1233
	/**
1234
	 * Create a code span markup for $code. Called from handleSpanToken.
1235
	 * @param  string $code
1236
	 * @return string
1237
	 */
1238
	protected function makeCodeSpan($code) {
1239
		if (is_callable($this->code_span_content_func)) {
1240
			$code = call_user_func($this->code_span_content_func, $code);
0 ignored issues
show
Bug introduced by
It seems like $this->code_span_content_func can also be of type null; however, parameter $callback of call_user_func() does only seem to accept callable, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1240
			$code = call_user_func(/** @scrutinizer ignore-type */ $this->code_span_content_func, $code);
Loading history...
1241
		} else {
1242
			$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1243
		}
1244
		return $this->hashPart("<code>$code</code>");
1245
	}
1246
1247
	/**
1248
	 * Define the emphasis operators with their regex matches
1249
	 * @var array
1250
	 */
1251
	protected $em_relist = array(
1252
		''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1253
		'*' => '(?<![\s*])\*(?!\*)',
1254
		'_' => '(?<![\s_])_(?!_)',
1255
	);
1256
1257
	/**
1258
	 * Define the strong operators with their regex matches
1259
	 * @var array
1260
	 */
1261
	protected $strong_relist = array(
1262
		''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1263
		'**' => '(?<![\s*])\*\*(?!\*)',
1264
		'__' => '(?<![\s_])__(?!_)',
1265
	);
1266
1267
	/**
1268
	 * Define the emphasis + strong operators with their regex matches
1269
	 * @var array
1270
	 */
1271
	protected $em_strong_relist = array(
1272
		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1273
		'***' => '(?<![\s*])\*\*\*(?!\*)',
1274
		'___' => '(?<![\s_])___(?!_)',
1275
	);
1276
1277
	/**
1278
	 * Container for prepared regular expressions
1279
	 * @var array
1280
	 */
1281
	protected $em_strong_prepared_relist;
1282
1283
	/**
1284
	 * Prepare regular expressions for searching emphasis tokens in any
1285
	 * context.
1286
	 * @return void
1287
	 */
1288
	protected function prepareItalicsAndBold() {
1289
		foreach ($this->em_relist as $em => $em_re) {
1290
			foreach ($this->strong_relist as $strong => $strong_re) {
1291
				// Construct list of allowed token expressions.
1292
				$token_relist = array();
1293
				if (isset($this->em_strong_relist["$em$strong"])) {
1294
					$token_relist[] = $this->em_strong_relist["$em$strong"];
1295
				}
1296
				$token_relist[] = $em_re;
1297
				$token_relist[] = $strong_re;
1298
1299
				// Construct master expression from list.
1300
				$token_re = '{(' . implode('|', $token_relist) . ')}';
1301
				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
1302
			}
1303
		}
1304
	}
1305
1306
	/**
1307
	 * Convert Markdown italics (emphasis) and bold (strong) to HTML
1308
	 * @param  string $text
1309
	 * @return string
1310
	 */
1311
	protected function doItalicsAndBold($text) {
1312
		if ($this->in_emphasis_processing) {
1313
			return $text; // avoid reentrency
1314
		}
1315
		$this->in_emphasis_processing = true;
1316
1317
		$token_stack = array('');
1318
		$text_stack = array('');
1319
		$em = '';
1320
		$strong = '';
1321
		$tree_char_em = false;
1322
1323
		while (1) {
1324
			// Get prepared regular expression for seraching emphasis tokens
1325
			// in current context.
1326
			$token_re = $this->em_strong_prepared_relist["$em$strong"];
1327
1328
			// Each loop iteration search for the next emphasis token.
1329
			// Each token is then passed to handleSpanToken.
1330
			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1331
			$text_stack[0] .= $parts[0];
1332
			$token =& $parts[1];
1333
			$text =& $parts[2];
1334
1335
			if (empty($token)) {
1336
				// Reached end of text span: empty stack without emitting.
1337
				// any more emphasis.
1338
				while ($token_stack[0]) {
1339
					$text_stack[1] .= array_shift($token_stack);
1340
					$text_stack[0] .= array_shift($text_stack);
1341
				}
1342
				break;
1343
			}
1344
1345
			$token_len = strlen($token);
1346
			if ($tree_char_em) {
1347
				// Reached closing marker while inside a three-char emphasis.
1348
				if ($token_len == 3) {
1349
					// Three-char closing marker, close em and strong.
1350
					array_shift($token_stack);
1351
					$span = array_shift($text_stack);
1352
					$span = $this->runSpanGamut($span);
1353
					$span = "<strong><em>$span</em></strong>";
1354
					$text_stack[0] .= $this->hashPart($span);
1355
					$em = '';
1356
					$strong = '';
1357
				} else {
1358
					// Other closing marker: close one em or strong and
1359
					// change current token state to match the other
1360
					$token_stack[0] = str_repeat($token[0], 3-$token_len);
1361
					$tag = $token_len == 2 ? "strong" : "em";
1362
					$span = $text_stack[0];
1363
					$span = $this->runSpanGamut($span);
1364
					$span = "<$tag>$span</$tag>";
1365
					$text_stack[0] = $this->hashPart($span);
1366
					$$tag = ''; // $$tag stands for $em or $strong
1367
				}
1368
				$tree_char_em = false;
1369
			} else if ($token_len == 3) {
1370
				if ($em) {
1371
					// Reached closing marker for both em and strong.
1372
					// Closing strong marker:
1373
					for ($i = 0; $i < 2; ++$i) {
1374
						$shifted_token = array_shift($token_stack);
1375
						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
1376
						$span = array_shift($text_stack);
1377
						$span = $this->runSpanGamut($span);
1378
						$span = "<$tag>$span</$tag>";
1379
						$text_stack[0] .= $this->hashPart($span);
1380
						$$tag = ''; // $$tag stands for $em or $strong
1381
					}
1382
				} else {
1383
					// Reached opening three-char emphasis marker. Push on token
1384
					// stack; will be handled by the special condition above.
1385
					$em = $token[0];
1386
					$strong = "$em$em";
1387
					array_unshift($token_stack, $token);
1388
					array_unshift($text_stack, '');
1389
					$tree_char_em = true;
1390
				}
1391
			} else if ($token_len == 2) {
1392
				if ($strong) {
1393
					// Unwind any dangling emphasis marker:
1394
					if (strlen($token_stack[0]) == 1) {
1395
						$text_stack[1] .= array_shift($token_stack);
1396
						$text_stack[0] .= array_shift($text_stack);
1397
						$em = '';
1398
					}
1399
					// Closing strong marker:
1400
					array_shift($token_stack);
1401
					$span = array_shift($text_stack);
1402
					$span = $this->runSpanGamut($span);
1403
					$span = "<strong>$span</strong>";
1404
					$text_stack[0] .= $this->hashPart($span);
1405
					$strong = '';
1406
				} else {
1407
					array_unshift($token_stack, $token);
1408
					array_unshift($text_stack, '');
1409
					$strong = $token;
1410
				}
1411
			} else {
1412
				// Here $token_len == 1
1413
				if ($em) {
1414
					if (strlen($token_stack[0]) == 1) {
1415
						// Closing emphasis marker:
1416
						array_shift($token_stack);
1417
						$span = array_shift($text_stack);
1418
						$span = $this->runSpanGamut($span);
1419
						$span = "<em>$span</em>";
1420
						$text_stack[0] .= $this->hashPart($span);
1421
						$em = '';
1422
					} else {
1423
						$text_stack[0] .= $token;
1424
					}
1425
				} else {
1426
					array_unshift($token_stack, $token);
1427
					array_unshift($text_stack, '');
1428
					$em = $token;
1429
				}
1430
			}
1431
		}
1432
		$this->in_emphasis_processing = false;
1433
		return $text_stack[0];
1434
	}
1435
1436
	/**
1437
	 * Parse Markdown blockquotes to HTML
1438
	 * @param  string $text
1439
	 * @return string
1440
	 */
1441
	protected function doBlockQuotes($text) {
1442
		$text = preg_replace_callback('/
1443
			  (								# Wrap whole match in $1
1444
				(?>
1445
				  ^[ ]*>[ ]?			# ">" at the start of a line
1446
					.+\n					# rest of the first line
1447
				  (.+\n)*					# subsequent consecutive lines
1448
				  \n*						# blanks
1449
				)+
1450
			  )
1451
			/xm',
1452
			array($this, '_doBlockQuotes_callback'), $text);
1453
1454
		return $text;
1455
	}
1456
1457
	/**
1458
	 * Blockquote parsing callback
1459
	 * @param  array $matches
1460
	 * @return string
1461
	 */
1462
	protected function _doBlockQuotes_callback($matches) {
1463
		$bq = $matches[1];
1464
		// trim one level of quoting - trim whitespace-only lines
1465
		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1466
		$bq = $this->runBlockGamut($bq); // recurse
1467
1468
		$bq = preg_replace('/^/m', "  ", $bq);
1469
		// These leading spaces cause problem with <pre> content,
1470
		// so we need to fix that:
1471
		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1472
			array($this, '_doBlockQuotes_callback2'), $bq);
1473
1474
		return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1475
	}
1476
1477
	/**
1478
	 * Blockquote parsing callback
1479
	 * @param  array $matches
1480
	 * @return string
1481
	 */
1482
	protected function _doBlockQuotes_callback2($matches) {
1483
		$pre = $matches[1];
1484
		$pre = preg_replace('/^  /m', '', $pre);
1485
		return $pre;
1486
	}
1487
1488
	/**
1489
	 * Parse paragraphs
1490
	 *
1491
	 * @param  string $text String to process in paragraphs
1492
	 * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1493
	 * @return string
1494
	 */
1495
	protected function formParagraphs($text, $wrap_in_p = true) {
1496
		// Strip leading and trailing lines:
1497
		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1498
1499
		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1500
1501
		// Wrap <p> tags and unhashify HTML blocks
1502
		foreach ($grafs as $key => $value) {
1503
			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1504
				// Is a paragraph.
1505
				$value = $this->runSpanGamut($value);
1506
				if ($wrap_in_p) {
1507
					$value = preg_replace('/^([ ]*)/', "<p>", $value);
1508
					$value .= "</p>";
1509
				}
1510
				$grafs[$key] = $this->unhash($value);
1511
			} else {
1512
				// Is a block.
1513
				// Modify elements of @grafs in-place...
1514
				$graf = $value;
1515
				$block = $this->html_hashes[$graf];
1516
				$graf = $block;
1517
//				if (preg_match('{
1518
//					\A
1519
//					(							# $1 = <div> tag
1520
//					  <div  \s+
1521
//					  [^>]*
1522
//					  \b
1523
//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1524
//					  1
1525
//					  \2
1526
//					  [^>]*
1527
//					  >
1528
//					)
1529
//					(							# $3 = contents
1530
//					.*
1531
//					)
1532
//					(</div>)					# $4 = closing tag
1533
//					\z
1534
//					}xs', $block, $matches))
1535
//				{
1536
//					list(, $div_open, , $div_content, $div_close) = $matches;
1537
//
1538
//					// We can't call Markdown(), because that resets the hash;
1539
//					// that initialization code should be pulled into its own sub, though.
1540
//					$div_content = $this->hashHTMLBlocks($div_content);
1541
//
1542
//					// Run document gamut methods on the content.
1543
//					foreach ($this->document_gamut as $method => $priority) {
1544
//						$div_content = $this->$method($div_content);
1545
//					}
1546
//
1547
//					$div_open = preg_replace(
1548
//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1549
//
1550
//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1551
//				}
1552
				$grafs[$key] = $graf;
1553
			}
1554
		}
1555
1556
		return implode("\n\n", $grafs);
1557
	}
1558
1559
	/**
1560
	 * Encode text for a double-quoted HTML attribute. This function
1561
	 * is *not* suitable for attributes enclosed in single quotes.
1562
	 * @param  string $text
1563
	 * @return string
1564
	 */
1565
	protected function encodeAttribute($text) {
1566
		$text = $this->encodeAmpsAndAngles($text);
1567
		$text = str_replace('"', '&quot;', $text);
1568
		return $text;
1569
	}
1570
1571
	/**
1572
	 * Encode text for a double-quoted HTML attribute containing a URL,
1573
	 * applying the URL filter if set. Also generates the textual
1574
	 * representation for the URL (removing mailto: or tel:) storing it in $text.
1575
	 * This function is *not* suitable for attributes enclosed in single quotes.
1576
	 *
1577
	 * @param  string $url
1578
	 * @param  string $text Passed by reference
1579
	 * @return string        URL
1580
	 */
1581
	protected function encodeURLAttribute($url, &$text = null) {
1582
		if (is_callable($this->url_filter_func)) {
1583
			$url = call_user_func($this->url_filter_func, $url);
0 ignored issues
show
Bug introduced by
It seems like $this->url_filter_func can also be of type null; however, parameter $callback of call_user_func() does only seem to accept callable, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1583
			$url = call_user_func(/** @scrutinizer ignore-type */ $this->url_filter_func, $url);
Loading history...
1584
		}
1585
1586
		if (preg_match('{^mailto:}i', $url)) {
1587
			$url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1588
		} else if (preg_match('{^tel:}i', $url)) {
1589
			$url = $this->encodeAttribute($url);
1590
			$text = substr($url, 4);
1591
		} else {
1592
			$url = $this->encodeAttribute($url);
1593
			$text = $url;
1594
		}
1595
1596
		return $url;
1597
	}
1598
1599
	/**
1600
	 * Smart processing for ampersands and angle brackets that need to
1601
	 * be encoded. Valid character entities are left alone unless the
1602
	 * no-entities mode is set.
1603
	 * @param  string $text
1604
	 * @return string
1605
	 */
1606
	protected function encodeAmpsAndAngles($text) {
1607
		if ($this->no_entities) {
1608
			$text = str_replace('&', '&amp;', $text);
1609
		} else {
1610
			// Ampersand-encoding based entirely on Nat Irons's Amputator
1611
			// MT plugin: <http://bumppo.net/projects/amputator/>
1612
			$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1613
								'&amp;', $text);
1614
		}
1615
		// Encode remaining <'s
1616
		$text = str_replace('<', '&lt;', $text);
1617
1618
		return $text;
1619
	}
1620
1621
	/**
1622
	 * Parse Markdown automatic links to anchor HTML tags
1623
	 * @param  string $text
1624
	 * @return string
1625
	 */
1626
	protected function doAutoLinks($text) {
1627
		$text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1628
			array($this, '_doAutoLinks_url_callback'), $text);
1629
1630
		// Email addresses: <[email protected]>
1631
		$text = preg_replace_callback('{
1632
			<
1633
			(?:mailto:)?
1634
			(
1635
				(?:
1636
					[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1637
				|
1638
					".*?"
1639
				)
1640
				\@
1641
				(?:
1642
					[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1643
				|
1644
					\[[\d.a-fA-F:]+\]	# IPv4 & IPv6
1645
				)
1646
			)
1647
			>
1648
			}xi',
1649
			array($this, '_doAutoLinks_email_callback'), $text);
1650
1651
		return $text;
1652
	}
1653
1654
	/**
1655
	 * Parse URL callback
1656
	 * @param  array $matches
1657
	 * @return string
1658
	 */
1659
	protected function _doAutoLinks_url_callback($matches) {
1660
		$url = $this->encodeURLAttribute($matches[1], $text);
1661
		$link = "<a href=\"$url\">$text</a>";
1662
		return $this->hashPart($link);
1663
	}
1664
1665
	/**
1666
	 * Parse email address callback
1667
	 * @param  array $matches
1668
	 * @return string
1669
	 */
1670
	protected function _doAutoLinks_email_callback($matches) {
1671
		$addr = $matches[1];
1672
		$url = $this->encodeURLAttribute("mailto:$addr", $text);
1673
		$link = "<a href=\"$url\">$text</a>";
1674
		return $this->hashPart($link);
1675
	}
1676
1677
	/**
1678
	 * Input: some text to obfuscate, e.g. "mailto:[email protected]"
1679
	 *
1680
	 * Output: the same text but with most characters encoded as either a
1681
	 *         decimal or hex entity, in the hopes of foiling most address
1682
	 *         harvesting spam bots. E.g.:
1683
	 *
1684
	 *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1685
	 *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1686
	 *        &#x6d;
1687
	 *
1688
	 * Note: the additional output $tail is assigned the same value as the
1689
	 * ouput, minus the number of characters specified by $head_length.
1690
	 *
1691
	 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1692
	 * With some optimizations by Milian Wolff. Forced encoding of HTML
1693
	 * attribute special characters by Allan Odgaard.
1694
	 *
1695
	 * @param  string  $text
1696
	 * @param  string  $tail Passed by reference
1697
	 * @param  integer $head_length
1698
	 * @return string
1699
	 */
1700
	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1701
		if ($text == "") {
1702
			return $tail = "";
1703
		}
1704
1705
		$chars = preg_split('/(?<!^)(?!$)/', $text);
1706
		$seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1707
1708
		foreach ($chars as $key => $char) {
1709
			$ord = ord($char);
1710
			// Ignore non-ascii chars.
1711
			if ($ord < 128) {
1712
				$r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1713
				// roughly 10% raw, 45% hex, 45% dec
1714
				// '@' *must* be encoded. I insist.
1715
				// '"' and '>' have to be encoded inside the attribute
1716
				if ($r > 90 && strpos('@"&>', $char) === false) {
1717
					/* do nothing */
1718
				} else if ($r < 45) {
1719
					$chars[$key] = '&#x'.dechex($ord).';';
1720
				} else {
1721
					$chars[$key] = '&#'.$ord.';';
1722
				}
1723
			}
1724
		}
1725
1726
		$text = implode('', $chars);
1727
		$tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1728
1729
		return $text;
1730
	}
1731
1732
	/**
1733
	 * Take the string $str and parse it into tokens, hashing embeded HTML,
1734
	 * escaped characters and handling code spans.
1735
	 * @param  string $str
1736
	 * @return string
1737
	 */
1738
	protected function parseSpan($str) {
1739
		$output = '';
1740
1741
		$span_re = '{
1742
				(
1743
					\\\\'.$this->escape_chars_re.'
1744
				|
1745
					(?<![`\\\\])
1746
					`+						# code span marker
1747
			'.( $this->no_markup ? '' : '
1748
				|
1749
					<!--    .*?     -->		# comment
1750
				|
1751
					<\?.*?\?> | <%.*?%>		# processing instruction
1752
				|
1753
					<[!$]?[-a-zA-Z0-9:_]+	# regular tags
1754
					(?>
1755
						\s
1756
						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1757
					)?
1758
					>
1759
				|
1760
					<[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1761
				|
1762
					</[-a-zA-Z0-9:_]+\s*> # closing tag
1763
			').'
1764
				)
1765
				}xs';
1766
1767
		while (1) {
1768
			// Each loop iteration seach for either the next tag, the next
1769
			// openning code span marker, or the next escaped character.
1770
			// Each token is then passed to handleSpanToken.
1771
			$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1772
1773
			// Create token from text preceding tag.
1774
			if ($parts[0] != "") {
1775
				$output .= $parts[0];
1776
			}
1777
1778
			// Check if we reach the end.
1779
			if (isset($parts[1])) {
1780
				$output .= $this->handleSpanToken($parts[1], $parts[2]);
1781
				$str = $parts[2];
1782
			} else {
1783
				break;
1784
			}
1785
		}
1786
1787
		return $output;
1788
	}
1789
1790
	/**
1791
	 * Handle $token provided by parseSpan by determining its nature and
1792
	 * returning the corresponding value that should replace it.
1793
	 * @param  string $token
1794
	 * @param  string $str Passed by reference
1795
	 * @return string
1796
	 */
1797
	protected function handleSpanToken($token, &$str) {
1798
		switch ($token[0]) {
1799
			case "\\":
1800
				return $this->hashPart("&#". ord($token[1]). ";");
1801
			case "`":
1802
				// Search for end marker in remaining text.
1803
				if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1804
					$str, $matches))
1805
				{
1806
					$str = $matches[2];
1807
					$codespan = $this->makeCodeSpan($matches[1]);
1808
					return $this->hashPart($codespan);
1809
				}
1810
				return $token; // Return as text since no ending marker found.
1811
			default:
1812
				return $this->hashPart($token);
1813
		}
1814
	}
1815
1816
	/**
1817
	 * Remove one level of line-leading tabs or spaces
1818
	 * @param  string $text
1819
	 * @return string
1820
	 */
1821
	protected function outdent($text) {
1822
		return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1823
	}
1824
1825
1826
	/**
1827
	 * String length function for detab. `_initDetab` will create a function to
1828
	 * handle UTF-8 if the default function does not exist.
1829
	 * @var string
1830
	 */
1831
	protected $utf8_strlen = 'mb_strlen';
1832
1833
	/**
1834
	 * Replace tabs with the appropriate amount of spaces.
1835
	 *
1836
	 * For each line we separate the line in blocks delemited by tab characters.
1837
	 * Then we reconstruct every line by adding the  appropriate number of space
1838
	 * between each blocks.
1839
	 *
1840
	 * @param  string $text
1841
	 * @return string
1842
	 */
1843
	protected function detab($text) {
1844
		$text = preg_replace_callback('/^.*\t.*$/m',
1845
			array($this, '_detab_callback'), $text);
1846
1847
		return $text;
1848
	}
1849
1850
	/**
1851
	 * Replace tabs callback
1852
	 * @param  string $matches
1853
	 * @return string
1854
	 */
1855
	protected function _detab_callback($matches) {
1856
		$line = $matches[0];
1857
		$strlen = $this->utf8_strlen; // strlen function for UTF-8.
1858
1859
		// Split in blocks.
1860
		$blocks = explode("\t", $line);
1861
		// Add each blocks to the line.
1862
		$line = $blocks[0];
1863
		unset($blocks[0]); // Do not add first block twice.
1864
		foreach ($blocks as $block) {
1865
			// Calculate amount of space, insert spaces, insert block.
1866
			$amount = $this->tab_width -
1867
				$strlen($line, 'UTF-8') % $this->tab_width;
1868
			$line .= str_repeat(" ", $amount) . $block;
1869
		}
1870
		return $line;
1871
	}
1872
1873
	/**
1874
	 * Check for the availability of the function in the `utf8_strlen` property
1875
	 * (initially `mb_strlen`). If the function is not available, create a
1876
	 * function that will loosely count the number of UTF-8 characters with a
1877
	 * regular expression.
1878
	 * @return void
1879
	 */
1880
	protected function _initDetab() {
1881
1882
		if (function_exists($this->utf8_strlen)) {
1883
			return;
1884
		}
1885
1886
		$this->utf8_strlen = function($text) {
0 ignored issues
show
Documentation Bug introduced by
It seems like function(...) { /* ... */ } of type callable is incompatible with the declared type string of property $utf8_strlen.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
1887
			return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1888
		};
1889
	}
1890
1891
	/**
1892
	 * Swap back in all the tags hashed by _HashHTMLBlocks.
1893
	 * @param  string $text
1894
	 * @return string
1895
	 */
1896
	protected function unhash($text) {
1897
		return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1898
			array($this, '_unhash_callback'), $text);
1899
	}
1900
1901
	/**
1902
	 * Unhashing callback
1903
	 * @param  array $matches
1904
	 * @return string
1905
	 */
1906
	protected function _unhash_callback($matches) {
1907
		return $this->html_hashes[$matches[0]];
1908
	}
1909
}
1910