Completed
Push — lib ( 9da5a6...89a5d6 )
by Michel
03:33
created

Markdown::_detab_callback()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 17
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 17
ccs 11
cts 11
cp 1
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 11
nc 2
nop 1
crap 2
1
<?php
2
/**
3
 * Markdown  -  A text-to-HTML conversion tool for web writers
4
 *
5
 * @package   php-markdown
6
 * @author    Michel Fortin <[email protected]>
7
 * @copyright 2004-2018 Michel Fortin <https://michelf.com/projects/php-markdown/>
8
 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9
 */
10
11
namespace Michelf;
12
13
/**
14
 * Markdown Parser Class
15
 */
16
class Markdown implements MarkdownInterface {
17
	/**
18
	 * Define the package version
19
	 * @var string
20
	 */
21
	const MARKDOWNLIB_VERSION = "1.8.0";
22
23
	/**
24
	 * Simple function interface - Initialize the parser and return the result
25
	 * of its transform method. This will work fine for derived classes too.
26
	 *
27
	 * @api
28
	 *
29
	 * @param  string $text
30
	 * @return string
31
	 */
32 107
	public static function defaultTransform($text) {
33
		// Take parser class on which this function was called.
34 107
		$parser_class = \get_called_class();
35
36
		// Try to take parser from the static parser list
37 107
		static $parser_list;
38 107
		$parser =& $parser_list[$parser_class];
39
40
		// Create the parser it not already set
41 107
		if (!$parser) {
42 2
			$parser = new $parser_class;
43
		}
44
45
		// Transform text using parser.
46 107
		return $parser->transform($text);
47
	}
48
49
	/**
50
	 * Configuration variables
51
	 */
52
53
	/**
54
	 * Change to ">" for HTML output.
55
	 * @var string
56
	 */
57
	public $empty_element_suffix = " />";
58
59
	/**
60
	 * The width of indentation of the output markup
61
	 * @var int
62
	 */
63
	public $tab_width = 4;
64
65
	/**
66
	 * Change to `true` to disallow markup or entities.
67
	 * @var boolean
68
	 */
69
	public $no_markup   = false;
70
	public $no_entities = false;
71
72
73
	/**
74
	 * Change to `true` to enable line breaks on \n without two trailling spaces
75
	 * @var boolean
76
	 */
77
	public $hard_wrap = false;
78
79
	/**
80
	 * Predefined URLs and titles for reference links and images.
81
	 * @var array
82
	 */
83
	public $predef_urls   = array();
84
	public $predef_titles = array();
85
86
	/**
87
	 * Optional filter function for URLs
88
	 * @var callable
89
	 */
90
	public $url_filter_func = null;
91
92
	/**
93
	 * Optional header id="" generation callback function.
94
	 * @var callable
95
	 */
96
	public $header_id_func = null;
97
98
	/**
99
	 * Optional function for converting code block content to HTML
100
	 * @var callable
101
	 */
102
	public $code_block_content_func = null;
103
104
	/**
105
	 * Optional function for converting code span content to HTML.
106
	 * @var callable
107
	 */
108
	public $code_span_content_func = null;
109
110
	/**
111
	 * Class attribute to toggle "enhanced ordered list" behaviour
112
	 * setting this to true will allow ordered lists to start from the index
113
	 * number that is defined first.
114
	 *
115
	 * For example:
116
	 * 2. List item two
117
	 * 3. List item three
118
	 *
119
	 * Becomes:
120
	 * <ol start="2">
121
	 * <li>List item two</li>
122
	 * <li>List item three</li>
123
	 * </ol>
124
	 *
125
	 * @var bool
126
	 */
127
	public $enhanced_ordered_list = false;
128
129
	/**
130
	 * Parser implementation
131
	 */
132
133
	/**
134
	 * Regex to match balanced [brackets].
135
	 * Needed to insert a maximum bracked depth while converting to PHP.
136
	 * @var int
137
	 */
138
	protected $nested_brackets_depth = 6;
139
	protected $nested_brackets_re;
140
141
	protected $nested_url_parenthesis_depth = 4;
142
	protected $nested_url_parenthesis_re;
143
144
	/**
145
	 * Table of hash values for escaped characters:
146
	 * @var string
147
	 */
148
	protected $escape_chars = '\`*_{}[]()>#+-.!';
149
	protected $escape_chars_re;
150
151
	/**
152
	 * Constructor function. Initialize appropriate member variables.
153
	 * @return void
0 ignored issues
show
Comprehensibility Best Practice introduced by
Adding a @return annotation to constructors is generally not recommended as a constructor does not have a meaningful return value.

Adding a @return annotation to a constructor is not recommended, since a constructor does not have a meaningful return value.

Please refer to the PHP core documentation on constructors.

Loading history...
154
	 */
155 2
	public function __construct() {
156 2
		$this->_initDetab();
157 2
		$this->prepareItalicsAndBold();
158
159 2
		$this->nested_brackets_re =
160 2
			str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
161 2
			str_repeat('\])*', $this->nested_brackets_depth);
162
163 2
		$this->nested_url_parenthesis_re =
164 2
			str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
165 2
			str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
166
167 2
		$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
168
169
		// Sort document, block, and span gamut in ascendent priority order.
170 2
		asort($this->document_gamut);
171 2
		asort($this->block_gamut);
172 2
		asort($this->span_gamut);
173 2
	}
174
175
176
	/**
177
	 * Internal hashes used during transformation.
178
	 * @var array
179
	 */
180
	protected $urls        = array();
181
	protected $titles      = array();
182
	protected $html_hashes = array();
183
184
	/**
185
	 * Status flag to avoid invalid nesting.
186
	 * @var boolean
187
	 */
188
	protected $in_anchor = false;
189
190
	/**
191
	 * Status flag to avoid invalid nesting.
192
	 * @var boolean
193
	 */
194
	protected $in_emphasis_processing = false;
195
196
	/**
197
	 * Called before the transformation process starts to setup parser states.
198
	 * @return void
199
	 */
200 107
	protected function setup() {
201
		// Clear global hashes.
202 107
		$this->urls        = $this->predef_urls;
203 107
		$this->titles      = $this->predef_titles;
204 107
		$this->html_hashes = array();
205 107
		$this->in_anchor   = false;
206 107
		$this->in_emphasis_processing = false;
207 107
	}
208
209
	/**
210
	 * Called after the transformation process to clear any variable which may
211
	 * be taking up memory unnecessarly.
212
	 * @return void
213
	 */
214 107
	protected function teardown() {
215 107
		$this->urls        = array();
216 107
		$this->titles      = array();
217 107
		$this->html_hashes = array();
218 107
	}
219
220
	/**
221
	 * Main function. Performs some preprocessing on the input text and pass
222
	 * it through the document gamut.
223
	 *
224
	 * @api
225
	 *
226
	 * @param  string $text
227
	 * @return string
228
	 */
229 107
	public function transform($text) {
230 107
		$this->setup();
231
232
		# Remove UTF-8 BOM and marker character in input, if present.
233 107
		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
234
235
		# Standardize line endings:
236
		#   DOS to Unix and Mac to Unix
237 107
		$text = preg_replace('{\r\n?}', "\n", $text);
238
239
		# Make sure $text ends with a couple of newlines:
240 107
		$text .= "\n\n";
241
242
		# Convert all tabs to spaces.
243 107
		$text = $this->detab($text);
244
245
		# Turn block-level HTML blocks into hash entries
246 107
		$text = $this->hashHTMLBlocks($text);
247
248
		# Strip any lines consisting only of spaces and tabs.
249
		# This makes subsequent regexen easier to write, because we can
250
		# match consecutive blank lines with /\n+/ instead of something
251
		# contorted like /[ ]*\n+/ .
252 107
		$text = preg_replace('/^[ ]+$/m', '', $text);
253
254
		# Run document gamut methods.
255 107
		foreach ($this->document_gamut as $method => $priority) {
256 107
			$text = $this->$method($text);
257
		}
258
259 107
		$this->teardown();
260
261 107
		return $text . "\n";
262
	}
263
264
	/**
265
	 * Define the document gamut
266
	 * @var array
267
	 */
268
	protected $document_gamut = array(
269
		// Strip link definitions, store in hashes.
270
		"stripLinkDefinitions" => 20,
271
		"runBasicBlockGamut"   => 30,
272
	);
273
274
	/**
275
	 * Strips link definitions from text, stores the URLs and titles in
276
	 * hash references
277
	 * @param  string $text
278
	 * @return string
279
	 */
280 48 View Code Duplication
	protected function stripLinkDefinitions($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
281
282 48
		$less_than_tab = $this->tab_width - 1;
283
284
		// Link defs are in the form: ^[id]: url "optional title"
285 48
		$text = preg_replace_callback('{
286 48
							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
287
							  [ ]*
288
							  \n?				# maybe *one* newline
289
							  [ ]*
290
							(?:
291
							  <(.+?)>			# url = $2
292
							|
293
							  (\S+?)			# url = $3
294
							)
295
							  [ ]*
296
							  \n?				# maybe one newline
297
							  [ ]*
298
							(?:
299
								(?<=\s)			# lookbehind for whitespace
300
								["(]
301
								(.*?)			# title = $4
302
								[")]
303
								[ ]*
304
							)?	# title is optional
305
							(?:\n+|\Z)
306
			}xm',
307 48
			array($this, '_stripLinkDefinitions_callback'),
308 48
			$text
309
		);
310 48
		return $text;
311
	}
312
313
	/**
314
	 * The callback to strip link definitions
315
	 * @param  array $matches
316
	 * @return string
317
	 */
318 9
	protected function _stripLinkDefinitions_callback($matches) {
319 9
		$link_id = strtolower($matches[1]);
320 9
		$url = $matches[2] == '' ? $matches[3] : $matches[2];
321 9
		$this->urls[$link_id] = $url;
322 9
		$this->titles[$link_id] =& $matches[4];
323 9
		return ''; // String that will replace the block
324
	}
325
326
	/**
327
	 * Hashify HTML blocks
328
	 * @param  string $text
329
	 * @return string
330
	 */
331 48
	protected function hashHTMLBlocks($text) {
332 48
		if ($this->no_markup) {
333
			return $text;
334
		}
335
336 48
		$less_than_tab = $this->tab_width - 1;
337
338
		/**
339
		 * Hashify HTML blocks:
340
		 *
341
		 * We only want to do this for block-level HTML tags, such as headers,
342
		 * lists, and tables. That's because we still want to wrap <p>s around
343
		 * "paragraphs" that are wrapped in non-block-level tags, such as
344
		 * anchors, phrase emphasis, and spans. The list of tags we're looking
345
		 * for is hard-coded:
346
		 *
347
		 * *  List "a" is made of tags which can be both inline or block-level.
348
		 *    These will be treated block-level when the start tag is alone on
349
		 *    its line, otherwise they're not matched here and will be taken as
350
		 *    inline later.
351
		 * *  List "b" is made of tags which are always block-level;
352
		 */
353 48
		$block_tags_a_re = 'ins|del';
354
		$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
355
						   'script|noscript|style|form|fieldset|iframe|math|svg|'.
356
						   'article|section|nav|aside|hgroup|header|footer|'.
357 48
						   'figure';
358
359
		// Regular expression for the content of a block tag.
360 48
		$nested_tags_level = 4;
361 48
		$attr = '
362
			(?>				# optional tag attributes
363
			  \s			# starts with whitespace
364
			  (?>
365
				[^>"/]+		# text outside quotes
366
			  |
367
				/+(?!>)		# slash not followed by ">"
368
			  |
369
				"[^"]*"		# text inside double quotes (tolerate ">")
370
			  |
371
				\'[^\']*\'	# text inside single quotes (tolerate ">")
372
			  )*
373
			)?
374
			';
375
		$content =
376 48
			str_repeat('
377
				(?>
378
				  [^<]+			# content without tag
379
				|
380
				  <\2			# nested opening tag
381 48
					'.$attr.'	# attributes
382
					(?>
383
					  />
384
					|
385 48
					  >', $nested_tags_level).	// end of opening tag
386 48
					  '.*?'.					// last level nested tag content
387 48
			str_repeat('
388
					  </\2\s*>	# closing nested tag
389
					)
390
				  |
391
					<(?!/\2\s*>	# other tags with a different name
392
				  )
393
				)*',
394 48
				$nested_tags_level);
395 48
		$content2 = str_replace('\2', '\3', $content);
396
397
		/**
398
		 * First, look for nested blocks, e.g.:
399
		 * 	<div>
400
		 * 		<div>
401
		 * 		tags for inner block must be indented.
402
		 * 		</div>
403
		 * 	</div>
404
		 *
405
		 * The outermost tags must start at the left margin for this to match,
406
		 * and the inner nested divs must be indented.
407
		 * We need to do this before the next, more liberal match, because the
408
		 * next match will start at the first `<div>` and stop at the
409
		 * first `</div>`.
410
		 */
411 48
		$text = preg_replace_callback('{(?>
412
			(?>
413
				(?<=\n)			# Starting on its own line
414
				|				# or
415
				\A\n?			# the at beginning of the doc
416
			)
417
			(						# save in $1
418
419
			  # Match from `\n<tag>` to `</tag>\n`, handling nested tags
420
			  # in between.
421
422 48
						[ ]{0,'.$less_than_tab.'}
423 48
						<('.$block_tags_b_re.')# start tag = $2
424 48
						'.$attr.'>			# attributes followed by > and \n
425 48
						'.$content.'		# content, support nesting
426
						</\2>				# the matching end tag
427
						[ ]*				# trailing spaces/tabs
428
						(?=\n+|\Z)	# followed by a newline or end of document
429
430
			| # Special version for tags of group a.
431
432 48
						[ ]{0,'.$less_than_tab.'}
433 48
						<('.$block_tags_a_re.')# start tag = $3
434 48
						'.$attr.'>[ ]*\n	# attributes followed by >
435 48
						'.$content2.'		# content, support nesting
436
						</\3>				# the matching end tag
437
						[ ]*				# trailing spaces/tabs
438
						(?=\n+|\Z)	# followed by a newline or end of document
439
440
			| # Special case just for <hr />. It was easier to make a special
441
			  # case than to make the other regex more complicated.
442
443 48
						[ ]{0,'.$less_than_tab.'}
444
						<(hr)				# start tag = $2
445 48
						'.$attr.'			# attributes
446
						/?>					# the matching end tag
447
						[ ]*
448
						(?=\n{2,}|\Z)		# followed by a blank line or end of document
449
450
			| # Special case for standalone HTML comments:
451
452 48
					[ ]{0,'.$less_than_tab.'}
453
					(?s:
454
						<!-- .*? -->
455
					)
456
					[ ]*
457
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
458
459
			| # PHP and ASP-style processor instructions (<? and <%)
460
461 48
					[ ]{0,'.$less_than_tab.'}
462
					(?s:
463
						<([?%])			# $2
464
						.*?
465
						\2>
466
					)
467
					[ ]*
468
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
469
470
			)
471
			)}Sxmi',
472 48
			array($this, '_hashHTMLBlocks_callback'),
473 48
			$text
474
		);
475
476 48
		return $text;
477
	}
478
479
	/**
480
	 * The callback for hashing HTML blocks
481
	 * @param  string $matches
482
	 * @return string
483
	 */
484 9
	protected function _hashHTMLBlocks_callback($matches) {
485 9
		$text = $matches[1];
486 9
		$key  = $this->hashBlock($text);
487 9
		return "\n\n$key\n\n";
488
	}
489
490
	/**
491
	 * Called whenever a tag must be hashed when a function insert an atomic
492
	 * element in the text stream. Passing $text to through this function gives
493
	 * a unique text-token which will be reverted back when calling unhash.
494
	 *
495
	 * The $boundary argument specify what character should be used to surround
496
	 * the token. By convension, "B" is used for block elements that needs not
497
	 * to be wrapped into paragraph tags at the end, ":" is used for elements
498
	 * that are word separators and "X" is used in the general case.
499
	 *
500
	 * @param  string $text
501
	 * @param  string $boundary
502
	 * @return string
503
	 */
504 107
	protected function hashPart($text, $boundary = 'X') {
505
		// Swap back any tag hash found in $text so we do not have to `unhash`
506
		// multiple times at the end.
507 107
		$text = $this->unhash($text);
508
509
		// Then hash the block.
510 107
		static $i = 0;
511 107
		$key = "$boundary\x1A" . ++$i . $boundary;
512 107
		$this->html_hashes[$key] = $text;
513 107
		return $key; // String that will replace the tag.
514
	}
515
516
	/**
517
	 * Shortcut function for hashPart with block-level boundaries.
518
	 * @param  string $text
519
	 * @return string
520
	 */
521 107
	protected function hashBlock($text) {
522 107
		return $this->hashPart($text, 'B');
523
	}
524
525
	/**
526
	 * Define the block gamut - these are all the transformations that form
527
	 * block-level tags like paragraphs, headers, and list items.
528
	 * @var array
529
	 */
530
	protected $block_gamut = array(
531
		"doHeaders"         => 10,
532
		"doHorizontalRules" => 20,
533
		"doLists"           => 40,
534
		"doCodeBlocks"      => 50,
535
		"doBlockQuotes"     => 60,
536
	);
537
538
	/**
539
	 * Run block gamut tranformations.
540
	 *
541
	 * We need to escape raw HTML in Markdown source before doing anything
542
	 * else. This need to be done for each block, and not only at the
543
	 * begining in the Markdown function since hashed blocks can be part of
544
	 * list items and could have been indented. Indented blocks would have
545
	 * been seen as a code block in a previous pass of hashHTMLBlocks.
546
	 *
547
	 * @param  string $text
548
	 * @return string
549
	 */
550 20
	protected function runBlockGamut($text) {
551 20
		$text = $this->hashHTMLBlocks($text);
552 20
		return $this->runBasicBlockGamut($text);
553
	}
554
555
	/**
556
	 * Run block gamut tranformations, without hashing HTML blocks. This is
557
	 * useful when HTML blocks are known to be already hashed, like in the first
558
	 * whole-document pass.
559
	 *
560
	 * @param  string $text
561
	 * @return string
562
	 */
563 107
	protected function runBasicBlockGamut($text) {
564
565 107
		foreach ($this->block_gamut as $method => $priority) {
566 107
			$text = $this->$method($text);
567
		}
568
569
		// Finally form paragraph and restore hashed blocks.
570 107
		$text = $this->formParagraphs($text);
571
572 107
		return $text;
573
	}
574
575
	/**
576
	 * Convert horizontal rules
577
	 * @param  string $text
578
	 * @return string
579
	 */
580 107
	protected function doHorizontalRules($text) {
581 107
		return preg_replace(
582 107
			'{
583
				^[ ]{0,3}	# Leading space
584
				([-*_])		# $1: First marker
585
				(?>			# Repeated marker group
586
					[ ]{0,2}	# Zero, one, or two spaces.
587
					\1			# Marker character
588
				){2,}		# Group repeated at least twice
589
				[ ]*		# Tailing spaces
590
				$			# End of line.
591
			}mx',
592 107
			"\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
593 107
			$text
594
		);
595
	}
596
597
	/**
598
	 * These are all the transformations that occur *within* block-level
599
	 * tags like paragraphs, headers, and list items.
600
	 * @var array
601
	 */
602
	protected $span_gamut = array(
603
		// Process character escapes, code spans, and inline HTML
604
		// in one shot.
605
		"parseSpan"           => -30,
606
		// Process anchor and image tags. Images must come first,
607
		// because ![foo][f] looks like an anchor.
608
		"doImages"            =>  10,
609
		"doAnchors"           =>  20,
610
		// Make links out of things like `<https://example.com/>`
611
		// Must come after doAnchors, because you can use < and >
612
		// delimiters in inline links like [this](<url>).
613
		"doAutoLinks"         =>  30,
614
		"encodeAmpsAndAngles" =>  40,
615
		"doItalicsAndBold"    =>  50,
616
		"doHardBreaks"        =>  60,
617
	);
618
619
	/**
620
	 * Run span gamut transformations
621
	 * @param  string $text
622
	 * @return string
623
	 */
624 106
	protected function runSpanGamut($text) {
625 106
		foreach ($this->span_gamut as $method => $priority) {
626 106
			$text = $this->$method($text);
627
		}
628
629 106
		return $text;
630
	}
631
632
	/**
633
	 * Do hard breaks
634
	 * @param  string $text
635
	 * @return string
636
	 */
637 106
	protected function doHardBreaks($text) {
638 106
		if ($this->hard_wrap) {
639
			return preg_replace_callback('/ *\n/',
640
				array($this, '_doHardBreaks_callback'), $text);
641
		} else {
642 106
			return preg_replace_callback('/ {2,}\n/',
643 106
				array($this, '_doHardBreaks_callback'), $text);
644
		}
645
	}
646
647
	/**
648
	 * Trigger part hashing for the hard break (callback method)
649
	 * @param  array $matches
650
	 * @return string
651
	 */
652 3
	protected function _doHardBreaks_callback($matches) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
653 3
		return $this->hashPart("<br$this->empty_element_suffix\n");
654
	}
655
656
	/**
657
	 * Turn Markdown link shortcuts into XHTML <a> tags.
658
	 * @param  string $text
659
	 * @return string
660
	 */
661 47 View Code Duplication
	protected function doAnchors($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
662 47
		if ($this->in_anchor) {
663 12
			return $text;
664
		}
665 47
		$this->in_anchor = true;
666
667
		// First, handle reference-style links: [link text] [id]
668 47
		$text = preg_replace_callback('{
669
			(					# wrap whole match in $1
670
			  \[
671 47
				('.$this->nested_brackets_re.')	# link text = $2
672
			  \]
673
674
			  [ ]?				# one optional space
675
			  (?:\n[ ]*)?		# one optional newline followed by spaces
676
677
			  \[
678
				(.*?)		# id = $3
679
			  \]
680
			)
681
			}xs',
682 47
			array($this, '_doAnchors_reference_callback'), $text);
683
684
		// Next, inline-style links: [link text](url "optional title")
685 47
		$text = preg_replace_callback('{
686
			(				# wrap whole match in $1
687
			  \[
688 47
				('.$this->nested_brackets_re.')	# link text = $2
689
			  \]
690
			  \(			# literal paren
691
				[ \n]*
692
				(?:
693
					<(.+?)>	# href = $3
694
				|
695 47
					('.$this->nested_url_parenthesis_re.')	# href = $4
696
				)
697
				[ \n]*
698
				(			# $5
699
				  ([\'"])	# quote char = $6
700
				  (.*?)		# Title = $7
701
				  \6		# matching quote
702
				  [ \n]*	# ignore any spaces/tabs between closing quote and )
703
				)?			# title is optional
704
			  \)
705
			)
706
			}xs',
707 47
			array($this, '_doAnchors_inline_callback'), $text);
708
709
		// Last, handle reference-style shortcuts: [link text]
710
		// These must come last in case you've also got [link text][1]
711
		// or [link text](/foo)
712 47
		$text = preg_replace_callback('{
713
			(					# wrap whole match in $1
714
			  \[
715
				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
716
			  \]
717
			)
718
			}xs',
719 47
			array($this, '_doAnchors_reference_callback'), $text);
720
721 47
		$this->in_anchor = false;
722 47
		return $text;
723
	}
724
725
	/**
726
	 * Callback method to parse referenced anchors
727
	 * @param  string $matches
728
	 * @return string
729
	 */
730 9
	protected function _doAnchors_reference_callback($matches) {
731 9
		$whole_match =  $matches[1];
732 9
		$link_text   =  $matches[2];
733 9
		$link_id     =& $matches[3];
734
735 9
		if ($link_id == "") {
736
			// for shortcut links like [this][] or [this].
737 6
			$link_id = $link_text;
738
		}
739
740
		// lower-case and turn embedded newlines into spaces
741 9
		$link_id = strtolower($link_id);
742 9
		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
743
744 9
		if (isset($this->urls[$link_id])) {
745 8
			$url = $this->urls[$link_id];
746 8
			$url = $this->encodeURLAttribute($url);
747
748 8
			$result = "<a href=\"$url\"";
749 8 View Code Duplication
			if ( isset( $this->titles[$link_id] ) ) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
750 6
				$title = $this->titles[$link_id];
751 6
				$title = $this->encodeAttribute($title);
752 6
				$result .=  " title=\"$title\"";
753
			}
754
755 8
			$link_text = $this->runSpanGamut($link_text);
756 8
			$result .= ">$link_text</a>";
757 8
			$result = $this->hashPart($result);
758
		} else {
759 3
			$result = $whole_match;
760
		}
761 9
		return $result;
762
	}
763
764
	/**
765
	 * Callback method to parse inline anchors
766
	 * @param  string $matches
767
	 * @return string
768
	 */
769 10
	protected function _doAnchors_inline_callback($matches) {
770 10
		$whole_match	=  $matches[1];
0 ignored issues
show
Unused Code introduced by
$whole_match is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
771 10
		$link_text		=  $this->runSpanGamut($matches[2]);
772 10
		$url			=  $matches[3] == '' ? $matches[4] : $matches[3];
773 10
		$title			=& $matches[7];
774
775
		// If the URL was of the form <s p a c e s> it got caught by the HTML
776
		// tag parser and hashed. Need to reverse the process before using
777
		// the URL.
778 10
		$unhashed = $this->unhash($url);
779 10
		if ($unhashed != $url)
780 2
			$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
781
782 10
		$url = $this->encodeURLAttribute($url);
783
784 10
		$result = "<a href=\"$url\"";
785 10
		if (isset($title)) {
786 4
			$title = $this->encodeAttribute($title);
787 4
			$result .=  " title=\"$title\"";
788
		}
789
790 10
		$link_text = $this->runSpanGamut($link_text);
791 10
		$result .= ">$link_text</a>";
792
793 10
		return $this->hashPart($result);
794
	}
795
796
	/**
797
	 * Turn Markdown image shortcuts into <img> tags.
798
	 * @param  string $text
799
	 * @return string
800
	 */
801 47 View Code Duplication
	protected function doImages($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
802
		// First, handle reference-style labeled images: ![alt text][id]
803 47
		$text = preg_replace_callback('{
804
			(				# wrap whole match in $1
805
			  !\[
806 47
				('.$this->nested_brackets_re.')		# alt text = $2
807
			  \]
808
809
			  [ ]?				# one optional space
810
			  (?:\n[ ]*)?		# one optional newline followed by spaces
811
812
			  \[
813
				(.*?)		# id = $3
814
			  \]
815
816
			)
817
			}xs',
818 47
			array($this, '_doImages_reference_callback'), $text);
819
820
		// Next, handle inline images:  ![alt text](url "optional title")
821
		// Don't forget: encode * and _
822 47
		$text = preg_replace_callback('{
823
			(				# wrap whole match in $1
824
			  !\[
825 47
				('.$this->nested_brackets_re.')		# alt text = $2
826
			  \]
827
			  \s?			# One optional whitespace character
828
			  \(			# literal paren
829
				[ \n]*
830
				(?:
831
					<(\S*)>	# src url = $3
832
				|
833 47
					('.$this->nested_url_parenthesis_re.')	# src url = $4
834
				)
835
				[ \n]*
836
				(			# $5
837
				  ([\'"])	# quote char = $6
838
				  (.*?)		# title = $7
839
				  \6		# matching quote
840
				  [ \n]*
841
				)?			# title is optional
842
			  \)
843
			)
844
			}xs',
845 47
			array($this, '_doImages_inline_callback'), $text);
846
847 47
		return $text;
848
	}
849
850
	/**
851
	 * Callback to parse references image tags
852
	 * @param  array $matches
853
	 * @return string
854
	 */
855 1
	protected function _doImages_reference_callback($matches) {
856 1
		$whole_match = $matches[1];
857 1
		$alt_text    = $matches[2];
858 1
		$link_id     = strtolower($matches[3]);
859
860 1
		if ($link_id == "") {
861
			$link_id = strtolower($alt_text); // for shortcut links like ![this][].
862
		}
863
864 1
		$alt_text = $this->encodeAttribute($alt_text);
865 1
		if (isset($this->urls[$link_id])) {
866 1
			$url = $this->encodeURLAttribute($this->urls[$link_id]);
867 1
			$result = "<img src=\"$url\" alt=\"$alt_text\"";
868 1 View Code Duplication
			if (isset($this->titles[$link_id])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
869 1
				$title = $this->titles[$link_id];
870 1
				$title = $this->encodeAttribute($title);
871 1
				$result .=  " title=\"$title\"";
872
			}
873 1
			$result .= $this->empty_element_suffix;
874 1
			$result = $this->hashPart($result);
875
		} else {
876
			// If there's no such link ID, leave intact:
877
			$result = $whole_match;
878
		}
879
880 1
		return $result;
881
	}
882
883
	/**
884
	 * Callback to parse inline image tags
885
	 * @param  array $matches
886
	 * @return string
887
	 */
888 2
	protected function _doImages_inline_callback($matches) {
889 2
		$whole_match	= $matches[1];
0 ignored issues
show
Unused Code introduced by
$whole_match is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
890 2
		$alt_text		= $matches[2];
891 2
		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
892 2
		$title			=& $matches[7];
893
894 2
		$alt_text = $this->encodeAttribute($alt_text);
895 2
		$url = $this->encodeURLAttribute($url);
896 2
		$result = "<img src=\"$url\" alt=\"$alt_text\"";
897 2
		if (isset($title)) {
898 1
			$title = $this->encodeAttribute($title);
899 1
			$result .=  " title=\"$title\""; // $title already quoted
900
		}
901 2
		$result .= $this->empty_element_suffix;
902
903 2
		return $this->hashPart($result);
904
	}
905
906
	/**
907
	 * Parse Markdown heading elements to HTML
908
	 * @param  string $text
909
	 * @return string
910
	 */
911 48
	protected function doHeaders($text) {
912
		/**
913
		 * Setext-style headers:
914
		 *	  Header 1
915
		 *	  ========
916
		 *
917
		 *	  Header 2
918
		 *	  --------
919
		 */
920 48
		$text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
921 48
			array($this, '_doHeaders_callback_setext'), $text);
922
923
		/**
924
		 * atx-style headers:
925
		 *   # Header 1
926
		 *   ## Header 2
927
		 *   ## Header 2 with closing hashes ##
928
		 *   ...
929
		 *   ###### Header 6
930
		 */
931 48
		$text = preg_replace_callback('{
932
				^(\#{1,6})	# $1 = string of #\'s
933
				[ ]*
934
				(.+?)		# $2 = Header text
935
				[ ]*
936
				\#*			# optional closing #\'s (not counted)
937
				\n+
938
			}xm',
939 48
			array($this, '_doHeaders_callback_atx'), $text);
940
941 48
		return $text;
942
	}
943
944
	/**
945
	 * Setext header parsing callback
946
	 * @param  array $matches
947
	 * @return string
948
	 */
949 5
	protected function _doHeaders_callback_setext($matches) {
950
		// Terrible hack to check we haven't found an empty list item.
951 5 View Code Duplication
		if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
952 1
			return $matches[0];
953
		}
954
955 4
		$level = $matches[2]{0} == '=' ? 1 : 2;
956
957
		// ID attribute generation
958 4
		$idAtt = $this->_generateIdFromHeaderValue($matches[1]);
959
960 4
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
961 4
		return "\n" . $this->hashBlock($block) . "\n\n";
962
	}
963
964
	/**
965
	 * ATX header parsing callback
966
	 * @param  array $matches
967
	 * @return string
968
	 */
969 8
	protected function _doHeaders_callback_atx($matches) {
970
		// ID attribute generation
971 8
		$idAtt = $this->_generateIdFromHeaderValue($matches[2]);
972
973 8
		$level = strlen($matches[1]);
974 8
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
975 8
		return "\n" . $this->hashBlock($block) . "\n\n";
976
	}
977
978
	/**
979
	 * If a header_id_func property is set, we can use it to automatically
980
	 * generate an id attribute.
981
	 *
982
	 * This method returns a string in the form id="foo", or an empty string
983
	 * otherwise.
984
	 * @param  string $headerValue
985
	 * @return string
986
	 */
987 9
	protected function _generateIdFromHeaderValue($headerValue) {
988 9
		if (!is_callable($this->header_id_func)) {
989 9
			return "";
990
		}
991
992
		$idValue = call_user_func($this->header_id_func, $headerValue);
993
		if (!$idValue) {
994
			return "";
995
		}
996
997
		return ' id="' . $this->encodeAttribute($idValue) . '"';
998
	}
999
1000
	/**
1001
	 * Form HTML ordered (numbered) and unordered (bulleted) lists.
1002
	 * @param  string $text
1003
	 * @return string
1004
	 */
1005 107
	protected function doLists($text) {
1006 107
		$less_than_tab = $this->tab_width - 1;
1007
1008
		// Re-usable patterns to match list item bullets and number markers:
1009 107
		$marker_ul_re  = '[*+-]';
1010 107
		$marker_ol_re  = '\d+[\.]';
1011
1012
		$markers_relist = array(
1013 107
			$marker_ul_re => $marker_ol_re,
1014 107
			$marker_ol_re => $marker_ul_re,
1015
			);
1016
1017 107
		foreach ($markers_relist as $marker_re => $other_marker_re) {
1018
			// Re-usable pattern to match any entirel ul or ol list:
1019
			$whole_list_re = '
1020
				(								# $1 = whole list
1021
				  (								# $2
1022 107
					([ ]{0,'.$less_than_tab.'})	# $3 = number of spaces
1023 107
					('.$marker_re.')			# $4 = first list item marker
1024
					[ ]+
1025
				  )
1026
				  (?s:.+?)
1027
				  (								# $5
1028
					  \z
1029
					|
1030
					  \n{2,}
1031
					  (?=\S)
1032
					  (?!						# Negative lookahead for another list item marker
1033
						[ ]*
1034 107
						'.$marker_re.'[ ]+
1035
					  )
1036
					|
1037
					  (?=						# Lookahead for another kind of list
1038
					    \n
1039
						\3						# Must have the same indentation
1040 107
						'.$other_marker_re.'[ ]+
1041
					  )
1042
				  )
1043
				)
1044
			'; // mx
1045
1046
			// We use a different prefix before nested lists than top-level lists.
1047
			//See extended comment in _ProcessListItems().
1048
1049 107
			if ($this->list_level) {
1050 24
				$text = preg_replace_callback('{
1051
						^
1052 24
						'.$whole_list_re.'
1053
					}mx',
1054 24
					array($this, '_doLists_callback'), $text);
1055
			} else {
1056 107
				$text = preg_replace_callback('{
1057
						(?:(?<=\n)\n|\A\n?) # Must eat the newline
1058 107
						'.$whole_list_re.'
1059
					}mx',
1060 107
					array($this, '_doLists_callback'), $text);
1061
			}
1062
		}
1063
1064 107
		return $text;
1065
	}
1066
1067
	/**
1068
	 * List parsing callback
1069
	 * @param  array $matches
1070
	 * @return string
1071
	 */
1072 24
	protected function _doLists_callback($matches) {
1073
		// Re-usable patterns to match list item bullets and number markers:
1074 24
		$marker_ul_re  = '[*+-]';
1075 24
		$marker_ol_re  = '\d+[\.]';
1076 24
		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
0 ignored issues
show
Unused Code introduced by
$marker_any_re is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1077 24
		$marker_ol_start_re = '[0-9]+';
1078
1079 24
		$list = $matches[1];
1080 24
		$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1081
1082 24
		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1083
1084 24
		$list .= "\n";
1085 24
		$result = $this->processListItems($list, $marker_any_re);
1086
1087 24
		$ol_start = 1;
1088 24
		if ($this->enhanced_ordered_list) {
1089
			// Get the start number for ordered list.
1090 14
			if ($list_type == 'ol') {
1091 5
				$ol_start_array = array();
1092 5
				$ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1093 5
				if ($ol_start_check){
1094 5
					$ol_start = $ol_start_array[0];
1095
				}
1096
			}
1097
		}
1098
1099 24
		if ($ol_start > 1 && $list_type == 'ol'){
1100
			$result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1101
		} else {
1102 24
			$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1103
		}
1104 24
		return "\n". $result ."\n\n";
1105
	}
1106
1107
	/**
1108
	 * Nesting tracker for list levels
1109
	 * @var integer
1110
	 */
1111
	protected $list_level = 0;
1112
1113
	/**
1114
	 * Process the contents of a single ordered or unordered list, splitting it
1115
	 * into individual list items.
1116
	 * @param  string $list_str
1117
	 * @param  string $marker_any_re
1118
	 * @return string
1119
	 */
1120 24
	protected function processListItems($list_str, $marker_any_re) {
1121
		/**
1122
		 * The $this->list_level global keeps track of when we're inside a list.
1123
		 * Each time we enter a list, we increment it; when we leave a list,
1124
		 * we decrement. If it's zero, we're not in a list anymore.
1125
		 *
1126
		 * We do this because when we're not inside a list, we want to treat
1127
		 * something like this:
1128
		 *
1129
		 *		I recommend upgrading to version
1130
		 *		8. Oops, now this line is treated
1131
		 *		as a sub-list.
1132
		 *
1133
		 * As a single paragraph, despite the fact that the second line starts
1134
		 * with a digit-period-space sequence.
1135
		 *
1136
		 * Whereas when we're inside a list (or sub-list), that line will be
1137
		 * treated as the start of a sub-list. What a kludge, huh? This is
1138
		 * an aspect of Markdown's syntax that's hard to parse perfectly
1139
		 * without resorting to mind-reading. Perhaps the solution is to
1140
		 * change the syntax rules such that sub-lists must start with a
1141
		 * starting cardinal number; e.g. "1." or "a.".
1142
		 */
1143 24
		$this->list_level++;
1144
1145
		// Trim trailing blank lines:
1146 24
		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1147
1148 24
		$list_str = preg_replace_callback('{
1149
			(\n)?							# leading line = $1
1150
			(^[ ]*)							# leading whitespace = $2
1151 24
			('.$marker_any_re.'				# list marker and space = $3
1152
				(?:[ ]+|(?=\n))	# space only required if item is not empty
1153
			)
1154
			((?s:.*?))						# list item text   = $4
1155
			(?:(\n+(?=\n))|\n)				# tailing blank line = $5
1156 24
			(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1157
			}xm',
1158 24
			array($this, '_processListItems_callback'), $list_str);
1159
1160 24
		$this->list_level--;
1161 24
		return $list_str;
1162
	}
1163
1164
	/**
1165
	 * List item parsing callback
1166
	 * @param  array $matches
1167
	 * @return string
1168
	 */
1169 24
	protected function _processListItems_callback($matches) {
1170 24
		$item = $matches[4];
1171 24
		$leading_line =& $matches[1];
1172 24
		$leading_space =& $matches[2];
1173 24
		$marker_space = $matches[3];
1174 24
		$tailing_blank_line =& $matches[5];
1175
1176 24
		if ($leading_line || $tailing_blank_line ||
1177 24
			preg_match('/\n{2,}/', $item))
1178
		{
1179
			// Replace marker with the appropriate whitespace indentation
1180 8
			$item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1181 8
			$item = $this->runBlockGamut($this->outdent($item)."\n");
1182
		} else {
1183
			// Recursion for sub-lists:
1184 18
			$item = $this->doLists($this->outdent($item));
1185 18
			$item = $this->formParagraphs($item, false);
1186
		}
1187
1188 24
		return "<li>" . $item . "</li>\n";
1189
	}
1190
1191
	/**
1192
	 * Process Markdown `<pre><code>` blocks.
1193
	 * @param  string $text
1194
	 * @return string
1195
	 */
1196 107
	protected function doCodeBlocks($text) {
1197 107
		$text = preg_replace_callback('{
1198
				(?:\n\n|\A\n?)
1199
				(	            # $1 = the code block -- one or more lines, starting with a space/tab
1200
				  (?>
1201 107
					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1202
					.*\n+
1203
				  )+
1204
				)
1205 107
				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
1206
			}xm',
1207 107
			array($this, '_doCodeBlocks_callback'), $text);
1208
1209 107
		return $text;
1210
	}
1211
1212
	/**
1213
	 * Code block parsing callback
1214
	 * @param  array $matches
1215
	 * @return string
1216
	 */
1217 35
	protected function _doCodeBlocks_callback($matches) {
1218 35
		$codeblock = $matches[1];
1219
1220 35
		$codeblock = $this->outdent($codeblock);
1221 35 View Code Duplication
		if ($this->code_block_content_func) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1222
			$codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1223
		} else {
1224 35
			$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1225
		}
1226
1227
		# trim leading newlines and trailing newlines
1228 35
		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1229
1230 35
		$codeblock = "<pre><code>$codeblock\n</code></pre>";
1231 35
		return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1232
	}
1233
1234
	/**
1235
	 * Create a code span markup for $code. Called from handleSpanToken.
1236
	 * @param  string $code
1237
	 * @return string
1238
	 */
1239 21
	protected function makeCodeSpan($code) {
1240 21
		if ($this->code_span_content_func) {
1241
			$code = call_user_func($this->code_span_content_func, $code);
1242
		} else {
1243 21
			$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1244
		}
1245 21
		return $this->hashPart("<code>$code</code>");
1246
	}
1247
1248
	/**
1249
	 * Define the emphasis operators with their regex matches
1250
	 * @var array
1251
	 */
1252
	protected $em_relist = array(
1253
		''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1254
		'*' => '(?<![\s*])\*(?!\*)',
1255
		'_' => '(?<![\s_])_(?!_)',
1256
	);
1257
1258
	/**
1259
	 * Define the strong operators with their regex matches
1260
	 * @var array
1261
	 */
1262
	protected $strong_relist = array(
1263
		''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1264
		'**' => '(?<![\s*])\*\*(?!\*)',
1265
		'__' => '(?<![\s_])__(?!_)',
1266
	);
1267
1268
	/**
1269
	 * Define the emphasis + strong operators with their regex matches
1270
	 * @var array
1271
	 */
1272
	protected $em_strong_relist = array(
1273
		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1274
		'***' => '(?<![\s*])\*\*\*(?!\*)',
1275
		'___' => '(?<![\s_])___(?!_)',
1276
	);
1277
1278
	/**
1279
	 * Container for prepared regular expressions
1280
	 * @var array
1281
	 */
1282
	protected $em_strong_prepared_relist;
1283
1284
	/**
1285
	 * Prepare regular expressions for searching emphasis tokens in any
1286
	 * context.
1287
	 * @return void
1288
	 */
1289 2
	protected function prepareItalicsAndBold() {
1290 2
		foreach ($this->em_relist as $em => $em_re) {
1291 2
			foreach ($this->strong_relist as $strong => $strong_re) {
1292
				// Construct list of allowed token expressions.
1293 2
				$token_relist = array();
1294 2
				if (isset($this->em_strong_relist["$em$strong"])) {
1295 2
					$token_relist[] = $this->em_strong_relist["$em$strong"];
1296
				}
1297 2
				$token_relist[] = $em_re;
1298 2
				$token_relist[] = $strong_re;
1299
1300
				// Construct master expression from list.
1301 2
				$token_re = '{(' . implode('|', $token_relist) . ')}';
1302 2
				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
1303
			}
1304
		}
1305 2
	}
1306
1307
	/**
1308
	 * Convert Markdown italics (emphasis) and bold (strong) to HTML
1309
	 * @param  string $text
1310
	 * @return string
1311
	 */
1312 106
	protected function doItalicsAndBold($text) {
1313 106
		if ($this->in_emphasis_processing) {
1314 14
			return $text; // avoid reentrency
1315
		}
1316 106
		$this->in_emphasis_processing = true;
1317
1318 106
		$token_stack = array('');
1319 106
		$text_stack = array('');
1320 106
		$em = '';
1321 106
		$strong = '';
1322 106
		$tree_char_em = false;
1323
1324 106
		while (1) {
1325
			// Get prepared regular expression for seraching emphasis tokens
1326
			// in current context.
1327 106
			$token_re = $this->em_strong_prepared_relist["$em$strong"];
1328
1329
			// Each loop iteration search for the next emphasis token.
1330
			// Each token is then passed to handleSpanToken.
1331 106
			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1332 106
			$text_stack[0] .= $parts[0];
1333 106
			$token =& $parts[1];
1334 106
			$text =& $parts[2];
1335
1336 106
			if (empty($token)) {
1337
				// Reached end of text span: empty stack without emitting.
1338
				// any more emphasis.
1339 106
				while ($token_stack[0]) {
1340 4
					$text_stack[1] .= array_shift($token_stack);
1341 4
					$text_stack[0] .= array_shift($text_stack);
1342
				}
1343 106
				break;
1344
			}
1345
1346 16
			$token_len = strlen($token);
1347 16
			if ($tree_char_em) {
1348
				// Reached closing marker while inside a three-char emphasis.
1349 4
				if ($token_len == 3) {
1350
					// Three-char closing marker, close em and strong.
1351 4
					array_shift($token_stack);
1352 4
					$span = array_shift($text_stack);
1353 4
					$span = $this->runSpanGamut($span);
1354 4
					$span = "<strong><em>$span</em></strong>";
1355 4
					$text_stack[0] .= $this->hashPart($span);
1356 4
					$em = '';
1357 4
					$strong = '';
1358
				} else {
1359
					// Other closing marker: close one em or strong and
1360
					// change current token state to match the other
1361 2
					$token_stack[0] = str_repeat($token{0}, 3-$token_len);
1362 2
					$tag = $token_len == 2 ? "strong" : "em";
1363 2
					$span = $text_stack[0];
1364 2
					$span = $this->runSpanGamut($span);
1365 2
					$span = "<$tag>$span</$tag>";
1366 2
					$text_stack[0] = $this->hashPart($span);
1367 2
					$$tag = ''; // $$tag stands for $em or $strong
1368
				}
1369 4
				$tree_char_em = false;
1370 16
			} else if ($token_len == 3) {
1371 4
				if ($em) {
1372
					// Reached closing marker for both em and strong.
1373
					// Closing strong marker:
1374 2
					for ($i = 0; $i < 2; ++$i) {
1375 2
						$shifted_token = array_shift($token_stack);
1376 2
						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
1377 2
						$span = array_shift($text_stack);
1378 2
						$span = $this->runSpanGamut($span);
1379 2
						$span = "<$tag>$span</$tag>";
1380 2
						$text_stack[0] .= $this->hashPart($span);
1381 2
						$$tag = ''; // $$tag stands for $em or $strong
1382
					}
1383
				} else {
1384
					// Reached opening three-char emphasis marker. Push on token
1385
					// stack; will be handled by the special condition above.
1386 4
					$em = $token{0};
1387 4
					$strong = "$em$em";
1388 4
					array_unshift($token_stack, $token);
1389 4
					array_unshift($text_stack, '');
1390 4
					$tree_char_em = true;
1391
				}
1392 14
			} else if ($token_len == 2) {
1393 9
				if ($strong) {
1394
					// Unwind any dangling emphasis marker:
1395 9
					if (strlen($token_stack[0]) == 1) {
1396 2
						$text_stack[1] .= array_shift($token_stack);
1397 2
						$text_stack[0] .= array_shift($text_stack);
1398 2
						$em = '';
1399
					}
1400
					// Closing strong marker:
1401 9
					array_shift($token_stack);
1402 9
					$span = array_shift($text_stack);
1403 9
					$span = $this->runSpanGamut($span);
1404 9
					$span = "<strong>$span</strong>";
1405 9
					$text_stack[0] .= $this->hashPart($span);
1406 9
					$strong = '';
1407
				} else {
1408 9
					array_unshift($token_stack, $token);
1409 9
					array_unshift($text_stack, '');
1410 9
					$strong = $token;
1411
				}
1412
			} else {
1413
				// Here $token_len == 1
1414 12
				if ($em) {
1415 10
					if (strlen($token_stack[0]) == 1) {
1416
						// Closing emphasis marker:
1417 10
						array_shift($token_stack);
1418 10
						$span = array_shift($text_stack);
1419 10
						$span = $this->runSpanGamut($span);
1420 10
						$span = "<em>$span</em>";
1421 10
						$text_stack[0] .= $this->hashPart($span);
1422 10
						$em = '';
1423
					} else {
1424 10
						$text_stack[0] .= $token;
1425
					}
1426
				} else {
1427 12
					array_unshift($token_stack, $token);
1428 12
					array_unshift($text_stack, '');
1429 12
					$em = $token;
1430
				}
1431
			}
1432
		}
1433 106
		$this->in_emphasis_processing = false;
1434 106
		return $text_stack[0];
1435
	}
1436
1437
	/**
1438
	 * Parse Markdown blockquotes to HTML
1439
	 * @param  string $text
1440
	 * @return string
1441
	 */
1442 107
	protected function doBlockQuotes($text) {
1443 107
		$text = preg_replace_callback('/
1444
			  (								# Wrap whole match in $1
1445
				(?>
1446
				  ^[ ]*>[ ]?			# ">" at the start of a line
1447
					.+\n					# rest of the first line
1448
				  (.+\n)*					# subsequent consecutive lines
1449
				  \n*						# blanks
1450
				)+
1451
			  )
1452
			/xm',
1453 107
			array($this, '_doBlockQuotes_callback'), $text);
1454
1455 107
		return $text;
1456
	}
1457
1458
	/**
1459
	 * Blockquote parsing callback
1460
	 * @param  array $matches
1461
	 * @return string
1462
	 */
1463 11
	protected function _doBlockQuotes_callback($matches) {
1464 11
		$bq = $matches[1];
1465
		// trim one level of quoting - trim whitespace-only lines
1466 11
		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1467 11
		$bq = $this->runBlockGamut($bq); // recurse
1468
1469 11
		$bq = preg_replace('/^/m', "  ", $bq);
1470
		// These leading spaces cause problem with <pre> content,
1471
		// so we need to fix that:
1472 11
		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1473 11
			array($this, '_doBlockQuotes_callback2'), $bq);
1474
1475 11
		return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1476
	}
1477
1478
	/**
1479
	 * Blockquote parsing callback
1480
	 * @param  array $matches
1481
	 * @return string
1482
	 */
1483 2
	protected function _doBlockQuotes_callback2($matches) {
1484 2
		$pre = $matches[1];
1485 2
		$pre = preg_replace('/^  /m', '', $pre);
1486 2
		return $pre;
1487
	}
1488
1489
	/**
1490
	 * Parse paragraphs
1491
	 *
1492
	 * @param  string $text String to process in paragraphs
1493
	 * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1494
	 * @return string
1495
	 */
1496 48
	protected function formParagraphs($text, $wrap_in_p = true) {
1497
		// Strip leading and trailing lines:
1498 48
		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1499
1500 48
		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1501
1502
		// Wrap <p> tags and unhashify HTML blocks
1503 48
		foreach ($grafs as $key => $value) {
1504 48
			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1505
				// Is a paragraph.
1506 47
				$value = $this->runSpanGamut($value);
1507 47
				if ($wrap_in_p) {
1508 45
					$value = preg_replace('/^([ ]*)/', "<p>", $value);
1509 45
					$value .= "</p>";
1510
				}
1511 47
				$grafs[$key] = $this->unhash($value);
1512
			} else {
1513
				// Is a block.
1514
				// Modify elements of @grafs in-place...
1515 31
				$graf = $value;
1516 31
				$block = $this->html_hashes[$graf];
1517 31
				$graf = $block;
1518
//				if (preg_match('{
1519
//					\A
1520
//					(							# $1 = <div> tag
1521
//					  <div  \s+
1522
//					  [^>]*
1523
//					  \b
1524
//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1525
//					  1
1526
//					  \2
1527
//					  [^>]*
1528
//					  >
1529
//					)
1530
//					(							# $3 = contents
1531
//					.*
1532
//					)
1533
//					(</div>)					# $4 = closing tag
1534
//					\z
1535
//					}xs', $block, $matches))
1536
//				{
1537
//					list(, $div_open, , $div_content, $div_close) = $matches;
1538
//
1539
//					// We can't call Markdown(), because that resets the hash;
1540
//					// that initialization code should be pulled into its own sub, though.
1541
//					$div_content = $this->hashHTMLBlocks($div_content);
1542
//
1543
//					// Run document gamut methods on the content.
1544
//					foreach ($this->document_gamut as $method => $priority) {
1545
//						$div_content = $this->$method($div_content);
1546
//					}
1547
//
1548
//					$div_open = preg_replace(
1549
//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1550
//
1551
//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1552
//				}
1553 48
				$grafs[$key] = $graf;
1554
			}
1555
		}
1556
1557 48
		return implode("\n\n", $grafs);
1558
	}
1559
1560
	/**
1561
	 * Encode text for a double-quoted HTML attribute. This function
1562
	 * is *not* suitable for attributes enclosed in single quotes.
1563
	 * @param  string $text
1564
	 * @return string
1565
	 */
1566 34
	protected function encodeAttribute($text) {
1567 34
		$text = $this->encodeAmpsAndAngles($text);
1568 34
		$text = str_replace('"', '&quot;', $text);
1569 34
		return $text;
1570
	}
1571
1572
	/**
1573
	 * Encode text for a double-quoted HTML attribute containing a URL,
1574
	 * applying the URL filter if set. Also generates the textual
1575
	 * representation for the URL (removing mailto: or tel:) storing it in $text.
1576
	 * This function is *not* suitable for attributes enclosed in single quotes.
1577
	 *
1578
	 * @param  string $url
1579
	 * @param  string &$text Passed by reference
1580
	 * @return string        URL
1581
	 */
1582 33
	protected function encodeURLAttribute($url, &$text = null) {
1583 33
		if ($this->url_filter_func) {
1584
			$url = call_user_func($this->url_filter_func, $url);
1585
		}
1586
1587 33
		if (preg_match('{^mailto:}i', $url)) {
1588 4
			$url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1589 31
		} else if (preg_match('{^tel:}i', $url)) {
1590
			$url = $this->encodeAttribute($url);
1591
			$text = substr($url, 4);
1592
		} else {
1593 31
			$url = $this->encodeAttribute($url);
1594 31
			$text = $url;
1595
		}
1596
1597 33
		return $url;
1598
	}
1599
1600
	/**
1601
	 * Smart processing for ampersands and angle brackets that need to
1602
	 * be encoded. Valid character entities are left alone unless the
1603
	 * no-entities mode is set.
1604
	 * @param  string $text
1605
	 * @return string
1606
	 */
1607 106
	protected function encodeAmpsAndAngles($text) {
1608 106
		if ($this->no_entities) {
1609
			$text = str_replace('&', '&amp;', $text);
1610
		} else {
1611
			// Ampersand-encoding based entirely on Nat Irons's Amputator
1612
			// MT plugin: <http://bumppo.net/projects/amputator/>
1613 106
			$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1614 106
								'&amp;', $text);
1615
		}
1616
		// Encode remaining <'s
1617 106
		$text = str_replace('<', '&lt;', $text);
1618
1619 106
		return $text;
1620
	}
1621
1622
	/**
1623
	 * Parse Markdown automatic links to anchor HTML tags
1624
	 * @param  string $text
1625
	 * @return string
1626
	 */
1627 106
	protected function doAutoLinks($text) {
1628 106
		$text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1629 106
			array($this, '_doAutoLinks_url_callback'), $text);
1630
1631
		// Email addresses: <[email protected]>
1632 106
		$text = preg_replace_callback('{
1633
			<
1634
			(?:mailto:)?
1635
			(
1636
				(?:
1637
					[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1638
				|
1639
					".*?"
1640
				)
1641
				\@
1642
				(?:
1643
					[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1644
				|
1645
					\[[\d.a-fA-F:]+\]	# IPv4 & IPv6
1646
				)
1647
			)
1648
			>
1649
			}xi',
1650 106
			array($this, '_doAutoLinks_email_callback'), $text);
1651
1652 106
		return $text;
1653
	}
1654
1655
	/**
1656
	 * Parse URL callback
1657
	 * @param  array $matches
1658
	 * @return string
1659
	 */
1660 4
	protected function _doAutoLinks_url_callback($matches) {
1661 4
		$url = $this->encodeURLAttribute($matches[1], $text);
1662 4
		$link = "<a href=\"$url\">$text</a>";
1663 4
		return $this->hashPart($link);
1664
	}
1665
1666
	/**
1667
	 * Parse email address callback
1668
	 * @param  array $matches
1669
	 * @return string
1670
	 */
1671 4
	protected function _doAutoLinks_email_callback($matches) {
1672 4
		$addr = $matches[1];
1673 4
		$url = $this->encodeURLAttribute("mailto:$addr", $text);
1674 4
		$link = "<a href=\"$url\">$text</a>";
1675 4
		return $this->hashPart($link);
1676
	}
1677
1678
	/**
1679
	 * Input: some text to obfuscate, e.g. "mailto:[email protected]"
1680
	 *
1681
	 * Output: the same text but with most characters encoded as either a
1682
	 *         decimal or hex entity, in the hopes of foiling most address
1683
	 *         harvesting spam bots. E.g.:
1684
	 *
1685
	 *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1686
	 *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1687
	 *        &#x6d;
1688
	 *
1689
	 * Note: the additional output $tail is assigned the same value as the
1690
	 * ouput, minus the number of characters specified by $head_length.
1691
	 *
1692
	 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1693
	 * With some optimizations by Milian Wolff. Forced encoding of HTML
1694
	 * attribute special characters by Allan Odgaard.
1695
	 *
1696
	 * @param  string  $text
1697
	 * @param  string  &$tail
1698
	 * @param  integer $head_length
1699
	 * @return string
1700
	 */
1701 4
	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1702 4
		if ($text == "") {
1703
			return $tail = "";
1704
		}
1705
1706 4
		$chars = preg_split('/(?<!^)(?!$)/', $text);
1707 4
		$seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1708
1709 4
		foreach ($chars as $key => $char) {
1710 4
			$ord = ord($char);
1711
			// Ignore non-ascii chars.
1712 4
			if ($ord < 128) {
1713 4
				$r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1714
				// roughly 10% raw, 45% hex, 45% dec
1715
				// '@' *must* be encoded. I insist.
1716
				// '"' and '>' have to be encoded inside the attribute
1717 4
				if ($r > 90 && strpos('@"&>', $char) === false) {
1718
					/* do nothing */
1719 4
				} else if ($r < 45) {
1720 4
					$chars[$key] = '&#x'.dechex($ord).';';
1721
				} else {
1722 4
					$chars[$key] = '&#'.$ord.';';
1723
				}
1724
			}
1725
		}
1726
1727 4
		$text = implode('', $chars);
1728 4
		$tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1729
1730 4
		return $text;
1731
	}
1732
1733
	/**
1734
	 * Take the string $str and parse it into tokens, hashing embeded HTML,
1735
	 * escaped characters and handling code spans.
1736
	 * @param  string $str
1737
	 * @return string
1738
	 */
1739 106
	protected function parseSpan($str) {
1740 106
		$output = '';
1741
1742
		$span_re = '{
1743
				(
1744 106
					\\\\'.$this->escape_chars_re.'
1745
				|
1746
					(?<![`\\\\])
1747
					`+						# code span marker
1748 106
			'.( $this->no_markup ? '' : '
1749
				|
1750
					<!--    .*?     -->		# comment
1751
				|
1752
					<\?.*?\?> | <%.*?%>		# processing instruction
1753
				|
1754
					<[!$]?[-a-zA-Z0-9:_]+	# regular tags
1755
					(?>
1756
						\s
1757
						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1758
					)?
1759
					>
1760
				|
1761
					<[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1762
				|
1763
					</[-a-zA-Z0-9:_]+\s*> # closing tag
1764 106
			').'
1765
				)
1766
				}xs';
1767
1768 106
		while (1) {
1769
			// Each loop iteration seach for either the next tag, the next
1770
			// openning code span marker, or the next escaped character.
1771
			// Each token is then passed to handleSpanToken.
1772 106
			$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1773
1774
			// Create token from text preceding tag.
1775 106
			if ($parts[0] != "") {
1776 106
				$output .= $parts[0];
1777
			}
1778
1779
			// Check if we reach the end.
1780 106
			if (isset($parts[1])) {
1781 37
				$output .= $this->handleSpanToken($parts[1], $parts[2]);
1782 37
				$str = $parts[2];
1783
			} else {
1784 106
				break;
1785
			}
1786
		}
1787
1788 106
		return $output;
1789
	}
1790
1791
	/**
1792
	 * Handle $token provided by parseSpan by determining its nature and
1793
	 * returning the corresponding value that should replace it.
1794
	 * @param  string $token
1795
	 * @param  string &$str
1796
	 * @return string
1797
	 */
1798 37
	protected function handleSpanToken($token, &$str) {
1799 37
		switch ($token{0}) {
1800 37
			case "\\":
1801 12
				return $this->hashPart("&#". ord($token{1}). ";");
1802 31
			case "`":
1803
				// Search for end marker in remaining text.
1804 23
				if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1805 23
					$str, $matches))
1806
				{
1807 21
					$str = $matches[2];
1808 21
					$codespan = $this->makeCodeSpan($matches[1]);
1809 21
					return $this->hashPart($codespan);
1810
				}
1811 2
				return $token; // Return as text since no ending marker found.
1812
			default:
1813 12
				return $this->hashPart($token);
1814
		}
1815
	}
1816
1817
	/**
1818
	 * Remove one level of line-leading tabs or spaces
1819
	 * @param  string $text
1820
	 * @return string
1821
	 */
1822 47
	protected function outdent($text) {
1823 47
		return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1824
	}
1825
1826
1827
	/**
1828
	 * String length function for detab. `_initDetab` will create a function to
1829
	 * handle UTF-8 if the default function does not exist.
1830
	 * @var string
1831
	 */
1832
	protected $utf8_strlen = 'mb_strlen';
1833
1834
	/**
1835
	 * Replace tabs with the appropriate amount of spaces.
1836
	 *
1837
	 * For each line we separate the line in blocks delemited by tab characters.
1838
	 * Then we reconstruct every line by adding the  appropriate number of space
1839
	 * between each blocks.
1840
	 *
1841
	 * @param  string $text
1842
	 * @return string
1843
	 */
1844 107
	protected function detab($text) {
1845 107
		$text = preg_replace_callback('/^.*\t.*$/m',
1846 107
			array($this, '_detab_callback'), $text);
1847
1848 107
		return $text;
1849
	}
1850
1851
	/**
1852
	 * Replace tabs callback
1853
	 * @param  string $matches
1854
	 * @return string
1855
	 */
1856 34
	protected function _detab_callback($matches) {
1857 34
		$line = $matches[0];
1858 34
		$strlen = $this->utf8_strlen; // strlen function for UTF-8.
1859
1860
		// Split in blocks.
1861 34
		$blocks = explode("\t", $line);
1862
		// Add each blocks to the line.
1863 34
		$line = $blocks[0];
1864 34
		unset($blocks[0]); // Do not add first block twice.
1865 34
		foreach ($blocks as $block) {
1866
			// Calculate amount of space, insert spaces, insert block.
1867 34
			$amount = $this->tab_width -
1868 34
				$strlen($line, 'UTF-8') % $this->tab_width;
1869 34
			$line .= str_repeat(" ", $amount) . $block;
1870
		}
1871 34
		return $line;
1872
	}
1873
1874
	/**
1875
	 * Check for the availability of the function in the `utf8_strlen` property
1876
	 * (initially `mb_strlen`). If the function is not available, create a
1877
	 * function that will loosely count the number of UTF-8 characters with a
1878
	 * regular expression.
1879
	 * @return void
1880
	 */
1881 2
	protected function _initDetab() {
1882
1883 2
		if (function_exists($this->utf8_strlen)) {
1884 2
			return;
1885
		}
1886
1887
		$this->utf8_strlen = function($text) {
0 ignored issues
show
Documentation Bug introduced by
It seems like function ($text) { r...\xBF]*/', $text, $m); } of type object<Closure> is incompatible with the declared type string of property $utf8_strlen.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
1888
			return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1889
		};
1890
	}
1891
1892
	/**
1893
	 * Swap back in all the tags hashed by _HashHTMLBlocks.
1894
	 * @param  string $text
1895
	 * @return string
1896
	 */
1897 107
	protected function unhash($text) {
1898 107
		return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1899 107
			array($this, '_unhash_callback'), $text);
1900
	}
1901
1902
	/**
1903
	 * Unhashing callback
1904
	 * @param  array $matches
1905
	 * @return string
1906
	 */
1907 85
	protected function _unhash_callback($matches) {
1908 85
		return $this->html_hashes[$matches[0]];
1909
	}
1910
}
1911