Passed
Pull Request — lib (#351)
by Aurélien
03:02
created

Markdown::_doImages_reference_callback()   B

Complexity

Conditions 9
Paths 38

Size

Total Lines 34

Duplication

Lines 12
Ratio 35.29 %

Code Coverage

Tests 17
CRAP Score 10.4384

Importance

Changes 0
Metric Value
dl 12
loc 34
ccs 17
cts 23
cp 0.7391
rs 8.0555
c 0
b 0
f 0
cc 9
nc 38
nop 1
crap 10.4384
1
<?php
2
/**
3
 * Markdown  -  A text-to-HTML conversion tool for web writers
4
 *
5
 * @package   php-markdown
6
 * @author    Michel Fortin <[email protected]>
7
 * @copyright 2004-2019 Michel Fortin <https://michelf.com/projects/php-markdown/>
8
 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9
 */
10
11
namespace Michelf;
12
13
/**
14
 * Markdown Parser Class
15
 */
16
class Markdown implements MarkdownInterface {
17
	/**
18
	 * Define the package version
19
	 * @var string
20
	 */
21
	const MARKDOWNLIB_VERSION = "1.9.0";
22
23
	/**
24
	 * Simple function interface - Initialize the parser and return the result
25
	 * of its transform method. This will work fine for derived classes too.
26
	 *
27
	 * @api
28
	 *
29
	 * @param  string $text
30
	 * @return string
31
	 */
32 107
	public static function defaultTransform($text) {
33
		// Take parser class on which this function was called.
34 107
		$parser_class = \get_called_class();
35
36
		// Try to take parser from the static parser list
37 107
		static $parser_list;
38 107
		$parser =& $parser_list[$parser_class];
39
40
		// Create the parser it not already set
41 107
		if (!$parser) {
42 2
			$parser = new $parser_class;
43
		}
44
45
		// Transform text using parser.
46 107
		return $parser->transform($text);
47
	}
48
49
	/**
50
	 * Configuration variables
51
	 */
52
53
	/**
54
	 * Change to ">" for HTML output.
55
	 * @var string
56
	 */
57
	public $empty_element_suffix = " />";
58
59
	/**
60
	 * The width of indentation of the output markup
61
	 * @var int
62
	 */
63
	public $tab_width = 4;
64
65
	/**
66
	 * Change to `true` to disallow markup or entities.
67
	 * @var boolean
68
	 */
69
	public $no_markup   = false;
70
	public $no_entities = false;
71
72
73
	/**
74
	 * Change to `true` to enable line breaks on \n without two trailling spaces
75
	 * @var boolean
76
	 */
77
	public $hard_wrap = false;
78
79
	/**
80
	 * Predefined URLs and titles for reference links and images.
81
	 * @var array
82
	 */
83
	public $predef_urls   = array();
84
	public $predef_titles = array();
85
86
	/**
87
	 * Optional filter function for URLs
88
	 * @var callable|null
89
	 */
90
	public $url_filter_func = null;
91
92
	/**
93
	 * Optional header id="" generation callback function.
94
	 * @var callable|null
95
	 */
96
	public $header_id_func = null;
97
98
	/**
99
	 * Optional function for converting code block content to HTML
100
	 * @var callable|null
101
	 */
102
	public $code_block_content_func = null;
103
104
	/**
105
	 * Optional function for converting code span content to HTML.
106
	 * @var callable|null
107
	 */
108
	public $code_span_content_func = null;
109
110
	/**
111
	 * Class attribute to toggle "enhanced ordered list" behaviour
112
	 * setting this to true will allow ordered lists to start from the index
113
	 * number that is defined first.
114
	 *
115
	 * For example:
116
	 * 2. List item two
117
	 * 3. List item three
118
	 *
119
	 * Becomes:
120
	 * <ol start="2">
121
	 * <li>List item two</li>
122
	 * <li>List item three</li>
123
	 * </ol>
124
	 *
125
	 * @var bool
126
	 */
127
	public $enhanced_ordered_list = false;
128
129
	/**
130
	 * Parser implementation
131
	 */
132
133
	/**
134
	 * Regex to match balanced [brackets].
135
	 * Needed to insert a maximum bracked depth while converting to PHP.
136
	 * @var int
137
	 */
138
	protected $nested_brackets_depth = 6;
139
	protected $nested_brackets_re;
140
141
	protected $nested_url_parenthesis_depth = 4;
142
	protected $nested_url_parenthesis_re;
143
144
	/**
145
	 * Table of hash values for escaped characters:
146
	 * @var string
147
	 */
148
	protected $escape_chars = '\`*_{}[]()>#+-.!';
149
	protected $escape_chars_re;
150
151
	/**
152
	 * Constructor function. Initialize appropriate member variables.
153
	 * @return void
0 ignored issues
show
Comprehensibility Best Practice introduced by
Adding a @return annotation to constructors is generally not recommended as a constructor does not have a meaningful return value.

Adding a @return annotation to a constructor is not recommended, since a constructor does not have a meaningful return value.

Please refer to the PHP core documentation on constructors.

Loading history...
154
	 */
155 5
	public function __construct() {
156 5
		$this->_initDetab();
157 5
		$this->prepareItalicsAndBold();
158
159 5
		$this->nested_brackets_re =
160 5
			str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
161 5
			str_repeat('\])*', $this->nested_brackets_depth);
162
163 5
		$this->nested_url_parenthesis_re =
164 5
			str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
165 5
			str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
166
167 5
		$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
168
169
		// Sort document, block, and span gamut in ascendent priority order.
170 5
		asort($this->document_gamut);
171 5
		asort($this->block_gamut);
172 5
		asort($this->span_gamut);
173 5
	}
174
175
176
	/**
177
	 * Internal hashes used during transformation.
178
	 * @var array
179
	 */
180
	protected $urls        = array();
181
	protected $titles      = array();
182
	protected $html_hashes = array();
183
184
	/**
185
	 * Status flag to avoid invalid nesting.
186
	 * @var boolean
187
	 */
188
	protected $in_anchor = false;
189
190
	/**
191
	 * Status flag to avoid invalid nesting.
192
	 * @var boolean
193
	 */
194
	protected $in_emphasis_processing = false;
195
196
	/**
197
	 * Called before the transformation process starts to setup parser states.
198
	 * @return void
199
	 */
200 110
	protected function setup() {
201
		// Clear global hashes.
202 110
		$this->urls        = $this->predef_urls;
203 110
		$this->titles      = $this->predef_titles;
204 110
		$this->html_hashes = array();
205 110
		$this->in_anchor   = false;
206 110
		$this->in_emphasis_processing = false;
207 110
	}
208
209
	/**
210
	 * Called after the transformation process to clear any variable which may
211
	 * be taking up memory unnecessarly.
212
	 * @return void
213
	 */
214 110
	protected function teardown() {
215 110
		$this->urls        = array();
216 110
		$this->titles      = array();
217 110
		$this->html_hashes = array();
218 110
	}
219
220
	/**
221
	 * Main function. Performs some preprocessing on the input text and pass
222
	 * it through the document gamut.
223
	 *
224
	 * @api
225
	 *
226
	 * @param  string $text
227
	 * @return string
228
	 */
229 110
	public function transform($text) {
230 110
		$this->setup();
231
232
		# Remove UTF-8 BOM and marker character in input, if present.
233 110
		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
234
235
		# Standardize line endings:
236
		#   DOS to Unix and Mac to Unix
237 110
		$text = preg_replace('{\r\n?}', "\n", $text);
238
239
		# Make sure $text ends with a couple of newlines:
240 110
		$text .= "\n\n";
241
242
		# Convert all tabs to spaces.
243 110
		$text = $this->detab($text);
244
245
		# Turn block-level HTML blocks into hash entries
246 110
		$text = $this->hashHTMLBlocks($text);
247
248
		# Strip any lines consisting only of spaces and tabs.
249
		# This makes subsequent regexen easier to write, because we can
250
		# match consecutive blank lines with /\n+/ instead of something
251
		# contorted like /[ ]*\n+/ .
252 110
		$text = preg_replace('/^[ ]+$/m', '', $text);
253
254
		# Run document gamut methods.
255 110
		foreach ($this->document_gamut as $method => $priority) {
256 110
			$text = $this->$method($text);
257
		}
258
259 110
		$this->teardown();
260
261 110
		return $text . "\n";
262
	}
263
264
	/**
265
	 * Define the document gamut
266
	 * @var array
267
	 */
268
	protected $document_gamut = array(
269
		// Strip link definitions, store in hashes.
270
		"stripLinkDefinitions" => 20,
271
		"runBasicBlockGamut"   => 30,
272
	);
273
274
	/**
275
	 * Strips link definitions from text, stores the URLs and titles in
276
	 * hash references
277
	 * @param  string $text
278
	 * @return string
279
	 */
280 48 View Code Duplication
	protected function stripLinkDefinitions($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
281
282 48
		$less_than_tab = $this->tab_width - 1;
283
284
		// Link defs are in the form: ^[id]: url "optional title"
285 48
		$text = preg_replace_callback('{
286 48
							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
287
							  [ ]*
288
							  \n?				# maybe *one* newline
289
							  [ ]*
290
							(?:
291
							  <(.+?)>			# url = $2
292
							|
293
							  (\S+?)			# url = $3
294
							)
295
							  [ ]*
296
							  \n?				# maybe one newline
297
							  [ ]*
298
							(?:
299
								(?<=\s)			# lookbehind for whitespace
300
								["(]
301
								(.*?)			# title = $4
302
								[")]
303
								[ ]*
304
							)?	# title is optional
305
							(?:\n+|\Z)
306
			}xm',
307 48
			array($this, '_stripLinkDefinitions_callback'),
308 48
			$text
309
		);
310 48
		return $text;
311
	}
312
313
	/**
314
	 * The callback to strip link definitions
315
	 * @param  array $matches
316
	 * @return string
317
	 */
318 9
	protected function _stripLinkDefinitions_callback($matches) {
319 9
		$link_id = strtolower($matches[1]);
320 9
		$url = $matches[2] == '' ? $matches[3] : $matches[2];
321 9
		$this->urls[$link_id] = $url;
322 9
		$this->titles[$link_id] =& $matches[4];
323 9
		return ''; // String that will replace the block
324
	}
325
326
	/**
327
	 * Hashify HTML blocks
328
	 * @param  string $text
329
	 * @return string
330
	 */
331 48
	protected function hashHTMLBlocks($text) {
332 48
		if ($this->no_markup) {
333
			return $text;
334
		}
335
336 48
		$less_than_tab = $this->tab_width - 1;
337
338
		/**
339
		 * Hashify HTML blocks:
340
		 *
341
		 * We only want to do this for block-level HTML tags, such as headers,
342
		 * lists, and tables. That's because we still want to wrap <p>s around
343
		 * "paragraphs" that are wrapped in non-block-level tags, such as
344
		 * anchors, phrase emphasis, and spans. The list of tags we're looking
345
		 * for is hard-coded:
346
		 *
347
		 * *  List "a" is made of tags which can be both inline or block-level.
348
		 *    These will be treated block-level when the start tag is alone on
349
		 *    its line, otherwise they're not matched here and will be taken as
350
		 *    inline later.
351
		 * *  List "b" is made of tags which are always block-level;
352
		 */
353 48
		$block_tags_a_re = 'ins|del';
354
		$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
355
						   'script|noscript|style|form|fieldset|iframe|math|svg|'.
356
						   'article|section|nav|aside|hgroup|header|footer|'.
357 48
						   'figure|details|summary';
358
359
		// Regular expression for the content of a block tag.
360 48
		$nested_tags_level = 4;
361 48
		$attr = '
362
			(?>				# optional tag attributes
363
			  \s			# starts with whitespace
364
			  (?>
365
				[^>"/]+		# text outside quotes
366
			  |
367
				/+(?!>)		# slash not followed by ">"
368
			  |
369
				"[^"]*"		# text inside double quotes (tolerate ">")
370
			  |
371
				\'[^\']*\'	# text inside single quotes (tolerate ">")
372
			  )*
373
			)?
374
			';
375
		$content =
376 48
			str_repeat('
377
				(?>
378
				  [^<]+			# content without tag
379
				|
380
				  <\2			# nested opening tag
381 48
					'.$attr.'	# attributes
382
					(?>
383
					  />
384
					|
385 48
					  >', $nested_tags_level).	// end of opening tag
386 48
					  '.*?'.					// last level nested tag content
387 48
			str_repeat('
388
					  </\2\s*>	# closing nested tag
389
					)
390
				  |
391
					<(?!/\2\s*>	# other tags with a different name
392
				  )
393
				)*',
394 48
				$nested_tags_level);
395 48
		$content2 = str_replace('\2', '\3', $content);
396
397
		/**
398
		 * First, look for nested blocks, e.g.:
399
		 * 	<div>
400
		 * 		<div>
401
		 * 		tags for inner block must be indented.
402
		 * 		</div>
403
		 * 	</div>
404
		 *
405
		 * The outermost tags must start at the left margin for this to match,
406
		 * and the inner nested divs must be indented.
407
		 * We need to do this before the next, more liberal match, because the
408
		 * next match will start at the first `<div>` and stop at the
409
		 * first `</div>`.
410
		 */
411 48
		$text = preg_replace_callback('{(?>
412
			(?>
413
				(?<=\n)			# Starting on its own line
414
				|				# or
415
				\A\n?			# the at beginning of the doc
416
			)
417
			(						# save in $1
418
419
			  # Match from `\n<tag>` to `</tag>\n`, handling nested tags
420
			  # in between.
421
422 48
						[ ]{0,'.$less_than_tab.'}
423 48
						<('.$block_tags_b_re.')# start tag = $2
424 48
						'.$attr.'>			# attributes followed by > and \n
425 48
						'.$content.'		# content, support nesting
426
						</\2>				# the matching end tag
427
						[ ]*				# trailing spaces/tabs
428
						(?=\n+|\Z)	# followed by a newline or end of document
429
430
			| # Special version for tags of group a.
431
432 48
						[ ]{0,'.$less_than_tab.'}
433 48
						<('.$block_tags_a_re.')# start tag = $3
434 48
						'.$attr.'>[ ]*\n	# attributes followed by >
435 48
						'.$content2.'		# content, support nesting
436
						</\3>				# the matching end tag
437
						[ ]*				# trailing spaces/tabs
438
						(?=\n+|\Z)	# followed by a newline or end of document
439
440
			| # Special case just for <hr />. It was easier to make a special
441
			  # case than to make the other regex more complicated.
442
443 48
						[ ]{0,'.$less_than_tab.'}
444
						<(hr)				# start tag = $2
445 48
						'.$attr.'			# attributes
446
						/?>					# the matching end tag
447
						[ ]*
448
						(?=\n{2,}|\Z)		# followed by a blank line or end of document
449
450
			| # Special case for standalone HTML comments:
451
452 48
					[ ]{0,'.$less_than_tab.'}
453
					(?s:
454
						<!-- .*? -->
455
					)
456
					[ ]*
457
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
458
459
			| # PHP and ASP-style processor instructions (<? and <%)
460
461 48
					[ ]{0,'.$less_than_tab.'}
462
					(?s:
463
						<([?%])			# $2
464
						.*?
465
						\2>
466
					)
467
					[ ]*
468
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
469
470
			)
471
			)}Sxmi',
472 48
			array($this, '_hashHTMLBlocks_callback'),
473 48
			$text
474
		);
475
476 48
		return $text;
477
	}
478
479
	/**
480
	 * The callback for hashing HTML blocks
481
	 * @param  string $matches
482
	 * @return string
483
	 */
484 9
	protected function _hashHTMLBlocks_callback($matches) {
485 9
		$text = $matches[1];
486 9
		$key  = $this->hashBlock($text);
487 9
		return "\n\n$key\n\n";
488
	}
489
490
	/**
491
	 * Called whenever a tag must be hashed when a function insert an atomic
492
	 * element in the text stream. Passing $text to through this function gives
493
	 * a unique text-token which will be reverted back when calling unhash.
494
	 *
495
	 * The $boundary argument specify what character should be used to surround
496
	 * the token. By convension, "B" is used for block elements that needs not
497
	 * to be wrapped into paragraph tags at the end, ":" is used for elements
498
	 * that are word separators and "X" is used in the general case.
499
	 *
500
	 * @param  string $text
501
	 * @param  string $boundary
502
	 * @return string
503
	 */
504 110
	protected function hashPart($text, $boundary = 'X') {
505
		// Swap back any tag hash found in $text so we do not have to `unhash`
506
		// multiple times at the end.
507 110
		$text = $this->unhash($text);
508
509
		// Then hash the block.
510 110
		static $i = 0;
511 110
		$key = "$boundary\x1A" . ++$i . $boundary;
512 110
		$this->html_hashes[$key] = $text;
513 110
		return $key; // String that will replace the tag.
514
	}
515
516
	/**
517
	 * Shortcut function for hashPart with block-level boundaries.
518
	 * @param  string $text
519
	 * @return string
520
	 */
521 110
	protected function hashBlock($text) {
522 110
		return $this->hashPart($text, 'B');
523
	}
524
525
	/**
526
	 * Define the block gamut - these are all the transformations that form
527
	 * block-level tags like paragraphs, headers, and list items.
528
	 * @var array
529
	 */
530
	protected $block_gamut = array(
531
		"doHeaders"         => 10,
532
		"doHorizontalRules" => 20,
533
		"doLists"           => 40,
534
		"doCodeBlocks"      => 50,
535
		"doBlockQuotes"     => 60,
536
	);
537
538
	/**
539
	 * Run block gamut tranformations.
540
	 *
541
	 * We need to escape raw HTML in Markdown source before doing anything
542
	 * else. This need to be done for each block, and not only at the
543
	 * begining in the Markdown function since hashed blocks can be part of
544
	 * list items and could have been indented. Indented blocks would have
545
	 * been seen as a code block in a previous pass of hashHTMLBlocks.
546
	 *
547
	 * @param  string $text
548
	 * @return string
549
	 */
550 20
	protected function runBlockGamut($text) {
551 20
		$text = $this->hashHTMLBlocks($text);
552 20
		return $this->runBasicBlockGamut($text);
553
	}
554
555
	/**
556
	 * Run block gamut tranformations, without hashing HTML blocks. This is
557
	 * useful when HTML blocks are known to be already hashed, like in the first
558
	 * whole-document pass.
559
	 *
560
	 * @param  string $text
561
	 * @return string
562
	 */
563 110
	protected function runBasicBlockGamut($text) {
564
565 110
		foreach ($this->block_gamut as $method => $priority) {
566 110
			$text = $this->$method($text);
567
		}
568
569
		// Finally form paragraph and restore hashed blocks.
570 110
		$text = $this->formParagraphs($text);
571
572 110
		return $text;
573
	}
574
575
	/**
576
	 * Convert horizontal rules
577
	 * @param  string $text
578
	 * @return string
579
	 */
580 110
	protected function doHorizontalRules($text) {
581 110
		return preg_replace(
582 110
			'{
583
				^[ ]{0,3}	# Leading space
584
				([-*_])		# $1: First marker
585
				(?>			# Repeated marker group
586
					[ ]{0,2}	# Zero, one, or two spaces.
587
					\1			# Marker character
588
				){2,}		# Group repeated at least twice
589
				[ ]*		# Tailing spaces
590
				$			# End of line.
591
			}mx',
592 110
			"\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
593 110
			$text
594
		);
595
	}
596
597
	/**
598
	 * These are all the transformations that occur *within* block-level
599
	 * tags like paragraphs, headers, and list items.
600
	 * @var array
601
	 */
602
	protected $span_gamut = array(
603
		// Process character escapes, code spans, and inline HTML
604
		// in one shot.
605
		"parseSpan"           => -30,
606
		// Process anchor and image tags. Images must come first,
607
		// because ![foo][f] looks like an anchor.
608
		"doImages"            =>  10,
609
		"doAnchors"           =>  20,
610
		// Make links out of things like `<https://example.com/>`
611
		// Must come after doAnchors, because you can use < and >
612
		// delimiters in inline links like [this](<url>).
613
		"doAutoLinks"         =>  30,
614
		"encodeAmpsAndAngles" =>  40,
615
		"doItalicsAndBold"    =>  50,
616
		"doHardBreaks"        =>  60,
617
	);
618
619
	/**
620
	 * Run span gamut transformations
621
	 * @param  string $text
622
	 * @return string
623
	 */
624 109
	protected function runSpanGamut($text) {
625 109
		foreach ($this->span_gamut as $method => $priority) {
626 109
			$text = $this->$method($text);
627
		}
628
629 109
		return $text;
630
	}
631
632
	/**
633
	 * Do hard breaks
634
	 * @param  string $text
635
	 * @return string
636
	 */
637 109
	protected function doHardBreaks($text) {
638 109
		if ($this->hard_wrap) {
639
			return preg_replace_callback('/ *\n/',
640
				array($this, '_doHardBreaks_callback'), $text);
641
		} else {
642 109
			return preg_replace_callback('/ {2,}\n/',
643 109
				array($this, '_doHardBreaks_callback'), $text);
644
		}
645
	}
646
647
	/**
648
	 * Trigger part hashing for the hard break (callback method)
649
	 * @param  array $matches
650
	 * @return string
651
	 */
652 3
	protected function _doHardBreaks_callback($matches) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
653 3
		return $this->hashPart("<br$this->empty_element_suffix\n");
654
	}
655
656
	/**
657
	 * Turn Markdown link shortcuts into XHTML <a> tags.
658
	 * @param  string $text
659
	 * @return string
660
	 */
661 47 View Code Duplication
	protected function doAnchors($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
662 47
		if ($this->in_anchor) {
663 12
			return $text;
664
		}
665 47
		$this->in_anchor = true;
666
667
		// First, handle reference-style links: [link text] [id]
668 47
		$text = preg_replace_callback('{
669
			(					# wrap whole match in $1
670
			  \[
671 47
				('.$this->nested_brackets_re.')	# link text = $2
672
			  \]
673
674
			  [ ]?				# one optional space
675
			  (?:\n[ ]*)?		# one optional newline followed by spaces
676
677
			  \[
678
				(.*?)		# id = $3
679
			  \]
680
			)
681
			}xs',
682 47
			array($this, '_doAnchors_reference_callback'), $text);
683
684
		// Next, inline-style links: [link text](url "optional title")
685 47
		$text = preg_replace_callback('{
686
			(				# wrap whole match in $1
687
			  \[
688 47
				('.$this->nested_brackets_re.')	# link text = $2
689
			  \]
690
			  \(			# literal paren
691
				[ \n]*
692
				(?:
693
					<(.+?)>	# href = $3
694
				|
695 47
					('.$this->nested_url_parenthesis_re.')	# href = $4
696
				)
697
				[ \n]*
698
				(			# $5
699
				  ([\'"])	# quote char = $6
700
				  (.*?)		# Title = $7
701
				  \6		# matching quote
702
				  [ \n]*	# ignore any spaces/tabs between closing quote and )
703
				)?			# title is optional
704
			  \)
705
			)
706
			}xs',
707 47
			array($this, '_doAnchors_inline_callback'), $text);
708
709
		// Last, handle reference-style shortcuts: [link text]
710
		// These must come last in case you've also got [link text][1]
711
		// or [link text](/foo)
712 47
		$text = preg_replace_callback('{
713
			(					# wrap whole match in $1
714
			  \[
715
				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
716
			  \]
717
			)
718
			}xs',
719 47
			array($this, '_doAnchors_reference_callback'), $text);
720
721 47
		$this->in_anchor = false;
722 47
		return $text;
723
	}
724
725
	/**
726
	 * Callback method to parse referenced anchors
727
	 * @param  string $matches
728
	 * @return string
729
	 */
730 9
	protected function _doAnchors_reference_callback($matches) {
731 9
		$whole_match =  $matches[1];
732 9
		$link_text   =  $matches[2];
733 9
		$link_id     =& $matches[3];
734
735 9
		if ($link_id == "") {
736
			// for shortcut links like [this][] or [this].
737 6
			$link_id = $link_text;
738
		}
739
740
		// lower-case and turn embedded newlines into spaces
741 9
		$link_id = strtolower($link_id);
742 9
		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
743
744 9
		if (isset($this->urls[$link_id])) {
745 8
			$url = $this->urls[$link_id];
746 8
			$url = $this->encodeURLAttribute($url);
747
748 8
			$result = "<a href=\"$url\"";
749 8 View Code Duplication
			if ( isset( $this->titles[$link_id] ) ) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
750 6
				$title = $this->titles[$link_id];
751 6
				$title = $this->encodeAttribute($title);
752 6
				$result .=  " title=\"$title\"";
753
			}
754
755 8
			$link_text = $this->runSpanGamut($link_text);
756 8
			$result .= ">$link_text</a>";
757 8
			$result = $this->hashPart($result);
758
		} else {
759 3
			$result = $whole_match;
760
		}
761 9
		return $result;
762
	}
763
764
	/**
765
	 * Callback method to parse inline anchors
766
	 * @param  string $matches
767
	 * @return string
768
	 */
769 10
	protected function _doAnchors_inline_callback($matches) {
770 10
		$link_text		=  $this->runSpanGamut($matches[2]);
771 10
		$url			=  $matches[3] === '' ? $matches[4] : $matches[3];
772 10
		$title			=& $matches[7];
773
774
		// If the URL was of the form <s p a c e s> it got caught by the HTML
775
		// tag parser and hashed. Need to reverse the process before using
776
		// the URL.
777 10
		$unhashed = $this->unhash($url);
778 10
		if ($unhashed !== $url)
779 2
			$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
780
781 10
		$url = $this->encodeURLAttribute($url);
782
783 10
		$result = "<a href=\"$url\"";
784 10
		if (isset($title)) {
785 4
			$title = $this->encodeAttribute($title);
786 4
			$result .=  " title=\"$title\"";
787
		}
788
789 10
		$link_text = $this->runSpanGamut($link_text);
790 10
		$result .= ">$link_text</a>";
791
792 10
		return $this->hashPart($result);
793
	}
794
795
	/**
796
	 * Turn Markdown image shortcuts into <img> tags.
797
	 * @param  string $text
798
	 * @return string
799
	 */
800 47 View Code Duplication
	protected function doImages($text) {
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
801
		// First, handle reference-style labeled images: ![alt text][id]
802 47
		$text = preg_replace_callback('{
803
			(				# wrap whole match in $1
804
			  !\[
805 47
				('.$this->nested_brackets_re.')		# alt text = $2
806
			  \]
807
808
			  [ ]?				# one optional space
809
			  (?:\n[ ]*)?		# one optional newline followed by spaces
810
811
			  \[
812
				(.*?)		# id = $3
813
			  \]
814
815
			)
816
			}xs',
817 47
			array($this, '_doImages_reference_callback'), $text);
818
819
		// Next, handle inline images:  ![alt text](url "optional title")
820
		// Don't forget: encode * and _
821 47
		$text = preg_replace_callback('{
822
			(				# wrap whole match in $1
823
			  !\[
824 47
				('.$this->nested_brackets_re.')		# alt text = $2
825
			  \]
826
			  \s?			# One optional whitespace character
827
			  \(			# literal paren
828
				[ \n]*
829
				(?:
830
					<(\S*)>	# src url = $3
831
				|
832 47
					('.$this->nested_url_parenthesis_re.')	# src url = $4
833
				)
834
				[ \n]*
835
				(			# $5
836
				  ([\'"])	# quote char = $6
837
				  (.*?)		# title = $7
838
				  \6		# matching quote
839
				  [ \n]*
840
				)?			# title is optional
841
			  \)
842
			)
843
			}xs',
844 47
			array($this, '_doImages_inline_callback'), $text);
845
846 47
		return $text;
847
	}
848
849
	/**
850
	 * Callback to parse references image tags
851
	 * @param  array $matches
852
	 * @return string
853
	 */
854 1
	protected function _doImages_reference_callback($matches) {
855 1
		$whole_match = $matches[1];
856 1
		$alt_text    = $matches[2];
857 1
		$link_id     = strtolower($matches[3]);
858
859 1
		if ($link_id == "") {
860
			$link_id = strtolower($alt_text); // for shortcut links like ![this][].
861
		}
862
863 1
		$alt_text = $this->encodeAttribute($alt_text);
864 1
		if (isset($this->urls[$link_id])) {
865 1
			$url = $this->encodeURLAttribute($this->urls[$link_id]);
866 1
			$result = "<img src=\"$url\" alt=\"$alt_text\"";
867 1 View Code Duplication
			if(file_exists($url))
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
868
			{
869
				list($width, $height, $type, $attr) = getimagesize($url);
0 ignored issues
show
Unused Code introduced by
The assignment to $attr is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
Unused Code introduced by
The assignment to $type is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
870
				if(isset($width)) $result .= " width=\"$width\"";
871
				if(isset($height)) $result .= " height=\"$height\"";
872
				if(isset($width) && isset($height)) $result .= " loading=\"lazy\"";
873
			}
874 1 View Code Duplication
			if (isset($this->titles[$link_id])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
875 1
				$title = $this->titles[$link_id];
876 1
				$title = $this->encodeAttribute($title);
877 1
				$result .=  " title=\"$title\"";
878
			}
879 1
			$result .= $this->empty_element_suffix;
880 1
			$result = $this->hashPart($result);
881
		} else {
882
			// If there's no such link ID, leave intact:
883
			$result = $whole_match;
884
		}
885
886 1
		return $result;
887
	}
888
889
	/**
890
	 * Callback to parse inline image tags
891
	 * @param  array $matches
892
	 * @return string
893
	 */
894 2
	protected function _doImages_inline_callback($matches) {
895 2
		$whole_match	= $matches[1];
0 ignored issues
show
Unused Code introduced by
$whole_match is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
896 2
		$alt_text		= $matches[2];
897 2
		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
898 2
		$title			=& $matches[7];
899
900 2
		$alt_text = $this->encodeAttribute($alt_text);
901 2
		$url = $this->encodeURLAttribute($url);
902 2
		$result = "<img src=\"$url\" alt=\"$alt_text\"";
903 2 View Code Duplication
		if(file_exists($url))
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
904
		{
905
			list($width, $height, $type, $attr) = getimagesize($url);
0 ignored issues
show
Unused Code introduced by
The assignment to $type is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
Unused Code introduced by
The assignment to $attr is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
906
			if(isset($width)) $result .= " width=\"$width\"";
907
			if(isset($height)) $result .= " height=\"$height\"";
908
			if(isset($width) && isset($height)) $result .= " loading=\"lazy\"";
909
		}
910 2
		if (isset($title)) {
911 1
			$title = $this->encodeAttribute($title);
912 1
			$result .=  " title=\"$title\""; // $title already quoted
913
		}
914 2
		$result .= $this->empty_element_suffix;
915
916 2
		return $this->hashPart($result);
917
	}
918
919
	/**
920
	 * Parse Markdown heading elements to HTML
921
	 * @param  string $text
922
	 * @return string
923
	 */
924 48
	protected function doHeaders($text) {
925
		/**
926
		 * Setext-style headers:
927
		 *	  Header 1
928
		 *	  ========
929
		 *
930
		 *	  Header 2
931
		 *	  --------
932
		 */
933 48
		$text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
934 48
			array($this, '_doHeaders_callback_setext'), $text);
935
936
		/**
937
		 * atx-style headers:
938
		 *   # Header 1
939
		 *   ## Header 2
940
		 *   ## Header 2 with closing hashes ##
941
		 *   ...
942
		 *   ###### Header 6
943
		 */
944 48
		$text = preg_replace_callback('{
945
				^(\#{1,6})	# $1 = string of #\'s
946
				[ ]*
947
				(.+?)		# $2 = Header text
948
				[ ]*
949
				\#*			# optional closing #\'s (not counted)
950
				\n+
951
			}xm',
952 48
			array($this, '_doHeaders_callback_atx'), $text);
953
954 48
		return $text;
955
	}
956
957
	/**
958
	 * Setext header parsing callback
959
	 * @param  array $matches
960
	 * @return string
961
	 */
962 5
	protected function _doHeaders_callback_setext($matches) {
963
		// Terrible hack to check we haven't found an empty list item.
964 5 View Code Duplication
		if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
965 1
			return $matches[0];
966
		}
967
968 4
		$level = $matches[2][0] == '=' ? 1 : 2;
969
970
		// ID attribute generation
971 4
		$idAtt = $this->_generateIdFromHeaderValue($matches[1]);
972
973 4
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
974 4
		return "\n" . $this->hashBlock($block) . "\n\n";
975
	}
976
977
	/**
978
	 * ATX header parsing callback
979
	 * @param  array $matches
980
	 * @return string
981
	 */
982 8
	protected function _doHeaders_callback_atx($matches) {
983
		// ID attribute generation
984 8
		$idAtt = $this->_generateIdFromHeaderValue($matches[2]);
985
986 8
		$level = strlen($matches[1]);
987 8
		$block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
988 8
		return "\n" . $this->hashBlock($block) . "\n\n";
989
	}
990
991
	/**
992
	 * If a header_id_func property is set, we can use it to automatically
993
	 * generate an id attribute.
994
	 *
995
	 * This method returns a string in the form id="foo", or an empty string
996
	 * otherwise.
997
	 * @param  string $headerValue
998
	 * @return string
999
	 */
1000 9
	protected function _generateIdFromHeaderValue($headerValue) {
1001 9
		if (!is_callable($this->header_id_func)) {
1002 9
			return "";
1003
		}
1004
1005
		$idValue = call_user_func($this->header_id_func, $headerValue);
1006
		if (!$idValue) {
1007
			return "";
1008
		}
1009
1010
		return ' id="' . $this->encodeAttribute($idValue) . '"';
1011
	}
1012
1013
	/**
1014
	 * Form HTML ordered (numbered) and unordered (bulleted) lists.
1015
	 * @param  string $text
1016
	 * @return string
1017
	 */
1018 110
	protected function doLists($text) {
1019 110
		$less_than_tab = $this->tab_width - 1;
1020
1021
		// Re-usable patterns to match list item bullets and number markers:
1022 110
		$marker_ul_re  = '[*+-]';
1023 110
		$marker_ol_re  = '\d+[\.]';
1024
1025
		$markers_relist = array(
1026 110
			$marker_ul_re => $marker_ol_re,
1027 110
			$marker_ol_re => $marker_ul_re,
1028
			);
1029
1030 110
		foreach ($markers_relist as $marker_re => $other_marker_re) {
1031
			// Re-usable pattern to match any entirel ul or ol list:
1032
			$whole_list_re = '
1033
				(								# $1 = whole list
1034
				  (								# $2
1035 110
					([ ]{0,'.$less_than_tab.'})	# $3 = number of spaces
1036 110
					('.$marker_re.')			# $4 = first list item marker
1037
					[ ]+
1038
				  )
1039
				  (?s:.+?)
1040
				  (								# $5
1041
					  \z
1042
					|
1043
					  \n{2,}
1044
					  (?=\S)
1045
					  (?!						# Negative lookahead for another list item marker
1046
						[ ]*
1047 110
						'.$marker_re.'[ ]+
1048
					  )
1049
					|
1050
					  (?=						# Lookahead for another kind of list
1051
					    \n
1052
						\3						# Must have the same indentation
1053 110
						'.$other_marker_re.'[ ]+
1054
					  )
1055
				  )
1056
				)
1057
			'; // mx
1058
1059
			// We use a different prefix before nested lists than top-level lists.
1060
			//See extended comment in _ProcessListItems().
1061
1062 110
			if ($this->list_level) {
1063 24
				$text = preg_replace_callback('{
1064
						^
1065 24
						'.$whole_list_re.'
1066
					}mx',
1067 24
					array($this, '_doLists_callback'), $text);
1068
			} else {
1069 110
				$text = preg_replace_callback('{
1070
						(?:(?<=\n)\n|\A\n?) # Must eat the newline
1071 110
						'.$whole_list_re.'
1072
					}mx',
1073 110
					array($this, '_doLists_callback'), $text);
1074
			}
1075
		}
1076
1077 110
		return $text;
1078
	}
1079
1080
	/**
1081
	 * List parsing callback
1082
	 * @param  array $matches
1083
	 * @return string
1084
	 */
1085 24
	protected function _doLists_callback($matches) {
1086
		// Re-usable patterns to match list item bullets and number markers:
1087 24
		$marker_ul_re  = '[*+-]';
1088 24
		$marker_ol_re  = '\d+[\.]';
1089 24
		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
0 ignored issues
show
Unused Code introduced by
$marker_any_re is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1090 24
		$marker_ol_start_re = '[0-9]+';
1091
1092 24
		$list = $matches[1];
1093 24
		$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1094
1095 24
		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1096
1097 24
		$list .= "\n";
1098 24
		$result = $this->processListItems($list, $marker_any_re);
1099
1100 24
		$ol_start = 1;
1101 24
		if ($this->enhanced_ordered_list) {
1102
			// Get the start number for ordered list.
1103 14
			if ($list_type == 'ol') {
1104 5
				$ol_start_array = array();
1105 5
				$ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1106 5
				if ($ol_start_check){
1107 5
					$ol_start = $ol_start_array[0];
1108
				}
1109
			}
1110
		}
1111
1112 24
		if ($ol_start > 1 && $list_type == 'ol'){
1113
			$result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1114
		} else {
1115 24
			$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1116
		}
1117 24
		return "\n". $result ."\n\n";
1118
	}
1119
1120
	/**
1121
	 * Nesting tracker for list levels
1122
	 * @var integer
1123
	 */
1124
	protected $list_level = 0;
1125
1126
	/**
1127
	 * Process the contents of a single ordered or unordered list, splitting it
1128
	 * into individual list items.
1129
	 * @param  string $list_str
1130
	 * @param  string $marker_any_re
1131
	 * @return string
1132
	 */
1133 24
	protected function processListItems($list_str, $marker_any_re) {
1134
		/**
1135
		 * The $this->list_level global keeps track of when we're inside a list.
1136
		 * Each time we enter a list, we increment it; when we leave a list,
1137
		 * we decrement. If it's zero, we're not in a list anymore.
1138
		 *
1139
		 * We do this because when we're not inside a list, we want to treat
1140
		 * something like this:
1141
		 *
1142
		 *		I recommend upgrading to version
1143
		 *		8. Oops, now this line is treated
1144
		 *		as a sub-list.
1145
		 *
1146
		 * As a single paragraph, despite the fact that the second line starts
1147
		 * with a digit-period-space sequence.
1148
		 *
1149
		 * Whereas when we're inside a list (or sub-list), that line will be
1150
		 * treated as the start of a sub-list. What a kludge, huh? This is
1151
		 * an aspect of Markdown's syntax that's hard to parse perfectly
1152
		 * without resorting to mind-reading. Perhaps the solution is to
1153
		 * change the syntax rules such that sub-lists must start with a
1154
		 * starting cardinal number; e.g. "1." or "a.".
1155
		 */
1156 24
		$this->list_level++;
1157
1158
		// Trim trailing blank lines:
1159 24
		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1160
1161 24
		$list_str = preg_replace_callback('{
1162
			(\n)?							# leading line = $1
1163
			(^[ ]*)							# leading whitespace = $2
1164 24
			('.$marker_any_re.'				# list marker and space = $3
1165
				(?:[ ]+|(?=\n))	# space only required if item is not empty
1166
			)
1167
			((?s:.*?))						# list item text   = $4
1168
			(?:(\n+(?=\n))|\n)				# tailing blank line = $5
1169 24
			(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1170
			}xm',
1171 24
			array($this, '_processListItems_callback'), $list_str);
1172
1173 24
		$this->list_level--;
1174 24
		return $list_str;
1175
	}
1176
1177
	/**
1178
	 * List item parsing callback
1179
	 * @param  array $matches
1180
	 * @return string
1181
	 */
1182 24
	protected function _processListItems_callback($matches) {
1183 24
		$item = $matches[4];
1184 24
		$leading_line =& $matches[1];
1185 24
		$leading_space =& $matches[2];
1186 24
		$marker_space = $matches[3];
1187 24
		$tailing_blank_line =& $matches[5];
1188
1189 24
		if ($leading_line || $tailing_blank_line ||
1190 24
			preg_match('/\n{2,}/', $item))
1191
		{
1192
			// Replace marker with the appropriate whitespace indentation
1193 8
			$item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1194 8
			$item = $this->runBlockGamut($this->outdent($item)."\n");
1195
		} else {
1196
			// Recursion for sub-lists:
1197 18
			$item = $this->doLists($this->outdent($item));
1198 18
			$item = $this->formParagraphs($item, false);
1199
		}
1200
1201 24
		return "<li>" . $item . "</li>\n";
1202
	}
1203
1204
	/**
1205
	 * Process Markdown `<pre><code>` blocks.
1206
	 * @param  string $text
1207
	 * @return string
1208
	 */
1209 110
	protected function doCodeBlocks($text) {
1210 110
		$text = preg_replace_callback('{
1211
				(?:\n\n|\A\n?)
1212
				(	            # $1 = the code block -- one or more lines, starting with a space/tab
1213
				  (?>
1214 110
					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1215
					.*\n+
1216
				  )+
1217
				)
1218 110
				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
1219
			}xm',
1220 110
			array($this, '_doCodeBlocks_callback'), $text);
1221
1222 110
		return $text;
1223
	}
1224
1225
	/**
1226
	 * Code block parsing callback
1227
	 * @param  array $matches
1228
	 * @return string
1229
	 */
1230 35
	protected function _doCodeBlocks_callback($matches) {
1231 35
		$codeblock = $matches[1];
1232
1233 35
		$codeblock = $this->outdent($codeblock);
1234 35 View Code Duplication
		if (is_callable($this->code_block_content_func)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
			$codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1236
		} else {
1237 35
			$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1238
		}
1239
1240
		# trim leading newlines and trailing newlines
1241 35
		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1242
1243 35
		$codeblock = "<pre><code>$codeblock\n</code></pre>";
1244 35
		return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1245
	}
1246
1247
	/**
1248
	 * Create a code span markup for $code. Called from handleSpanToken.
1249
	 * @param  string $code
1250
	 * @return string
1251
	 */
1252 21
	protected function makeCodeSpan($code) {
1253 21 View Code Duplication
		if (is_callable($this->code_span_content_func)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1254
			$code = call_user_func($this->code_span_content_func, $code);
1255
		} else {
1256 21
			$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1257
		}
1258 21
		return $this->hashPart("<code>$code</code>");
1259
	}
1260
1261
	/**
1262
	 * Define the emphasis operators with their regex matches
1263
	 * @var array
1264
	 */
1265
	protected $em_relist = array(
1266
		''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1267
		'*' => '(?<![\s*])\*(?!\*)',
1268
		'_' => '(?<![\s_])_(?!_)',
1269
	);
1270
1271
	/**
1272
	 * Define the strong operators with their regex matches
1273
	 * @var array
1274
	 */
1275
	protected $strong_relist = array(
1276
		''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1277
		'**' => '(?<![\s*])\*\*(?!\*)',
1278
		'__' => '(?<![\s_])__(?!_)',
1279
	);
1280
1281
	/**
1282
	 * Define the emphasis + strong operators with their regex matches
1283
	 * @var array
1284
	 */
1285
	protected $em_strong_relist = array(
1286
		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1287
		'***' => '(?<![\s*])\*\*\*(?!\*)',
1288
		'___' => '(?<![\s_])___(?!_)',
1289
	);
1290
1291
	/**
1292
	 * Container for prepared regular expressions
1293
	 * @var array
1294
	 */
1295
	protected $em_strong_prepared_relist;
1296
1297
	/**
1298
	 * Prepare regular expressions for searching emphasis tokens in any
1299
	 * context.
1300
	 * @return void
1301
	 */
1302 5
	protected function prepareItalicsAndBold() {
1303 5
		foreach ($this->em_relist as $em => $em_re) {
1304 5
			foreach ($this->strong_relist as $strong => $strong_re) {
1305
				// Construct list of allowed token expressions.
1306 5
				$token_relist = array();
1307 5
				if (isset($this->em_strong_relist["$em$strong"])) {
1308 5
					$token_relist[] = $this->em_strong_relist["$em$strong"];
1309
				}
1310 5
				$token_relist[] = $em_re;
1311 5
				$token_relist[] = $strong_re;
1312
1313
				// Construct master expression from list.
1314 5
				$token_re = '{(' . implode('|', $token_relist) . ')}';
1315 5
				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
1316
			}
1317
		}
1318 5
	}
1319
1320
	/**
1321
	 * Convert Markdown italics (emphasis) and bold (strong) to HTML
1322
	 * @param  string $text
1323
	 * @return string
1324
	 */
1325 109
	protected function doItalicsAndBold($text) {
1326 109
		if ($this->in_emphasis_processing) {
1327 16
			return $text; // avoid reentrency
1328
		}
1329 109
		$this->in_emphasis_processing = true;
1330
1331 109
		$token_stack = array('');
1332 109
		$text_stack = array('');
1333 109
		$em = '';
1334 109
		$strong = '';
1335 109
		$tree_char_em = false;
1336
1337 109
		while (1) {
1338
			// Get prepared regular expression for seraching emphasis tokens
1339
			// in current context.
1340 109
			$token_re = $this->em_strong_prepared_relist["$em$strong"];
1341
1342
			// Each loop iteration search for the next emphasis token.
1343
			// Each token is then passed to handleSpanToken.
1344 109
			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1345 109
			$text_stack[0] .= $parts[0];
1346 109
			$token =& $parts[1];
1347 109
			$text =& $parts[2];
1348
1349 109
			if (empty($token)) {
1350
				// Reached end of text span: empty stack without emitting.
1351
				// any more emphasis.
1352 109
				while ($token_stack[0]) {
1353 4
					$text_stack[1] .= array_shift($token_stack);
1354 4
					$text_stack[0] .= array_shift($text_stack);
1355
				}
1356 109
				break;
1357
			}
1358
1359 18
			$token_len = strlen($token);
1360 18
			if ($tree_char_em) {
1361
				// Reached closing marker while inside a three-char emphasis.
1362 4
				if ($token_len == 3) {
1363
					// Three-char closing marker, close em and strong.
1364 4
					array_shift($token_stack);
1365 4
					$span = array_shift($text_stack);
1366 4
					$span = $this->runSpanGamut($span);
1367 4
					$span = "<strong><em>$span</em></strong>";
1368 4
					$text_stack[0] .= $this->hashPart($span);
1369 4
					$em = '';
1370 4
					$strong = '';
1371
				} else {
1372
					// Other closing marker: close one em or strong and
1373
					// change current token state to match the other
1374 2
					$token_stack[0] = str_repeat($token[0], 3-$token_len);
1375 2
					$tag = $token_len == 2 ? "strong" : "em";
1376 2
					$span = $text_stack[0];
1377 2
					$span = $this->runSpanGamut($span);
1378 2
					$span = "<$tag>$span</$tag>";
1379 2
					$text_stack[0] = $this->hashPart($span);
1380 2
					$$tag = ''; // $$tag stands for $em or $strong
1381
				}
1382 4
				$tree_char_em = false;
1383 18
			} else if ($token_len == 3) {
1384 4
				if ($em) {
1385
					// Reached closing marker for both em and strong.
1386
					// Closing strong marker:
1387 2
					for ($i = 0; $i < 2; ++$i) {
1388 2
						$shifted_token = array_shift($token_stack);
1389 2
						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
1390 2
						$span = array_shift($text_stack);
1391 2
						$span = $this->runSpanGamut($span);
1392 2
						$span = "<$tag>$span</$tag>";
1393 2
						$text_stack[0] .= $this->hashPart($span);
1394 2
						$$tag = ''; // $$tag stands for $em or $strong
1395
					}
1396
				} else {
1397
					// Reached opening three-char emphasis marker. Push on token
1398
					// stack; will be handled by the special condition above.
1399 4
					$em = $token[0];
1400 4
					$strong = "$em$em";
1401 4
					array_unshift($token_stack, $token);
1402 4
					array_unshift($text_stack, '');
1403 4
					$tree_char_em = true;
1404
				}
1405 16
			} else if ($token_len == 2) {
1406 11
				if ($strong) {
1407
					// Unwind any dangling emphasis marker:
1408 11
					if (strlen($token_stack[0]) == 1) {
1409 2
						$text_stack[1] .= array_shift($token_stack);
1410 2
						$text_stack[0] .= array_shift($text_stack);
1411 2
						$em = '';
1412
					}
1413
					// Closing strong marker:
1414 11
					array_shift($token_stack);
1415 11
					$span = array_shift($text_stack);
1416 11
					$span = $this->runSpanGamut($span);
1417 11
					$span = "<strong>$span</strong>";
1418 11
					$text_stack[0] .= $this->hashPart($span);
1419 11
					$strong = '';
1420
				} else {
1421 11
					array_unshift($token_stack, $token);
1422 11
					array_unshift($text_stack, '');
1423 11
					$strong = $token;
1424
				}
1425
			} else {
1426
				// Here $token_len == 1
1427 12
				if ($em) {
1428 10
					if (strlen($token_stack[0]) == 1) {
1429
						// Closing emphasis marker:
1430 10
						array_shift($token_stack);
1431 10
						$span = array_shift($text_stack);
1432 10
						$span = $this->runSpanGamut($span);
1433 10
						$span = "<em>$span</em>";
1434 10
						$text_stack[0] .= $this->hashPart($span);
1435 10
						$em = '';
1436
					} else {
1437 10
						$text_stack[0] .= $token;
1438
					}
1439
				} else {
1440 12
					array_unshift($token_stack, $token);
1441 12
					array_unshift($text_stack, '');
1442 12
					$em = $token;
1443
				}
1444
			}
1445
		}
1446 109
		$this->in_emphasis_processing = false;
1447 109
		return $text_stack[0];
1448
	}
1449
1450
	/**
1451
	 * Parse Markdown blockquotes to HTML
1452
	 * @param  string $text
1453
	 * @return string
1454
	 */
1455 110
	protected function doBlockQuotes($text) {
1456 110
		$text = preg_replace_callback('/
1457
			  (								# Wrap whole match in $1
1458
				(?>
1459
				  ^[ ]*>[ ]?			# ">" at the start of a line
1460
					.+\n					# rest of the first line
1461
				  (.+\n)*					# subsequent consecutive lines
1462
				  \n*						# blanks
1463
				)+
1464
			  )
1465
			/xm',
1466 110
			array($this, '_doBlockQuotes_callback'), $text);
1467
1468 110
		return $text;
1469
	}
1470
1471
	/**
1472
	 * Blockquote parsing callback
1473
	 * @param  array $matches
1474
	 * @return string
1475
	 */
1476 11
	protected function _doBlockQuotes_callback($matches) {
1477 11
		$bq = $matches[1];
1478
		// trim one level of quoting - trim whitespace-only lines
1479 11
		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1480 11
		$bq = $this->runBlockGamut($bq); // recurse
1481
1482 11
		$bq = preg_replace('/^/m', "  ", $bq);
1483
		// These leading spaces cause problem with <pre> content,
1484
		// so we need to fix that:
1485 11
		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1486 11
			array($this, '_doBlockQuotes_callback2'), $bq);
1487
1488 11
		return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1489
	}
1490
1491
	/**
1492
	 * Blockquote parsing callback
1493
	 * @param  array $matches
1494
	 * @return string
1495
	 */
1496 2
	protected function _doBlockQuotes_callback2($matches) {
1497 2
		$pre = $matches[1];
1498 2
		$pre = preg_replace('/^  /m', '', $pre);
1499 2
		return $pre;
1500
	}
1501
1502
	/**
1503
	 * Parse paragraphs
1504
	 *
1505
	 * @param  string $text String to process in paragraphs
1506
	 * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1507
	 * @return string
1508
	 */
1509 48
	protected function formParagraphs($text, $wrap_in_p = true) {
1510
		// Strip leading and trailing lines:
1511 48
		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1512
1513 48
		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1514
1515
		// Wrap <p> tags and unhashify HTML blocks
1516 48
		foreach ($grafs as $key => $value) {
1517 48
			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1518
				// Is a paragraph.
1519 47
				$value = $this->runSpanGamut($value);
1520 47
				if ($wrap_in_p) {
1521 45
					$value = preg_replace('/^([ ]*)/', "<p>", $value);
1522 45
					$value .= "</p>";
1523
				}
1524 47
				$grafs[$key] = $this->unhash($value);
1525
			} else {
1526
				// Is a block.
1527
				// Modify elements of @grafs in-place...
1528 31
				$graf = $value;
1529 31
				$block = $this->html_hashes[$graf];
1530 31
				$graf = $block;
1531
//				if (preg_match('{
1532
//					\A
1533
//					(							# $1 = <div> tag
1534
//					  <div  \s+
1535
//					  [^>]*
1536
//					  \b
1537
//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1538
//					  1
1539
//					  \2
1540
//					  [^>]*
1541
//					  >
1542
//					)
1543
//					(							# $3 = contents
1544
//					.*
1545
//					)
1546
//					(</div>)					# $4 = closing tag
1547
//					\z
1548
//					}xs', $block, $matches))
1549
//				{
1550
//					list(, $div_open, , $div_content, $div_close) = $matches;
1551
//
1552
//					// We can't call Markdown(), because that resets the hash;
1553
//					// that initialization code should be pulled into its own sub, though.
1554
//					$div_content = $this->hashHTMLBlocks($div_content);
1555
//
1556
//					// Run document gamut methods on the content.
1557
//					foreach ($this->document_gamut as $method => $priority) {
1558
//						$div_content = $this->$method($div_content);
1559
//					}
1560
//
1561
//					$div_open = preg_replace(
1562
//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1563
//
1564
//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1565
//				}
1566 48
				$grafs[$key] = $graf;
1567
			}
1568
		}
1569
1570 48
		return implode("\n\n", $grafs);
1571
	}
1572
1573
	/**
1574
	 * Encode text for a double-quoted HTML attribute. This function
1575
	 * is *not* suitable for attributes enclosed in single quotes.
1576
	 * @param  string $text
1577
	 * @return string
1578
	 */
1579 36
	protected function encodeAttribute($text) {
1580 36
		$text = $this->encodeAmpsAndAngles($text);
1581 36
		$text = str_replace('"', '&quot;', $text);
1582 36
		return $text;
1583
	}
1584
1585
	/**
1586
	 * Encode text for a double-quoted HTML attribute containing a URL,
1587
	 * applying the URL filter if set. Also generates the textual
1588
	 * representation for the URL (removing mailto: or tel:) storing it in $text.
1589
	 * This function is *not* suitable for attributes enclosed in single quotes.
1590
	 *
1591
	 * @param  string $url
1592
	 * @param  string $text Passed by reference
1593
	 * @return string        URL
1594
	 */
1595 33
	protected function encodeURLAttribute($url, &$text = null) {
1596 33
		if (is_callable($this->url_filter_func)) {
1597
			$url = call_user_func($this->url_filter_func, $url);
1598
		}
1599
1600 33
		if (preg_match('{^mailto:}i', $url)) {
1601 4
			$url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1602 31
		} else if (preg_match('{^tel:}i', $url)) {
1603
			$url = $this->encodeAttribute($url);
1604
			$text = substr($url, 4);
1605
		} else {
1606 31
			$url = $this->encodeAttribute($url);
1607 31
			$text = $url;
1608
		}
1609
1610 33
		return $url;
1611
	}
1612
1613
	/**
1614
	 * Smart processing for ampersands and angle brackets that need to
1615
	 * be encoded. Valid character entities are left alone unless the
1616
	 * no-entities mode is set.
1617
	 * @param  string $text
1618
	 * @return string
1619
	 */
1620 109
	protected function encodeAmpsAndAngles($text) {
1621 109
		if ($this->no_entities) {
1622
			$text = str_replace('&', '&amp;', $text);
1623
		} else {
1624
			// Ampersand-encoding based entirely on Nat Irons's Amputator
1625
			// MT plugin: <http://bumppo.net/projects/amputator/>
1626 109
			$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1627 109
								'&amp;', $text);
1628
		}
1629
		// Encode remaining <'s
1630 109
		$text = str_replace('<', '&lt;', $text);
1631
1632 109
		return $text;
1633
	}
1634
1635
	/**
1636
	 * Parse Markdown automatic links to anchor HTML tags
1637
	 * @param  string $text
1638
	 * @return string
1639
	 */
1640 109
	protected function doAutoLinks($text) {
1641 109
		$text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1642 109
			array($this, '_doAutoLinks_url_callback'), $text);
1643
1644
		// Email addresses: <[email protected]>
1645 109
		$text = preg_replace_callback('{
1646
			<
1647
			(?:mailto:)?
1648
			(
1649
				(?:
1650
					[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1651
				|
1652
					".*?"
1653
				)
1654
				\@
1655
				(?:
1656
					[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1657
				|
1658
					\[[\d.a-fA-F:]+\]	# IPv4 & IPv6
1659
				)
1660
			)
1661
			>
1662
			}xi',
1663 109
			array($this, '_doAutoLinks_email_callback'), $text);
1664
1665 109
		return $text;
1666
	}
1667
1668
	/**
1669
	 * Parse URL callback
1670
	 * @param  array $matches
1671
	 * @return string
1672
	 */
1673 4
	protected function _doAutoLinks_url_callback($matches) {
1674 4
		$url = $this->encodeURLAttribute($matches[1], $text);
1675 4
		$link = "<a href=\"$url\">$text</a>";
1676 4
		return $this->hashPart($link);
1677
	}
1678
1679
	/**
1680
	 * Parse email address callback
1681
	 * @param  array $matches
1682
	 * @return string
1683
	 */
1684 4
	protected function _doAutoLinks_email_callback($matches) {
1685 4
		$addr = $matches[1];
1686 4
		$url = $this->encodeURLAttribute("mailto:$addr", $text);
1687 4
		$link = "<a href=\"$url\">$text</a>";
1688 4
		return $this->hashPart($link);
1689
	}
1690
1691
	/**
1692
	 * Input: some text to obfuscate, e.g. "mailto:[email protected]"
1693
	 *
1694
	 * Output: the same text but with most characters encoded as either a
1695
	 *         decimal or hex entity, in the hopes of foiling most address
1696
	 *         harvesting spam bots. E.g.:
1697
	 *
1698
	 *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1699
	 *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1700
	 *        &#x6d;
1701
	 *
1702
	 * Note: the additional output $tail is assigned the same value as the
1703
	 * ouput, minus the number of characters specified by $head_length.
1704
	 *
1705
	 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1706
	 * With some optimizations by Milian Wolff. Forced encoding of HTML
1707
	 * attribute special characters by Allan Odgaard.
1708
	 *
1709
	 * @param  string  $text
1710
	 * @param  string  $tail Passed by reference
1711
	 * @param  integer $head_length
1712
	 * @return string
1713
	 */
1714 4
	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1715 4
		if ($text == "") {
1716
			return $tail = "";
1717
		}
1718
1719 4
		$chars = preg_split('/(?<!^)(?!$)/', $text);
1720 4
		$seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1721
1722 4
		foreach ($chars as $key => $char) {
1723 4
			$ord = ord($char);
1724
			// Ignore non-ascii chars.
1725 4
			if ($ord < 128) {
1726 4
				$r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1727
				// roughly 10% raw, 45% hex, 45% dec
1728
				// '@' *must* be encoded. I insist.
1729
				// '"' and '>' have to be encoded inside the attribute
1730 4
				if ($r > 90 && strpos('@"&>', $char) === false) {
1731
					/* do nothing */
1732 4
				} else if ($r < 45) {
1733 4
					$chars[$key] = '&#x'.dechex($ord).';';
1734
				} else {
1735 4
					$chars[$key] = '&#'.$ord.';';
1736
				}
1737
			}
1738
		}
1739
1740 4
		$text = implode('', $chars);
1741 4
		$tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1742
1743 4
		return $text;
1744
	}
1745
1746
	/**
1747
	 * Take the string $str and parse it into tokens, hashing embeded HTML,
1748
	 * escaped characters and handling code spans.
1749
	 * @param  string $str
1750
	 * @return string
1751
	 */
1752 109
	protected function parseSpan($str) {
1753 109
		$output = '';
1754
1755
		$span_re = '{
1756
				(
1757 109
					\\\\'.$this->escape_chars_re.'
1758
				|
1759
					(?<![`\\\\])
1760
					`+						# code span marker
1761 109
			'.( $this->no_markup ? '' : '
1762
				|
1763
					<!--    .*?     -->		# comment
1764
				|
1765
					<\?.*?\?> | <%.*?%>		# processing instruction
1766
				|
1767
					<[!$]?[-a-zA-Z0-9:_]+	# regular tags
1768
					(?>
1769
						\s
1770
						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1771
					)?
1772
					>
1773
				|
1774
					<[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1775
				|
1776
					</[-a-zA-Z0-9:_]+\s*> # closing tag
1777 109
			').'
1778
				)
1779
				}xs';
1780
1781 109
		while (1) {
1782
			// Each loop iteration seach for either the next tag, the next
1783
			// openning code span marker, or the next escaped character.
1784
			// Each token is then passed to handleSpanToken.
1785 109
			$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1786
1787
			// Create token from text preceding tag.
1788 109
			if ($parts[0] != "") {
1789 109
				$output .= $parts[0];
1790
			}
1791
1792
			// Check if we reach the end.
1793 109
			if (isset($parts[1])) {
1794 37
				$output .= $this->handleSpanToken($parts[1], $parts[2]);
1795 37
				$str = $parts[2];
1796
			} else {
1797 109
				break;
1798
			}
1799
		}
1800
1801 109
		return $output;
1802
	}
1803
1804
	/**
1805
	 * Handle $token provided by parseSpan by determining its nature and
1806
	 * returning the corresponding value that should replace it.
1807
	 * @param  string $token
1808
	 * @param  string $str Passed by reference
1809
	 * @return string
1810
	 */
1811 37
	protected function handleSpanToken($token, &$str) {
1812 37
		switch ($token[0]) {
1813 37
			case "\\":
1814 12
				return $this->hashPart("&#". ord($token[1]). ";");
1815 31
			case "`":
1816
				// Search for end marker in remaining text.
1817 23
				if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1818 23
					$str, $matches))
1819
				{
1820 21
					$str = $matches[2];
1821 21
					$codespan = $this->makeCodeSpan($matches[1]);
1822 21
					return $this->hashPart($codespan);
1823
				}
1824 2
				return $token; // Return as text since no ending marker found.
1825
			default:
1826 12
				return $this->hashPart($token);
1827
		}
1828
	}
1829
1830
	/**
1831
	 * Remove one level of line-leading tabs or spaces
1832
	 * @param  string $text
1833
	 * @return string
1834
	 */
1835 47
	protected function outdent($text) {
1836 47
		return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1837
	}
1838
1839
1840
	/**
1841
	 * String length function for detab. `_initDetab` will create a function to
1842
	 * handle UTF-8 if the default function does not exist.
1843
	 * @var string
1844
	 */
1845
	protected $utf8_strlen = 'mb_strlen';
1846
1847
	/**
1848
	 * Replace tabs with the appropriate amount of spaces.
1849
	 *
1850
	 * For each line we separate the line in blocks delemited by tab characters.
1851
	 * Then we reconstruct every line by adding the  appropriate number of space
1852
	 * between each blocks.
1853
	 *
1854
	 * @param  string $text
1855
	 * @return string
1856
	 */
1857 110
	protected function detab($text) {
1858 110
		$text = preg_replace_callback('/^.*\t.*$/m',
1859 110
			array($this, '_detab_callback'), $text);
1860
1861 110
		return $text;
1862
	}
1863
1864
	/**
1865
	 * Replace tabs callback
1866
	 * @param  string $matches
1867
	 * @return string
1868
	 */
1869 34
	protected function _detab_callback($matches) {
1870 34
		$line = $matches[0];
1871 34
		$strlen = $this->utf8_strlen; // strlen function for UTF-8.
1872
1873
		// Split in blocks.
1874 34
		$blocks = explode("\t", $line);
1875
		// Add each blocks to the line.
1876 34
		$line = $blocks[0];
1877 34
		unset($blocks[0]); // Do not add first block twice.
1878 34
		foreach ($blocks as $block) {
1879
			// Calculate amount of space, insert spaces, insert block.
1880 34
			$amount = $this->tab_width -
1881 34
				$strlen($line, 'UTF-8') % $this->tab_width;
1882 34
			$line .= str_repeat(" ", $amount) . $block;
1883
		}
1884 34
		return $line;
1885
	}
1886
1887
	/**
1888
	 * Check for the availability of the function in the `utf8_strlen` property
1889
	 * (initially `mb_strlen`). If the function is not available, create a
1890
	 * function that will loosely count the number of UTF-8 characters with a
1891
	 * regular expression.
1892
	 * @return void
1893
	 */
1894 5
	protected function _initDetab() {
1895
1896 5
		if (function_exists($this->utf8_strlen)) {
1897 5
			return;
1898
		}
1899
1900
		$this->utf8_strlen = function($text) {
0 ignored issues
show
Documentation Bug introduced by
It seems like function ($text) { r...\xBF]*/', $text, $m); } of type object<Closure> is incompatible with the declared type string of property $utf8_strlen.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
1901
			return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1902
		};
1903
	}
1904
1905
	/**
1906
	 * Swap back in all the tags hashed by _HashHTMLBlocks.
1907
	 * @param  string $text
1908
	 * @return string
1909
	 */
1910 110
	protected function unhash($text) {
1911 110
		return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1912 110
			array($this, '_unhash_callback'), $text);
1913
	}
1914
1915
	/**
1916
	 * Unhashing callback
1917
	 * @param  array $matches
1918
	 * @return string
1919
	 */
1920 87
	protected function _unhash_callback($matches) {
1921 87
		return $this->html_hashes[$matches[0]];
1922
	}
1923
}
1924