Completed
Push — v2/videopress-sideloading ( 341d44...ce2243 )
by
unknown
09:54
created

WPCom_GHF_Markdown_Parser::transform()   C

Complexity

Conditions 8
Paths 32

Size

Total Lines 52
Code Lines 19

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 52
rs 6.8493
cc 8
eloc 19
nc 32
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * GitHub-Flavoured Markdown. Inspired by Evan's plugin, but modified.
4
 *
5
 * @author Evan Solomon
6
 * @author Matt Wiebe <[email protected]>
7
 * @link https://github.com/evansolomon/wp-github-flavored-markdown-comments
8
 *
9
 * Add a few extras from GitHub's Markdown implementation. Must be used in a WordPress environment.
10
 */
11
12
class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser {
13
14
	/**
15
	 * Hooray somewhat arbitrary numbers that are fearful of 1.0.x.
16
	 */
17
	const WPCOM_GHF_MARDOWN_VERSION = '0.9.0';
18
19
	/**
20
	 * Use a [code] shortcode when encountering a fenced code block
21
	 * @var boolean
22
	 */
23
	public $use_code_shortcode = true;
24
25
	/**
26
	 * Preserve shortcodes, untouched by Markdown.
27
	 * This requires use within a WordPress installation.
28
	 * @var boolean
29
	 */
30
	public $preserve_shortcodes = true;
31
32
	/**
33
	 * Preserve the legacy $latex your-latex-code-here$ style
34
	 * LaTeX markup
35
	 */
36
	public $preserve_latex = true;
37
38
	/**
39
	 * Preserve single-line <code> blocks.
40
	 * @var boolean
41
	 */
42
	public $preserve_inline_code_blocks = true;
43
44
	/**
45
	 * Strip paragraphs from the output. This is the right default for WordPress,
46
	 * which generally wants to create its own paragraphs with `wpautop`
47
	 * @var boolean
48
	 */
49
	public $strip_paras = true;
50
51
	// Will run through sprintf - you can supply your own syntax if you want
52
	public $shortcode_start = '[code lang=%s]';
53
	public $shortcode_end   = '[/code]';
54
55
	// Stores shortcodes we remove and then replace
56
	protected $preserve_text_hash = array();
57
58
	/**
59
	 * Set environment defaults based on presence of key functions/classes.
60
	 */
61
	public function __construct() {
62
		$this->use_code_shortcode  = class_exists( 'SyntaxHighlighter' );
63
		$this->preserve_shortcodes = function_exists( 'get_shortcode_regex' );
64
		$this->preserve_latex      = function_exists( 'latex_markup' );
65
		$this->strip_paras         = function_exists( 'wpautop' );
66
67
		parent::__construct();
68
	}
69
70
	/**
71
	 * Overload to specify heading styles only if the hash has space(s) after it. This is actually in keeping with
72
	 * the documentation and eases the semantic overload of the hash character.
73
	 * #Will Not Produce a Heading 1
74
	 * # This Will Produce a Heading 1
75
	 *
76
	 * @param  string $text Markdown text
77
	 * @return string       HTML-transformed text
78
	 */
79
	public function transform( $text ) {
80
		// Preserve anything inside a single-line <code> element
81
		if ( $this->preserve_inline_code_blocks ) {
82
			$text = $this->single_line_code_preserve( $text );
83
		}
84
		// Remove all shortcodes so their interiors are left intact
85
		if ( $this->preserve_shortcodes ) {
86
			$text = $this->shortcode_preserve( $text );
87
		}
88
		// Remove legacy LaTeX so it's left intact
89
		if ( $this->preserve_latex ) {
90
			$text = $this->latex_preserve( $text );
91
		}
92
93
		// escape line-beginning # chars that do not have a space after them.
94
		$text = preg_replace_callback( '|^#{1,6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text );
95
96
		/**
97
		 * Allow third-party plugins to define custom patterns that won't be processed by Markdown.
98
		 *
99
		 * @module markdown
100
		 *
101
		 * @since 3.9.2
102
		 *
103
		 * @param array $custom_patterns Array of custom patterns to be ignored by Markdown.
104
		 */
105
		$custom_patterns = apply_filters( 'jetpack_markdown_preserve_pattern', array() );
106
		if ( is_array( $custom_patterns ) && ! empty( $custom_patterns ) ) {
107
			foreach ( $custom_patterns as $pattern ) {
108
				$text = preg_replace_callback( $pattern, array( $this, '_doRemoveText'), $text );
109
			}
110
		}
111
112
		// run through core Markdown
113
		$text = parent::transform( $text );
114
115
		// Occasionally Markdown Extra chokes on a para structure, producing odd paragraphs.
116
		$text = str_replace( "<p>&lt;</p>\n\n<p>p>", '<p>', $text );
117
118
		// put start-of-line # chars back in place
119
		$text = $this->restore_leading_hash( $text );
120
121
		// Strip paras if set
122
		if ( $this->strip_paras ) {
123
			$text = $this->unp( $text );
124
		}
125
126
		// Restore preserved things like shortcodes/LaTeX
127
		$text = $this->do_restore( $text );
128
129
		return $text;
130
	}
131
132
	/**
133
	 * Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code>
134
	 * @param  string $text Text that may need preserving
135
	 * @return string       Text that was preserved if needed
136
	 */
137
	public function single_line_code_preserve( $text ) {
138
		return preg_replace_callback( '|<code\b[^>]*>(.*?)</code>|', array( $this, 'do_single_line_code_preserve' ), $text );
139
	}
140
141
	/**
142
	 * Regex callback for inline code presevation
143
	 * @param  array $matches Regex matches
144
	 * @return string         Hashed content for later restoration
145
	 */
146
	public function do_single_line_code_preserve( $matches ) {
147
		return '<code>' . $this->hash_block( $matches[1] ) . '</code>';
148
	}
149
150
	/**
151
	 * Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping.
152
	 * @param  string $text Markdown/HTML content
153
	 * @return string       Markdown/HTML content with escaped code blocks
154
	 */
155
	public function codeblock_preserve( $text ) {
156
		return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text );
157
	}
158
159
	/**
160
	 * Regex callback for code block preservation.
161
	 * @param  array $matches Regex matches
162
	 * @return string         Codeblock with escaped interior
163
	 */
164
	public function do_codeblock_preserve( $matches ) {
165
		$block = stripslashes( $matches[3] );
166
		$block = esc_html( $block );
167
		$block = str_replace( '\\', '\\\\', $block );
168
		$open = $matches[1] . $matches[2] . "\n";
169
		return $open . $block . $matches[4];
170
	}
171
172
	/**
173
	 * Restore previously preserved (i.e. escaped) code block contents.
174
	 * @param  string $text Markdown/HTML content with escaped code blocks
175
	 * @return string       Markdown/HTML content
176
	 */
177
	public function codeblock_restore( $text ) {
178
		return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text );
179
	}
180
181
	/**
182
	 * Regex callback for code block restoration (unescaping).
183
	 * @param  array $matches Regex matches
184
	 * @return string         Codeblock with unescaped interior
185
	 */
186
	public function do_codeblock_restore( $matches ) {
187
		$block = html_entity_decode( $matches[3], ENT_QUOTES );
188
		$open = $matches[1] . $matches[2] . "\n";
189
		return $open . $block . $matches[4];
190
	}
191
192
	/**
193
	 * Called to preserve legacy LaTeX like $latex some-latex-text $
194
	 * @param  string $text Text in which to preserve LaTeX
195
	 * @return string       Text with LaTeX replaced by a hash that will be restored later
196
	 */
197
	protected function latex_preserve( $text ) {
198
		// regex from latex_remove()
199
		$regex = '%
200
			\$latex(?:=\s*|\s+)
201
			((?:
202
				[^$]+ # Not a dollar
203
			|
204
				(?<=(?<!\\\\)\\\\)\$ # Dollar preceded by exactly one slash
205
			)+)
206
			(?<!\\\\)\$ # Dollar preceded by zero slashes
207
		%ix';
208
		$text = preg_replace_callback( $regex, array( $this, '_doRemoveText'), $text );
209
		return $text;
210
	}
211
212
	/**
213
	 * Called to preserve WP shortcodes from being formatted by Markdown in any way.
214
	 * @param  string $text Text in which to preserve shortcodes
215
	 * @return string       Text with shortcodes replaced by a hash that will be restored later
216
	 */
217
	protected function shortcode_preserve( $text ) {
218
		$text = preg_replace_callback( $this->get_shortcode_regex(), array( $this, '_doRemoveText' ), $text );
219
		return $text;
220
	}
221
222
	/**
223
	 * Restores any text preserved by $this->hash_block()
224
	 * @param  string $text Text that may have hashed preservation placeholders
225
	 * @return string       Text with hashed preseravtion placeholders replaced by original text
226
	 */
227
	protected function do_restore( $text ) {
228
		foreach( $this->preserve_text_hash as $hash => $value ) {
229
			$placeholder = $this->hash_maker( $hash );
230
			$text = str_replace( $placeholder, $value, $text );
231
		}
232
		// reset the hash
233
		$this->preserve_text_hash = array();
234
		return $text;
235
	}
236
237
	/**
238
	 * Regex callback for text preservation
239
	 * @param  array $m  Regex $matches array
240
	 * @return string    A placeholder that will later be replaced by the original text
241
	 */
242
	protected function _doRemoveText( $m ) {
243
		return $this->hash_block( $m[0] );
244
	}
245
246
	/**
247
	 * Call this to store a text block for later restoration.
248
	 * @param  string $text Text to preserve for later
249
	 * @return string       Placeholder that will be swapped out later for the original text
250
	 */
251
	protected function hash_block( $text ) {
252
		$hash = md5( $text );
253
		$this->preserve_text_hash[ $hash ] = $text;
254
		$placeholder = $this->hash_maker( $hash );
255
		return $placeholder;
256
	}
257
258
	/**
259
	 * Less glamorous than the Keymaker
260
	 * @param  string $hash An md5 hash
261
	 * @return string       A placeholder hash
262
	 */
263
	protected function hash_maker( $hash ) {
264
		return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH';
265
	}
266
267
	/**
268
	 * Remove bare <p> elements. <p>s with attributes will be preserved.
269
	 * @param  string $text HTML content
270
	 * @return string       <p>-less content
271
	 */
272
	public function unp( $text ) {
273
		return preg_replace( "#<p>(.*?)</p>(\n|$)#ums", '$1$2', $text );
274
	}
275
276
	/**
277
	 * A regex of all shortcodes currently registered by the current
278
	 * WordPress installation
279
	 * @uses   get_shortcode_regex()
280
	 * @return string A regex for grabbing shortcodes.
281
	 */
282
	protected function get_shortcode_regex() {
283
		$pattern = get_shortcode_regex();
284
285
		// don't match markdown link anchors that could be mistaken for shortcodes.
286
		$pattern .= '(?!\()';
287
288
		return "/$pattern/s";
289
	}
290
291
	/**
292
	 * Since we escape unspaced #Headings, put things back later.
293
	 * @param  string $text text with a leading escaped hash
294
	 * @return string       text with leading hashes unescaped
295
	 */
296
	protected function restore_leading_hash( $text ) {
297
		return preg_replace( "/^(<p>)?(&#35;|\\\\#)/um", "$1#", $text );
298
	}
299
300
	/**
301
	 * Overload to support ```-fenced code blocks for pre-Markdown Extra 1.2.8
302
	 * https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks
303
	 */
304
	public function doFencedCodeBlocks( $text ) {
305
		// If we're at least at 1.2.8, native fenced code blocks are in.
306
		// Below is just copied from it in case we somehow got loaded on
307
		// top of someone else's Markdown Extra
308
		if ( version_compare( MARKDOWNEXTRA_VERSION, '1.2.8', '>=' ) )
309
			return parent::doFencedCodeBlocks( $text );
310
311
		#
312
		# Adding the fenced code block syntax to regular Markdown:
313
		#
314
		# ~~~
315
		# Code block
316
		# ~~~
317
		#
318
		$less_than_tab = $this->tab_width;
0 ignored issues
show
Unused Code introduced by
$less_than_tab is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
319
320
		$text = preg_replace_callback('{
321
				(?:\n|\A)
322
				# 1: Opening marker
323
				(
324
					(?:~{3,}|`{3,}) # 3 or more tildes/backticks.
325
				)
326
				[ ]*
327
				(?:
328
					\.?([-_:a-zA-Z0-9]+) # 2: standalone class name
329
				|
330
					'.$this->id_class_attr_catch_re.' # 3: Extra attributes
331
				)?
332
				[ ]* \n # Whitespace and newline following marker.
333
334
				# 4: Content
335
				(
336
					(?>
337
						(?!\1 [ ]* \n)	# Not a closing marker.
338
						.*\n+
339
					)+
340
				)
341
342
				# Closing marker.
343
				\1 [ ]* (?= \n )
344
			}xm',
345
			array($this, '_doFencedCodeBlocks_callback'), $text);
346
347
		return $text;
348
	}
349
350
	/**
351
	 * Callback for pre-processing start of line hashes to slyly escape headings that don't
352
	 * have a leading space
353
	 * @param  array $m  preg_match matches
354
	 * @return string    possibly escaped start of line hash
355
	 */
356
	public function _doEscapeForHashWithoutSpacing( $m ) {
357
		if ( ! isset( $m[1] ) )
358
			$m[0] = '\\' . $m[0];
359
		return $m[0];
360
	}
361
362
	/**
363
	 * Overload to support Viper's [code] shortcode. Because awesome.
364
	 */
365
	public function _doFencedCodeBlocks_callback( $matches ) {
366
		// in case we have some escaped leading hashes right at the start of the block
367
		$matches[4] = $this->restore_leading_hash( $matches[4] );
368
		// just MarkdownExtra_Parser if we're not going ultra-deluxe
369
		if ( ! $this->use_code_shortcode ) {
370
			return parent::_doFencedCodeBlocks_callback( $matches );
371
		}
372
373
		// default to a "text" class if one wasn't passed. Helps with encoding issues later.
374
		if ( empty( $matches[2] ) ) {
375
			$matches[2] = 'text';
376
		}
377
378
		$classname =& $matches[2];
379
		$codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] );
380
381
		if ( $classname{0} == '.' )
382
			$classname = substr( $classname, 1 );
383
384
		$codeblock = esc_html( $codeblock );
385
		$codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end;
386
		return "\n\n" . $this->hashBlock( $codeblock ). "\n\n";
387
	}
388
389
}
390