|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* GitHub-Flavoured Markdown. Inspired by Evan's plugin, but modified. |
|
4
|
|
|
* |
|
5
|
|
|
* @author Evan Solomon |
|
6
|
|
|
* @author Matt Wiebe <[email protected]> |
|
7
|
|
|
* @link https://github.com/evansolomon/wp-github-flavored-markdown-comments |
|
8
|
|
|
* |
|
9
|
|
|
* Add a few extras from GitHub's Markdown implementation. Must be used in a WordPress environment. |
|
10
|
|
|
*/ |
|
11
|
|
|
|
|
12
|
|
|
class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser { |
|
13
|
|
|
|
|
14
|
|
|
/** |
|
15
|
|
|
* Hooray somewhat arbitrary numbers that are fearful of 1.0.x. |
|
16
|
|
|
*/ |
|
17
|
|
|
const WPCOM_GHF_MARDOWN_VERSION = '0.9.0'; |
|
18
|
|
|
|
|
19
|
|
|
/** |
|
20
|
|
|
* Use a [code] shortcode when encountering a fenced code block |
|
21
|
|
|
* @var boolean |
|
22
|
|
|
*/ |
|
23
|
|
|
public $use_code_shortcode = true; |
|
24
|
|
|
|
|
25
|
|
|
/** |
|
26
|
|
|
* Preserve shortcodes, untouched by Markdown. |
|
27
|
|
|
* This requires use within a WordPress installation. |
|
28
|
|
|
* @var boolean |
|
29
|
|
|
*/ |
|
30
|
|
|
public $preserve_shortcodes = true; |
|
31
|
|
|
|
|
32
|
|
|
/** |
|
33
|
|
|
* Preserve the legacy $latex your-latex-code-here$ style |
|
34
|
|
|
* LaTeX markup |
|
35
|
|
|
*/ |
|
36
|
|
|
public $preserve_latex = true; |
|
37
|
|
|
|
|
38
|
|
|
/** |
|
39
|
|
|
* Preserve single-line <code> blocks. |
|
40
|
|
|
* @var boolean |
|
41
|
|
|
*/ |
|
42
|
|
|
public $preserve_inline_code_blocks = true; |
|
43
|
|
|
|
|
44
|
|
|
/** |
|
45
|
|
|
* Strip paragraphs from the output. This is the right default for WordPress, |
|
46
|
|
|
* which generally wants to create its own paragraphs with `wpautop` |
|
47
|
|
|
* @var boolean |
|
48
|
|
|
*/ |
|
49
|
|
|
public $strip_paras = true; |
|
50
|
|
|
|
|
51
|
|
|
// Will run through sprintf - you can supply your own syntax if you want |
|
52
|
|
|
public $shortcode_start = '[code lang=%s]'; |
|
53
|
|
|
public $shortcode_end = '[/code]'; |
|
54
|
|
|
|
|
55
|
|
|
// Stores shortcodes we remove and then replace |
|
56
|
|
|
protected $preserve_text_hash = array(); |
|
57
|
|
|
|
|
58
|
|
|
/** |
|
59
|
|
|
* Set environment defaults based on presence of key functions/classes. |
|
60
|
|
|
*/ |
|
61
|
|
|
public function __construct() { |
|
62
|
|
|
$this->use_code_shortcode = class_exists( 'SyntaxHighlighter' ); |
|
63
|
|
|
$this->preserve_shortcodes = function_exists( 'get_shortcode_regex' ); |
|
64
|
|
|
$this->preserve_latex = function_exists( 'latex_markup' ); |
|
65
|
|
|
$this->strip_paras = function_exists( 'wpautop' ); |
|
66
|
|
|
|
|
67
|
|
|
parent::__construct(); |
|
68
|
|
|
} |
|
69
|
|
|
|
|
70
|
|
|
/** |
|
71
|
|
|
* Overload to specify heading styles only if the hash has space(s) after it. This is actually in keeping with |
|
72
|
|
|
* the documentation and eases the semantic overload of the hash character. |
|
73
|
|
|
* #Will Not Produce a Heading 1 |
|
74
|
|
|
* # This Will Produce a Heading 1 |
|
75
|
|
|
* |
|
76
|
|
|
* @param string $text Markdown text |
|
77
|
|
|
* @return string HTML-transformed text |
|
78
|
|
|
*/ |
|
79
|
|
|
public function transform( $text ) { |
|
80
|
|
|
// Preserve anything inside a single-line <code> element |
|
81
|
|
|
if ( $this->preserve_inline_code_blocks ) { |
|
82
|
|
|
$text = $this->single_line_code_preserve( $text ); |
|
83
|
|
|
} |
|
84
|
|
|
// Remove all shortcodes so their interiors are left intact |
|
85
|
|
|
if ( $this->preserve_shortcodes ) { |
|
86
|
|
|
$text = $this->shortcode_preserve( $text ); |
|
87
|
|
|
} |
|
88
|
|
|
// Remove legacy LaTeX so it's left intact |
|
89
|
|
|
if ( $this->preserve_latex ) { |
|
90
|
|
|
$text = $this->latex_preserve( $text ); |
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
|
// escape line-beginning # chars that do not have a space after them. |
|
94
|
|
|
$text = preg_replace_callback( '|^#{1,6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text ); |
|
95
|
|
|
|
|
96
|
|
|
/** |
|
97
|
|
|
* Allow third-party plugins to define custom patterns that won't be processed by Markdown. |
|
98
|
|
|
* |
|
99
|
|
|
* @module markdown |
|
100
|
|
|
* |
|
101
|
|
|
* @since 3.9.2 |
|
102
|
|
|
* |
|
103
|
|
|
* @param array $custom_patterns Array of custom patterns to be ignored by Markdown. |
|
104
|
|
|
*/ |
|
105
|
|
|
$custom_patterns = apply_filters( 'jetpack_markdown_preserve_pattern', array() ); |
|
106
|
|
|
if ( is_array( $custom_patterns ) && ! empty( $custom_patterns ) ) { |
|
107
|
|
|
foreach ( $custom_patterns as $pattern ) { |
|
108
|
|
|
$text = preg_replace_callback( $pattern, array( $this, '_doRemoveText'), $text ); |
|
109
|
|
|
} |
|
110
|
|
|
} |
|
111
|
|
|
|
|
112
|
|
|
// run through core Markdown |
|
113
|
|
|
$text = parent::transform( $text ); |
|
114
|
|
|
|
|
115
|
|
|
// Occasionally Markdown Extra chokes on a para structure, producing odd paragraphs. |
|
116
|
|
|
$text = str_replace( "<p><</p>\n\n<p>p>", '<p>', $text ); |
|
117
|
|
|
|
|
118
|
|
|
// put start-of-line # chars back in place |
|
119
|
|
|
$text = $this->restore_leading_hash( $text ); |
|
120
|
|
|
|
|
121
|
|
|
// Strip paras if set |
|
122
|
|
|
if ( $this->strip_paras ) { |
|
123
|
|
|
$text = $this->unp( $text ); |
|
124
|
|
|
} |
|
125
|
|
|
|
|
126
|
|
|
// Restore preserved things like shortcodes/LaTeX |
|
127
|
|
|
$text = $this->do_restore( $text ); |
|
128
|
|
|
|
|
129
|
|
|
return $text; |
|
130
|
|
|
} |
|
131
|
|
|
|
|
132
|
|
|
/** |
|
133
|
|
|
* Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code> |
|
134
|
|
|
* @param string $text Text that may need preserving |
|
135
|
|
|
* @return string Text that was preserved if needed |
|
136
|
|
|
*/ |
|
137
|
|
|
public function single_line_code_preserve( $text ) { |
|
138
|
|
|
return preg_replace_callback( '|<code\b[^>]*>(.*?)</code>|', array( $this, 'do_single_line_code_preserve' ), $text ); |
|
139
|
|
|
} |
|
140
|
|
|
|
|
141
|
|
|
/** |
|
142
|
|
|
* Regex callback for inline code presevation |
|
143
|
|
|
* @param array $matches Regex matches |
|
144
|
|
|
* @return string Hashed content for later restoration |
|
145
|
|
|
*/ |
|
146
|
|
|
public function do_single_line_code_preserve( $matches ) { |
|
147
|
|
|
return '<code>' . $this->hash_block( $matches[1] ) . '</code>'; |
|
148
|
|
|
} |
|
149
|
|
|
|
|
150
|
|
|
/** |
|
151
|
|
|
* Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping. |
|
152
|
|
|
* @param string $text Markdown/HTML content |
|
153
|
|
|
* @return string Markdown/HTML content with escaped code blocks |
|
154
|
|
|
*/ |
|
155
|
|
|
public function codeblock_preserve( $text ) { |
|
156
|
|
|
return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text ); |
|
157
|
|
|
} |
|
158
|
|
|
|
|
159
|
|
|
/** |
|
160
|
|
|
* Regex callback for code block preservation. |
|
161
|
|
|
* @param array $matches Regex matches |
|
162
|
|
|
* @return string Codeblock with escaped interior |
|
163
|
|
|
*/ |
|
164
|
|
|
public function do_codeblock_preserve( $matches ) { |
|
165
|
|
|
$block = stripslashes( $matches[3] ); |
|
166
|
|
|
$block = esc_html( $block ); |
|
167
|
|
|
$block = str_replace( '\\', '\\\\', $block ); |
|
168
|
|
|
$open = $matches[1] . $matches[2] . "\n"; |
|
169
|
|
|
return $open . $block . $matches[4]; |
|
170
|
|
|
} |
|
171
|
|
|
|
|
172
|
|
|
/** |
|
173
|
|
|
* Restore previously preserved (i.e. escaped) code block contents. |
|
174
|
|
|
* @param string $text Markdown/HTML content with escaped code blocks |
|
175
|
|
|
* @return string Markdown/HTML content |
|
176
|
|
|
*/ |
|
177
|
|
|
public function codeblock_restore( $text ) { |
|
178
|
|
|
return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text ); |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
/** |
|
182
|
|
|
* Regex callback for code block restoration (unescaping). |
|
183
|
|
|
* @param array $matches Regex matches |
|
184
|
|
|
* @return string Codeblock with unescaped interior |
|
185
|
|
|
*/ |
|
186
|
|
|
public function do_codeblock_restore( $matches ) { |
|
187
|
|
|
$block = html_entity_decode( $matches[3], ENT_QUOTES ); |
|
188
|
|
|
$open = $matches[1] . $matches[2] . "\n"; |
|
189
|
|
|
return $open . $block . $matches[4]; |
|
190
|
|
|
} |
|
191
|
|
|
|
|
192
|
|
|
/** |
|
193
|
|
|
* Called to preserve legacy LaTeX like $latex some-latex-text $ |
|
194
|
|
|
* @param string $text Text in which to preserve LaTeX |
|
195
|
|
|
* @return string Text with LaTeX replaced by a hash that will be restored later |
|
196
|
|
|
*/ |
|
197
|
|
|
protected function latex_preserve( $text ) { |
|
198
|
|
|
// regex from latex_remove() |
|
199
|
|
|
$regex = '% |
|
200
|
|
|
\$latex(?:=\s*|\s+) |
|
201
|
|
|
((?: |
|
202
|
|
|
[^$]+ # Not a dollar |
|
203
|
|
|
| |
|
204
|
|
|
(?<=(?<!\\\\)\\\\)\$ # Dollar preceded by exactly one slash |
|
205
|
|
|
)+) |
|
206
|
|
|
(?<!\\\\)\$ # Dollar preceded by zero slashes |
|
207
|
|
|
%ix'; |
|
208
|
|
|
$text = preg_replace_callback( $regex, array( $this, '_doRemoveText'), $text ); |
|
209
|
|
|
return $text; |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
|
/** |
|
213
|
|
|
* Called to preserve WP shortcodes from being formatted by Markdown in any way. |
|
214
|
|
|
* @param string $text Text in which to preserve shortcodes |
|
215
|
|
|
* @return string Text with shortcodes replaced by a hash that will be restored later |
|
216
|
|
|
*/ |
|
217
|
|
|
protected function shortcode_preserve( $text ) { |
|
218
|
|
|
$text = preg_replace_callback( $this->get_shortcode_regex(), array( $this, '_doRemoveText' ), $text ); |
|
219
|
|
|
return $text; |
|
220
|
|
|
} |
|
221
|
|
|
|
|
222
|
|
|
/** |
|
223
|
|
|
* Restores any text preserved by $this->hash_block() |
|
224
|
|
|
* @param string $text Text that may have hashed preservation placeholders |
|
225
|
|
|
* @return string Text with hashed preseravtion placeholders replaced by original text |
|
226
|
|
|
*/ |
|
227
|
|
|
protected function do_restore( $text ) { |
|
228
|
|
|
foreach( $this->preserve_text_hash as $hash => $value ) { |
|
229
|
|
|
$placeholder = $this->hash_maker( $hash ); |
|
230
|
|
|
$text = str_replace( $placeholder, $value, $text ); |
|
231
|
|
|
} |
|
232
|
|
|
// reset the hash |
|
233
|
|
|
$this->preserve_text_hash = array(); |
|
234
|
|
|
return $text; |
|
235
|
|
|
} |
|
236
|
|
|
|
|
237
|
|
|
/** |
|
238
|
|
|
* Regex callback for text preservation |
|
239
|
|
|
* @param array $m Regex $matches array |
|
240
|
|
|
* @return string A placeholder that will later be replaced by the original text |
|
241
|
|
|
*/ |
|
242
|
|
|
protected function _doRemoveText( $m ) { |
|
243
|
|
|
return $this->hash_block( $m[0] ); |
|
244
|
|
|
} |
|
245
|
|
|
|
|
246
|
|
|
/** |
|
247
|
|
|
* Call this to store a text block for later restoration. |
|
248
|
|
|
* @param string $text Text to preserve for later |
|
249
|
|
|
* @return string Placeholder that will be swapped out later for the original text |
|
250
|
|
|
*/ |
|
251
|
|
|
protected function hash_block( $text ) { |
|
252
|
|
|
$hash = md5( $text ); |
|
253
|
|
|
$this->preserve_text_hash[ $hash ] = $text; |
|
254
|
|
|
$placeholder = $this->hash_maker( $hash ); |
|
255
|
|
|
return $placeholder; |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
|
|
/** |
|
259
|
|
|
* Less glamorous than the Keymaker |
|
260
|
|
|
* @param string $hash An md5 hash |
|
261
|
|
|
* @return string A placeholder hash |
|
262
|
|
|
*/ |
|
263
|
|
|
protected function hash_maker( $hash ) { |
|
264
|
|
|
return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH'; |
|
265
|
|
|
} |
|
266
|
|
|
|
|
267
|
|
|
/** |
|
268
|
|
|
* Remove bare <p> elements. <p>s with attributes will be preserved. |
|
269
|
|
|
* @param string $text HTML content |
|
270
|
|
|
* @return string <p>-less content |
|
271
|
|
|
*/ |
|
272
|
|
|
public function unp( $text ) { |
|
273
|
|
|
return preg_replace( "#<p>(.*?)</p>(\n|$)#ums", '$1$2', $text ); |
|
274
|
|
|
} |
|
275
|
|
|
|
|
276
|
|
|
/** |
|
277
|
|
|
* A regex of all shortcodes currently registered by the current |
|
278
|
|
|
* WordPress installation |
|
279
|
|
|
* @uses get_shortcode_regex() |
|
280
|
|
|
* @return string A regex for grabbing shortcodes. |
|
281
|
|
|
*/ |
|
282
|
|
|
protected function get_shortcode_regex() { |
|
283
|
|
|
$pattern = get_shortcode_regex(); |
|
284
|
|
|
|
|
285
|
|
|
// don't match markdown link anchors that could be mistaken for shortcodes. |
|
286
|
|
|
$pattern .= '(?!\()'; |
|
287
|
|
|
|
|
288
|
|
|
return "/$pattern/s"; |
|
289
|
|
|
} |
|
290
|
|
|
|
|
291
|
|
|
/** |
|
292
|
|
|
* Since we escape unspaced #Headings, put things back later. |
|
293
|
|
|
* @param string $text text with a leading escaped hash |
|
294
|
|
|
* @return string text with leading hashes unescaped |
|
295
|
|
|
*/ |
|
296
|
|
|
protected function restore_leading_hash( $text ) { |
|
297
|
|
|
return preg_replace( "/^(<p>)?(#|\\\\#)/um", "$1#", $text ); |
|
298
|
|
|
} |
|
299
|
|
|
|
|
300
|
|
|
/** |
|
301
|
|
|
* Overload to support ```-fenced code blocks for pre-Markdown Extra 1.2.8 |
|
302
|
|
|
* https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks |
|
303
|
|
|
*/ |
|
304
|
|
|
public function doFencedCodeBlocks( $text ) { |
|
305
|
|
|
// If we're at least at 1.2.8, native fenced code blocks are in. |
|
306
|
|
|
// Below is just copied from it in case we somehow got loaded on |
|
307
|
|
|
// top of someone else's Markdown Extra |
|
308
|
|
|
if ( version_compare( MARKDOWNEXTRA_VERSION, '1.2.8', '>=' ) ) |
|
309
|
|
|
return parent::doFencedCodeBlocks( $text ); |
|
310
|
|
|
|
|
311
|
|
|
# |
|
312
|
|
|
# Adding the fenced code block syntax to regular Markdown: |
|
313
|
|
|
# |
|
314
|
|
|
# ~~~ |
|
315
|
|
|
# Code block |
|
316
|
|
|
# ~~~ |
|
317
|
|
|
# |
|
318
|
|
|
$less_than_tab = $this->tab_width; |
|
|
|
|
|
|
319
|
|
|
|
|
320
|
|
|
$text = preg_replace_callback('{ |
|
321
|
|
|
(?:\n|\A) |
|
322
|
|
|
# 1: Opening marker |
|
323
|
|
|
( |
|
324
|
|
|
(?:~{3,}|`{3,}) # 3 or more tildes/backticks. |
|
325
|
|
|
) |
|
326
|
|
|
[ ]* |
|
327
|
|
|
(?: |
|
328
|
|
|
\.?([-_:a-zA-Z0-9]+) # 2: standalone class name |
|
329
|
|
|
| |
|
330
|
|
|
'.$this->id_class_attr_catch_re.' # 3: Extra attributes |
|
331
|
|
|
)? |
|
332
|
|
|
[ ]* \n # Whitespace and newline following marker. |
|
333
|
|
|
|
|
334
|
|
|
# 4: Content |
|
335
|
|
|
( |
|
336
|
|
|
(?> |
|
337
|
|
|
(?!\1 [ ]* \n) # Not a closing marker. |
|
338
|
|
|
.*\n+ |
|
339
|
|
|
)+ |
|
340
|
|
|
) |
|
341
|
|
|
|
|
342
|
|
|
# Closing marker. |
|
343
|
|
|
\1 [ ]* (?= \n ) |
|
344
|
|
|
}xm', |
|
345
|
|
|
array($this, '_doFencedCodeBlocks_callback'), $text); |
|
346
|
|
|
|
|
347
|
|
|
return $text; |
|
348
|
|
|
} |
|
349
|
|
|
|
|
350
|
|
|
/** |
|
351
|
|
|
* Callback for pre-processing start of line hashes to slyly escape headings that don't |
|
352
|
|
|
* have a leading space |
|
353
|
|
|
* @param array $m preg_match matches |
|
354
|
|
|
* @return string possibly escaped start of line hash |
|
355
|
|
|
*/ |
|
356
|
|
|
public function _doEscapeForHashWithoutSpacing( $m ) { |
|
357
|
|
|
if ( ! isset( $m[1] ) ) |
|
358
|
|
|
$m[0] = '\\' . $m[0]; |
|
359
|
|
|
return $m[0]; |
|
360
|
|
|
} |
|
361
|
|
|
|
|
362
|
|
|
/** |
|
363
|
|
|
* Overload to support Viper's [code] shortcode. Because awesome. |
|
364
|
|
|
*/ |
|
365
|
|
|
public function _doFencedCodeBlocks_callback( $matches ) { |
|
366
|
|
|
// in case we have some escaped leading hashes right at the start of the block |
|
367
|
|
|
$matches[4] = $this->restore_leading_hash( $matches[4] ); |
|
368
|
|
|
// just MarkdownExtra_Parser if we're not going ultra-deluxe |
|
369
|
|
|
if ( ! $this->use_code_shortcode ) { |
|
370
|
|
|
return parent::_doFencedCodeBlocks_callback( $matches ); |
|
371
|
|
|
} |
|
372
|
|
|
|
|
373
|
|
|
// default to a "text" class if one wasn't passed. Helps with encoding issues later. |
|
374
|
|
|
if ( empty( $matches[2] ) ) { |
|
375
|
|
|
$matches[2] = 'text'; |
|
376
|
|
|
} |
|
377
|
|
|
|
|
378
|
|
|
$classname =& $matches[2]; |
|
379
|
|
|
$codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] ); |
|
380
|
|
|
|
|
381
|
|
|
if ( $classname{0} == '.' ) |
|
382
|
|
|
$classname = substr( $classname, 1 ); |
|
383
|
|
|
|
|
384
|
|
|
$codeblock = esc_html( $codeblock ); |
|
385
|
|
|
$codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end; |
|
386
|
|
|
return "\n\n" . $this->hashBlock( $codeblock ). "\n\n"; |
|
387
|
|
|
} |
|
388
|
|
|
|
|
389
|
|
|
} |
|
390
|
|
|
|
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.
Both the
$myVarassignment in line 1 and the$higherassignment in line 2 are dead. The first because$myVaris never used and the second because$higheris always overwritten for every possible time line.