Automattic /
jetpack
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | /** |
||
| 3 | * GitHub-Flavoured Markdown. Inspired by Evan's plugin, but modified. |
||
| 4 | * |
||
| 5 | * @author Evan Solomon |
||
| 6 | * @author Matt Wiebe <[email protected]> |
||
| 7 | * @link https://github.com/evansolomon/wp-github-flavored-markdown-comments |
||
| 8 | * |
||
| 9 | * Add a few extras from GitHub's Markdown implementation. Must be used in a WordPress environment. |
||
| 10 | */ |
||
| 11 | |||
| 12 | class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser { |
||
| 13 | |||
| 14 | /** |
||
| 15 | * Hooray somewhat arbitrary numbers that are fearful of 1.0.x. |
||
| 16 | */ |
||
| 17 | const WPCOM_GHF_MARDOWN_VERSION = '0.9.0'; |
||
| 18 | |||
| 19 | /** |
||
| 20 | * Use a [code] shortcode when encountering a fenced code block |
||
| 21 | * @var boolean |
||
| 22 | */ |
||
| 23 | public $use_code_shortcode = true; |
||
| 24 | |||
| 25 | /** |
||
| 26 | * Preserve shortcodes, untouched by Markdown. |
||
| 27 | * This requires use within a WordPress installation. |
||
| 28 | * @var boolean |
||
| 29 | */ |
||
| 30 | public $preserve_shortcodes = true; |
||
| 31 | |||
| 32 | /** |
||
| 33 | * Preserve the legacy $latex your-latex-code-here$ style |
||
| 34 | * LaTeX markup |
||
| 35 | */ |
||
| 36 | public $preserve_latex = true; |
||
| 37 | |||
| 38 | /** |
||
| 39 | * Preserve single-line <code> blocks. |
||
| 40 | * @var boolean |
||
| 41 | */ |
||
| 42 | public $preserve_inline_code_blocks = true; |
||
| 43 | |||
| 44 | /** |
||
| 45 | * Strip paragraphs from the output. This is the right default for WordPress, |
||
| 46 | * which generally wants to create its own paragraphs with `wpautop` |
||
| 47 | * @var boolean |
||
| 48 | */ |
||
| 49 | public $strip_paras = true; |
||
| 50 | |||
| 51 | // Will run through sprintf - you can supply your own syntax if you want |
||
| 52 | public $shortcode_start = '[code lang=%s]'; |
||
| 53 | public $shortcode_end = '[/code]'; |
||
| 54 | |||
| 55 | // Stores shortcodes we remove and then replace |
||
| 56 | protected $preserve_text_hash = array(); |
||
| 57 | |||
| 58 | /** |
||
| 59 | * Set environment defaults based on presence of key functions/classes. |
||
| 60 | */ |
||
| 61 | public function __construct() { |
||
| 62 | $this->use_code_shortcode = class_exists( 'SyntaxHighlighter' ); |
||
| 63 | $this->preserve_shortcodes = function_exists( 'get_shortcode_regex' ); |
||
| 64 | $this->preserve_latex = function_exists( 'latex_markup' ); |
||
| 65 | $this->strip_paras = function_exists( 'wpautop' ); |
||
| 66 | |||
| 67 | parent::__construct(); |
||
| 68 | } |
||
| 69 | |||
| 70 | /** |
||
| 71 | * Overload to specify heading styles only if the hash has space(s) after it. This is actually in keeping with |
||
| 72 | * the documentation and eases the semantic overload of the hash character. |
||
| 73 | * #Will Not Produce a Heading 1 |
||
| 74 | * # This Will Produce a Heading 1 |
||
| 75 | * |
||
| 76 | * @param string $text Markdown text |
||
| 77 | * @return string HTML-transformed text |
||
| 78 | */ |
||
| 79 | public function transform( $text ) { |
||
| 80 | // Preserve anything inside a single-line <code> element |
||
| 81 | if ( $this->preserve_inline_code_blocks ) { |
||
| 82 | $text = $this->single_line_code_preserve( $text ); |
||
|
0 ignored issues
–
show
Coding Style
introduced
by
Loading history...
|
|||
| 83 | } |
||
| 84 | // Remove all shortcodes so their interiors are left intact |
||
| 85 | if ( $this->preserve_shortcodes ) { |
||
| 86 | $text = $this->shortcode_preserve( $text ); |
||
|
0 ignored issues
–
show
|
|||
| 87 | } |
||
| 88 | // Remove legacy LaTeX so it's left intact |
||
| 89 | if ( $this->preserve_latex ) { |
||
| 90 | $text = $this->latex_preserve( $text ); |
||
|
0 ignored issues
–
show
|
|||
| 91 | } |
||
| 92 | |||
| 93 | // escape line-beginning # chars that do not have a space after them. |
||
| 94 | $text = preg_replace_callback( '|^#{1,6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text ); |
||
|
0 ignored issues
–
show
|
|||
| 95 | |||
| 96 | /** |
||
| 97 | * Allow third-party plugins to define custom patterns that won't be processed by Markdown. |
||
| 98 | * |
||
| 99 | * @module markdown |
||
| 100 | * |
||
| 101 | * @since 3.9.2 |
||
| 102 | * |
||
| 103 | * @param array $custom_patterns Array of custom patterns to be ignored by Markdown. |
||
| 104 | */ |
||
| 105 | $custom_patterns = apply_filters( 'jetpack_markdown_preserve_pattern', array() ); |
||
| 106 | if ( is_array( $custom_patterns ) && ! empty( $custom_patterns ) ) { |
||
| 107 | foreach ( $custom_patterns as $pattern ) { |
||
| 108 | $text = preg_replace_callback( $pattern, array( $this, '_doRemoveText'), $text ); |
||
|
0 ignored issues
–
show
|
|||
| 109 | } |
||
| 110 | } |
||
| 111 | |||
| 112 | // run through core Markdown |
||
| 113 | $text = parent::transform( $text ); |
||
|
0 ignored issues
–
show
|
|||
| 114 | |||
| 115 | // Occasionally Markdown Extra chokes on a para structure, producing odd paragraphs. |
||
| 116 | $text = str_replace( "<p><</p>\n\n<p>p>", '<p>', $text ); |
||
|
0 ignored issues
–
show
|
|||
| 117 | |||
| 118 | // put start-of-line # chars back in place |
||
| 119 | $text = $this->restore_leading_hash( $text ); |
||
|
0 ignored issues
–
show
|
|||
| 120 | |||
| 121 | // Strip paras if set |
||
| 122 | if ( $this->strip_paras ) { |
||
| 123 | $text = $this->unp( $text ); |
||
|
0 ignored issues
–
show
|
|||
| 124 | } |
||
| 125 | |||
| 126 | // Restore preserved things like shortcodes/LaTeX |
||
| 127 | $text = $this->do_restore( $text ); |
||
|
0 ignored issues
–
show
|
|||
| 128 | |||
| 129 | return $text; |
||
| 130 | } |
||
| 131 | |||
| 132 | /** |
||
| 133 | * Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code> |
||
| 134 | * @param string $text Text that may need preserving |
||
| 135 | * @return string Text that was preserved if needed |
||
| 136 | */ |
||
| 137 | public function single_line_code_preserve( $text ) { |
||
| 138 | return preg_replace_callback( '|<code\b[^>]*>(.*?)</code>|', array( $this, 'do_single_line_code_preserve' ), $text ); |
||
| 139 | } |
||
| 140 | |||
| 141 | /** |
||
| 142 | * Regex callback for inline code presevation |
||
| 143 | * @param array $matches Regex matches |
||
| 144 | * @return string Hashed content for later restoration |
||
| 145 | */ |
||
| 146 | public function do_single_line_code_preserve( $matches ) { |
||
| 147 | return '<code>' . $this->hash_block( $matches[1] ) . '</code>'; |
||
| 148 | } |
||
| 149 | |||
| 150 | /** |
||
| 151 | * Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping. |
||
| 152 | * @param string $text Markdown/HTML content |
||
| 153 | * @return string Markdown/HTML content with escaped code blocks |
||
| 154 | */ |
||
| 155 | public function codeblock_preserve( $text ) { |
||
| 156 | return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text ); |
||
| 157 | } |
||
| 158 | |||
| 159 | /** |
||
| 160 | * Regex callback for code block preservation. |
||
| 161 | * @param array $matches Regex matches |
||
| 162 | * @return string Codeblock with escaped interior |
||
| 163 | */ |
||
| 164 | public function do_codeblock_preserve( $matches ) { |
||
| 165 | $block = stripslashes( $matches[3] ); |
||
| 166 | $block = esc_html( $block ); |
||
| 167 | $block = str_replace( '\\', '\\\\', $block ); |
||
| 168 | $open = $matches[1] . $matches[2] . "\n"; |
||
| 169 | return $open . $block . $matches[4]; |
||
| 170 | } |
||
| 171 | |||
| 172 | /** |
||
| 173 | * Restore previously preserved (i.e. escaped) code block contents. |
||
| 174 | * @param string $text Markdown/HTML content with escaped code blocks |
||
| 175 | * @return string Markdown/HTML content |
||
| 176 | */ |
||
| 177 | public function codeblock_restore( $text ) { |
||
| 178 | return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text ); |
||
| 179 | } |
||
| 180 | |||
| 181 | /** |
||
| 182 | * Regex callback for code block restoration (unescaping). |
||
| 183 | * @param array $matches Regex matches |
||
| 184 | * @return string Codeblock with unescaped interior |
||
| 185 | */ |
||
| 186 | public function do_codeblock_restore( $matches ) { |
||
| 187 | $block = html_entity_decode( $matches[3], ENT_QUOTES ); |
||
| 188 | $open = $matches[1] . $matches[2] . "\n"; |
||
| 189 | return $open . $block . $matches[4]; |
||
| 190 | } |
||
| 191 | |||
| 192 | /** |
||
| 193 | * Called to preserve legacy LaTeX like $latex some-latex-text $ |
||
| 194 | * @param string $text Text in which to preserve LaTeX |
||
| 195 | * @return string Text with LaTeX replaced by a hash that will be restored later |
||
| 196 | */ |
||
| 197 | protected function latex_preserve( $text ) { |
||
| 198 | // regex from latex_remove() |
||
| 199 | $regex = '% |
||
| 200 | \$latex(?:=\s*|\s+) |
||
| 201 | ((?: |
||
| 202 | [^$]+ # Not a dollar |
||
| 203 | | |
||
| 204 | (?<=(?<!\\\\)\\\\)\$ # Dollar preceded by exactly one slash |
||
| 205 | )+) |
||
| 206 | (?<!\\\\)\$ # Dollar preceded by zero slashes |
||
| 207 | %ix'; |
||
| 208 | $text = preg_replace_callback( $regex, array( $this, '_doRemoveText'), $text ); |
||
|
0 ignored issues
–
show
|
|||
| 209 | return $text; |
||
| 210 | } |
||
| 211 | |||
| 212 | /** |
||
| 213 | * Called to preserve WP shortcodes from being formatted by Markdown in any way. |
||
| 214 | * @param string $text Text in which to preserve shortcodes |
||
| 215 | * @return string Text with shortcodes replaced by a hash that will be restored later |
||
| 216 | */ |
||
| 217 | protected function shortcode_preserve( $text ) { |
||
| 218 | $text = preg_replace_callback( $this->get_shortcode_regex(), array( $this, '_doRemoveText' ), $text ); |
||
|
0 ignored issues
–
show
|
|||
| 219 | return $text; |
||
| 220 | } |
||
| 221 | |||
| 222 | /** |
||
| 223 | * Restores any text preserved by $this->hash_block() |
||
| 224 | * @param string $text Text that may have hashed preservation placeholders |
||
| 225 | * @return string Text with hashed preseravtion placeholders replaced by original text |
||
| 226 | */ |
||
| 227 | protected function do_restore( $text ) { |
||
| 228 | foreach( $this->preserve_text_hash as $hash => $value ) { |
||
| 229 | $placeholder = $this->hash_maker( $hash ); |
||
| 230 | $text = str_replace( $placeholder, $value, $text ); |
||
|
0 ignored issues
–
show
|
|||
| 231 | } |
||
| 232 | // reset the hash |
||
| 233 | $this->preserve_text_hash = array(); |
||
| 234 | return $text; |
||
| 235 | } |
||
| 236 | |||
| 237 | /** |
||
| 238 | * Regex callback for text preservation |
||
| 239 | * @param array $m Regex $matches array |
||
| 240 | * @return string A placeholder that will later be replaced by the original text |
||
| 241 | */ |
||
| 242 | protected function _doRemoveText( $m ) { |
||
| 243 | return $this->hash_block( $m[0] ); |
||
| 244 | } |
||
| 245 | |||
| 246 | /** |
||
| 247 | * Call this to store a text block for later restoration. |
||
| 248 | * @param string $text Text to preserve for later |
||
| 249 | * @return string Placeholder that will be swapped out later for the original text |
||
| 250 | */ |
||
| 251 | protected function hash_block( $text ) { |
||
| 252 | $hash = md5( $text ); |
||
| 253 | $this->preserve_text_hash[ $hash ] = $text; |
||
| 254 | $placeholder = $this->hash_maker( $hash ); |
||
| 255 | return $placeholder; |
||
| 256 | } |
||
| 257 | |||
| 258 | /** |
||
| 259 | * Less glamorous than the Keymaker |
||
| 260 | * @param string $hash An md5 hash |
||
| 261 | * @return string A placeholder hash |
||
| 262 | */ |
||
| 263 | protected function hash_maker( $hash ) { |
||
| 264 | return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH'; |
||
| 265 | } |
||
| 266 | |||
| 267 | /** |
||
| 268 | * Remove bare <p> elements. <p>s with attributes will be preserved. |
||
| 269 | * @param string $text HTML content |
||
| 270 | * @return string <p>-less content |
||
| 271 | */ |
||
| 272 | public function unp( $text ) { |
||
| 273 | return preg_replace( "#<p>(.*?)</p>(\n|$)#ums", '$1$2', $text ); |
||
| 274 | } |
||
| 275 | |||
| 276 | /** |
||
| 277 | * A regex of all shortcodes currently registered by the current |
||
| 278 | * WordPress installation |
||
| 279 | * @uses get_shortcode_regex() |
||
| 280 | * @return string A regex for grabbing shortcodes. |
||
| 281 | */ |
||
| 282 | protected function get_shortcode_regex() { |
||
| 283 | $pattern = get_shortcode_regex(); |
||
| 284 | |||
| 285 | // don't match markdown link anchors that could be mistaken for shortcodes. |
||
| 286 | $pattern .= '(?!\()'; |
||
| 287 | |||
| 288 | return "/$pattern/s"; |
||
| 289 | } |
||
| 290 | |||
| 291 | /** |
||
| 292 | * Since we escape unspaced #Headings, put things back later. |
||
| 293 | * @param string $text text with a leading escaped hash |
||
| 294 | * @return string text with leading hashes unescaped |
||
| 295 | */ |
||
| 296 | protected function restore_leading_hash( $text ) { |
||
| 297 | return preg_replace( "/^(<p>)?(#|\\\\#)/um", "$1#", $text ); |
||
| 298 | } |
||
| 299 | |||
| 300 | /** |
||
| 301 | * Overload to support ```-fenced code blocks for pre-Markdown Extra 1.2.8 |
||
| 302 | * https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks |
||
| 303 | */ |
||
| 304 | public function doFencedCodeBlocks( $text ) { |
||
| 305 | // If we're at least at 1.2.8, native fenced code blocks are in. |
||
| 306 | // Below is just copied from it in case we somehow got loaded on |
||
| 307 | // top of someone else's Markdown Extra |
||
| 308 | if ( version_compare( MARKDOWNEXTRA_VERSION, '1.2.8', '>=' ) ) |
||
| 309 | return parent::doFencedCodeBlocks( $text ); |
||
| 310 | |||
| 311 | # |
||
| 312 | # Adding the fenced code block syntax to regular Markdown: |
||
| 313 | # |
||
| 314 | # ~~~ |
||
| 315 | # Code block |
||
| 316 | # ~~~ |
||
| 317 | # |
||
| 318 | $less_than_tab = $this->tab_width; |
||
| 319 | |||
| 320 | $text = preg_replace_callback('{ |
||
|
0 ignored issues
–
show
|
|||
| 321 | (?:\n|\A) |
||
| 322 | # 1: Opening marker |
||
| 323 | ( |
||
| 324 | (?:~{3,}|`{3,}) # 3 or more tildes/backticks. |
||
| 325 | ) |
||
| 326 | [ ]* |
||
| 327 | (?: |
||
| 328 | \.?([-_:a-zA-Z0-9]+) # 2: standalone class name |
||
| 329 | | |
||
| 330 | '.$this->id_class_attr_catch_re.' # 3: Extra attributes |
||
| 331 | )? |
||
| 332 | [ ]* \n # Whitespace and newline following marker. |
||
| 333 | |||
| 334 | # 4: Content |
||
| 335 | ( |
||
| 336 | (?> |
||
| 337 | (?!\1 [ ]* \n) # Not a closing marker. |
||
| 338 | .*\n+ |
||
| 339 | )+ |
||
| 340 | ) |
||
| 341 | |||
| 342 | # Closing marker. |
||
| 343 | \1 [ ]* (?= \n ) |
||
| 344 | }xm', |
||
| 345 | array($this, '_doFencedCodeBlocks_callback'), $text); |
||
| 346 | |||
| 347 | return $text; |
||
| 348 | } |
||
| 349 | |||
| 350 | /** |
||
| 351 | * Callback for pre-processing start of line hashes to slyly escape headings that don't |
||
| 352 | * have a leading space |
||
| 353 | * @param array $m preg_match matches |
||
| 354 | * @return string possibly escaped start of line hash |
||
| 355 | */ |
||
| 356 | public function _doEscapeForHashWithoutSpacing( $m ) { |
||
| 357 | if ( ! isset( $m[1] ) ) |
||
| 358 | $m[0] = '\\' . $m[0]; |
||
| 359 | return $m[0]; |
||
| 360 | } |
||
| 361 | |||
| 362 | /** |
||
| 363 | * Overload to support Viper's [code] shortcode. Because awesome. |
||
| 364 | */ |
||
| 365 | public function _doFencedCodeBlocks_callback( $matches ) { |
||
| 366 | // in case we have some escaped leading hashes right at the start of the block |
||
| 367 | $matches[4] = $this->restore_leading_hash( $matches[4] ); |
||
| 368 | // just MarkdownExtra_Parser if we're not going ultra-deluxe |
||
| 369 | if ( ! $this->use_code_shortcode ) { |
||
| 370 | return parent::_doFencedCodeBlocks_callback( $matches ); |
||
| 371 | } |
||
| 372 | |||
| 373 | // default to a "text" class if one wasn't passed. Helps with encoding issues later. |
||
| 374 | if ( empty( $matches[2] ) ) { |
||
| 375 | $matches[2] = 'text'; |
||
| 376 | } |
||
| 377 | |||
| 378 | $classname =& $matches[2]; |
||
| 379 | $codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] ); |
||
| 380 | |||
| 381 | if ( $classname{0} == '.' ) |
||
| 382 | $classname = substr( $classname, 1 ); |
||
| 383 | |||
| 384 | $codeblock = esc_html( $codeblock ); |
||
| 385 | $codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end; |
||
| 386 | return "\n\n" . $this->hashBlock( $codeblock ). "\n\n"; |
||
| 387 | } |
||
| 388 | |||
| 389 | } |
||
| 390 |