Total Complexity | 163 |
Total Lines | 1876 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like MarkdownExtra often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use MarkdownExtra, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
16 | class MarkdownExtra extends \Michelf\Markdown { |
||
17 | /** |
||
18 | * Configuration variables |
||
19 | */ |
||
20 | |||
21 | /** |
||
22 | * Prefix for footnote ids. |
||
23 | * @var string |
||
24 | */ |
||
25 | public $fn_id_prefix = ""; |
||
26 | |||
27 | /** |
||
28 | * Optional title attribute for footnote links. |
||
29 | * @var string |
||
30 | */ |
||
31 | public $fn_link_title = ""; |
||
32 | |||
33 | /** |
||
34 | * Optional class attribute for footnote links and backlinks. |
||
35 | * @var string |
||
36 | */ |
||
37 | public $fn_link_class = "footnote-ref"; |
||
38 | public $fn_backlink_class = "footnote-backref"; |
||
39 | |||
40 | /** |
||
41 | * Content to be displayed within footnote backlinks. The default is '↩'; |
||
42 | * the U+FE0E on the end is a Unicode variant selector used to prevent iOS |
||
43 | * from displaying the arrow character as an emoji. |
||
44 | * Optionally use '^^' and '%%' to refer to the footnote number and |
||
45 | * reference number respectively. {@see parseFootnotePlaceholders()} |
||
46 | * @var string |
||
47 | */ |
||
48 | public $fn_backlink_html = '↩︎'; |
||
49 | |||
50 | /** |
||
51 | * Optional title and aria-label attributes for footnote backlinks for |
||
52 | * added accessibility (to ensure backlink uniqueness). |
||
53 | * Use '^^' and '%%' to refer to the footnote number and reference number |
||
54 | * respectively. {@see parseFootnotePlaceholders()} |
||
55 | * @var string |
||
56 | */ |
||
57 | public $fn_backlink_title = ""; |
||
58 | public $fn_backlink_label = ""; |
||
59 | |||
60 | /** |
||
61 | * Class name for table cell alignment (%% replaced left/center/right) |
||
62 | * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' |
||
63 | * If empty, the align attribute is used instead of a class name. |
||
64 | * @var string |
||
65 | */ |
||
66 | public $table_align_class_tmpl = ''; |
||
67 | |||
68 | /** |
||
69 | * Optional class prefix for fenced code block. |
||
70 | * @var string |
||
71 | */ |
||
72 | public $code_class_prefix = ""; |
||
73 | |||
74 | /** |
||
75 | * Class attribute for code blocks goes on the `code` tag; |
||
76 | * setting this to true will put attributes on the `pre` tag instead. |
||
77 | * @var boolean |
||
78 | */ |
||
79 | public $code_attr_on_pre = false; |
||
80 | |||
81 | /** |
||
82 | * Predefined abbreviations. |
||
83 | * @var array |
||
84 | */ |
||
85 | public $predef_abbr = array(); |
||
86 | |||
87 | /** |
||
88 | * Only convert atx-style headers if there's a space between the header and # |
||
89 | * @var boolean |
||
90 | */ |
||
91 | public $hashtag_protection = false; |
||
92 | |||
93 | /** |
||
94 | * Determines whether footnotes should be appended to the end of the document. |
||
95 | * If true, footnote html can be retrieved from $this->footnotes_assembled. |
||
96 | * @var boolean |
||
97 | */ |
||
98 | public $omit_footnotes = false; |
||
99 | |||
100 | |||
101 | /** |
||
102 | * After parsing, the HTML for the list of footnotes appears here. |
||
103 | * This is available only if $omit_footnotes == true. |
||
104 | * |
||
105 | * Note: when placing the content of `footnotes_assembled` on the page, |
||
106 | * consider adding the attribute `role="doc-endnotes"` to the `div` or |
||
107 | * `section` that will enclose the list of footnotes so they are |
||
108 | * reachable to accessibility tools the same way they would be with the |
||
109 | * default HTML output. |
||
110 | * @var null|string |
||
111 | */ |
||
112 | public $footnotes_assembled = null; |
||
113 | |||
114 | /** |
||
115 | * Parser implementation |
||
116 | */ |
||
117 | |||
118 | /** |
||
119 | * Constructor function. Initialize the parser object. |
||
120 | * @return void |
||
121 | */ |
||
122 | public function __construct() { |
||
123 | // Add extra escapable characters before parent constructor |
||
124 | // initialize the table. |
||
125 | $this->escape_chars .= ':|'; |
||
126 | |||
127 | // Insert extra document, block, and span transformations. |
||
128 | // Parent constructor will do the sorting. |
||
129 | $this->document_gamut += array( |
||
130 | "doFencedCodeBlocks" => 5, |
||
131 | "stripFootnotes" => 15, |
||
132 | "stripAbbreviations" => 25, |
||
133 | "appendFootnotes" => 50, |
||
134 | ); |
||
135 | $this->block_gamut += array( |
||
136 | "doFencedCodeBlocks" => 5, |
||
137 | "doTables" => 15, |
||
138 | "doDefLists" => 45, |
||
139 | ); |
||
140 | $this->span_gamut += array( |
||
141 | "doFootnotes" => 5, |
||
142 | "doAbbreviations" => 70, |
||
143 | ); |
||
144 | |||
145 | $this->enhanced_ordered_list = true; |
||
146 | parent::__construct(); |
||
147 | } |
||
148 | |||
149 | |||
150 | /** |
||
151 | * Extra variables used during extra transformations. |
||
152 | * @var array |
||
153 | */ |
||
154 | protected $footnotes = array(); |
||
155 | protected $footnotes_ordered = array(); |
||
156 | protected $footnotes_ref_count = array(); |
||
157 | protected $footnotes_numbers = array(); |
||
158 | protected $abbr_desciptions = array(); |
||
159 | /** @var string */ |
||
160 | protected $abbr_word_re = ''; |
||
161 | |||
162 | /** |
||
163 | * Give the current footnote number. |
||
164 | * @var integer |
||
165 | */ |
||
166 | protected $footnote_counter = 1; |
||
167 | |||
168 | /** |
||
169 | * Ref attribute for links |
||
170 | * @var array |
||
171 | */ |
||
172 | protected $ref_attr = array(); |
||
173 | |||
174 | /** |
||
175 | * Setting up Extra-specific variables. |
||
176 | */ |
||
177 | protected function setup() { |
||
178 | parent::setup(); |
||
179 | |||
180 | $this->footnotes = array(); |
||
181 | $this->footnotes_ordered = array(); |
||
182 | $this->footnotes_ref_count = array(); |
||
183 | $this->footnotes_numbers = array(); |
||
184 | $this->abbr_desciptions = array(); |
||
185 | $this->abbr_word_re = ''; |
||
186 | $this->footnote_counter = 1; |
||
187 | $this->footnotes_assembled = null; |
||
188 | |||
189 | foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { |
||
190 | if ($this->abbr_word_re) |
||
191 | $this->abbr_word_re .= '|'; |
||
192 | $this->abbr_word_re .= preg_quote($abbr_word); |
||
193 | $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); |
||
194 | } |
||
195 | } |
||
196 | |||
197 | /** |
||
198 | * Clearing Extra-specific variables. |
||
199 | */ |
||
200 | protected function teardown() { |
||
201 | $this->footnotes = array(); |
||
202 | $this->footnotes_ordered = array(); |
||
203 | $this->footnotes_ref_count = array(); |
||
204 | $this->footnotes_numbers = array(); |
||
205 | $this->abbr_desciptions = array(); |
||
206 | $this->abbr_word_re = ''; |
||
207 | |||
208 | if ( ! $this->omit_footnotes ) |
||
209 | $this->footnotes_assembled = null; |
||
210 | |||
211 | parent::teardown(); |
||
212 | } |
||
213 | |||
214 | |||
215 | /** |
||
216 | * Extra attribute parser |
||
217 | */ |
||
218 | |||
219 | /** |
||
220 | * Expression to use to catch attributes (includes the braces) |
||
221 | * @var string |
||
222 | */ |
||
223 | protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}'; |
||
224 | |||
225 | /** |
||
226 | * Expression to use when parsing in a context when no capture is desired |
||
227 | * @var string |
||
228 | */ |
||
229 | protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}'; |
||
230 | |||
231 | /** |
||
232 | * Parse attributes caught by the $this->id_class_attr_catch_re expression |
||
233 | * and return the HTML-formatted list of attributes. |
||
234 | * |
||
235 | * Currently supported attributes are .class and #id. |
||
236 | * |
||
237 | * In addition, this method also supports supplying a default Id value, |
||
238 | * which will be used to populate the id attribute in case it was not |
||
239 | * overridden. |
||
240 | * @param string $tag_name |
||
241 | * @param string $attr |
||
242 | * @param mixed $defaultIdValue |
||
243 | * @param array $classes |
||
244 | * @return string |
||
245 | */ |
||
246 | protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) { |
||
247 | if (empty($attr) && !$defaultIdValue && empty($classes)) { |
||
248 | return ""; |
||
249 | } |
||
250 | |||
251 | // Split on components |
||
252 | preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches); |
||
253 | $elements = $matches[0]; |
||
254 | |||
255 | // Handle classes and IDs (only first ID taken into account) |
||
256 | $attributes = array(); |
||
257 | $id = false; |
||
258 | foreach ($elements as $element) { |
||
259 | if ($element[0] === '.') { |
||
260 | $classes[] = substr($element, 1); |
||
261 | } else if ($element[0] === '#') { |
||
262 | if ($id === false) $id = substr($element, 1); |
||
263 | } else if (strpos($element, '=') > 0) { |
||
264 | $parts = explode('=', $element, 2); |
||
265 | $attributes[] = $parts[0] . '="' . $parts[1] . '"'; |
||
266 | } |
||
267 | } |
||
268 | |||
269 | if ($id === false || $id === '') { |
||
270 | $id = $defaultIdValue; |
||
271 | } |
||
272 | |||
273 | // Compose attributes as string |
||
274 | $attr_str = ""; |
||
275 | if (!empty($id)) { |
||
276 | $attr_str .= ' id="'.$this->encodeAttribute($id) .'"'; |
||
277 | } |
||
278 | if (!empty($classes)) { |
||
279 | $attr_str .= ' class="'. implode(" ", $classes) . '"'; |
||
280 | } |
||
281 | if (!$this->no_markup && !empty($attributes)) { |
||
282 | $attr_str .= ' '.implode(" ", $attributes); |
||
283 | } |
||
284 | return $attr_str; |
||
285 | } |
||
286 | |||
287 | /** |
||
288 | * Strips link definitions from text, stores the URLs and titles in |
||
289 | * hash references. |
||
290 | * @param string $text |
||
291 | * @return string |
||
292 | */ |
||
293 | protected function stripLinkDefinitions($text) { |
||
294 | $less_than_tab = $this->tab_width - 1; |
||
295 | |||
296 | // Link defs are in the form: ^[id]: url "optional title" |
||
297 | $text = preg_replace_callback('{ |
||
298 | ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 |
||
299 | [ ]* |
||
300 | \n? # maybe *one* newline |
||
301 | [ ]* |
||
302 | (?: |
||
303 | <(.+?)> # url = $2 |
||
304 | | |
||
305 | (\S+?) # url = $3 |
||
306 | ) |
||
307 | [ ]* |
||
308 | \n? # maybe one newline |
||
309 | [ ]* |
||
310 | (?: |
||
311 | (?<=\s) # lookbehind for whitespace |
||
312 | ["(] |
||
313 | (.*?) # title = $4 |
||
314 | [")] |
||
315 | [ ]* |
||
316 | )? # title is optional |
||
317 | (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr |
||
318 | (?:\n+|\Z) |
||
319 | }xm', |
||
320 | array($this, '_stripLinkDefinitions_callback'), |
||
321 | $text); |
||
322 | return $text; |
||
323 | } |
||
324 | |||
325 | /** |
||
326 | * Strip link definition callback |
||
327 | * @param array $matches |
||
328 | * @return string |
||
329 | */ |
||
330 | protected function _stripLinkDefinitions_callback($matches) { |
||
331 | $link_id = strtolower($matches[1]); |
||
332 | $url = $matches[2] == '' ? $matches[3] : $matches[2]; |
||
333 | $this->urls[$link_id] = $url; |
||
334 | $this->titles[$link_id] =& $matches[4]; |
||
335 | $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); |
||
336 | return ''; // String that will replace the block |
||
337 | } |
||
338 | |||
339 | |||
340 | /** |
||
341 | * HTML block parser |
||
342 | */ |
||
343 | |||
344 | /** |
||
345 | * Tags that are always treated as block tags |
||
346 | * @var string |
||
347 | */ |
||
348 | protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure'; |
||
349 | |||
350 | /** |
||
351 | * Tags treated as block tags only if the opening tag is alone on its line |
||
352 | * @var string |
||
353 | */ |
||
354 | protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; |
||
355 | |||
356 | /** |
||
357 | * Tags where markdown="1" default to span mode: |
||
358 | * @var string |
||
359 | */ |
||
360 | protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; |
||
361 | |||
362 | /** |
||
363 | * Tags which must not have their contents modified, no matter where |
||
364 | * they appear |
||
365 | * @var string |
||
366 | */ |
||
367 | protected $clean_tags_re = 'script|style|math|svg'; |
||
368 | |||
369 | /** |
||
370 | * Tags that do not need to be closed. |
||
371 | * @var string |
||
372 | */ |
||
373 | protected $auto_close_tags_re = 'hr|img|param|source|track'; |
||
374 | |||
375 | /** |
||
376 | * Hashify HTML Blocks and "clean tags". |
||
377 | * |
||
378 | * We only want to do this for block-level HTML tags, such as headers, |
||
379 | * lists, and tables. That's because we still want to wrap <p>s around |
||
380 | * "paragraphs" that are wrapped in non-block-level tags, such as anchors, |
||
381 | * phrase emphasis, and spans. The list of tags we're looking for is |
||
382 | * hard-coded. |
||
383 | * |
||
384 | * This works by calling _HashHTMLBlocks_InMarkdown, which then calls |
||
385 | * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" |
||
386 | * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back |
||
387 | * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. |
||
388 | * These two functions are calling each other. It's recursive! |
||
389 | * @param string $text |
||
390 | * @return string |
||
391 | */ |
||
392 | protected function hashHTMLBlocks($text) { |
||
393 | if ($this->no_markup) { |
||
394 | return $text; |
||
395 | } |
||
396 | |||
397 | // Call the HTML-in-Markdown hasher. |
||
398 | list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); |
||
399 | |||
400 | return $text; |
||
401 | } |
||
402 | |||
403 | /** |
||
404 | * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. |
||
405 | * |
||
406 | * * $indent is the number of space to be ignored when checking for code |
||
407 | * blocks. This is important because if we don't take the indent into |
||
408 | * account, something like this (which looks right) won't work as expected: |
||
409 | * |
||
410 | * <div> |
||
411 | * <div markdown="1"> |
||
412 | * Hello World. <-- Is this a Markdown code block or text? |
||
413 | * </div> <-- Is this a Markdown code block or a real tag? |
||
414 | * <div> |
||
415 | * |
||
416 | * If you don't like this, just don't indent the tag on which |
||
417 | * you apply the markdown="1" attribute. |
||
418 | * |
||
419 | * * If $enclosing_tag_re is not empty, stops at the first unmatched closing |
||
420 | * tag with that name. Nested tags supported. |
||
421 | * |
||
422 | * * If $span is true, text inside must treated as span. So any double |
||
423 | * newline will be replaced by a single newline so that it does not create |
||
424 | * paragraphs. |
||
425 | * |
||
426 | * Returns an array of that form: ( processed text , remaining text ) |
||
427 | * |
||
428 | * @param string $text |
||
429 | * @param integer $indent |
||
430 | * @param string $enclosing_tag_re |
||
431 | * @param boolean $span |
||
432 | * @return array |
||
433 | */ |
||
434 | protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, |
||
435 | $enclosing_tag_re = '', $span = false) |
||
436 | { |
||
437 | |||
438 | if ($text === '') return array('', ''); |
||
439 | |||
440 | // Regex to check for the presense of newlines around a block tag. |
||
441 | $newline_before_re = '/(?:^\n?|\n\n)*$/'; |
||
442 | $newline_after_re = |
||
443 | '{ |
||
444 | ^ # Start of text following the tag. |
||
445 | (?>[ ]*<!--.*?-->)? # Optional comment. |
||
446 | [ ]*\n # Must be followed by newline. |
||
447 | }xs'; |
||
448 | |||
449 | // Regex to match any tag. |
||
450 | $block_tag_re = |
||
451 | '{ |
||
452 | ( # $2: Capture whole tag. |
||
453 | </? # Any opening or closing tag. |
||
454 | (?> # Tag name. |
||
455 | ' . $this->block_tags_re . ' | |
||
456 | ' . $this->context_block_tags_re . ' | |
||
457 | ' . $this->clean_tags_re . ' | |
||
458 | (?!\s)'.$enclosing_tag_re . ' |
||
459 | ) |
||
460 | (?: |
||
461 | (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. |
||
462 | (?> |
||
463 | ".*?" | # Double quotes (can contain `>`) |
||
464 | \'.*?\' | # Single quotes (can contain `>`) |
||
465 | .+? # Anything but quotes and `>`. |
||
466 | )*? |
||
467 | )? |
||
468 | > # End of tag. |
||
469 | | |
||
470 | <!-- .*? --> # HTML Comment |
||
471 | | |
||
472 | <\?.*?\?> | <%.*?%> # Processing instruction |
||
473 | | |
||
474 | <!\[CDATA\[.*?\]\]> # CData Block |
||
475 | ' . ( !$span ? ' # If not in span. |
||
476 | | |
||
477 | # Indented code block |
||
478 | (?: ^[ ]*\n | ^ | \n[ ]*\n ) |
||
479 | [ ]{' . ($indent + 4) . '}[^\n]* \n |
||
480 | (?> |
||
481 | (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n |
||
482 | )* |
||
483 | | |
||
484 | # Fenced code block marker |
||
485 | (?<= ^ | \n ) |
||
486 | [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,}) |
||
487 | [ ]* |
||
488 | (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name |
||
489 | [ ]* |
||
490 | (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes |
||
491 | [ ]* |
||
492 | (?= \n ) |
||
493 | ' : '' ) . ' # End (if not is span). |
||
494 | | |
||
495 | # Code span marker |
||
496 | # Note, this regex needs to go after backtick fenced |
||
497 | # code blocks but it should also be kept outside of the |
||
498 | # "if not in span" condition adding backticks to the parser |
||
499 | `+ |
||
500 | ) |
||
501 | }xs'; |
||
502 | |||
503 | |||
504 | $depth = 0; // Current depth inside the tag tree. |
||
505 | $parsed = ""; // Parsed text that will be returned. |
||
506 | |||
507 | // Loop through every tag until we find the closing tag of the parent |
||
508 | // or loop until reaching the end of text if no parent tag specified. |
||
509 | do { |
||
510 | // Split the text using the first $tag_match pattern found. |
||
511 | // Text before pattern will be first in the array, text after |
||
512 | // pattern will be at the end, and between will be any catches made |
||
513 | // by the pattern. |
||
514 | $parts = preg_split($block_tag_re, $text, 2, |
||
515 | PREG_SPLIT_DELIM_CAPTURE); |
||
516 | |||
517 | // If in Markdown span mode, add a empty-string span-level hash |
||
518 | // after each newline to prevent triggering any block element. |
||
519 | if ($span) { |
||
520 | $void = $this->hashPart("", ':'); |
||
521 | $newline = "\n$void"; |
||
522 | $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; |
||
523 | } |
||
524 | |||
525 | $parsed .= $parts[0]; // Text before current tag. |
||
526 | |||
527 | // If end of $text has been reached. Stop loop. |
||
528 | if (count($parts) < 3) { |
||
529 | $text = ""; |
||
530 | break; |
||
531 | } |
||
532 | |||
533 | $tag = $parts[1]; // Tag to handle. |
||
534 | $text = $parts[2]; // Remaining text after current tag. |
||
535 | |||
536 | // Check for: Fenced code block marker. |
||
537 | // Note: need to recheck the whole tag to disambiguate backtick |
||
538 | // fences from code spans |
||
539 | if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) { |
||
540 | // Fenced code block marker: find matching end marker. |
||
541 | $fence_indent = strlen($capture[1]); // use captured indent in re |
||
542 | $fence_re = $capture[2]; // use captured fence in re |
||
543 | if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text, |
||
544 | $matches)) |
||
545 | { |
||
546 | // End marker found: pass text unchanged until marker. |
||
547 | $parsed .= $tag . $matches[0]; |
||
548 | $text = substr($text, strlen($matches[0])); |
||
549 | } |
||
550 | else { |
||
551 | // No end marker: just skip it. |
||
552 | $parsed .= $tag; |
||
553 | } |
||
554 | } |
||
555 | // Check for: Indented code block. |
||
556 | else if ($tag[0] === "\n" || $tag[0] === " ") { |
||
557 | // Indented code block: pass it unchanged, will be handled |
||
558 | // later. |
||
559 | $parsed .= $tag; |
||
560 | } |
||
561 | // Check for: Code span marker |
||
562 | // Note: need to check this after backtick fenced code blocks |
||
563 | else if ($tag[0] === "`") { |
||
564 | // Find corresponding end marker. |
||
565 | $tag_re = preg_quote($tag); |
||
566 | if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}', |
||
567 | $text, $matches)) |
||
568 | { |
||
569 | // End marker found: pass text unchanged until marker. |
||
570 | $parsed .= $tag . $matches[0]; |
||
571 | $text = substr($text, strlen($matches[0])); |
||
572 | } |
||
573 | else { |
||
574 | // Unmatched marker: just skip it. |
||
575 | $parsed .= $tag; |
||
576 | } |
||
577 | } |
||
578 | // Check for: Opening Block level tag or |
||
579 | // Opening Context Block tag (like ins and del) |
||
580 | // used as a block tag (tag is alone on it's line). |
||
581 | else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) || |
||
582 | ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) && |
||
583 | preg_match($newline_before_re, $parsed) && |
||
584 | preg_match($newline_after_re, $text) ) |
||
585 | ) |
||
586 | { |
||
587 | // Need to parse tag and following text using the HTML parser. |
||
588 | list($block_text, $text) = |
||
589 | $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); |
||
590 | |||
591 | // Make sure it stays outside of any paragraph by adding newlines. |
||
592 | $parsed .= "\n\n$block_text\n\n"; |
||
593 | } |
||
594 | // Check for: Clean tag (like script, math) |
||
595 | // HTML Comments, processing instructions. |
||
596 | else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) || |
||
597 | $tag[1] === '!' || $tag[1] === '?') |
||
598 | { |
||
599 | // Need to parse tag and following text using the HTML parser. |
||
600 | // (don't check for markdown attribute) |
||
601 | list($block_text, $text) = |
||
602 | $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); |
||
603 | |||
604 | $parsed .= $block_text; |
||
605 | } |
||
606 | // Check for: Tag with same name as enclosing tag. |
||
607 | else if ($enclosing_tag_re !== '' && |
||
608 | // Same name as enclosing tag. |
||
609 | preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag)) |
||
610 | { |
||
611 | // Increase/decrease nested tag count. |
||
612 | if ($tag[1] === '/') { |
||
613 | $depth--; |
||
614 | } else if ($tag[strlen($tag)-2] !== '/') { |
||
615 | $depth++; |
||
616 | } |
||
617 | |||
618 | if ($depth < 0) { |
||
619 | // Going out of parent element. Clean up and break so we |
||
620 | // return to the calling function. |
||
621 | $text = $tag . $text; |
||
622 | break; |
||
623 | } |
||
624 | |||
625 | $parsed .= $tag; |
||
626 | } |
||
627 | else { |
||
628 | $parsed .= $tag; |
||
629 | } |
||
630 | } while ($depth >= 0); |
||
631 | |||
632 | return array($parsed, $text); |
||
633 | } |
||
634 | |||
635 | /** |
||
636 | * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. |
||
637 | * |
||
638 | * * Calls $hash_method to convert any blocks. |
||
639 | * * Stops when the first opening tag closes. |
||
640 | * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. |
||
641 | * (it is not inside clean tags) |
||
642 | * |
||
643 | * Returns an array of that form: ( processed text , remaining text ) |
||
644 | * @param string $text |
||
645 | * @param string $hash_method |
||
646 | * @param bool $md_attr Handle `markdown="1"` attribute |
||
647 | * @return array |
||
648 | */ |
||
649 | protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { |
||
650 | if ($text === '') return array('', ''); |
||
651 | |||
652 | // Regex to match `markdown` attribute inside of a tag. |
||
653 | $markdown_attr_re = ' |
||
654 | { |
||
655 | \s* # Eat whitespace before the `markdown` attribute |
||
656 | markdown |
||
657 | \s*=\s* |
||
658 | (?> |
||
659 | (["\']) # $1: quote delimiter |
||
660 | (.*?) # $2: attribute value |
||
661 | \1 # matching delimiter |
||
662 | | |
||
663 | ([^\s>]*) # $3: unquoted attribute value |
||
664 | ) |
||
665 | () # $4: make $3 always defined (avoid warnings) |
||
666 | }xs'; |
||
667 | |||
668 | // Regex to match any tag. |
||
669 | $tag_re = '{ |
||
670 | ( # $2: Capture whole tag. |
||
671 | </? # Any opening or closing tag. |
||
672 | [\w:$]+ # Tag name. |
||
673 | (?: |
||
674 | (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. |
||
675 | (?> |
||
676 | ".*?" | # Double quotes (can contain `>`) |
||
677 | \'.*?\' | # Single quotes (can contain `>`) |
||
678 | .+? # Anything but quotes and `>`. |
||
679 | )*? |
||
680 | )? |
||
681 | > # End of tag. |
||
682 | | |
||
683 | <!-- .*? --> # HTML Comment |
||
684 | | |
||
685 | <\?.*?\?> | <%.*?%> # Processing instruction |
||
686 | | |
||
687 | <!\[CDATA\[.*?\]\]> # CData Block |
||
688 | ) |
||
689 | }xs'; |
||
690 | |||
691 | $original_text = $text; // Save original text in case of faliure. |
||
692 | |||
693 | $depth = 0; // Current depth inside the tag tree. |
||
694 | $block_text = ""; // Temporary text holder for current text. |
||
695 | $parsed = ""; // Parsed text that will be returned. |
||
696 | $base_tag_name_re = ''; |
||
697 | |||
698 | // Get the name of the starting tag. |
||
699 | // (This pattern makes $base_tag_name_re safe without quoting.) |
||
700 | if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) |
||
701 | $base_tag_name_re = $matches[1]; |
||
702 | |||
703 | // Loop through every tag until we find the corresponding closing tag. |
||
704 | do { |
||
705 | // Split the text using the first $tag_match pattern found. |
||
706 | // Text before pattern will be first in the array, text after |
||
707 | // pattern will be at the end, and between will be any catches made |
||
708 | // by the pattern. |
||
709 | $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); |
||
710 | |||
711 | if (count($parts) < 3) { |
||
712 | // End of $text reached with unbalenced tag(s). |
||
713 | // In that case, we return original text unchanged and pass the |
||
714 | // first character as filtered to prevent an infinite loop in the |
||
715 | // parent function. |
||
716 | return array($original_text[0], substr($original_text, 1)); |
||
717 | } |
||
718 | |||
719 | $block_text .= $parts[0]; // Text before current tag. |
||
720 | $tag = $parts[1]; // Tag to handle. |
||
721 | $text = $parts[2]; // Remaining text after current tag. |
||
722 | |||
723 | // Check for: Auto-close tag (like <hr/>) |
||
724 | // Comments and Processing Instructions. |
||
725 | if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) || |
||
726 | $tag[1] === '!' || $tag[1] === '?') |
||
727 | { |
||
728 | // Just add the tag to the block as if it was text. |
||
729 | $block_text .= $tag; |
||
730 | } |
||
731 | else { |
||
732 | // Increase/decrease nested tag count. Only do so if |
||
733 | // the tag's name match base tag's. |
||
734 | if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) { |
||
735 | if ($tag[1] === '/') { |
||
736 | $depth--; |
||
737 | } else if ($tag[strlen($tag)-2] !== '/') { |
||
738 | $depth++; |
||
739 | } |
||
740 | } |
||
741 | |||
742 | // Check for `markdown="1"` attribute and handle it. |
||
743 | if ($md_attr && |
||
744 | preg_match($markdown_attr_re, $tag, $attr_m) && |
||
745 | preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) |
||
746 | { |
||
747 | // Remove `markdown` attribute from opening tag. |
||
748 | $tag = preg_replace($markdown_attr_re, '', $tag); |
||
749 | |||
750 | // Check if text inside this tag must be parsed in span mode. |
||
751 | $mode = $attr_m[2] . $attr_m[3]; |
||
752 | $span_mode = $mode === 'span' || ($mode !== 'block' && |
||
753 | preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag)); |
||
754 | |||
755 | // Calculate indent before tag. |
||
756 | if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { |
||
757 | $strlen = $this->utf8_strlen; |
||
758 | $indent = $strlen($matches[1], 'UTF-8'); |
||
759 | } else { |
||
760 | $indent = 0; |
||
761 | } |
||
762 | |||
763 | // End preceding block with this tag. |
||
764 | $block_text .= $tag; |
||
765 | $parsed .= $this->$hash_method($block_text); |
||
766 | |||
767 | // Get enclosing tag name for the ParseMarkdown function. |
||
768 | // (This pattern makes $tag_name_re safe without quoting.) |
||
769 | preg_match('/^<([\w:$]*)\b/', $tag, $matches); |
||
770 | $tag_name_re = $matches[1]; |
||
771 | |||
772 | // Parse the content using the HTML-in-Markdown parser. |
||
773 | list ($block_text, $text) |
||
774 | = $this->_hashHTMLBlocks_inMarkdown($text, $indent, |
||
775 | $tag_name_re, $span_mode); |
||
776 | |||
777 | // Outdent markdown text. |
||
778 | if ($indent > 0) { |
||
779 | $block_text = preg_replace("/^[ ]{1,$indent}/m", "", |
||
780 | $block_text); |
||
781 | } |
||
782 | |||
783 | // Append tag content to parsed text. |
||
784 | if (!$span_mode) { |
||
785 | $parsed .= "\n\n$block_text\n\n"; |
||
786 | } else { |
||
787 | $parsed .= (string) $block_text; |
||
788 | } |
||
789 | |||
790 | // Start over with a new block. |
||
791 | $block_text = ""; |
||
792 | } |
||
793 | else $block_text .= $tag; |
||
794 | } |
||
795 | |||
796 | } while ($depth > 0); |
||
797 | |||
798 | // Hash last block text that wasn't processed inside the loop. |
||
799 | $parsed .= $this->$hash_method($block_text); |
||
800 | |||
801 | return array($parsed, $text); |
||
802 | } |
||
803 | |||
804 | /** |
||
805 | * Called whenever a tag must be hashed when a function inserts a "clean" tag |
||
806 | * in $text, it passes through this function and is automaticaly escaped, |
||
807 | * blocking invalid nested overlap. |
||
808 | * @param string $text |
||
809 | * @return string |
||
810 | */ |
||
811 | protected function hashClean($text) { |
||
812 | return $this->hashPart($text, 'C'); |
||
813 | } |
||
814 | |||
815 | /** |
||
816 | * Turn Markdown link shortcuts into XHTML <a> tags. |
||
817 | * @param string $text |
||
818 | * @return string |
||
819 | */ |
||
820 | protected function doAnchors($text) { |
||
821 | if ($this->in_anchor) { |
||
822 | return $text; |
||
823 | } |
||
824 | $this->in_anchor = true; |
||
825 | |||
826 | // First, handle reference-style links: [link text] [id] |
||
827 | $text = preg_replace_callback('{ |
||
828 | ( # wrap whole match in $1 |
||
829 | \[ |
||
830 | (' . $this->nested_brackets_re . ') # link text = $2 |
||
831 | \] |
||
832 | |||
833 | [ ]? # one optional space |
||
834 | (?:\n[ ]*)? # one optional newline followed by spaces |
||
835 | |||
836 | \[ |
||
837 | (.*?) # id = $3 |
||
838 | \] |
||
839 | ) |
||
840 | }xs', |
||
841 | array($this, '_doAnchors_reference_callback'), $text); |
||
842 | |||
843 | // Next, inline-style links: [link text](url "optional title") |
||
844 | $text = preg_replace_callback('{ |
||
845 | ( # wrap whole match in $1 |
||
846 | \[ |
||
847 | (' . $this->nested_brackets_re . ') # link text = $2 |
||
848 | \] |
||
849 | \( # literal paren |
||
850 | [ \n]* |
||
851 | (?: |
||
852 | <(.+?)> # href = $3 |
||
853 | | |
||
854 | (' . $this->nested_url_parenthesis_re . ') # href = $4 |
||
855 | ) |
||
856 | [ \n]* |
||
857 | ( # $5 |
||
858 | ([\'"]) # quote char = $6 |
||
859 | (.*?) # Title = $7 |
||
860 | \6 # matching quote |
||
861 | [ \n]* # ignore any spaces/tabs between closing quote and ) |
||
862 | )? # title is optional |
||
863 | \) |
||
864 | (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes |
||
865 | ) |
||
866 | }xs', |
||
867 | array($this, '_doAnchors_inline_callback'), $text); |
||
868 | |||
869 | // Last, handle reference-style shortcuts: [link text] |
||
870 | // These must come last in case you've also got [link text][1] |
||
871 | // or [link text](/foo) |
||
872 | $text = preg_replace_callback('{ |
||
873 | ( # wrap whole match in $1 |
||
874 | \[ |
||
875 | ([^\[\]]+) # link text = $2; can\'t contain [ or ] |
||
876 | \] |
||
877 | ) |
||
878 | }xs', |
||
879 | array($this, '_doAnchors_reference_callback'), $text); |
||
880 | |||
881 | $this->in_anchor = false; |
||
882 | return $text; |
||
883 | } |
||
884 | |||
885 | /** |
||
886 | * Callback for reference anchors |
||
887 | * @param array $matches |
||
888 | * @return string |
||
889 | */ |
||
890 | protected function _doAnchors_reference_callback($matches) { |
||
891 | $whole_match = $matches[1]; |
||
892 | $link_text = $matches[2]; |
||
893 | $link_id =& $matches[3]; |
||
894 | |||
895 | if ($link_id == "") { |
||
896 | // for shortcut links like [this][] or [this]. |
||
897 | $link_id = $link_text; |
||
898 | } |
||
899 | |||
900 | // lower-case and turn embedded newlines into spaces |
||
901 | $link_id = strtolower($link_id); |
||
902 | $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); |
||
903 | |||
904 | if (isset($this->urls[$link_id])) { |
||
905 | $url = $this->urls[$link_id]; |
||
906 | $url = $this->encodeURLAttribute($url); |
||
907 | |||
908 | $result = "<a href=\"$url\""; |
||
909 | if ( isset( $this->titles[$link_id] ) ) { |
||
910 | $title = $this->titles[$link_id]; |
||
911 | $title = $this->encodeAttribute($title); |
||
912 | $result .= " title=\"$title\""; |
||
913 | } |
||
914 | if (isset($this->ref_attr[$link_id])) |
||
915 | $result .= $this->ref_attr[$link_id]; |
||
916 | |||
917 | $link_text = $this->runSpanGamut($link_text); |
||
918 | $result .= ">$link_text</a>"; |
||
919 | $result = $this->hashPart($result); |
||
920 | } |
||
921 | else { |
||
922 | $result = $whole_match; |
||
923 | } |
||
924 | return $result; |
||
925 | } |
||
926 | |||
927 | /** |
||
928 | * Callback for inline anchors |
||
929 | * @param array $matches |
||
930 | * @return string |
||
931 | */ |
||
932 | protected function _doAnchors_inline_callback($matches) { |
||
933 | $link_text = $this->runSpanGamut($matches[2]); |
||
934 | $url = $matches[3] === '' ? $matches[4] : $matches[3]; |
||
935 | $title =& $matches[7]; |
||
936 | $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); |
||
937 | |||
938 | // if the URL was of the form <s p a c e s> it got caught by the HTML |
||
939 | // tag parser and hashed. Need to reverse the process before using the URL. |
||
940 | $unhashed = $this->unhash($url); |
||
941 | if ($unhashed !== $url) |
||
942 | $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); |
||
943 | |||
944 | $url = $this->encodeURLAttribute($url); |
||
945 | |||
946 | $result = "<a href=\"$url\""; |
||
947 | if (isset($title)) { |
||
948 | $title = $this->encodeAttribute($title); |
||
949 | $result .= " title=\"$title\""; |
||
950 | } |
||
951 | $result .= $attr; |
||
952 | |||
953 | $link_text = $this->runSpanGamut($link_text); |
||
954 | $result .= ">$link_text</a>"; |
||
955 | |||
956 | return $this->hashPart($result); |
||
957 | } |
||
958 | |||
959 | /** |
||
960 | * Turn Markdown image shortcuts into <img> tags. |
||
961 | * @param string $text |
||
962 | * @return string |
||
963 | */ |
||
964 | protected function doImages($text) { |
||
965 | // First, handle reference-style labeled images: ![alt text][id] |
||
966 | $text = preg_replace_callback('{ |
||
967 | ( # wrap whole match in $1 |
||
968 | !\[ |
||
969 | (' . $this->nested_brackets_re . ') # alt text = $2 |
||
970 | \] |
||
971 | |||
972 | [ ]? # one optional space |
||
973 | (?:\n[ ]*)? # one optional newline followed by spaces |
||
974 | |||
975 | \[ |
||
976 | (.*?) # id = $3 |
||
977 | \] |
||
978 | |||
979 | ) |
||
980 | }xs', |
||
981 | array($this, '_doImages_reference_callback'), $text); |
||
982 | |||
983 | // Next, handle inline images:  |
||
984 | // Don't forget: encode * and _ |
||
985 | $text = preg_replace_callback('{ |
||
986 | ( # wrap whole match in $1 |
||
987 | !\[ |
||
988 | (' . $this->nested_brackets_re . ') # alt text = $2 |
||
989 | \] |
||
990 | \s? # One optional whitespace character |
||
991 | \( # literal paren |
||
992 | [ \n]* |
||
993 | (?: |
||
994 | <(\S*)> # src url = $3 |
||
995 | | |
||
996 | (' . $this->nested_url_parenthesis_re . ') # src url = $4 |
||
997 | ) |
||
998 | [ \n]* |
||
999 | ( # $5 |
||
1000 | ([\'"]) # quote char = $6 |
||
1001 | (.*?) # title = $7 |
||
1002 | \6 # matching quote |
||
1003 | [ \n]* |
||
1004 | )? # title is optional |
||
1005 | \) |
||
1006 | (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes |
||
1007 | ) |
||
1008 | }xs', |
||
1009 | array($this, '_doImages_inline_callback'), $text); |
||
1010 | |||
1011 | return $text; |
||
1012 | } |
||
1013 | |||
1014 | /** |
||
1015 | * Callback for referenced images |
||
1016 | * @param array $matches |
||
1017 | * @return string |
||
1018 | */ |
||
1019 | protected function _doImages_reference_callback($matches) { |
||
1020 | $whole_match = $matches[1]; |
||
1021 | $alt_text = $matches[2]; |
||
1022 | $link_id = strtolower($matches[3]); |
||
1023 | |||
1024 | if ($link_id === "") { |
||
1025 | $link_id = strtolower($alt_text); // for shortcut links like ![this][]. |
||
1026 | } |
||
1027 | |||
1028 | $alt_text = $this->encodeAttribute($alt_text); |
||
1029 | if (isset($this->urls[$link_id])) { |
||
1030 | $url = $this->encodeURLAttribute($this->urls[$link_id]); |
||
1031 | $result = "<img src=\"$url\" alt=\"$alt_text\""; |
||
1032 | if (isset($this->titles[$link_id])) { |
||
1033 | $title = $this->titles[$link_id]; |
||
1034 | $title = $this->encodeAttribute($title); |
||
1035 | $result .= " title=\"$title\""; |
||
1036 | } |
||
1037 | if (isset($this->ref_attr[$link_id])) { |
||
1038 | $result .= $this->ref_attr[$link_id]; |
||
1039 | } |
||
1040 | $result .= $this->empty_element_suffix; |
||
1041 | $result = $this->hashPart($result); |
||
1042 | } |
||
1043 | else { |
||
1044 | // If there's no such link ID, leave intact: |
||
1045 | $result = $whole_match; |
||
1046 | } |
||
1047 | |||
1048 | return $result; |
||
1049 | } |
||
1050 | |||
1051 | /** |
||
1052 | * Callback for inline images |
||
1053 | * @param array $matches |
||
1054 | * @return string |
||
1055 | */ |
||
1056 | protected function _doImages_inline_callback($matches) { |
||
1057 | $alt_text = $matches[2]; |
||
1058 | $url = $matches[3] === '' ? $matches[4] : $matches[3]; |
||
1059 | $title =& $matches[7]; |
||
1060 | $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); |
||
1061 | |||
1062 | $alt_text = $this->encodeAttribute($alt_text); |
||
1063 | $url = $this->encodeURLAttribute($url); |
||
1064 | $result = "<img src=\"$url\" alt=\"$alt_text\""; |
||
1065 | if (isset($title)) { |
||
1066 | $title = $this->encodeAttribute($title); |
||
1067 | $result .= " title=\"$title\""; // $title already quoted |
||
1068 | } |
||
1069 | $result .= $attr; |
||
1070 | $result .= $this->empty_element_suffix; |
||
1071 | |||
1072 | return $this->hashPart($result); |
||
1073 | } |
||
1074 | |||
1075 | /** |
||
1076 | * Process markdown headers. Redefined to add ID and class attribute support. |
||
1077 | * @param string $text |
||
1078 | * @return string |
||
1079 | */ |
||
1080 | protected function doHeaders($text) { |
||
1081 | // Setext-style headers: |
||
1082 | // Header 1 {#header1} |
||
1083 | // ======== |
||
1084 | // |
||
1085 | // Header 2 {#header2 .class1 .class2} |
||
1086 | // -------- |
||
1087 | // |
||
1088 | $text = preg_replace_callback( |
||
1089 | '{ |
||
1090 | (^.+?) # $1: Header text |
||
1091 | (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes |
||
1092 | [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer |
||
1093 | }mx', |
||
1094 | array($this, '_doHeaders_callback_setext'), $text); |
||
1095 | |||
1096 | // atx-style headers: |
||
1097 | // # Header 1 {#header1} |
||
1098 | // ## Header 2 {#header2} |
||
1099 | // ## Header 2 with closing hashes ## {#header3.class1.class2} |
||
1100 | // ... |
||
1101 | // ###### Header 6 {.class2} |
||
1102 | // |
||
1103 | $text = preg_replace_callback('{ |
||
1104 | ^(\#{1,6}) # $1 = string of #\'s |
||
1105 | [ ]'.($this->hashtag_protection ? '+' : '*').' |
||
1106 | (.+?) # $2 = Header text |
||
1107 | [ ]* |
||
1108 | \#* # optional closing #\'s (not counted) |
||
1109 | (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes |
||
1110 | [ ]* |
||
1111 | \n+ |
||
1112 | }xm', |
||
1113 | array($this, '_doHeaders_callback_atx'), $text); |
||
1114 | |||
1115 | return $text; |
||
1116 | } |
||
1117 | |||
1118 | /** |
||
1119 | * Callback for setext headers |
||
1120 | * @param array $matches |
||
1121 | * @return string |
||
1122 | */ |
||
1123 | protected function _doHeaders_callback_setext($matches) { |
||
1124 | if ($matches[3] === '-' && preg_match('{^- }', $matches[1])) { |
||
1125 | return $matches[0]; |
||
1126 | } |
||
1127 | |||
1128 | $level = $matches[3][0] === '=' ? 1 : 2; |
||
1129 | |||
1130 | $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null; |
||
|
|||
1131 | |||
1132 | $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId); |
||
1133 | $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>"; |
||
1134 | return "\n" . $this->hashBlock($block) . "\n\n"; |
||
1135 | } |
||
1136 | |||
1137 | /** |
||
1138 | * Callback for atx headers |
||
1139 | * @param array $matches |
||
1140 | * @return string |
||
1141 | */ |
||
1142 | protected function _doHeaders_callback_atx($matches) { |
||
1143 | $level = strlen($matches[1]); |
||
1144 | |||
1145 | $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null; |
||
1146 | $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId); |
||
1147 | $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>"; |
||
1148 | return "\n" . $this->hashBlock($block) . "\n\n"; |
||
1149 | } |
||
1150 | |||
1151 | /** |
||
1152 | * Form HTML tables. |
||
1153 | * @param string $text |
||
1154 | * @return string |
||
1155 | */ |
||
1156 | protected function doTables($text) { |
||
1157 | $less_than_tab = $this->tab_width - 1; |
||
1158 | // Find tables with leading pipe. |
||
1159 | // |
||
1160 | // | Header 1 | Header 2 |
||
1161 | // | -------- | -------- |
||
1162 | // | Cell 1 | Cell 2 |
||
1163 | // | Cell 3 | Cell 4 |
||
1164 | $text = preg_replace_callback(' |
||
1165 | { |
||
1166 | ^ # Start of a line |
||
1167 | [ ]{0,' . $less_than_tab . '} # Allowed whitespace. |
||
1168 | [|] # Optional leading pipe (present) |
||
1169 | (.+) \n # $1: Header row (at least one pipe) |
||
1170 | |||
1171 | [ ]{0,' . $less_than_tab . '} # Allowed whitespace. |
||
1172 | [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline |
||
1173 | |||
1174 | ( # $3: Cells |
||
1175 | (?> |
||
1176 | [ ]* # Allowed whitespace. |
||
1177 | [|] .* \n # Row content. |
||
1178 | )* |
||
1179 | ) |
||
1180 | (?=\n|\Z) # Stop at final double newline. |
||
1181 | }xm', |
||
1182 | array($this, '_doTable_leadingPipe_callback'), $text); |
||
1183 | |||
1184 | // Find tables without leading pipe. |
||
1185 | // |
||
1186 | // Header 1 | Header 2 |
||
1187 | // -------- | -------- |
||
1188 | // Cell 1 | Cell 2 |
||
1189 | // Cell 3 | Cell 4 |
||
1190 | $text = preg_replace_callback(' |
||
1191 | { |
||
1192 | ^ # Start of a line |
||
1193 | [ ]{0,' . $less_than_tab . '} # Allowed whitespace. |
||
1194 | (\S.*[|].*) \n # $1: Header row (at least one pipe) |
||
1195 | |||
1196 | [ ]{0,' . $less_than_tab . '} # Allowed whitespace. |
||
1197 | ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline |
||
1198 | |||
1199 | ( # $3: Cells |
||
1200 | (?> |
||
1201 | .* [|] .* \n # Row content |
||
1202 | )* |
||
1203 | ) |
||
1204 | (?=\n|\Z) # Stop at final double newline. |
||
1205 | }xm', |
||
1206 | array($this, '_DoTable_callback'), $text); |
||
1207 | |||
1208 | return $text; |
||
1209 | } |
||
1210 | |||
1211 | /** |
||
1212 | * Callback for removing the leading pipe for each row |
||
1213 | * @param array $matches |
||
1214 | * @return string |
||
1215 | */ |
||
1216 | protected function _doTable_leadingPipe_callback($matches) { |
||
1217 | $head = $matches[1]; |
||
1218 | $underline = $matches[2]; |
||
1219 | $content = $matches[3]; |
||
1220 | |||
1221 | $content = preg_replace('/^ *[|]/m', '', $content); |
||
1222 | |||
1223 | return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); |
||
1224 | } |
||
1225 | |||
1226 | /** |
||
1227 | * Make the align attribute in a table |
||
1228 | * @param string $alignname |
||
1229 | * @return string |
||
1230 | */ |
||
1231 | protected function _doTable_makeAlignAttr($alignname) { |
||
1232 | if (empty($this->table_align_class_tmpl)) { |
||
1233 | return " align=\"$alignname\""; |
||
1234 | } |
||
1235 | |||
1236 | $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); |
||
1237 | return " class=\"$classname\""; |
||
1238 | } |
||
1239 | |||
1240 | /** |
||
1241 | * Calback for processing tables |
||
1242 | * @param array $matches |
||
1243 | * @return string |
||
1244 | */ |
||
1245 | protected function _doTable_callback($matches) { |
||
1246 | $head = $matches[1]; |
||
1247 | $underline = $matches[2]; |
||
1248 | $content = $matches[3]; |
||
1249 | |||
1250 | // Remove any tailing pipes for each line. |
||
1251 | $head = preg_replace('/[|] *$/m', '', $head); |
||
1252 | $underline = preg_replace('/[|] *$/m', '', $underline); |
||
1253 | $content = preg_replace('/[|] *$/m', '', $content); |
||
1254 | |||
1255 | // Reading alignement from header underline. |
||
1256 | $separators = preg_split('/ *[|] */', $underline); |
||
1257 | foreach ($separators as $n => $s) { |
||
1258 | if (preg_match('/^ *-+: *$/', $s)) |
||
1259 | $attr[$n] = $this->_doTable_makeAlignAttr('right'); |
||
1260 | else if (preg_match('/^ *:-+: *$/', $s)) |
||
1261 | $attr[$n] = $this->_doTable_makeAlignAttr('center'); |
||
1262 | else if (preg_match('/^ *:-+ *$/', $s)) |
||
1263 | $attr[$n] = $this->_doTable_makeAlignAttr('left'); |
||
1264 | else |
||
1265 | $attr[$n] = ''; |
||
1266 | } |
||
1267 | |||
1268 | // Parsing span elements, including code spans, character escapes, |
||
1269 | // and inline HTML tags, so that pipes inside those gets ignored. |
||
1270 | $head = $this->parseSpan($head); |
||
1271 | $headers = preg_split('/ *[|] */', $head); |
||
1272 | $col_count = count($headers); |
||
1273 | $attr = array_pad($attr, $col_count, ''); |
||
1274 | |||
1275 | // Write column headers. |
||
1276 | $text = "<table>\n"; |
||
1277 | $text .= "<thead>\n"; |
||
1278 | $text .= "<tr>\n"; |
||
1279 | foreach ($headers as $n => $header) { |
||
1280 | $text .= " <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n"; |
||
1281 | } |
||
1282 | $text .= "</tr>\n"; |
||
1283 | $text .= "</thead>\n"; |
||
1284 | |||
1285 | // Split content by row. |
||
1286 | $rows = explode("\n", trim($content, "\n")); |
||
1287 | |||
1288 | $text .= "<tbody>\n"; |
||
1289 | foreach ($rows as $row) { |
||
1290 | // Parsing span elements, including code spans, character escapes, |
||
1291 | // and inline HTML tags, so that pipes inside those gets ignored. |
||
1292 | $row = $this->parseSpan($row); |
||
1293 | |||
1294 | // Split row by cell. |
||
1295 | $row_cells = preg_split('/ *[|] */', $row, $col_count); |
||
1296 | $row_cells = array_pad($row_cells, $col_count, ''); |
||
1297 | |||
1298 | $text .= "<tr>\n"; |
||
1299 | foreach ($row_cells as $n => $cell) { |
||
1300 | $text .= " <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n"; |
||
1301 | } |
||
1302 | $text .= "</tr>\n"; |
||
1303 | } |
||
1304 | $text .= "</tbody>\n"; |
||
1305 | $text .= "</table>"; |
||
1306 | |||
1307 | return $this->hashBlock($text) . "\n"; |
||
1308 | } |
||
1309 | |||
1310 | /** |
||
1311 | * Form HTML definition lists. |
||
1312 | * @param string $text |
||
1313 | * @return string |
||
1314 | */ |
||
1315 | protected function doDefLists($text) { |
||
1316 | $less_than_tab = $this->tab_width - 1; |
||
1317 | |||
1318 | // Re-usable pattern to match any entire dl list: |
||
1319 | $whole_list_re = '(?> |
||
1320 | ( # $1 = whole list |
||
1321 | ( # $2 |
||
1322 | [ ]{0,' . $less_than_tab . '} |
||
1323 | ((?>.*\S.*\n)+) # $3 = defined term |
||
1324 | \n? |
||
1325 | [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition |
||
1326 | ) |
||
1327 | (?s:.+?) |
||
1328 | ( # $4 |
||
1329 | \z |
||
1330 | | |
||
1331 | \n{2,} |
||
1332 | (?=\S) |
||
1333 | (?! # Negative lookahead for another term |
||
1334 | [ ]{0,' . $less_than_tab . '} |
||
1335 | (?: \S.*\n )+? # defined term |
||
1336 | \n? |
||
1337 | [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition |
||
1338 | ) |
||
1339 | (?! # Negative lookahead for another definition |
||
1340 | [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition |
||
1341 | ) |
||
1342 | ) |
||
1343 | ) |
||
1344 | )'; // mx |
||
1345 | |||
1346 | $text = preg_replace_callback('{ |
||
1347 | (?>\A\n?|(?<=\n\n)) |
||
1348 | ' . $whole_list_re . ' |
||
1349 | }mx', |
||
1350 | array($this, '_doDefLists_callback'), $text); |
||
1351 | |||
1352 | return $text; |
||
1353 | } |
||
1354 | |||
1355 | /** |
||
1356 | * Callback for processing definition lists |
||
1357 | * @param array $matches |
||
1358 | * @return string |
||
1359 | */ |
||
1360 | protected function _doDefLists_callback($matches) { |
||
1361 | // Re-usable patterns to match list item bullets and number markers: |
||
1362 | $list = $matches[1]; |
||
1363 | |||
1364 | // Turn double returns into triple returns, so that we can make a |
||
1365 | // paragraph for the last item in a list, if necessary: |
||
1366 | $result = trim($this->processDefListItems($list)); |
||
1367 | $result = "<dl>\n" . $result . "\n</dl>"; |
||
1368 | return $this->hashBlock($result) . "\n\n"; |
||
1369 | } |
||
1370 | |||
1371 | /** |
||
1372 | * Process the contents of a single definition list, splitting it |
||
1373 | * into individual term and definition list items. |
||
1374 | * @param string $list_str |
||
1375 | * @return string |
||
1376 | */ |
||
1377 | protected function processDefListItems($list_str) { |
||
1378 | |||
1379 | $less_than_tab = $this->tab_width - 1; |
||
1380 | |||
1381 | // Trim trailing blank lines: |
||
1382 | $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); |
||
1383 | |||
1384 | // Process definition terms. |
||
1385 | $list_str = preg_replace_callback('{ |
||
1386 | (?>\A\n?|\n\n+) # leading line |
||
1387 | ( # definition terms = $1 |
||
1388 | [ ]{0,' . $less_than_tab . '} # leading whitespace |
||
1389 | (?!\:[ ]|[ ]) # negative lookahead for a definition |
||
1390 | # mark (colon) or more whitespace. |
||
1391 | (?> \S.* \n)+? # actual term (not whitespace). |
||
1392 | ) |
||
1393 | (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed |
||
1394 | # with a definition mark. |
||
1395 | }xm', |
||
1396 | array($this, '_processDefListItems_callback_dt'), $list_str); |
||
1397 | |||
1398 | // Process actual definitions. |
||
1399 | $list_str = preg_replace_callback('{ |
||
1400 | \n(\n+)? # leading line = $1 |
||
1401 | ( # marker space = $2 |
||
1402 | [ ]{0,' . $less_than_tab . '} # whitespace before colon |
||
1403 | \:[ ]+ # definition mark (colon) |
||
1404 | ) |
||
1405 | ((?s:.+?)) # definition text = $3 |
||
1406 | (?= \n+ # stop at next definition mark, |
||
1407 | (?: # next term or end of text |
||
1408 | [ ]{0,' . $less_than_tab . '} \:[ ] | |
||
1409 | <dt> | \z |
||
1410 | ) |
||
1411 | ) |
||
1412 | }xm', |
||
1413 | array($this, '_processDefListItems_callback_dd'), $list_str); |
||
1414 | |||
1415 | return $list_str; |
||
1416 | } |
||
1417 | |||
1418 | /** |
||
1419 | * Callback for <dt> elements in definition lists |
||
1420 | * @param array $matches |
||
1421 | * @return string |
||
1422 | */ |
||
1423 | protected function _processDefListItems_callback_dt($matches) { |
||
1424 | $terms = explode("\n", trim($matches[1])); |
||
1425 | $text = ''; |
||
1426 | foreach ($terms as $term) { |
||
1427 | $term = $this->runSpanGamut(trim($term)); |
||
1428 | $text .= "\n<dt>" . $term . "</dt>"; |
||
1429 | } |
||
1430 | return $text . "\n"; |
||
1431 | } |
||
1432 | |||
1433 | /** |
||
1434 | * Callback for <dd> elements in definition lists |
||
1435 | * @param array $matches |
||
1436 | * @return string |
||
1437 | */ |
||
1438 | protected function _processDefListItems_callback_dd($matches) { |
||
1455 | } |
||
1456 | |||
1457 | /** |
||
1458 | * Adding the fenced code block syntax to regular Markdown: |
||
1459 | * |
||
1460 | * ~~~ |
||
1461 | * Code block |
||
1462 | * ~~~ |
||
1463 | * |
||
1464 | * @param string $text |
||
1465 | * @return string |
||
1466 | */ |
||
1467 | protected function doFencedCodeBlocks($text) { |
||
1468 | |||
1469 | $text = preg_replace_callback('{ |
||
1470 | (?:\n|\A) |
||
1471 | # 1: Opening marker |
||
1472 | ( |
||
1473 | (?:~{3,}|`{3,}) # 3 or more tildes/backticks. |
||
1474 | ) |
||
1475 | [ ]* |
||
1476 | (?: |
||
1477 | \.?([-_:a-zA-Z0-9]+) # 2: standalone class name |
||
1478 | )? |
||
1479 | [ ]* |
||
1480 | (?: |
||
1481 | ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes |
||
1482 | )? |
||
1483 | [ ]* \n # Whitespace and newline following marker. |
||
1484 | |||
1485 | # 4: Content |
||
1486 | ( |
||
1487 | (?> |
||
1488 | (?!\1 [ ]* \n) # Not a closing marker. |
||
1489 | .*\n+ |
||
1490 | )+ |
||
1491 | ) |
||
1492 | |||
1493 | # Closing marker. |
||
1494 | \1 [ ]* (?= \n ) |
||
1495 | }xm', |
||
1496 | array($this, '_doFencedCodeBlocks_callback'), $text); |
||
1497 | |||
1498 | return $text; |
||
1499 | } |
||
1500 | |||
1501 | /** |
||
1502 | * Callback to process fenced code blocks |
||
1503 | * @param array $matches |
||
1504 | * @return string |
||
1505 | */ |
||
1506 | protected function _doFencedCodeBlocks_callback($matches) { |
||
1507 | $classname =& $matches[2]; |
||
1508 | $attrs =& $matches[3]; |
||
1509 | $codeblock = $matches[4]; |
||
1510 | |||
1511 | if ($this->code_block_content_func) { |
||
1512 | $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname); |
||
1513 | } else { |
||
1514 | $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); |
||
1515 | } |
||
1516 | |||
1517 | $codeblock = preg_replace_callback('/^\n+/', |
||
1518 | array($this, '_doFencedCodeBlocks_newlines'), $codeblock); |
||
1519 | |||
1520 | $classes = array(); |
||
1521 | if ($classname !== "") { |
||
1522 | if ($classname[0] === '.') { |
||
1523 | $classname = substr($classname, 1); |
||
1524 | } |
||
1525 | $classes[] = $this->code_class_prefix . $classname; |
||
1526 | } |
||
1527 | $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes); |
||
1528 | $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; |
||
1529 | $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; |
||
1530 | $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; |
||
1531 | |||
1532 | return "\n\n".$this->hashBlock($codeblock)."\n\n"; |
||
1533 | } |
||
1534 | |||
1535 | /** |
||
1536 | * Replace new lines in fenced code blocks |
||
1537 | * @param array $matches |
||
1538 | * @return string |
||
1539 | */ |
||
1540 | protected function _doFencedCodeBlocks_newlines($matches) { |
||
1541 | return str_repeat("<br$this->empty_element_suffix", |
||
1542 | strlen($matches[0])); |
||
1543 | } |
||
1544 | |||
1545 | /** |
||
1546 | * Redefining emphasis markers so that emphasis by underscore does not |
||
1547 | * work in the middle of a word. |
||
1548 | * @var array |
||
1549 | */ |
||
1550 | protected $em_relist = array( |
||
1551 | '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)', |
||
1552 | '*' => '(?<![\s*])\*(?!\*)', |
||
1553 | '_' => '(?<![\s_])_(?![a-zA-Z0-9_])', |
||
1554 | ); |
||
1555 | protected $strong_relist = array( |
||
1556 | '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)', |
||
1557 | '**' => '(?<![\s*])\*\*(?!\*)', |
||
1558 | '__' => '(?<![\s_])__(?![a-zA-Z0-9_])', |
||
1559 | ); |
||
1560 | protected $em_strong_relist = array( |
||
1561 | '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)', |
||
1562 | '***' => '(?<![\s*])\*\*\*(?!\*)', |
||
1563 | '___' => '(?<![\s_])___(?![a-zA-Z0-9_])', |
||
1564 | ); |
||
1565 | |||
1566 | /** |
||
1567 | * Parse text into paragraphs |
||
1568 | * @param string $text String to process in paragraphs |
||
1569 | * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags |
||
1570 | * @return string HTML output |
||
1571 | */ |
||
1572 | protected function formParagraphs($text, $wrap_in_p = true) { |
||
1573 | // Strip leading and trailing lines: |
||
1574 | $text = preg_replace('/\A\n+|\n+\z/', '', $text); |
||
1575 | |||
1576 | $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); |
||
1577 | |||
1578 | // Wrap <p> tags and unhashify HTML blocks |
||
1579 | foreach ($grafs as $key => $value) { |
||
1580 | $value = trim($this->runSpanGamut($value)); |
||
1581 | |||
1582 | // Check if this should be enclosed in a paragraph. |
||
1583 | // Clean tag hashes & block tag hashes are left alone. |
||
1584 | $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); |
||
1585 | |||
1586 | if ($is_p) { |
||
1587 | $value = "<p>$value</p>"; |
||
1588 | } |
||
1589 | $grafs[$key] = $value; |
||
1590 | } |
||
1591 | |||
1592 | // Join grafs in one text, then unhash HTML tags. |
||
1593 | $text = implode("\n\n", $grafs); |
||
1594 | |||
1595 | // Finish by removing any tag hashes still present in $text. |
||
1596 | $text = $this->unhash($text); |
||
1597 | |||
1598 | return $text; |
||
1599 | } |
||
1600 | |||
1601 | |||
1602 | /** |
||
1603 | * Footnotes - Strips link definitions from text, stores the URLs and |
||
1604 | * titles in hash references. |
||
1605 | * @param string $text |
||
1606 | * @return string |
||
1607 | */ |
||
1608 | protected function stripFootnotes($text) { |
||
1609 | $less_than_tab = $this->tab_width - 1; |
||
1610 | |||
1611 | // Link defs are in the form: [^id]: url "optional title" |
||
1612 | $text = preg_replace_callback('{ |
||
1613 | ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1 |
||
1614 | [ ]* |
||
1615 | \n? # maybe *one* newline |
||
1616 | ( # text = $2 (no blank lines allowed) |
||
1617 | (?: |
||
1618 | .+ # actual text |
||
1619 | | |
||
1620 | \n # newlines but |
||
1621 | (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker. |
||
1622 | (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed |
||
1623 | # by non-indented content |
||
1624 | )* |
||
1625 | ) |
||
1626 | }xm', |
||
1627 | array($this, '_stripFootnotes_callback'), |
||
1628 | $text); |
||
1629 | return $text; |
||
1630 | } |
||
1631 | |||
1632 | /** |
||
1633 | * Callback for stripping footnotes |
||
1634 | * @param array $matches |
||
1635 | * @return string |
||
1636 | */ |
||
1637 | protected function _stripFootnotes_callback($matches) { |
||
1638 | $note_id = $this->fn_id_prefix . $matches[1]; |
||
1639 | $this->footnotes[$note_id] = $this->outdent($matches[2]); |
||
1640 | return ''; // String that will replace the block |
||
1641 | } |
||
1642 | |||
1643 | /** |
||
1644 | * Replace footnote references in $text [^id] with a special text-token |
||
1645 | * which will be replaced by the actual footnote marker in appendFootnotes. |
||
1646 | * @param string $text |
||
1647 | * @return string |
||
1648 | */ |
||
1649 | protected function doFootnotes($text) { |
||
1650 | if (!$this->in_anchor) { |
||
1651 | $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); |
||
1652 | } |
||
1653 | return $text; |
||
1654 | } |
||
1655 | |||
1656 | /** |
||
1657 | * Append footnote list to text |
||
1658 | * @param string $text |
||
1659 | * @return string |
||
1660 | */ |
||
1661 | protected function appendFootnotes($text) { |
||
1662 | $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', |
||
1663 | array($this, '_appendFootnotes_callback'), $text); |
||
1664 | |||
1665 | if ( ! empty( $this->footnotes_ordered ) ) { |
||
1666 | $this->_doFootnotes(); |
||
1667 | if ( ! $this->omit_footnotes ) { |
||
1668 | $text .= "\n\n"; |
||
1669 | $text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n"; |
||
1670 | $text .= "<hr" . $this->empty_element_suffix . "\n"; |
||
1671 | $text .= $this->footnotes_assembled; |
||
1672 | $text .= "</div>"; |
||
1673 | } |
||
1674 | } |
||
1675 | return $text; |
||
1676 | } |
||
1677 | |||
1678 | |||
1679 | /** |
||
1680 | * Generates the HTML for footnotes. Called by appendFootnotes, even if |
||
1681 | * footnotes are not being appended. |
||
1682 | * @return void |
||
1683 | */ |
||
1684 | protected function _doFootnotes() { |
||
1685 | $attr = array(); |
||
1686 | if ($this->fn_backlink_class !== "") { |
||
1687 | $class = $this->fn_backlink_class; |
||
1688 | $class = $this->encodeAttribute($class); |
||
1689 | $attr['class'] = " class=\"$class\""; |
||
1690 | } |
||
1691 | $attr['role'] = " role=\"doc-backlink\""; |
||
1692 | $num = 0; |
||
1693 | |||
1694 | $text = "<ol>\n\n"; |
||
1695 | while (!empty($this->footnotes_ordered)) { |
||
1696 | $footnote = reset($this->footnotes_ordered); |
||
1697 | $note_id = key($this->footnotes_ordered); |
||
1698 | unset($this->footnotes_ordered[$note_id]); |
||
1699 | $ref_count = $this->footnotes_ref_count[$note_id]; |
||
1700 | unset($this->footnotes_ref_count[$note_id]); |
||
1701 | unset($this->footnotes[$note_id]); |
||
1702 | |||
1703 | $footnote .= "\n"; // Need to append newline before parsing. |
||
1704 | $footnote = $this->runBlockGamut("$footnote\n"); |
||
1705 | $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', |
||
1706 | array($this, '_appendFootnotes_callback'), $footnote); |
||
1707 | |||
1708 | $num++; |
||
1709 | $note_id = $this->encodeAttribute($note_id); |
||
1710 | |||
1711 | // Prepare backlink, multiple backlinks if multiple references |
||
1712 | // Do not create empty backlinks if the html is blank |
||
1713 | $backlink = ""; |
||
1714 | if (!empty($this->fn_backlink_html)) { |
||
1715 | for ($ref_num = 1; $ref_num <= $ref_count; ++$ref_num) { |
||
1716 | if (!empty($this->fn_backlink_title)) { |
||
1717 | $attr['title'] = ' title="' . $this->encodeAttribute($this->fn_backlink_title) . '"'; |
||
1718 | } |
||
1719 | if (!empty($this->fn_backlink_label)) { |
||
1720 | $attr['label'] = ' aria-label="' . $this->encodeAttribute($this->fn_backlink_label) . '"'; |
||
1721 | } |
||
1722 | $parsed_attr = $this->parseFootnotePlaceholders( |
||
1723 | implode('', $attr), |
||
1724 | $num, |
||
1725 | $ref_num |
||
1726 | ); |
||
1727 | $backlink_text = $this->parseFootnotePlaceholders( |
||
1728 | $this->fn_backlink_html, |
||
1729 | $num, |
||
1730 | $ref_num |
||
1731 | ); |
||
1732 | $ref_count_mark = $ref_num > 1 ? $ref_num : ''; |
||
1733 | $backlink .= " <a href=\"#fnref$ref_count_mark:$note_id\"$parsed_attr>$backlink_text</a>"; |
||
1734 | } |
||
1735 | $backlink = trim($backlink); |
||
1736 | } |
||
1737 | |||
1738 | // Add backlink to last paragraph; create new paragraph if needed. |
||
1739 | if (!empty($backlink)) { |
||
1740 | if (preg_match('{</p>$}', $footnote)) { |
||
1741 | $footnote = substr($footnote, 0, -4) . " $backlink</p>"; |
||
1742 | } else { |
||
1743 | $footnote .= "\n\n<p>$backlink</p>"; |
||
1744 | } |
||
1745 | } |
||
1746 | |||
1747 | $text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n"; |
||
1748 | $text .= $footnote . "\n"; |
||
1749 | $text .= "</li>\n\n"; |
||
1750 | } |
||
1751 | $text .= "</ol>\n"; |
||
1752 | |||
1753 | $this->footnotes_assembled = $text; |
||
1754 | } |
||
1755 | |||
1756 | /** |
||
1757 | * Callback for appending footnotes |
||
1758 | * @param array $matches |
||
1759 | * @return string |
||
1760 | */ |
||
1761 | protected function _appendFootnotes_callback($matches) { |
||
1762 | $node_id = $this->fn_id_prefix . $matches[1]; |
||
1763 | |||
1764 | // Create footnote marker only if it has a corresponding footnote *and* |
||
1765 | // the footnote hasn't been used by another marker. |
||
1766 | if (isset($this->footnotes[$node_id])) { |
||
1767 | $num =& $this->footnotes_numbers[$node_id]; |
||
1768 | if (!isset($num)) { |
||
1769 | // Transfer footnote content to the ordered list and give it its |
||
1770 | // number |
||
1771 | $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; |
||
1772 | $this->footnotes_ref_count[$node_id] = 1; |
||
1773 | $num = $this->footnote_counter++; |
||
1774 | $ref_count_mark = ''; |
||
1775 | } else { |
||
1776 | $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; |
||
1777 | } |
||
1778 | |||
1779 | $attr = ""; |
||
1780 | if ($this->fn_link_class !== "") { |
||
1781 | $class = $this->fn_link_class; |
||
1782 | $class = $this->encodeAttribute($class); |
||
1783 | $attr .= " class=\"$class\""; |
||
1784 | } |
||
1785 | if ($this->fn_link_title !== "") { |
||
1786 | $title = $this->fn_link_title; |
||
1787 | $title = $this->encodeAttribute($title); |
||
1788 | $attr .= " title=\"$title\""; |
||
1789 | } |
||
1790 | $attr .= " role=\"doc-noteref\""; |
||
1791 | |||
1792 | $attr = str_replace("%%", $num, $attr); |
||
1793 | $node_id = $this->encodeAttribute($node_id); |
||
1794 | |||
1795 | return |
||
1796 | "<sup id=\"fnref$ref_count_mark:$node_id\">". |
||
1797 | "<a href=\"#fn:$node_id\"$attr>$num</a>". |
||
1798 | "</sup>"; |
||
1799 | } |
||
1800 | |||
1801 | return "[^" . $matches[1] . "]"; |
||
1802 | } |
||
1803 | |||
1804 | /** |
||
1805 | * Build footnote label by evaluating any placeholders. |
||
1806 | * - ^^ footnote number |
||
1807 | * - %% footnote reference number (Nth reference to footnote number) |
||
1808 | * @param string $label |
||
1809 | * @param int $footnote_number |
||
1810 | * @param int $reference_number |
||
1811 | * @return string |
||
1812 | */ |
||
1813 | protected function parseFootnotePlaceholders($label, $footnote_number, $reference_number) { |
||
1814 | return str_replace( |
||
1815 | array('^^', '%%'), |
||
1816 | array($footnote_number, $reference_number), |
||
1817 | $label |
||
1818 | ); |
||
1819 | } |
||
1820 | |||
1821 | |||
1822 | /** |
||
1823 | * Abbreviations - strips abbreviations from text, stores titles in hash |
||
1824 | * references. |
||
1825 | * @param string $text |
||
1826 | * @return string |
||
1827 | */ |
||
1828 | protected function stripAbbreviations($text) { |
||
1839 | } |
||
1840 | |||
1841 | /** |
||
1842 | * Callback for stripping abbreviations |
||
1843 | * @param array $matches |
||
1844 | * @return string |
||
1845 | */ |
||
1846 | protected function _stripAbbreviations_callback($matches) { |
||
1847 | $abbr_word = $matches[1]; |
||
1848 | $abbr_desc = $matches[2]; |
||
1849 | if ($this->abbr_word_re) { |
||
1850 | $this->abbr_word_re .= '|'; |
||
1851 | } |
||
1852 | $this->abbr_word_re .= preg_quote($abbr_word); |
||
1853 | $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); |
||
1854 | return ''; // String that will replace the block |
||
1855 | } |
||
1856 | |||
1857 | /** |
||
1858 | * Find defined abbreviations in text and wrap them in <abbr> elements. |
||
1859 | * @param string $text |
||
1860 | * @return string |
||
1861 | */ |
||
1862 | protected function doAbbreviations($text) { |
||
1874 | } |
||
1875 | |||
1876 | /** |
||
1877 | * Callback for processing abbreviations |
||
1878 | * @param array $matches |
||
1879 | * @return string |
||
1880 | */ |
||
1881 | protected function _doAbbreviations_callback($matches) { |
||
1882 | $abbr = $matches[0]; |
||
1892 | } |
||
1893 | } |
||
1894 |