1 | <?php |
||
2 | /** |
||
3 | * @copyright Copyright (c) 2014 Carsten Brandt |
||
4 | * @license https://github.com/cebe/markdown/blob/master/LICENSE |
||
5 | * @link https://github.com/cebe/markdown#readme |
||
6 | */ |
||
7 | |||
8 | namespace cebe\markdown\latex; |
||
9 | |||
10 | use cebe\markdown\block\CodeTrait; |
||
11 | use cebe\markdown\block\HeadlineTrait; |
||
12 | use cebe\markdown\block\ListTrait; |
||
13 | use cebe\markdown\block\QuoteTrait; |
||
14 | use cebe\markdown\block\RuleTrait; |
||
15 | |||
16 | use cebe\markdown\inline\CodeTrait as InlineCodeTrait; |
||
17 | use cebe\markdown\inline\EmphStrongTrait; |
||
18 | use cebe\markdown\inline\LinkTrait; |
||
19 | |||
20 | use MikeVanRiel\TextToLatex; |
||
21 | |||
22 | /** |
||
23 | * Markdown parser for the [initial markdown spec](http://daringfireball.net/projects/markdown/syntax). |
||
24 | * |
||
25 | * @author Carsten Brandt <[email protected]> |
||
26 | */ |
||
27 | class Markdown extends \cebe\markdown\Parser |
||
28 | { |
||
29 | // include block element parsing using traits |
||
30 | use CodeTrait; |
||
31 | use HeadlineTrait; |
||
32 | use ListTrait { |
||
33 | // Check Ul List before headline |
||
34 | identifyUl as protected identifyBUl; |
||
35 | consumeUl as protected consumeBUl; |
||
36 | } |
||
37 | use QuoteTrait; |
||
38 | use RuleTrait { |
||
39 | // Check Hr before checking lists |
||
40 | identifyHr as protected identifyAHr; |
||
41 | consumeHr as protected consumeAHr; |
||
42 | } |
||
43 | |||
44 | // include inline element parsing using traits |
||
45 | use InlineCodeTrait; |
||
46 | use EmphStrongTrait; |
||
47 | use LinkTrait; |
||
48 | |||
49 | /** |
||
50 | * @var string this string will be prefixed to all auto generated labels. |
||
51 | * This can be used to disambiguate labels when combining multiple markdown files into one document. |
||
52 | */ |
||
53 | public $labelPrefix = ''; |
||
54 | |||
55 | const LINK_STYLE_FOOTNOTE = 'footnote'; |
||
56 | const LINK_STYLE_HREF = 'href'; |
||
57 | |||
58 | /** |
||
59 | * @var string link style defines how links are rendered in LaTeX, there are two different options: |
||
60 | * |
||
61 | * - `footnote` (default) - render all links with a footnote, which contains the full URL of the link. This is good for printing the PDF. |
||
62 | * - `href` - render all links with a hyperref, similar to HTML, the link target is not visible in this case. |
||
63 | */ |
||
64 | public $linkStyle = self::LINK_STYLE_FOOTNOTE; |
||
65 | |||
66 | /** |
||
67 | * @var array these are "escapeable" characters. When using one of these prefixed with a |
||
68 | * backslash, the character will be outputted without the backslash and is not interpreted |
||
69 | * as markdown. |
||
70 | */ |
||
71 | protected $escapeCharacters = [ |
||
72 | '\\', // backslash |
||
73 | '`', // backtick |
||
74 | '*', // asterisk |
||
75 | '_', // underscore |
||
76 | '{', '}', // curly braces |
||
77 | '[', ']', // square brackets |
||
78 | '(', ')', // parentheses |
||
79 | '#', // hash mark |
||
80 | '+', // plus sign |
||
81 | '-', // minus sign (hyphen) |
||
82 | '.', // dot |
||
83 | '!', // exclamation mark |
||
84 | '<', '>', |
||
85 | ]; |
||
86 | |||
87 | |||
88 | /** |
||
89 | * @inheritDoc |
||
90 | */ |
||
91 | 23 | protected function prepare() |
|
92 | { |
||
93 | // reset references |
||
94 | 23 | $this->references = []; |
|
95 | 23 | } |
|
96 | |||
97 | /** |
||
98 | * Consume lines for a paragraph |
||
99 | * |
||
100 | * Allow headlines and code to break paragraphs |
||
101 | */ |
||
102 | 8 | protected function consumeParagraph($lines, $current) |
|
103 | { |
||
104 | // consume until newline |
||
105 | 8 | $content = []; |
|
106 | 8 | for ($i = $current, $count = count($lines); $i < $count; $i++) { |
|
107 | 8 | $line = $lines[$i]; |
|
108 | 8 | if (!empty($line) && ltrim($line) !== '' && |
|
109 | 8 | !($line[0] === "\t" || $line[0] === " " && strncmp($line, ' ', 4) === 0) && |
|
110 | 8 | !$this->identifyHeadline($line, $lines, $i)) |
|
111 | 8 | { |
|
112 | 8 | $content[] = $line; |
|
113 | 8 | } else { |
|
114 | 7 | break; |
|
115 | } |
||
116 | 8 | } |
|
117 | $block = [ |
||
118 | 8 | 'paragraph', |
|
119 | 8 | 'content' => $this->parseInline(implode("\n", $content)), |
|
120 | 8 | ]; |
|
121 | 8 | return [$block, --$i]; |
|
122 | } |
||
123 | |||
124 | |||
125 | // rendering adjusted for LaTeX output |
||
126 | |||
127 | |||
128 | /** |
||
129 | * @inheritdoc |
||
130 | */ |
||
131 | 21 | protected function renderParagraph($block) |
|
132 | { |
||
133 | 21 | return $this->renderAbsy($block['content']) . "\n\n"; |
|
134 | } |
||
135 | |||
136 | /** |
||
137 | * @inheritdoc |
||
138 | */ |
||
139 | 2 | protected function renderQuote($block) |
|
140 | { |
||
141 | 2 | return '\begin{quote}' . $this->renderAbsy($block['content']) . "\\end{quote}\n"; |
|
142 | } |
||
143 | |||
144 | /** |
||
145 | * @inheritdoc |
||
146 | */ |
||
147 | 3 | protected function renderCode($block) |
|
148 | { |
||
149 | 3 | $language = isset($block['language']) ? "\\lstset{language={$block['language']}}" : '\lstset{language={}}'; |
|
150 | |||
151 | 3 | $content = $block['content']; |
|
152 | // replace No-Break Space characters in code block, which do not render in LaTeX |
||
153 | 3 | $content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $content); |
|
154 | |||
155 | 3 | return "$language\\begin{lstlisting}\n{$content}\n\\end{lstlisting}\n"; |
|
156 | } |
||
157 | |||
158 | /** |
||
159 | * @inheritdoc |
||
160 | */ |
||
161 | 2 | protected function renderList($block) |
|
162 | { |
||
163 | 2 | $type = ($block['list'] === 'ol') ? 'enumerate' : 'itemize'; |
|
164 | 2 | $output = "\\begin{{$type}}\n"; |
|
165 | |||
166 | 2 | foreach ($block['items'] as $item => $itemLines) { |
|
167 | 2 | $output .= '\item ' . $this->renderAbsy($itemLines). "\n"; |
|
168 | 2 | } |
|
169 | |||
170 | 2 | return "$output\\end{{$type}}\n"; |
|
171 | } |
||
172 | |||
173 | /** |
||
174 | * @inheritdoc |
||
175 | */ |
||
176 | 3 | protected function renderHeadline($block) |
|
177 | { |
||
178 | 3 | $content = $this->renderAbsy($block['content']); |
|
179 | 3 | switch($block['level']) { |
|
180 | 3 | case 1: return "\\section{{$content}}\n"; |
|
181 | 3 | case 2: return "\\subsection{{$content}}\n"; |
|
182 | 2 | case 3: return "\\subsubsection{{$content}}\n"; |
|
183 | 2 | default: return "\\paragraph{{$content}}\n"; |
|
184 | 2 | } |
|
185 | } |
||
186 | |||
187 | /** |
||
188 | * @inheritdoc |
||
189 | */ |
||
190 | 2 | protected function renderHr($block) |
|
191 | { |
||
192 | 2 | return "\n\\noindent\\rule{\\textwidth}{0.4pt}\n"; |
|
193 | } |
||
194 | |||
195 | /** |
||
196 | * @inheritdoc |
||
197 | */ |
||
198 | 2 | protected function renderLink($block) |
|
199 | { |
||
200 | 2 | View Code Duplication | if (isset($block['refkey'])) { |
201 | if (($ref = $this->lookupReference($block['refkey'])) !== false) { |
||
202 | $block = array_merge($block, $ref); |
||
203 | } else { |
||
204 | return $block['orig']; |
||
205 | } |
||
206 | } |
||
207 | |||
208 | 2 | $url = $block['url']; |
|
209 | 2 | $text = $this->renderAbsy($block['text']); |
|
210 | 2 | if (strpos($url, '://') === false) { |
|
211 | // consider all non absolute links as relative in the document |
||
212 | // $title is ignored in this case. |
||
213 | if (isset($url[0]) && $url[0] === '#') { |
||
214 | $url = $this->labelPrefix . $url; |
||
215 | } |
||
216 | return '\hyperref['.str_replace('#', '::', $url).']{' . $text . '}'; |
||
217 | } else { |
||
218 | 2 | if ($this->linkStyle === self::LINK_STYLE_HREF) { |
|
219 | return '\href{' . $this->escapeUrl($url) . '}{' . $text . '}'; |
||
220 | } |
||
221 | 2 | return $text . '\\footnote{' . (empty($block['title']) ? '' : $this->escapeLatex($block['title']) . ': ') . '\url{' . $this->escapeUrl($url) . '}}'; |
|
222 | } |
||
223 | } |
||
224 | |||
225 | /** |
||
226 | * @inheritdoc |
||
227 | */ |
||
228 | 2 | protected function renderImage($block) |
|
229 | { |
||
230 | 2 | View Code Duplication | if (isset($block['refkey'])) { |
231 | if (($ref = $this->lookupReference($block['refkey'])) !== false) { |
||
232 | $block = array_merge($block, $ref); |
||
233 | } else { |
||
234 | return $block['orig']; |
||
235 | } |
||
236 | } |
||
237 | |||
238 | // TODO create figure with caption with title |
||
239 | $replaces = [ |
||
240 | 2 | '%' => '\\%', |
|
241 | 2 | '{' => '\\%7B', |
|
242 | 2 | '}' => '\\%7D', |
|
243 | 2 | '\\' => '\\\\', |
|
244 | 2 | '#' => '\\#', |
|
245 | 2 | '$' => '\\%24', |
|
246 | 2 | ]; |
|
247 | 2 | $url = str_replace(array_keys($replaces), array_values($replaces), $block['url']); |
|
248 | 2 | return "\\noindent\\includegraphics[width=\\textwidth]{{$url}}"; |
|
249 | } |
||
250 | |||
251 | /** |
||
252 | * Parses <a name="..."></a> tags as reference labels |
||
253 | */ |
||
254 | 2 | private function parseInlineHtml($text) |
|
255 | { |
||
256 | 2 | if (strpos($text, '>') !== false) { |
|
257 | // convert a name markers to \labels |
||
258 | 2 | if (preg_match('~^<((a|span)) (name|id)="(.*?)">.*?</\1>~i', $text, $matches)) { |
|
259 | return [ |
||
260 | 2 | ['label', 'name' => str_replace('#', '::', $this->labelPrefix . $matches[4])], |
|
261 | 2 | strlen($matches[0]) |
|
262 | 2 | ]; |
|
263 | } |
||
264 | } |
||
265 | return [['text', '<'], 1]; |
||
266 | } |
||
267 | |||
268 | /** |
||
269 | * renders a reference label |
||
270 | */ |
||
271 | 2 | protected function renderLabel($block) |
|
272 | { |
||
273 | 2 | return "\\label{{$block['name']}}"; |
|
274 | } |
||
275 | |||
276 | /** |
||
277 | * @inheritdoc |
||
278 | */ |
||
279 | 2 | protected function renderEmail($block) |
|
280 | { |
||
281 | 2 | $email = $this->escapeUrl($block[1]); |
|
282 | 2 | return "\\href{mailto:{$email}}{{$email}}"; |
|
283 | } |
||
284 | |||
285 | /** |
||
286 | * @inheritdoc |
||
287 | */ |
||
288 | 2 | protected function renderUrl($block) |
|
289 | { |
||
290 | 2 | return '\url{' . $this->escapeUrl($block[1]) . '}'; |
|
291 | } |
||
292 | |||
293 | /** |
||
294 | * @inheritdoc |
||
295 | */ |
||
296 | 1 | protected function renderInlineCode($block) |
|
297 | { |
||
298 | // replace No-Break Space characters in code block, which do not render in LaTeX |
||
299 | 1 | $content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $block[1]); |
|
300 | |||
301 | 1 | if (strpos($content, '|') !== false) { |
|
302 | 1 | return '\\lstinline`' . str_replace("\n", ' ', $content) . '`'; // TODO make this more robust against code containing backticks |
|
303 | } else { |
||
304 | return '\\lstinline|' . str_replace("\n", ' ', $content) . '|'; |
||
305 | } |
||
306 | } |
||
307 | |||
308 | /** |
||
309 | * @inheritdoc |
||
310 | */ |
||
311 | 1 | protected function renderStrong($block) |
|
312 | { |
||
313 | 1 | return '\textbf{' . $this->renderAbsy($block[1]) . '}'; |
|
314 | } |
||
315 | |||
316 | /** |
||
317 | * @inheritdoc |
||
318 | */ |
||
319 | 1 | protected function renderEmph($block) |
|
320 | { |
||
321 | 1 | return '\textit{' . $this->renderAbsy($block[1]) . '}'; |
|
322 | } |
||
323 | |||
324 | /** |
||
325 | * Parses escaped special characters. |
||
326 | * This allow a backslash to be interpreted as LaTeX |
||
327 | * @marker \ |
||
328 | */ |
||
329 | 1 | protected function parseEscape($text) |
|
330 | { |
||
331 | 1 | if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) { |
|
332 | 1 | if ($text[1] === '\\') { |
|
333 | return [['backslash'], 2]; |
||
334 | } |
||
335 | 1 | return [['text', $text[1]], 2]; |
|
336 | } |
||
337 | return [['text', $text[0]], 1]; |
||
338 | } |
||
339 | |||
340 | protected function renderBackslash() |
||
341 | { |
||
342 | return '\\'; |
||
343 | } |
||
344 | |||
345 | private $_escaper; |
||
346 | |||
347 | /** |
||
348 | * Escape special characters in URLs |
||
349 | */ |
||
350 | 4 | protected function escapeUrl($string) |
|
351 | { |
||
352 | 4 | return str_replace('%', '\\%', $this->escapeLatex($string)); |
|
353 | } |
||
354 | |||
355 | /** |
||
356 | * Escape special LaTeX characters |
||
357 | */ |
||
358 | 21 | protected function escapeLatex($string) |
|
359 | { |
||
360 | 21 | if ($this->_escaper === null) { |
|
361 | 21 | $this->_escaper = new TextToLatex(); |
|
362 | 21 | } |
|
363 | 21 | return $this->_escaper->convert($string); |
|
364 | } |
||
365 | |||
366 | /** |
||
367 | * @inheritdocs |
||
368 | * |
||
369 | * Parses a newline indicated by two spaces on the end of a markdown line. |
||
370 | */ |
||
371 | 21 | protected function renderText($text) |
|
372 | { |
||
373 | 21 | $output = str_replace(" \n", "\\\\\n", $this->escapeLatex($text[1])); |
|
374 | // support No-Break Space in LaTeX |
||
375 | 21 | $output = preg_replace("/\x{00a0}/u", '~', $output); |
|
376 | // support Narrow No-Break Space spaces in LaTeX |
||
377 | // http://unicode-table.com/en/202F/ |
||
378 | // http://tex.stackexchange.com/questions/76132/how-to-typeset-a-small-non-breaking-space |
||
379 | 21 | $output = preg_replace("/\x{202f}/u", '\nobreak\hspace{.16667em plus .08333em}', $output); |
|
380 | 21 | return $output; |
|
381 | } |
||
382 | } |
||
383 |