Code

< 40 %
40-60 %
> 60 %
1
<?php
2
/**
3
 * @copyright Copyright (c) 2014 Carsten Brandt
4
 * @license https://github.com/cebe/markdown/blob/master/LICENSE
5
 * @link https://github.com/cebe/markdown#readme
6
 */
7
8
namespace cebe\markdown\latex;
9
10
use cebe\markdown\block\CodeTrait;
11
use cebe\markdown\block\HeadlineTrait;
12
use cebe\markdown\block\ListTrait;
13
use cebe\markdown\block\QuoteTrait;
14
use cebe\markdown\block\RuleTrait;
15
16
use cebe\markdown\inline\CodeTrait as InlineCodeTrait;
17
use cebe\markdown\inline\EmphStrongTrait;
18
use cebe\markdown\inline\LinkTrait;
19
20
use MikeVanRiel\TextToLatex;
21
22
/**
23
 * Markdown parser for the [initial markdown spec](http://daringfireball.net/projects/markdown/syntax).
24
 *
25
 * @author Carsten Brandt <[email protected]>
26
 */
27
class Markdown extends \cebe\markdown\Parser
28
{
29
	// include block element parsing using traits
30
	use CodeTrait;
31
	use HeadlineTrait;
32
	use ListTrait {
33
		// Check Ul List before headline
34
		identifyUl as protected identifyBUl;
35
		consumeUl as protected consumeBUl;
36
	}
37
	use QuoteTrait;
38
	use RuleTrait {
39
		// Check Hr before checking lists
40
		identifyHr as protected identifyAHr;
41
		consumeHr as protected consumeAHr;
42
	}
43
44
	// include inline element parsing using traits
45
	use InlineCodeTrait;
46
	use EmphStrongTrait;
47
	use LinkTrait;
48
49
	/**
50
	 * @var string this string will be prefixed to all auto generated labels.
51
	 * This can be used to disambiguate labels when combining multiple markdown files into one document.
52
	 */
53
	public $labelPrefix = '';
54
55
	const LINK_STYLE_FOOTNOTE = 'footnote';
56
	const LINK_STYLE_HREF = 'href';
57
58
	/**
59
	 * @var string link style defines how links are rendered in LaTeX, there are two different options:
60
	 *
61
	 * - `footnote` (default) - render all links with a footnote, which contains the full URL of the link. This is good for printing the PDF.
62
	 * - `href` - render all links with a hyperref, similar to HTML, the link target is not visible in this case.
63
	 */
64
	public $linkStyle = self::LINK_STYLE_FOOTNOTE;
65
66
	/**
67
	 * @var array these are "escapeable" characters. When using one of these prefixed with a
68
	 * backslash, the character will be outputted without the backslash and is not interpreted
69
	 * as markdown.
70
	 */
71
	protected $escapeCharacters = [
72
		'\\', // backslash
73
		'`', // backtick
74
		'*', // asterisk
75
		'_', // underscore
76
		'{', '}', // curly braces
77
		'[', ']', // square brackets
78
		'(', ')', // parentheses
79
		'#', // hash mark
80
		'+', // plus sign
81
		'-', // minus sign (hyphen)
82
		'.', // dot
83
		'!', // exclamation mark
84
		'<', '>',
85
	];
86
87
88
	/**
89
	 * @inheritDoc
90
	 */
91 23
	protected function prepare()
92
	{
93
		// reset references
94 23
		$this->references = [];
95 23
	}
96
97
	/**
98
	 * Consume lines for a paragraph
99
	 *
100
	 * Allow headlines and code to break paragraphs
101
	 */
102 8
	protected function consumeParagraph($lines, $current)
103
	{
104
		// consume until newline
105 8
		$content = [];
106 8
		for ($i = $current, $count = count($lines); $i < $count; $i++) {
107 8
			$line = $lines[$i];
108 8
			if (!empty($line) && ltrim($line) !== '' &&
109 8
				!($line[0] === "\t" || $line[0] === " " && strncmp($line, '    ', 4) === 0) &&
110 8
				!$this->identifyHeadline($line, $lines, $i))
111 8
			{
112 8
				$content[] = $line;
113 8
			} else {
114 7
				break;
115
			}
116 8
		}
117
		$block = [
118 8
			'paragraph',
119 8
			'content' => $this->parseInline(implode("\n", $content)),
120 8
		];
121 8
		return [$block, --$i];
122
	}
123
124
125
	// rendering adjusted for LaTeX output
126
127
128
	/**
129
	 * @inheritdoc
130
	 */
131 21
	protected function renderParagraph($block)
132
	{
133 21
		return $this->renderAbsy($block['content']) . "\n\n";
134
	}
135
136
	/**
137
	 * @inheritdoc
138
	 */
139 2
	protected function renderQuote($block)
140
	{
141 2
		return '\begin{quote}' . $this->renderAbsy($block['content']) . "\\end{quote}\n";
142
	}
143
144
	/**
145
	 * @inheritdoc
146
	 */
147 3
	protected function renderCode($block)
148
	{
149 3
		$language = isset($block['language']) ? "\\lstset{language={$block['language']}}" : '\lstset{language={}}';
150
151 3
		$content = $block['content'];
152
		// replace No-Break Space characters in code block, which do not render in LaTeX
153 3
		$content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $content);
154
155 3
		return "$language\\begin{lstlisting}\n{$content}\n\\end{lstlisting}\n";
156
	}
157
158
	/**
159
	 * @inheritdoc
160
	 */
161 2
	protected function renderList($block)
162
	{
163 2
		$type = ($block['list'] === 'ol') ? 'enumerate' : 'itemize';
164 2
		$output = "\\begin{{$type}}\n";
165
166 2
		foreach ($block['items'] as $item => $itemLines) {
167 2
			$output .= '\item ' . $this->renderAbsy($itemLines). "\n";
168 2
		}
169
170 2
		return "$output\\end{{$type}}\n";
171
	}
172
173
	/**
174
	 * @inheritdoc
175
	 */
176 3
	protected function renderHeadline($block)
177
	{
178 3
		$content = $this->renderAbsy($block['content']);
179 3
		switch($block['level']) {
180 3
			case 1: return "\\section{{$content}}\n";
181 3
			case 2: return "\\subsection{{$content}}\n";
182 2
			case 3: return "\\subsubsection{{$content}}\n";
183 2
			default: return "\\paragraph{{$content}}\n";
184 2
		}
185
	}
186
187
	/**
188
	 * @inheritdoc
189
	 */
190 2
	protected function renderHr($block)
191
	{
192 2
		return "\n\\noindent\\rule{\\textwidth}{0.4pt}\n";
193
	}
194
195
	/**
196
	 * @inheritdoc
197
	 */
198 2
	protected function renderLink($block)
199
	{
200 2 View Code Duplication
		if (isset($block['refkey'])) {
201
			if (($ref = $this->lookupReference($block['refkey'])) !== false) {
202
				$block = array_merge($block, $ref);
203
			} else {
204
				return $block['orig'];
205
			}
206
		}
207
208 2
		$url = $block['url'];
209 2
		$text = $this->renderAbsy($block['text']);
210 2
		if (strpos($url, '://') === false) {
211
			// consider all non absolute links as relative in the document
212
			// $title is ignored in this case.
213
			if (isset($url[0]) && $url[0] === '#') {
214
				$url = $this->labelPrefix . $url;
215
			}
216
			return '\hyperref['.str_replace('#', '::', $url).']{' . $text . '}';
217
		} else {
218 2
			if ($this->linkStyle === self::LINK_STYLE_HREF) {
219
				return '\href{' . $this->escapeUrl($url) . '}{' . $text . '}';
220
			}
221 2
			return $text . '\\footnote{' . (empty($block['title']) ? '' : $this->escapeLatex($block['title']) . ': ') . '\url{' . $this->escapeUrl($url) . '}}';
222
		}
223
	}
224
225
	/**
226
	 * @inheritdoc
227
	 */
228 2
	protected function renderImage($block)
229
	{
230 2 View Code Duplication
		if (isset($block['refkey'])) {
231
			if (($ref = $this->lookupReference($block['refkey'])) !== false) {
232
				$block = array_merge($block, $ref);
233
			} else {
234
				return $block['orig'];
235
			}
236
		}
237
238
		// TODO create figure with caption with title
239
		$replaces = [
240 2
			'%' => '\\%',
241 2
			'{' => '\\%7B',
242 2
			'}' => '\\%7D',
243 2
			'\\' => '\\\\',
244 2
			'#' => '\\#',
245 2
			'$' => '\\%24',
246 2
		];
247 2
		$url = str_replace(array_keys($replaces), array_values($replaces), $block['url']);
248 2
		return "\\noindent\\includegraphics[width=\\textwidth]{{$url}}";
249
	}
250
251
	/**
252
	 * Parses <a name="..."></a> tags as reference labels
253
	 */
254 2
	private function parseInlineHtml($text)
255
	{
256 2
		if (strpos($text, '>') !== false) {
257
			// convert a name markers to \labels
258 2
			if (preg_match('~^<((a|span)) (name|id)="(.*?)">.*?</\1>~i', $text, $matches)) {
259
				return [
260 2
					['label', 'name' => str_replace('#', '::', $this->labelPrefix . $matches[4])],
261 2
					strlen($matches[0])
262 2
				];
263
			}
264
		}
265
		return [['text', '<'], 1];
266
	}
267
268
	/**
269
	 * renders a reference label
270
	 */
271 2
	protected function renderLabel($block)
272
	{
273 2
		return "\\label{{$block['name']}}";
274
	}
275
276
	/**
277
	 * @inheritdoc
278
	 */
279 2
	protected function renderEmail($block)
280
	{
281 2
		$email = $this->escapeUrl($block[1]);
282 2
		return "\\href{mailto:{$email}}{{$email}}";
283
	}
284
285
	/**
286
	 * @inheritdoc
287
	 */
288 2
	protected function renderUrl($block)
289
	{
290 2
		return '\url{' . $this->escapeUrl($block[1]) . '}';
291
	}
292
293
	/**
294
	 * @inheritdoc
295
	 */
296 1
	protected function renderInlineCode($block)
297
	{
298
		// replace No-Break Space characters in code block, which do not render in LaTeX
299 1
		$content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $block[1]);
300
301 1
		if (strpos($content, '|') !== false) {
302 1
			return '\\lstinline`' . str_replace("\n", ' ', $content) . '`'; // TODO make this more robust against code containing backticks
303
		} else {
304
			return '\\lstinline|' . str_replace("\n", ' ', $content) . '|';
305
		}
306
	}
307
308
	/**
309
	 * @inheritdoc
310
	 */
311 1
	protected function renderStrong($block)
312
	{
313 1
		return '\textbf{' . $this->renderAbsy($block[1]) . '}';
314
	}
315
316
	/**
317
	 * @inheritdoc
318
	 */
319 1
	protected function renderEmph($block)
320
	{
321 1
		return '\textit{' . $this->renderAbsy($block[1]) . '}';
322
	}
323
324
	/**
325
	 * Parses escaped special characters.
326
	 * This allow a backslash to be interpreted as LaTeX
327
	 * @marker \
328
	 */
329 1
	protected function parseEscape($text)
330
	{
331 1
		if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
332 1
			if ($text[1] === '\\') {
333
				return [['backslash'], 2];
334
			}
335 1
			return [['text', $text[1]], 2];
336
		}
337
		return [['text', $text[0]], 1];
338
	}
339
340
	protected function renderBackslash()
341
	{
342
		return '\\';
343
	}
344
345
	private $_escaper;
346
347
	/**
348
	 * Escape special characters in URLs
349
	 */
350 4
	protected function escapeUrl($string)
351
	{
352 4
		return str_replace('%', '\\%', $this->escapeLatex($string));
353
	}
354
355
	/**
356
	 * Escape special LaTeX characters
357
	 */
358 21
	protected function escapeLatex($string)
359
	{
360 21
		if ($this->_escaper === null) {
361 21
			$this->_escaper = new TextToLatex();
362 21
		}
363 21
		return $this->_escaper->convert($string);
364
	}
365
366
	/**
367
	 * @inheritdocs
368
	 *
369
	 * Parses a newline indicated by two spaces on the end of a markdown line.
370
	 */
371 21
	protected function renderText($text)
372
	{
373 21
		$output = str_replace("  \n", "\\\\\n", $this->escapeLatex($text[1]));
374
		// support No-Break Space in LaTeX
375 21
		$output = preg_replace("/\x{00a0}/u", '~', $output);
376
		// support Narrow No-Break Space spaces in LaTeX
377
		// http://unicode-table.com/en/202F/
378
		// http://tex.stackexchange.com/questions/76132/how-to-typeset-a-small-non-breaking-space
379 21
		$output = preg_replace("/\x{202f}/u", '\nobreak\hspace{.16667em plus .08333em}', $output);
380 21
		return $output;
381
	}
382
}
383