Markdown::parseEscape()   A
last analyzed

Complexity

Conditions 4
Paths 3

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 4.5923

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 4
cts 6
cp 0.6667
rs 9.9332
c 0
b 0
f 0
cc 4
nc 3
nop 1
crap 4.5923
1
<?php
2
/**
3
 * @copyright Copyright (c) 2014 Carsten Brandt
4
 * @license https://github.com/cebe/markdown/blob/master/LICENSE
5
 * @link https://github.com/cebe/markdown#readme
6
 */
7
8
namespace cebe\markdown\latex;
9
10
use cebe\markdown\block\CodeTrait;
11
use cebe\markdown\block\HeadlineTrait;
12
use cebe\markdown\block\ListTrait;
13
use cebe\markdown\block\QuoteTrait;
14
use cebe\markdown\block\RuleTrait;
15
16
use cebe\markdown\inline\CodeTrait as InlineCodeTrait;
17
use cebe\markdown\inline\EmphStrongTrait;
18
use cebe\markdown\inline\LinkTrait;
19
20
use MikeVanRiel\TextToLatex;
21
22
/**
23
 * Markdown parser for the [initial markdown spec](http://daringfireball.net/projects/markdown/syntax).
24
 *
25
 * @author Carsten Brandt <[email protected]>
26
 */
27
class Markdown extends \cebe\markdown\Parser
28
{
29
	// include block element parsing using traits
30
	use CodeTrait;
31
	use HeadlineTrait;
32
	use ListTrait {
33
		// Check Ul List before headline
34
		identifyUl as protected identifyBUl;
35
		consumeUl as protected consumeBUl;
36
	}
37
	use QuoteTrait;
38
	use RuleTrait {
39
		// Check Hr before checking lists
40
		identifyHr as protected identifyAHr;
41
		consumeHr as protected consumeAHr;
42
	}
43
44
	// include inline element parsing using traits
45
	use InlineCodeTrait;
46
	use EmphStrongTrait;
47
	use LinkTrait;
48
49
	/**
50
	 * @var string this string will be prefixed to all auto generated labels.
51
	 * This can be used to disambiguate labels when combining multiple markdown files into one document.
52
	 */
53
	public $labelPrefix = '';
54
55
	const LINK_STYLE_FOOTNOTE = 'footnote';
56
	const LINK_STYLE_HREF = 'href';
57
58
	/**
59
	 * @var string link style defines how links are rendered in LaTeX, there are two different options:
60
	 *
61
	 * - `footnote` (default) - render all links with a footnote, which contains the full URL of the link. This is good for printing the PDF.
62
	 * - `href` - render all links with a hyperref, similar to HTML, the link target is not visible in this case.
63
	 */
64
	public $linkStyle = self::LINK_STYLE_FOOTNOTE;
65
66
	/**
67
	 * @var array these are "escapeable" characters. When using one of these prefixed with a
68
	 * backslash, the character will be outputted without the backslash and is not interpreted
69
	 * as markdown.
70
	 */
71
	protected $escapeCharacters = [
72
		'\\', // backslash
73
		'`', // backtick
74
		'*', // asterisk
75
		'_', // underscore
76
		'{', '}', // curly braces
77
		'[', ']', // square brackets
78
		'(', ')', // parentheses
79
		'#', // hash mark
80
		'+', // plus sign
81
		'-', // minus sign (hyphen)
82
		'.', // dot
83
		'!', // exclamation mark
84
		'<', '>',
85
	];
86
87
88
	/**
89
	 * @inheritDoc
90
	 */
91 23
	protected function prepare()
92
	{
93
		// reset references
94 23
		$this->references = [];
95 23
	}
96
97
	/**
98
	 * Consume lines for a paragraph
99
	 *
100
	 * Allow headlines and code to break paragraphs
101
	 */
102 8
	protected function consumeParagraph($lines, $current)
103
	{
104
		// consume until newline
105 8
		$content = [];
106 8
		for ($i = $current, $count = count($lines); $i < $count; $i++) {
107 8
			$line = $lines[$i];
108 8
			if (!empty($line) && ltrim($line) !== '' &&
109 8
				!($line[0] === "\t" || $line[0] === " " && strncmp($line, '    ', 4) === 0) &&
110 8
				!$this->identifyHeadline($line, $lines, $i))
111 8
			{
112 8
				$content[] = $line;
113 8
			} else {
114 7
				break;
115
			}
116 8
		}
117
		$block = [
118 8
			'paragraph',
119 8
			'content' => $this->parseInline(implode("\n", $content)),
120 8
		];
121 8
		return [$block, --$i];
122
	}
123
124
125
	// rendering adjusted for LaTeX output
126
127
128
	/**
129
	 * @inheritdoc
130
	 */
131 21
	protected function renderParagraph($block)
132
	{
133 21
		return $this->renderAbsy($block['content']) . "\n\n";
134
	}
135
136
	/**
137
	 * @inheritdoc
138
	 */
139 2
	protected function renderQuote($block)
140
	{
141 2
		return '\begin{quote}' . $this->renderAbsy($block['content']) . "\\end{quote}\n";
142
	}
143
144
	/**
145
	 * @inheritdoc
146
	 */
147 3
	protected function renderCode($block)
148
	{
149 3
		$language = isset($block['language']) ? "\\lstset{language={$block['language']}}" : '\lstset{language={}}';
150
151 3
		$content = $block['content'];
152
		// replace No-Break Space characters in code block, which do not render in LaTeX
153 3
		$content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $content);
154
155 3
		return "$language\\begin{lstlisting}\n{$content}\n\\end{lstlisting}\n";
156
	}
157
158
	/**
159
	 * @inheritdoc
160
	 */
161 2
	protected function renderList($block)
162
	{
163 2
		$type = ($block['list'] === 'ol') ? 'enumerate' : 'itemize';
164 2
		$output = "\\begin{{$type}}\n";
165
166 2
		foreach ($block['items'] as $item => $itemLines) {
167 2
			$output .= '\item ' . $this->renderAbsy($itemLines). "\n";
168 2
		}
169
170 2
		return "$output\\end{{$type}}\n";
171
	}
172
173
	/**
174
	 * @inheritdoc
175
	 */
176 3
	protected function renderHeadline($block)
177
	{
178 3
		$content = $this->renderAbsy($block['content']);
179 3
		switch($block['level']) {
180 3
			case 1: return "\\section{{$content}}\n";
181 3
			case 2: return "\\subsection{{$content}}\n";
182 2
			case 3: return "\\subsubsection{{$content}}\n";
183 2
			default: return "\\paragraph{{$content}}\n";
184 2
		}
185
	}
186
187
	/**
188
	 * @inheritdoc
189
	 */
190 2
	protected function renderHr($block)
0 ignored issues
show
Unused Code introduced by
The parameter $block is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
191
	{
192 2
		return "\n\\noindent\\rule{\\textwidth}{0.4pt}\n";
193
	}
194
195
	/**
196
	 * @inheritdoc
197
	 */
198 2
	protected function renderLink($block)
199
	{
200 2 View Code Duplication
		if (isset($block['refkey'])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
201
			if (($ref = $this->lookupReference($block['refkey'])) !== false) {
202
				$block = array_merge($block, $ref);
203
			} else {
204
				return $block['orig'];
205
			}
206
		}
207
208 2
		$url = $block['url'];
209 2
		$text = $this->renderAbsy($block['text']);
210 2
		if (strpos($url, '://') === false) {
211
			// consider all non absolute links as relative in the document
212
			// $title is ignored in this case.
213
			if (isset($url[0]) && $url[0] === '#') {
214
				$url = $this->labelPrefix . $url;
215
			}
216
			return '\hyperref['.str_replace('#', '::', $url).']{' . $text . '}';
217
		} else {
218 2
			if ($this->linkStyle === self::LINK_STYLE_HREF) {
219
				return '\href{' . $this->escapeUrl($url) . '}{' . $text . '}';
220
			}
221 2
			return $text . '\\footnote{' . (empty($block['title']) ? '' : $this->escapeLatex($block['title']) . ': ') . '\url{' . $this->escapeUrl($url) . '}}';
222
		}
223
	}
224
225
	/**
226
	 * @inheritdoc
227
	 */
228 2
	protected function renderImage($block)
229
	{
230 2 View Code Duplication
		if (isset($block['refkey'])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
231
			if (($ref = $this->lookupReference($block['refkey'])) !== false) {
232
				$block = array_merge($block, $ref);
233
			} else {
234
				return $block['orig'];
235
			}
236
		}
237
238
		// TODO create figure with caption with title
239
		$replaces = [
240 2
			'%' => '\\%',
241 2
			'{' => '\\%7B',
242 2
			'}' => '\\%7D',
243 2
			'\\' => '\\\\',
244 2
			'#' => '\\#',
245 2
			'$' => '\\%24',
246 2
		];
247 2
		$url = str_replace(array_keys($replaces), array_values($replaces), $block['url']);
248 2
		return "\\noindent\\includegraphics[width=\\textwidth]{{$url}}";
249
	}
250
251
	/**
252
	 * Parses <a name="..."></a> tags as reference labels
253
	 */
254 2
	private function parseInlineHtml($text)
255
	{
256 2
		if (strpos($text, '>') !== false) {
257
			// convert a name markers to \labels
258 2
			if (preg_match('~^<((a|span)) (name|id)="(.*?)">.*?</\1>~i', $text, $matches)) {
259
				return [
260 2
					['label', 'name' => str_replace('#', '::', $this->labelPrefix . $matches[4])],
261 2
					strlen($matches[0])
262 2
				];
263
			}
264
		}
265
		return [['text', '<'], 1];
266
	}
267
268
	/**
269
	 * renders a reference label
270
	 */
271 2
	protected function renderLabel($block)
272
	{
273 2
		return "\\label{{$block['name']}}";
274
	}
275
276
	/**
277
	 * @inheritdoc
278
	 */
279 2
	protected function renderEmail($block)
280
	{
281 2
		$email = $this->escapeUrl($block[1]);
282 2
		return "\\href{mailto:{$email}}{{$email}}";
283
	}
284
285
	/**
286
	 * @inheritdoc
287
	 */
288 2
	protected function renderUrl($block)
289
	{
290 2
		return '\url{' . $this->escapeUrl($block[1]) . '}';
291
	}
292
293
	/**
294
	 * @inheritdoc
295
	 */
296 1
	protected function renderInlineCode($block)
297
	{
298
		// replace No-Break Space characters in code block, which do not render in LaTeX
299 1
		$content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $block[1]);
300
301 1
		if (strpos($content, '|') !== false) {
302 1
			return '\\lstinline`' . str_replace("\n", ' ', $content) . '`'; // TODO make this more robust against code containing backticks
303
		} else {
304
			return '\\lstinline|' . str_replace("\n", ' ', $content) . '|';
305
		}
306
	}
307
308
	/**
309
	 * @inheritdoc
310
	 */
311 1
	protected function renderStrong($block)
312
	{
313 1
		return '\textbf{' . $this->renderAbsy($block[1]) . '}';
314
	}
315
316
	/**
317
	 * @inheritdoc
318
	 */
319 1
	protected function renderEmph($block)
320
	{
321 1
		return '\textit{' . $this->renderAbsy($block[1]) . '}';
322
	}
323
324
	/**
325
	 * Parses escaped special characters.
326
	 * This allow a backslash to be interpreted as LaTeX
327
	 * @marker \
328
	 */
329 1
	protected function parseEscape($text)
330
	{
331 1
		if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
332 1
			if ($text[1] === '\\') {
333
				return [['backslash'], 2];
0 ignored issues
show
Best Practice introduced by
The expression return array(array('backslash'), 2); seems to be an array, but some of its elements' types (string[]) are incompatible with the return type of the parent method cebe\markdown\Parser::parseEscape of type array<array|integer>.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
334
			}
335 1
			return [['text', $text[1]], 2];
336
		}
337
		return [['text', $text[0]], 1];
338
	}
339
340
	protected function renderBackslash()
341
	{
342
		return '\\';
343
	}
344
345
	private $_escaper;
346
347
	/**
348
	 * Escape special characters in URLs
349
	 */
350 4
	protected function escapeUrl($string)
351
	{
352 4
		return str_replace('%', '\\%', $this->escapeLatex($string));
353
	}
354
355
	/**
356
	 * Escape special LaTeX characters
357
	 */
358 21
	protected function escapeLatex($string)
359
	{
360 21
		if ($this->_escaper === null) {
361 21
			$this->_escaper = new TextToLatex();
362 21
		}
363 21
		return $this->_escaper->convert($string);
364
	}
365
366
	/**
367
	 * @inheritdocs
368
	 *
369
	 * Parses a newline indicated by two spaces on the end of a markdown line.
370
	 */
371 21
	protected function renderText($text)
372
	{
373 21
		$output = str_replace("  \n", "\\\\\n", $this->escapeLatex($text[1]));
374
		// support No-Break Space in LaTeX
375 21
		$output = preg_replace("/\x{00a0}/u", '~', $output);
376
		// support Narrow No-Break Space spaces in LaTeX
377
		// http://unicode-table.com/en/202F/
378
		// http://tex.stackexchange.com/questions/76132/how-to-typeset-a-small-non-breaking-space
379 21
		$output = preg_replace("/\x{202f}/u", '\nobreak\hspace{.16667em plus .08333em}', $output);
380 21
		return $output;
381
	}
382
}
383