1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace League\HTMLToMarkdown\Converter; |
||
6 | |||
7 | use League\HTMLToMarkdown\ElementInterface; |
||
8 | |||
9 | class TextConverter implements ConverterInterface |
||
10 | { |
||
11 | public function convert(ElementInterface $element): string |
||
12 | { |
||
13 | $markdown = $element->getValue(); |
||
14 | 81 | ||
15 | // Remove leftover \n at the beginning of the line |
||
16 | 81 | $markdown = \ltrim($markdown, "\n"); |
|
17 | |||
18 | // Replace sequences of invisible characters with spaces |
||
19 | 81 | $markdown = \preg_replace('~\s+~u', ' ', $markdown); |
|
20 | \assert(\is_string($markdown)); |
||
21 | |||
22 | 81 | // Escape the following characters: '*', '_', '[', ']' and '\' |
|
23 | if (($parent = $element->getParent()) && $parent->getTagName() !== 'div') { |
||
24 | $markdown = \preg_replace('~([*_\\[\\]\\\\])~u', '\\\\$1', $markdown); |
||
25 | 81 | \assert(\is_string($markdown)); |
|
26 | 81 | } |
|
27 | 54 | ||
28 | $markdown = \preg_replace('~^#~u', '\\\\#', $markdown); |
||
29 | 81 | \assert(\is_string($markdown)); |
|
30 | |||
31 | 81 | if ($markdown === ' ') { |
|
32 | 18 | $next = $element->getNext(); |
|
33 | 18 | if (! $next || $next->isBlock()) { |
|
34 | 3 | $markdown = ''; |
|
35 | 2 | } |
|
36 | 12 | } |
|
37 | |||
38 | 81 | return \htmlspecialchars($markdown, ENT_NOQUOTES, 'UTF-8'); |
|
39 | } |
||
40 | |||
41 | /** |
||
42 | * @return string[] |
||
43 | */ |
||
44 | 99 | public function getSupportedTags(): array |
|
45 | { |
||
46 | 99 | return ['#text']; |
|
47 | } |
||
48 | } |
||
49 |