1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace League\HTMLToMarkdown\Converter; |
||
6 | |||
7 | use League\HTMLToMarkdown\ElementInterface; |
||
8 | |||
9 | class ParagraphConverter implements ConverterInterface |
||
10 | { |
||
11 | public function convert(ElementInterface $element): string |
||
12 | { |
||
13 | $value = $element->getValue(); |
||
14 | 51 | ||
15 | $markdown = ''; |
||
16 | 51 | ||
17 | $lines = \preg_split('/\r\n|\r|\n/', $value); |
||
18 | 51 | \assert($lines !== false); |
|
19 | |||
20 | 51 | foreach ($lines as $line) { |
|
21 | 51 | /* |
|
22 | * Some special characters need to be escaped based on the position that they appear |
||
23 | * The following function will deal with those special cases. |
||
24 | */ |
||
25 | $markdown .= $this->escapeSpecialCharacters($line); |
||
26 | 51 | $markdown .= "\n"; |
|
27 | 51 | } |
|
28 | 34 | ||
29 | return \trim($markdown) !== '' ? \rtrim($markdown) . "\n\n" : ''; |
||
30 | 51 | } |
|
31 | |||
32 | /** |
||
33 | * @return string[] |
||
34 | */ |
||
35 | public function getSupportedTags(): array |
||
36 | 99 | { |
|
37 | return ['p']; |
||
38 | 99 | } |
|
39 | |||
40 | private function escapeSpecialCharacters(string $line): string |
||
41 | { |
||
42 | $line = $this->escapeFirstCharacters($line); |
||
43 | $line = $this->escapeOtherCharacters($line); |
||
44 | $line = $this->escapeOtherCharactersRegex($line); |
||
45 | |||
46 | 51 | return $line; |
|
47 | } |
||
48 | 51 | ||
49 | 51 | private function escapeFirstCharacters(string $line): string |
|
50 | 51 | { |
|
51 | $escapable = [ |
||
52 | 51 | '>', |
|
53 | '- ', |
||
54 | '+ ', |
||
55 | '--', |
||
56 | '~~~', |
||
57 | '---', |
||
58 | '- - -', |
||
59 | ]; |
||
60 | 51 | ||
61 | foreach ($escapable as $i) { |
||
62 | if (\strpos(\ltrim($line), $i) === 0) { |
||
63 | 51 | // Found a character that must be escaped, adding a backslash before |
|
64 | 34 | return '\\' . \ltrim($line); |
|
65 | 34 | } |
|
66 | 34 | } |
|
67 | 34 | ||
68 | 34 | return $line; |
|
69 | } |
||
70 | 34 | ||
71 | private function escapeOtherCharacters(string $line): string |
||
72 | 51 | { |
|
73 | 51 | $escapable = [ |
|
74 | '<!--', |
||
75 | 19 | ]; |
|
76 | |||
77 | 34 | foreach ($escapable as $i) { |
|
78 | if (($pos = \strpos($line, $i)) === false) { |
||
79 | 51 | continue; |
|
80 | } |
||
81 | |||
82 | // Found an escapable character, escaping it |
||
83 | $line = \substr_replace($line, '\\', $pos, 0); |
||
84 | } |
||
85 | |||
86 | return $line; |
||
87 | 51 | } |
|
88 | |||
89 | private function escapeOtherCharactersRegex(string $line): string |
||
90 | 17 | { |
|
91 | 34 | $regExs = [ |
|
92 | // Match numbers ending on ')' or '.' that are at the beginning of the line. |
||
93 | 51 | // They will be escaped if immediately followed by a space or newline. |
|
94 | 51 | '/^[0-9]+(?=(\)|\.)( |$))/', |
|
95 | ]; |
||
96 | 17 | ||
97 | foreach ($regExs as $i) { |
||
98 | 34 | if (! \preg_match($i, $line, $match)) { |
|
99 | continue; |
||
100 | 51 | } |
|
101 | |||
102 | // Matched an escapable character, adding a backslash on the string before the offending character |
||
103 | $line = \substr_replace($line, '\\', \strlen($match[0]), 0); |
||
104 | } |
||
105 | |||
106 | return $line; |
||
107 | } |
||
108 | } |
||
109 |