thephpleague /
html-to-markdown
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace League\HTMLToMarkdown\Converter; |
||
| 6 | |||
| 7 | use League\HTMLToMarkdown\ElementInterface; |
||
| 8 | |||
| 9 | class ParagraphConverter implements ConverterInterface |
||
| 10 | { |
||
| 11 | public function convert(ElementInterface $element): string |
||
| 12 | { |
||
| 13 | $value = $element->getValue(); |
||
| 14 | 51 | ||
| 15 | $markdown = ''; |
||
| 16 | 51 | ||
| 17 | $lines = \preg_split('/\r\n|\r|\n/', $value); |
||
| 18 | 51 | \assert($lines !== false); |
|
| 19 | |||
| 20 | 51 | foreach ($lines as $line) { |
|
| 21 | 51 | /* |
|
| 22 | * Some special characters need to be escaped based on the position that they appear |
||
| 23 | * The following function will deal with those special cases. |
||
| 24 | */ |
||
| 25 | $markdown .= $this->escapeSpecialCharacters($line); |
||
| 26 | 51 | $markdown .= "\n"; |
|
| 27 | 51 | } |
|
| 28 | 34 | ||
| 29 | return \trim($markdown) !== '' ? \rtrim($markdown) . "\n\n" : ''; |
||
| 30 | 51 | } |
|
| 31 | |||
| 32 | /** |
||
| 33 | * @return string[] |
||
| 34 | */ |
||
| 35 | public function getSupportedTags(): array |
||
| 36 | 99 | { |
|
| 37 | return ['p']; |
||
| 38 | 99 | } |
|
| 39 | |||
| 40 | private function escapeSpecialCharacters(string $line): string |
||
| 41 | { |
||
| 42 | $line = $this->escapeFirstCharacters($line); |
||
| 43 | $line = $this->escapeOtherCharacters($line); |
||
| 44 | $line = $this->escapeOtherCharactersRegex($line); |
||
| 45 | |||
| 46 | 51 | return $line; |
|
| 47 | } |
||
| 48 | 51 | ||
| 49 | 51 | private function escapeFirstCharacters(string $line): string |
|
| 50 | 51 | { |
|
| 51 | $escapable = [ |
||
| 52 | 51 | '>', |
|
| 53 | '- ', |
||
| 54 | '+ ', |
||
| 55 | '--', |
||
| 56 | '~~~', |
||
| 57 | '---', |
||
| 58 | '- - -', |
||
| 59 | ]; |
||
| 60 | 51 | ||
| 61 | foreach ($escapable as $i) { |
||
| 62 | if (\strpos(\ltrim($line), $i) === 0) { |
||
| 63 | 51 | // Found a character that must be escaped, adding a backslash before |
|
| 64 | 34 | return '\\' . \ltrim($line); |
|
| 65 | 34 | } |
|
| 66 | 34 | } |
|
| 67 | 34 | ||
| 68 | 34 | return $line; |
|
| 69 | } |
||
| 70 | 34 | ||
| 71 | private function escapeOtherCharacters(string $line): string |
||
| 72 | 51 | { |
|
| 73 | 51 | $escapable = [ |
|
| 74 | '<!--', |
||
| 75 | 19 | ]; |
|
| 76 | |||
| 77 | 34 | foreach ($escapable as $i) { |
|
| 78 | if (($pos = \strpos($line, $i)) === false) { |
||
| 79 | 51 | continue; |
|
| 80 | } |
||
| 81 | |||
| 82 | // Found an escapable character, escaping it |
||
| 83 | $line = \substr_replace($line, '\\', $pos, 0); |
||
| 84 | } |
||
| 85 | |||
| 86 | return $line; |
||
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
Loading history...
|
|||
| 87 | 51 | } |
|
| 88 | |||
| 89 | private function escapeOtherCharactersRegex(string $line): string |
||
| 90 | 17 | { |
|
| 91 | 34 | $regExs = [ |
|
| 92 | // Match numbers ending on ')' or '.' that are at the beginning of the line. |
||
| 93 | 51 | // They will be escaped if immediately followed by a space or newline. |
|
| 94 | 51 | '/^[0-9]+(?=(\)|\.)( |$))/', |
|
| 95 | ]; |
||
| 96 | 17 | ||
| 97 | foreach ($regExs as $i) { |
||
| 98 | 34 | if (! \preg_match($i, $line, $match)) { |
|
| 99 | continue; |
||
| 100 | 51 | } |
|
| 101 | |||
| 102 | // Matched an escapable character, adding a backslash on the string before the offending character |
||
| 103 | $line = \substr_replace($line, '\\', \strlen($match[0]), 0); |
||
| 104 | } |
||
| 105 | |||
| 106 | return $line; |
||
|
0 ignored issues
–
show
|
|||
| 107 | } |
||
| 108 | } |
||
| 109 |