ttskch /
esaba
| 1 | <?php |
||||
| 2 | |||||
| 3 | declare(strict_types=1); |
||||
| 4 | |||||
| 5 | namespace App\Esa; |
||||
| 6 | |||||
| 7 | use App\Esa\Exception\UndefinedEmojiException; |
||||
| 8 | use Symfony\Component\DomCrawler\Crawler; |
||||
| 9 | use Symfony\Component\Routing\Generator\UrlGeneratorInterface; |
||||
| 10 | |||||
| 11 | class HtmlHandler |
||||
| 12 | { |
||||
| 13 | 58 | public function __construct( |
|||
| 14 | private Crawler $crawler, |
||||
| 15 | private UrlGeneratorInterface $urlGenerator, |
||||
| 16 | private EmojiManager $emojiManager, |
||||
| 17 | private string $teamName, |
||||
| 18 | ) { |
||||
| 19 | } |
||||
| 20 | |||||
| 21 | 19 | public function initialize(string $html): self |
|||
| 22 | { |
||||
| 23 | 19 | if (!$html) { |
|||
| 24 | $html = '<div></div>'; |
||||
| 25 | } |
||||
| 26 | |||||
| 27 | 19 | $this->crawler->clear(); |
|||
| 28 | 19 | $this->crawler->addHtmlContent($html); |
|||
| 29 | |||||
| 30 | 19 | return $this; |
|||
| 31 | } |
||||
| 32 | |||||
| 33 | 3 | public function dumpHtml(): string |
|||
| 34 | { |
||||
| 35 | 3 | $this->ensureInitialized(); |
|||
| 36 | |||||
| 37 | 2 | return $this->crawler->html(); |
|||
| 38 | } |
||||
| 39 | |||||
| 40 | /** |
||||
| 41 | * @param array $replacements map of [regexp pattern => replacement] |
||||
| 42 | */ |
||||
| 43 | 19 | public function replaceHtml(array $replacements): self |
|||
| 44 | { |
||||
| 45 | 19 | $this->ensureInitialized(); |
|||
| 46 | |||||
| 47 | 18 | $html = $this->crawler->html(); |
|||
| 48 | |||||
| 49 | 18 | foreach ($replacements as $pattern => $replacement) { |
|||
| 50 | 16 | $html = preg_replace($pattern, $replacement, $html); |
|||
| 51 | } |
||||
| 52 | |||||
| 53 | 18 | return $this->initialize($html); |
|||
| 54 | } |
||||
| 55 | |||||
| 56 | /** |
||||
| 57 | * @param array $replacements map of [regexp pattern => replacement] |
||||
| 58 | */ |
||||
| 59 | 17 | public function replaceText(array $replacements): self |
|||
| 60 | { |
||||
| 61 | 17 | $this->ensureInitialized(); |
|||
| 62 | |||||
| 63 | 16 | $domNode = $this->crawler->getNode(0); |
|||
| 64 | |||||
| 65 | 16 | $this->walkDomNodesAndReplaceOnlyTextNodes($domNode, $replacements); |
|||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 66 | |||||
| 67 | 16 | $this->crawler->clear(); |
|||
| 68 | 16 | $this->crawler->addNode($domNode); |
|||
|
0 ignored issues
–
show
It seems like
$domNode can also be of type null; however, parameter $node of Symfony\Component\DomCrawler\Crawler::addNode() does only seem to accept DOMNode, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 69 | |||||
| 70 | 16 | return $this; |
|||
| 71 | } |
||||
| 72 | |||||
| 73 | /** |
||||
| 74 | * @param array $replacements map of [regexp pattern => replacement] |
||||
| 75 | */ |
||||
| 76 | 17 | public function walkDomNodesAndReplaceOnlyTextNodes(\DOMNode $node, array $replacements): self |
|||
| 77 | { |
||||
| 78 | 17 | if (XML_TEXT_NODE === $node->nodeType) { |
|||
| 79 | 17 | foreach ($replacements as $pattern => $replacement) { |
|||
| 80 | 15 | $node->textContent = preg_replace($pattern, $replacement, $node->textContent); |
|||
| 81 | } |
||||
| 82 | |||||
| 83 | 17 | return $this; |
|||
| 84 | } |
||||
| 85 | |||||
| 86 | 17 | if (!$node->hasChildNodes()) { |
|||
| 87 | 17 | return $this; |
|||
| 88 | } |
||||
| 89 | |||||
| 90 | 17 | foreach ($node->childNodes as $childNode) { |
|||
| 91 | 17 | $this->walkDomNodesAndReplaceOnlyTextNodes($childNode, $replacements); |
|||
| 92 | } |
||||
| 93 | |||||
| 94 | 17 | return $this; |
|||
| 95 | } |
||||
| 96 | |||||
| 97 | /** |
||||
| 98 | * Replace links to other post with links to see the post on esaba. |
||||
| 99 | */ |
||||
| 100 | 2 | public function replacePostUrls(string $routeName, string $routeVariableName): self |
|||
| 101 | { |
||||
| 102 | 2 | $backReferenceNumberForPostId = null; |
|||
| 103 | 2 | $backReferenceNumberForAnchorHash = null; |
|||
| 104 | 2 | $pattern = $this->getPostUrlPattern($backReferenceNumberForPostId, $backReferenceNumberForAnchorHash); |
|||
| 105 | 2 | $walker = $this->getATagWalkerForPostUrls($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName); |
|||
| 106 | |||||
| 107 | 2 | return $this->replaceATagWithWalker($pattern, $walker); |
|||
| 108 | } |
||||
| 109 | |||||
| 110 | /** |
||||
| 111 | * Disable @mention links. |
||||
| 112 | */ |
||||
| 113 | 2 | public function disableMentionLinks(): self |
|||
| 114 | { |
||||
| 115 | 2 | $pattern = $this->getMentionLinkPattern(); |
|||
| 116 | 2 | $walker = $this->getATagWalkerForMentionLinks($pattern); |
|||
| 117 | |||||
| 118 | 2 | return $this->replaceATagWithWalker($pattern, $walker); |
|||
| 119 | } |
||||
| 120 | |||||
| 121 | /** |
||||
| 122 | * Replace <a> tag href values for specified regexp pattern with closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement]. |
||||
| 123 | */ |
||||
| 124 | 4 | public function replaceATagWithWalker(string $pattern, \Closure $walker): self |
|||
| 125 | { |
||||
| 126 | 4 | $this->ensureInitialized(); |
|||
| 127 | |||||
| 128 | 3 | $targetATags = $this->crawler->filter('a')->reduce($this->getATagReducer($pattern)); |
|||
| 129 | 3 | $replacements = $targetATags->each($walker); |
|||
| 130 | 3 | $replacements = array_combine(array_column($replacements, 'pattern'), array_column($replacements, 'replacement')); |
|||
| 131 | |||||
| 132 | 3 | return $this->replaceHtml($replacements); |
|||
| 133 | } |
||||
| 134 | |||||
| 135 | /** |
||||
| 136 | * @param ?int $backReferenceNumberForPostId for returning position of post id in regexp pattern |
||||
| 137 | * @param ?int $backReferenceNumberForAnchorHash for returning position of anchor hash regexp pattern |
||||
| 138 | */ |
||||
| 139 | 28 | public function getPostUrlPattern(?int &$backReferenceNumberForPostId, ?int &$backReferenceNumberForAnchorHash): string |
|||
| 140 | { |
||||
| 141 | 28 | $backReferenceNumberForPostId = 3; |
|||
| 142 | 28 | $backReferenceNumberForAnchorHash = 5; |
|||
| 143 | |||||
| 144 | 28 | return sprintf('#^((https?:)?//%s\.esa\.io)?/posts/(\d+)(/|/edit/?)?(\#.+)?$#', $this->teamName); |
|||
| 145 | } |
||||
| 146 | |||||
| 147 | 3 | public function getMentionLinkPattern(): string |
|||
| 148 | { |
||||
| 149 | 3 | return '#/members/([^\'"]+)#'; |
|||
| 150 | } |
||||
| 151 | |||||
| 152 | /** |
||||
| 153 | * Return closure reduces ATags Crawler with regexp pattern for href value. |
||||
| 154 | */ |
||||
| 155 | 28 | public function getATagReducer(string $pattern): \Closure |
|||
| 156 | { |
||||
| 157 | 28 | $reducer = function (Crawler $node) use ($pattern) { |
|||
| 158 | 26 | preg_match($pattern, $node->attr('href'), $matches); |
|||
|
0 ignored issues
–
show
It seems like
$node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 159 | |||||
| 160 | 26 | return boolval($matches); |
|||
| 161 | }; |
||||
| 162 | |||||
| 163 | 28 | return $reducer; |
|||
| 164 | } |
||||
| 165 | |||||
| 166 | /** |
||||
| 167 | * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of post urls. |
||||
| 168 | */ |
||||
| 169 | 3 | public function getATagWalkerForPostUrls( |
|||
| 170 | string $pattern, |
||||
| 171 | ?int $backReferenceNumberForPostId, |
||||
| 172 | ?int $backReferenceNumberForAnchorHash, |
||||
| 173 | string $routeName, |
||||
| 174 | string $routeVariableName, |
||||
| 175 | ): \Closure { |
||||
| 176 | 3 | $that = $this; |
|||
| 177 | |||||
| 178 | 3 | $walker = function (Crawler $node) use ($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName, $that) { |
|||
| 179 | 1 | preg_match($pattern, $node->attr('href'), $matches); |
|||
|
0 ignored issues
–
show
It seems like
$node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 180 | 1 | $href = $matches[0]; |
|||
| 181 | 1 | $postId = $matches[$backReferenceNumberForPostId]; |
|||
| 182 | 1 | $anchorHash = isset($matches[$backReferenceNumberForAnchorHash]) ? $matches[$backReferenceNumberForAnchorHash] : ''; |
|||
| 183 | |||||
| 184 | 1 | $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href)); |
|||
| 185 | 1 | $replacement = sprintf('href="%s%s"', $that->urlGenerator->generate($routeName, [$routeVariableName => (int) $postId]), $anchorHash); |
|||
| 186 | |||||
| 187 | return [ |
||||
| 188 | 1 | 'pattern' => $pattern, |
|||
| 189 | 'replacement' => $replacement, |
||||
| 190 | ]; |
||||
| 191 | }; |
||||
| 192 | |||||
| 193 | 3 | return $walker; |
|||
| 194 | } |
||||
| 195 | |||||
| 196 | /** |
||||
| 197 | * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of mention links. |
||||
| 198 | */ |
||||
| 199 | 3 | public function getATagWalkerForMentionLinks(string $pattern): \Closure |
|||
| 200 | { |
||||
| 201 | 3 | $walker = function (Crawler $node) use ($pattern) { |
|||
| 202 | 1 | preg_match($pattern, $node->attr('href'), $matches); |
|||
|
0 ignored issues
–
show
It seems like
$node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 203 | 1 | $href = $matches[0]; |
|||
| 204 | |||||
| 205 | 1 | $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href)); |
|||
| 206 | 1 | $replacement = ''; |
|||
| 207 | |||||
| 208 | return [ |
||||
| 209 | 1 | 'pattern' => $pattern, |
|||
| 210 | 'replacement' => $replacement, |
||||
| 211 | ]; |
||||
| 212 | }; |
||||
| 213 | |||||
| 214 | 3 | return $walker; |
|||
| 215 | } |
||||
| 216 | |||||
| 217 | /** |
||||
| 218 | * Replace emoji codes only in text content of each nodes with img tags. |
||||
| 219 | */ |
||||
| 220 | 15 | public function replaceEmojiCodes(): self |
|||
| 221 | { |
||||
| 222 | // find emoji codes. |
||||
| 223 | 15 | preg_match_all('/:([^\s:<>\'\/"]+):/', $this->crawler->text(), $matches); |
|||
| 224 | |||||
| 225 | 15 | $tempReplacements = []; |
|||
| 226 | 15 | foreach (array_unique($matches[1]) as $name) { |
|||
| 227 | 13 | $pattern = sprintf('/:%s:/', preg_quote($name)); |
|||
| 228 | 13 | $replacement = sprintf('__ESABA_IMG_TAG__%s__ESABA_IMG_TAG__', $name); |
|||
| 229 | |||||
| 230 | 13 | $tempReplacements[$pattern] = $replacement; |
|||
| 231 | } |
||||
| 232 | |||||
| 233 | // set temporarily replaced html content. |
||||
| 234 | 15 | $this->replaceText($tempReplacements); |
|||
| 235 | |||||
| 236 | 15 | $replacements = []; |
|||
| 237 | 15 | foreach (array_values($tempReplacements) as $tempReplacement) { |
|||
| 238 | 13 | preg_match('/__ESABA_IMG_TAG__(.+)__ESABA_IMG_TAG__/', $tempReplacement, $matches); |
|||
| 239 | 13 | $name = $matches[1]; |
|||
| 240 | |||||
| 241 | 13 | $pattern = sprintf('/%s/', preg_quote($tempReplacement)); |
|||
| 242 | try { |
||||
| 243 | 13 | $replacement = sprintf('<img src="%s" class="emoji" title=":%s:" alt=":%s:">', $this->emojiManager->getImageUrl($name), $name, $name); |
|||
| 244 | 1 | } catch (UndefinedEmojiException $e) { |
|||
| 245 | 1 | $replacement = sprintf(':%s:', $name); |
|||
| 246 | } |
||||
| 247 | |||||
| 248 | 13 | $replacements[$pattern] = $replacement; |
|||
| 249 | } |
||||
| 250 | |||||
| 251 | 15 | return $this->replaceHtml($replacements); |
|||
| 252 | } |
||||
| 253 | |||||
| 254 | /** |
||||
| 255 | * Return map of ['id' => id, 'text' => text] of headings as TOC. |
||||
| 256 | */ |
||||
| 257 | 3 | public function getToc(): array |
|||
| 258 | { |
||||
| 259 | 3 | $this->ensureInitialized(); |
|||
| 260 | |||||
| 261 | 2 | $toc = $this->crawler->filter('h1, h2, h3')->each($this->getWalkerForToc()); |
|||
| 262 | |||||
| 263 | 2 | return $toc; |
|||
| 264 | } |
||||
| 265 | |||||
| 266 | /** |
||||
| 267 | * Return closure returns map of ['id' => id, 'text' => text] of h tags. |
||||
| 268 | */ |
||||
| 269 | 3 | public function getWalkerForToc(): \Closure |
|||
| 270 | { |
||||
| 271 | 3 | $walker = function (Crawler $node) { |
|||
| 272 | return [ |
||||
| 273 | 2 | 'id' => $node->attr('id'), |
|||
| 274 | 2 | 'text' => trim(str_replace($node->filter('a')->text(), '', $node->text())), |
|||
| 275 | ]; |
||||
| 276 | }; |
||||
| 277 | |||||
| 278 | 3 | return $walker; |
|||
| 279 | } |
||||
| 280 | |||||
| 281 | 26 | private function ensureInitialized(): void |
|||
| 282 | { |
||||
| 283 | 26 | if (!$this->crawler->count()) { |
|||
| 284 | 5 | throw new \LogicException('Initialize before using.'); |
|||
| 285 | } |
||||
| 286 | } |
||||
| 287 | } |
||||
| 288 |