1 | <?php |
||||
2 | |||||
3 | declare(strict_types=1); |
||||
4 | |||||
5 | namespace App\Esa; |
||||
6 | |||||
7 | use App\Esa\Exception\UndefinedEmojiException; |
||||
8 | use Symfony\Component\DomCrawler\Crawler; |
||||
9 | use Symfony\Component\Routing\Generator\UrlGeneratorInterface; |
||||
10 | |||||
11 | class HtmlHandler |
||||
12 | { |
||||
13 | 58 | public function __construct( |
|||
14 | private Crawler $crawler, |
||||
15 | private UrlGeneratorInterface $urlGenerator, |
||||
16 | private EmojiManager $emojiManager, |
||||
17 | private string $teamName, |
||||
18 | ) { |
||||
19 | } |
||||
20 | |||||
21 | 19 | public function initialize(string $html): self |
|||
22 | { |
||||
23 | 19 | if (!$html) { |
|||
24 | $html = '<div></div>'; |
||||
25 | } |
||||
26 | |||||
27 | 19 | $this->crawler->clear(); |
|||
28 | 19 | $this->crawler->addHtmlContent($html); |
|||
29 | |||||
30 | 19 | return $this; |
|||
31 | } |
||||
32 | |||||
33 | 3 | public function dumpHtml(): string |
|||
34 | { |
||||
35 | 3 | $this->ensureInitialized(); |
|||
36 | |||||
37 | 2 | return $this->crawler->html(); |
|||
38 | } |
||||
39 | |||||
40 | /** |
||||
41 | * @param array $replacements map of [regexp pattern => replacement] |
||||
42 | */ |
||||
43 | 19 | public function replaceHtml(array $replacements): self |
|||
44 | { |
||||
45 | 19 | $this->ensureInitialized(); |
|||
46 | |||||
47 | 18 | $html = $this->crawler->html(); |
|||
48 | |||||
49 | 18 | foreach ($replacements as $pattern => $replacement) { |
|||
50 | 16 | $html = preg_replace($pattern, $replacement, $html); |
|||
51 | } |
||||
52 | |||||
53 | 18 | return $this->initialize($html); |
|||
54 | } |
||||
55 | |||||
56 | /** |
||||
57 | * @param array $replacements map of [regexp pattern => replacement] |
||||
58 | */ |
||||
59 | 17 | public function replaceText(array $replacements): self |
|||
60 | { |
||||
61 | 17 | $this->ensureInitialized(); |
|||
62 | |||||
63 | 16 | $domNode = $this->crawler->getNode(0); |
|||
64 | |||||
65 | 16 | $this->walkDomNodesAndReplaceOnlyTextNodes($domNode, $replacements); |
|||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
66 | |||||
67 | 16 | $this->crawler->clear(); |
|||
68 | 16 | $this->crawler->addNode($domNode); |
|||
0 ignored issues
–
show
It seems like
$domNode can also be of type null ; however, parameter $node of Symfony\Component\DomCrawler\Crawler::addNode() does only seem to accept DOMNode , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
69 | |||||
70 | 16 | return $this; |
|||
71 | } |
||||
72 | |||||
73 | /** |
||||
74 | * @param array $replacements map of [regexp pattern => replacement] |
||||
75 | */ |
||||
76 | 17 | public function walkDomNodesAndReplaceOnlyTextNodes(\DOMNode $node, array $replacements): self |
|||
77 | { |
||||
78 | 17 | if (XML_TEXT_NODE === $node->nodeType) { |
|||
79 | 17 | foreach ($replacements as $pattern => $replacement) { |
|||
80 | 15 | $node->textContent = preg_replace($pattern, $replacement, $node->textContent); |
|||
81 | } |
||||
82 | |||||
83 | 17 | return $this; |
|||
84 | } |
||||
85 | |||||
86 | 17 | if (!$node->hasChildNodes()) { |
|||
87 | 17 | return $this; |
|||
88 | } |
||||
89 | |||||
90 | 17 | foreach ($node->childNodes as $childNode) { |
|||
91 | 17 | $this->walkDomNodesAndReplaceOnlyTextNodes($childNode, $replacements); |
|||
92 | } |
||||
93 | |||||
94 | 17 | return $this; |
|||
95 | } |
||||
96 | |||||
97 | /** |
||||
98 | * Replace links to other post with links to see the post on esaba. |
||||
99 | */ |
||||
100 | 2 | public function replacePostUrls(string $routeName, string $routeVariableName): self |
|||
101 | { |
||||
102 | 2 | $backReferenceNumberForPostId = null; |
|||
103 | 2 | $backReferenceNumberForAnchorHash = null; |
|||
104 | 2 | $pattern = $this->getPostUrlPattern($backReferenceNumberForPostId, $backReferenceNumberForAnchorHash); |
|||
105 | 2 | $walker = $this->getATagWalkerForPostUrls($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName); |
|||
106 | |||||
107 | 2 | return $this->replaceATagWithWalker($pattern, $walker); |
|||
108 | } |
||||
109 | |||||
110 | /** |
||||
111 | * Disable @mention links. |
||||
112 | */ |
||||
113 | 2 | public function disableMentionLinks(): self |
|||
114 | { |
||||
115 | 2 | $pattern = $this->getMentionLinkPattern(); |
|||
116 | 2 | $walker = $this->getATagWalkerForMentionLinks($pattern); |
|||
117 | |||||
118 | 2 | return $this->replaceATagWithWalker($pattern, $walker); |
|||
119 | } |
||||
120 | |||||
121 | /** |
||||
122 | * Replace <a> tag href values for specified regexp pattern with closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement]. |
||||
123 | */ |
||||
124 | 4 | public function replaceATagWithWalker(string $pattern, \Closure $walker): self |
|||
125 | { |
||||
126 | 4 | $this->ensureInitialized(); |
|||
127 | |||||
128 | 3 | $targetATags = $this->crawler->filter('a')->reduce($this->getATagReducer($pattern)); |
|||
129 | 3 | $replacements = $targetATags->each($walker); |
|||
130 | 3 | $replacements = array_combine(array_column($replacements, 'pattern'), array_column($replacements, 'replacement')); |
|||
131 | |||||
132 | 3 | return $this->replaceHtml($replacements); |
|||
133 | } |
||||
134 | |||||
135 | /** |
||||
136 | * @param ?int $backReferenceNumberForPostId for returning position of post id in regexp pattern |
||||
137 | * @param ?int $backReferenceNumberForAnchorHash for returning position of anchor hash regexp pattern |
||||
138 | */ |
||||
139 | 28 | public function getPostUrlPattern(?int &$backReferenceNumberForPostId, ?int &$backReferenceNumberForAnchorHash): string |
|||
140 | { |
||||
141 | 28 | $backReferenceNumberForPostId = 3; |
|||
142 | 28 | $backReferenceNumberForAnchorHash = 5; |
|||
143 | |||||
144 | 28 | return sprintf('#^((https?:)?//%s\.esa\.io)?/posts/(\d+)(/|/edit/?)?(\#.+)?$#', $this->teamName); |
|||
145 | } |
||||
146 | |||||
147 | 3 | public function getMentionLinkPattern(): string |
|||
148 | { |
||||
149 | 3 | return '#/members/([^\'"]+)#'; |
|||
150 | } |
||||
151 | |||||
152 | /** |
||||
153 | * Return closure reduces ATags Crawler with regexp pattern for href value. |
||||
154 | */ |
||||
155 | 28 | public function getATagReducer(string $pattern): \Closure |
|||
156 | { |
||||
157 | 28 | $reducer = function (Crawler $node) use ($pattern) { |
|||
158 | 26 | preg_match($pattern, $node->attr('href'), $matches); |
|||
0 ignored issues
–
show
It seems like
$node->attr('href') can also be of type null ; however, parameter $subject of preg_match() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
159 | |||||
160 | 26 | return boolval($matches); |
|||
161 | }; |
||||
162 | |||||
163 | 28 | return $reducer; |
|||
164 | } |
||||
165 | |||||
166 | /** |
||||
167 | * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of post urls. |
||||
168 | */ |
||||
169 | 3 | public function getATagWalkerForPostUrls( |
|||
170 | string $pattern, |
||||
171 | ?int $backReferenceNumberForPostId, |
||||
172 | ?int $backReferenceNumberForAnchorHash, |
||||
173 | string $routeName, |
||||
174 | string $routeVariableName, |
||||
175 | ): \Closure { |
||||
176 | 3 | $that = $this; |
|||
177 | |||||
178 | 3 | $walker = function (Crawler $node) use ($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName, $that) { |
|||
179 | 1 | preg_match($pattern, $node->attr('href'), $matches); |
|||
0 ignored issues
–
show
It seems like
$node->attr('href') can also be of type null ; however, parameter $subject of preg_match() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
180 | 1 | $href = $matches[0]; |
|||
181 | 1 | $postId = $matches[$backReferenceNumberForPostId]; |
|||
182 | 1 | $anchorHash = isset($matches[$backReferenceNumberForAnchorHash]) ? $matches[$backReferenceNumberForAnchorHash] : ''; |
|||
183 | |||||
184 | 1 | $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href)); |
|||
185 | 1 | $replacement = sprintf('href="%s%s"', $that->urlGenerator->generate($routeName, [$routeVariableName => (int) $postId]), $anchorHash); |
|||
186 | |||||
187 | return [ |
||||
188 | 1 | 'pattern' => $pattern, |
|||
189 | 'replacement' => $replacement, |
||||
190 | ]; |
||||
191 | }; |
||||
192 | |||||
193 | 3 | return $walker; |
|||
194 | } |
||||
195 | |||||
196 | /** |
||||
197 | * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of mention links. |
||||
198 | */ |
||||
199 | 3 | public function getATagWalkerForMentionLinks(string $pattern): \Closure |
|||
200 | { |
||||
201 | 3 | $walker = function (Crawler $node) use ($pattern) { |
|||
202 | 1 | preg_match($pattern, $node->attr('href'), $matches); |
|||
0 ignored issues
–
show
It seems like
$node->attr('href') can also be of type null ; however, parameter $subject of preg_match() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
203 | 1 | $href = $matches[0]; |
|||
204 | |||||
205 | 1 | $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href)); |
|||
206 | 1 | $replacement = ''; |
|||
207 | |||||
208 | return [ |
||||
209 | 1 | 'pattern' => $pattern, |
|||
210 | 'replacement' => $replacement, |
||||
211 | ]; |
||||
212 | }; |
||||
213 | |||||
214 | 3 | return $walker; |
|||
215 | } |
||||
216 | |||||
217 | /** |
||||
218 | * Replace emoji codes only in text content of each nodes with img tags. |
||||
219 | */ |
||||
220 | 15 | public function replaceEmojiCodes(): self |
|||
221 | { |
||||
222 | // find emoji codes. |
||||
223 | 15 | preg_match_all('/:([^\s:<>\'\/"]+):/', $this->crawler->text(), $matches); |
|||
224 | |||||
225 | 15 | $tempReplacements = []; |
|||
226 | 15 | foreach (array_unique($matches[1]) as $name) { |
|||
227 | 13 | $pattern = sprintf('/:%s:/', preg_quote($name)); |
|||
228 | 13 | $replacement = sprintf('__ESABA_IMG_TAG__%s__ESABA_IMG_TAG__', $name); |
|||
229 | |||||
230 | 13 | $tempReplacements[$pattern] = $replacement; |
|||
231 | } |
||||
232 | |||||
233 | // set temporarily replaced html content. |
||||
234 | 15 | $this->replaceText($tempReplacements); |
|||
235 | |||||
236 | 15 | $replacements = []; |
|||
237 | 15 | foreach (array_values($tempReplacements) as $tempReplacement) { |
|||
238 | 13 | preg_match('/__ESABA_IMG_TAG__(.+)__ESABA_IMG_TAG__/', $tempReplacement, $matches); |
|||
239 | 13 | $name = $matches[1]; |
|||
240 | |||||
241 | 13 | $pattern = sprintf('/%s/', preg_quote($tempReplacement)); |
|||
242 | try { |
||||
243 | 13 | $replacement = sprintf('<img src="%s" class="emoji" title=":%s:" alt=":%s:">', $this->emojiManager->getImageUrl($name), $name, $name); |
|||
244 | 1 | } catch (UndefinedEmojiException $e) { |
|||
245 | 1 | $replacement = sprintf(':%s:', $name); |
|||
246 | } |
||||
247 | |||||
248 | 13 | $replacements[$pattern] = $replacement; |
|||
249 | } |
||||
250 | |||||
251 | 15 | return $this->replaceHtml($replacements); |
|||
252 | } |
||||
253 | |||||
254 | /** |
||||
255 | * Return map of ['id' => id, 'text' => text] of headings as TOC. |
||||
256 | */ |
||||
257 | 3 | public function getToc(): array |
|||
258 | { |
||||
259 | 3 | $this->ensureInitialized(); |
|||
260 | |||||
261 | 2 | $toc = $this->crawler->filter('h1, h2, h3')->each($this->getWalkerForToc()); |
|||
262 | |||||
263 | 2 | return $toc; |
|||
264 | } |
||||
265 | |||||
266 | /** |
||||
267 | * Return closure returns map of ['id' => id, 'text' => text] of h tags. |
||||
268 | */ |
||||
269 | 3 | public function getWalkerForToc(): \Closure |
|||
270 | { |
||||
271 | 3 | $walker = function (Crawler $node) { |
|||
272 | return [ |
||||
273 | 2 | 'id' => $node->attr('id'), |
|||
274 | 2 | 'text' => trim(str_replace($node->filter('a')->text(), '', $node->text())), |
|||
275 | ]; |
||||
276 | }; |
||||
277 | |||||
278 | 3 | return $walker; |
|||
279 | } |
||||
280 | |||||
281 | 26 | private function ensureInitialized(): void |
|||
282 | { |
||||
283 | 26 | if (!$this->crawler->count()) { |
|||
284 | 5 | throw new \LogicException('Initialize before using.'); |
|||
285 | } |
||||
286 | } |
||||
287 | } |
||||
288 |