Issues (13)

src/Esa/HtmlHandler.php (5 issues)

Labels
Severity
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Esa;
6
7
use App\Esa\Exception\UndefinedEmojiException;
8
use Symfony\Component\DomCrawler\Crawler;
9
use Symfony\Component\Routing\Generator\UrlGeneratorInterface;
10
11
class HtmlHandler
12
{
13 58
    public function __construct(
14
        private Crawler $crawler,
15
        private UrlGeneratorInterface $urlGenerator,
16
        private EmojiManager $emojiManager,
17
        private string $teamName,
18
    ) {
19
    }
20
21 19
    public function initialize(string $html): self
22
    {
23 19
        if (!$html) {
24
            $html = '<div></div>';
25
        }
26
27 19
        $this->crawler->clear();
28 19
        $this->crawler->addHtmlContent($html);
29
30 19
        return $this;
31
    }
32
33 3
    public function dumpHtml(): string
34
    {
35 3
        $this->ensureInitialized();
36
37 2
        return $this->crawler->html();
38
    }
39
40
    /**
41
     * @param array $replacements map of [regexp pattern => replacement]
42
     */
43 19
    public function replaceHtml(array $replacements): self
44
    {
45 19
        $this->ensureInitialized();
46
47 18
        $html = $this->crawler->html();
48
49 18
        foreach ($replacements as $pattern => $replacement) {
50 16
            $html = preg_replace($pattern, $replacement, $html);
51
        }
52
53 18
        return $this->initialize($html);
54
    }
55
56
    /**
57
     * @param array $replacements map of [regexp pattern => replacement]
58
     */
59 17
    public function replaceText(array $replacements): self
60
    {
61 17
        $this->ensureInitialized();
62
63 16
        $domNode = $this->crawler->getNode(0);
64
65 16
        $this->walkDomNodesAndReplaceOnlyTextNodes($domNode, $replacements);
0 ignored issues
show
It seems like $domNode can also be of type null; however, parameter $node of App\Esa\HtmlHandler::wal...dReplaceOnlyTextNodes() does only seem to accept DOMNode, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

65
        $this->walkDomNodesAndReplaceOnlyTextNodes(/** @scrutinizer ignore-type */ $domNode, $replacements);
Loading history...
66
67 16
        $this->crawler->clear();
68 16
        $this->crawler->addNode($domNode);
0 ignored issues
show
It seems like $domNode can also be of type null; however, parameter $node of Symfony\Component\DomCrawler\Crawler::addNode() does only seem to accept DOMNode, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

68
        $this->crawler->addNode(/** @scrutinizer ignore-type */ $domNode);
Loading history...
69
70 16
        return $this;
71
    }
72
73
    /**
74
     * @param array $replacements map of [regexp pattern => replacement]
75
     */
76 17
    public function walkDomNodesAndReplaceOnlyTextNodes(\DOMNode $node, array $replacements): self
77
    {
78 17
        if (XML_TEXT_NODE === $node->nodeType) {
79 17
            foreach ($replacements as $pattern => $replacement) {
80 15
                $node->textContent = preg_replace($pattern, $replacement, $node->textContent);
81
            }
82
83 17
            return $this;
84
        }
85
86 17
        if (!$node->hasChildNodes()) {
87 17
            return $this;
88
        }
89
90 17
        foreach ($node->childNodes as $childNode) {
91 17
            $this->walkDomNodesAndReplaceOnlyTextNodes($childNode, $replacements);
92
        }
93
94 17
        return $this;
95
    }
96
97
    /**
98
     * Replace links to other post with links to see the post on esaba.
99
     */
100 2
    public function replacePostUrls(string $routeName, string $routeVariableName): self
101
    {
102 2
        $backReferenceNumberForPostId = null;
103 2
        $backReferenceNumberForAnchorHash = null;
104 2
        $pattern = $this->getPostUrlPattern($backReferenceNumberForPostId, $backReferenceNumberForAnchorHash);
105 2
        $walker = $this->getATagWalkerForPostUrls($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName);
106
107 2
        return $this->replaceATagWithWalker($pattern, $walker);
108
    }
109
110
    /**
111
     * Disable @mention links.
112
     */
113 2
    public function disableMentionLinks(): self
114
    {
115 2
        $pattern = $this->getMentionLinkPattern();
116 2
        $walker = $this->getATagWalkerForMentionLinks($pattern);
117
118 2
        return $this->replaceATagWithWalker($pattern, $walker);
119
    }
120
121
    /**
122
     * Replace <a> tag href values for specified regexp pattern with closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement].
123
     */
124 4
    public function replaceATagWithWalker(string $pattern, \Closure $walker): self
125
    {
126 4
        $this->ensureInitialized();
127
128 3
        $targetATags = $this->crawler->filter('a')->reduce($this->getATagReducer($pattern));
129 3
        $replacements = $targetATags->each($walker);
130 3
        $replacements = array_combine(array_column($replacements, 'pattern'), array_column($replacements, 'replacement'));
131
132 3
        return $this->replaceHtml($replacements);
133
    }
134
135
    /**
136
     * @param ?int $backReferenceNumberForPostId     for returning position of post id in regexp pattern
137
     * @param ?int $backReferenceNumberForAnchorHash for returning position of anchor hash regexp pattern
138
     */
139 28
    public function getPostUrlPattern(?int &$backReferenceNumberForPostId, ?int &$backReferenceNumberForAnchorHash): string
140
    {
141 28
        $backReferenceNumberForPostId = 3;
142 28
        $backReferenceNumberForAnchorHash = 5;
143
144 28
        return sprintf('#^((https?:)?//%s\.esa\.io)?/posts/(\d+)(/|/edit/?)?(\#.+)?$#', $this->teamName);
145
    }
146
147 3
    public function getMentionLinkPattern(): string
148
    {
149 3
        return '#/members/([^\'"]+)#';
150
    }
151
152
    /**
153
     * Return closure reduces ATags Crawler with regexp pattern for href value.
154
     */
155 28
    public function getATagReducer(string $pattern): \Closure
156
    {
157 28
        $reducer = function (Crawler $node) use ($pattern) {
158 26
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

158
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
159
160 26
            return boolval($matches);
161
        };
162
163 28
        return $reducer;
164
    }
165
166
    /**
167
     * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of post urls.
168
     */
169 3
    public function getATagWalkerForPostUrls(
170
        string $pattern,
171
        ?int $backReferenceNumberForPostId,
172
        ?int $backReferenceNumberForAnchorHash,
173
        string $routeName,
174
        string $routeVariableName,
175
    ): \Closure {
176 3
        $that = $this;
177
178 3
        $walker = function (Crawler $node) use ($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName, $that) {
179 1
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

179
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
180 1
            $href = $matches[0];
181 1
            $postId = $matches[$backReferenceNumberForPostId];
182 1
            $anchorHash = isset($matches[$backReferenceNumberForAnchorHash]) ? $matches[$backReferenceNumberForAnchorHash] : '';
183
184 1
            $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href));
185 1
            $replacement = sprintf('href="%s%s"', $that->urlGenerator->generate($routeName, [$routeVariableName => (int) $postId]), $anchorHash);
186
187
            return [
188 1
                'pattern' => $pattern,
189
                'replacement' => $replacement,
190
            ];
191
        };
192
193 3
        return $walker;
194
    }
195
196
    /**
197
     * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of mention links.
198
     */
199 3
    public function getATagWalkerForMentionLinks(string $pattern): \Closure
200
    {
201 3
        $walker = function (Crawler $node) use ($pattern) {
202 1
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

202
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
203 1
            $href = $matches[0];
204
205 1
            $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href));
206 1
            $replacement = '';
207
208
            return [
209 1
                'pattern' => $pattern,
210
                'replacement' => $replacement,
211
            ];
212
        };
213
214 3
        return $walker;
215
    }
216
217
    /**
218
     * Replace emoji codes only in text content of each nodes with img tags.
219
     */
220 15
    public function replaceEmojiCodes(): self
221
    {
222
        // find emoji codes.
223 15
        preg_match_all('/:([^\s:<>\'\/"]+):/', $this->crawler->text(), $matches);
224
225 15
        $tempReplacements = [];
226 15
        foreach (array_unique($matches[1]) as $name) {
227 13
            $pattern = sprintf('/:%s:/', preg_quote($name));
228 13
            $replacement = sprintf('__ESABA_IMG_TAG__%s__ESABA_IMG_TAG__', $name);
229
230 13
            $tempReplacements[$pattern] = $replacement;
231
        }
232
233
        // set temporarily replaced html content.
234 15
        $this->replaceText($tempReplacements);
235
236 15
        $replacements = [];
237 15
        foreach (array_values($tempReplacements) as $tempReplacement) {
238 13
            preg_match('/__ESABA_IMG_TAG__(.+)__ESABA_IMG_TAG__/', $tempReplacement, $matches);
239 13
            $name = $matches[1];
240
241 13
            $pattern = sprintf('/%s/', preg_quote($tempReplacement));
242
            try {
243 13
                $replacement = sprintf('<img src="%s" class="emoji" title=":%s:" alt=":%s:">', $this->emojiManager->getImageUrl($name), $name, $name);
244 1
            } catch (UndefinedEmojiException $e) {
245 1
                $replacement = sprintf(':%s:', $name);
246
            }
247
248 13
            $replacements[$pattern] = $replacement;
249
        }
250
251 15
        return $this->replaceHtml($replacements);
252
    }
253
254
    /**
255
     * Return map of ['id' => id, 'text' => text] of headings as TOC.
256
     */
257 3
    public function getToc(): array
258
    {
259 3
        $this->ensureInitialized();
260
261 2
        $toc = $this->crawler->filter('h1, h2, h3')->each($this->getWalkerForToc());
262
263 2
        return $toc;
264
    }
265
266
    /**
267
     * Return closure returns map of ['id' => id, 'text' => text] of h tags.
268
     */
269 3
    public function getWalkerForToc(): \Closure
270
    {
271 3
        $walker = function (Crawler $node) {
272
            return [
273 2
                'id' => $node->attr('id'),
274 2
                'text' => trim(str_replace($node->filter('a')->text(), '', $node->text())),
275
            ];
276
        };
277
278 3
        return $walker;
279
    }
280
281 26
    private function ensureInitialized(): void
282
    {
283 26
        if (!$this->crawler->count()) {
284 5
            throw new \LogicException('Initialize before using.');
285
        }
286
    }
287
}
288