Passed
Pull Request — master (#15)
by Takashi
07:09 queued 51s
created

HtmlHandler::replaceText()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 6
c 0
b 0
f 0
nc 1
nop 1
dl 0
loc 12
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Esa;
6
7
use App\Esa\Exception\UndefinedEmojiException;
8
use Symfony\Component\DomCrawler\Crawler;
9
use Symfony\Component\Routing\Generator\UrlGeneratorInterface;
10
11
class HtmlHandler
12
{
13
    public function __construct(
14
        private Crawler $crawler,
15
        private UrlGeneratorInterface $urlGenerator,
16
        private EmojiManager $emojiManager,
17
        private string $teamName,
18
    ) {
19
    }
20
21
    public function initialize(string $html): self
22
    {
23
        $this->crawler->clear();
24
        $this->crawler->addHtmlContent($html);
25
26
        return $this;
27
    }
28
29
    public function dumpHtml(): string
30
    {
31
        $this->ensureInitialized();
32
33
        return $this->crawler->html();
34
    }
35
36
    /**
37
     * @param array $replacements map of [regexp pattern => replacement]
38
     */
39
    public function replaceHtml(array $replacements): self
40
    {
41
        $this->ensureInitialized();
42
43
        $html = $this->crawler->html();
44
45
        foreach ($replacements as $pattern => $replacement) {
46
            $html = preg_replace($pattern, $replacement, $html);
47
        }
48
49
        return $this->initialize($html);
50
    }
51
52
    /**
53
     * @param array $replacements map of [regexp pattern => replacement]
54
     */
55
    public function replaceText(array $replacements): self
56
    {
57
        $this->ensureInitialized();
58
59
        $domNode = $this->crawler->getNode(0);
60
61
        $this->walkDomNodesAndReplaceOnlyTextNodes($domNode, $replacements);
0 ignored issues
show
Bug introduced by
It seems like $domNode can also be of type null; however, parameter $node of App\Esa\HtmlHandler::wal...dReplaceOnlyTextNodes() does only seem to accept DOMNode, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

61
        $this->walkDomNodesAndReplaceOnlyTextNodes(/** @scrutinizer ignore-type */ $domNode, $replacements);
Loading history...
62
63
        $this->crawler->clear();
64
        $this->crawler->addNode($domNode);
0 ignored issues
show
Bug introduced by
It seems like $domNode can also be of type null; however, parameter $node of Symfony\Component\DomCrawler\Crawler::addNode() does only seem to accept DOMNode, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

64
        $this->crawler->addNode(/** @scrutinizer ignore-type */ $domNode);
Loading history...
65
66
        return $this;
67
    }
68
69
    /**
70
     * @param array $replacements map of [regexp pattern => replacement]
71
     */
72
    public function walkDomNodesAndReplaceOnlyTextNodes(\DOMNode $node, array $replacements): self
73
    {
74
        if (XML_TEXT_NODE === $node->nodeType) {
75
            foreach ($replacements as $pattern => $replacement) {
76
                $node->textContent = preg_replace($pattern, $replacement, $node->textContent);
77
            }
78
79
            return $this;
80
        }
81
82
        if (!$node->hasChildNodes()) {
83
            return $this;
84
        }
85
86
        foreach ($node->childNodes as $childNode) {
87
            $this->walkDomNodesAndReplaceOnlyTextNodes($childNode, $replacements);
88
        }
89
90
        return $this;
91
    }
92
93
    /**
94
     * Replace links to other post with links to see the post on esaba.
95
     */
96
    public function replacePostUrls(string $routeName, string $routeVariableName): self
97
    {
98
        $backReferenceNumberForPostId = null;
99
        $backReferenceNumberForAnchorHash = null;
100
        $pattern = $this->getPostUrlPattern($backReferenceNumberForPostId, $backReferenceNumberForAnchorHash);
101
        $walker = $this->getATagWalkerForPostUrls($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName);
102
103
        return $this->replaceATagWithWalker($pattern, $walker);
104
    }
105
106
    /**
107
     * Disable @mention links.
108
     */
109
    public function disableMentionLinks(): self
110
    {
111
        $pattern = $this->getMentionLinkPattern();
112
        $walker = $this->getATagWalkerForMentionLinks($pattern);
113
114
        return $this->replaceATagWithWalker($pattern, $walker);
115
    }
116
117
    /**
118
     * Replace <a> tag href values for specified regexp pattern with closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement].
119
     */
120
    public function replaceATagWithWalker(string $pattern, \Closure $walker): self
121
    {
122
        $this->ensureInitialized();
123
124
        $targetATags = $this->crawler->filter('a')->reduce($this->getATagReducer($pattern));
125
        $replacements = $targetATags->each($walker);
126
        $replacements = array_combine(array_column($replacements, 'pattern'), array_column($replacements, 'replacement'));
127
128
        return $this->replaceHtml($replacements);
129
    }
130
131
    /**
132
     * @param ?int $backReferenceNumberForPostId     for returning position of post id in regexp pattern
133
     * @param ?int $backReferenceNumberForAnchorHash for returning position of anchor hash regexp pattern
134
     */
135
    public function getPostUrlPattern(?int &$backReferenceNumberForPostId, ?int &$backReferenceNumberForAnchorHash): string
136
    {
137
        $backReferenceNumberForPostId = 3;
138
        $backReferenceNumberForAnchorHash = 5;
139
140
        return sprintf('#^((https?:)?//%s\.esa\.io)?/posts/(\d+)(/|/edit/?)?(\#.+)?$#', $this->teamName);
141
    }
142
143
    public function getMentionLinkPattern(): string
144
    {
145
        return '#/members/([^\'"]+)#';
146
    }
147
148
    /**
149
     * Return closure reduces ATags Crawler with regexp pattern for href value.
150
     */
151
    public function getATagReducer(string $pattern): \Closure
152
    {
153
        $reducer = function (Crawler $node) use ($pattern) {
154
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
Bug introduced by
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

154
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
155
156
            return boolval($matches);
157
        };
158
159
        return $reducer;
160
    }
161
162
    /**
163
     * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of post urls.
164
     */
165
    public function getATagWalkerForPostUrls(
166
        string $pattern,
167
        ?int $backReferenceNumberForPostId,
168
        ?int $backReferenceNumberForAnchorHash,
169
        string $routeName,
170
        string $routeVariableName,
171
    ): \Closure {
172
        $that = $this;
173
174
        $walker = function (Crawler $node) use ($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName, $that) {
175
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
Bug introduced by
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

175
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
176
            $href = $matches[0];
177
            $postId = $matches[$backReferenceNumberForPostId];
178
            $anchorHash = isset($matches[$backReferenceNumberForAnchorHash]) ? $matches[$backReferenceNumberForAnchorHash] : '';
179
180
            $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href));
181
            $replacement = sprintf('href="%s%s"', $that->urlGenerator->generate($routeName, [$routeVariableName => $postId]), $anchorHash);
182
183
            return [
184
                'pattern' => $pattern,
185
                'replacement' => $replacement,
186
            ];
187
        };
188
189
        return $walker;
190
    }
191
192
    /**
193
     * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of mention links.
194
     */
195
    public function getATagWalkerForMentionLinks(string $pattern): \Closure
196
    {
197
        $walker = function (Crawler $node) use ($pattern) {
198
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
Bug introduced by
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

198
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
199
            $href = $matches[0];
200
201
            $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href));
202
            $replacement = '';
203
204
            return [
205
                'pattern' => $pattern,
206
                'replacement' => $replacement,
207
            ];
208
        };
209
210
        return $walker;
211
    }
212
213
    /**
214
     * Replace emoji codes only in text content of each nodes with img tags.
215
     */
216
    public function replaceEmojiCodes(): self
217
    {
218
        // find emoji codes.
219
        preg_match_all('/:([^\s:<>\'\/"]+):/', $this->crawler->text(), $matches);
220
221
        $tempReplacements = [];
222
        foreach (array_unique($matches[1]) as $name) {
223
            $pattern = sprintf('/:%s:/', preg_quote($name));
224
            $replacement = sprintf('__ESABA_IMG_TAG__%s__ESABA_IMG_TAG__', $name);
225
226
            $tempReplacements[$pattern] = $replacement;
227
        }
228
229
        // set temporarily replaced html content.
230
        $this->replaceText($tempReplacements);
231
232
        $replacements = [];
233
        foreach (array_values($tempReplacements) as $tempReplacement) {
234
            preg_match('/__ESABA_IMG_TAG__(.+)__ESABA_IMG_TAG__/', $tempReplacement, $matches);
235
            $name = $matches[1];
236
237
            $pattern = sprintf('/%s/', preg_quote($tempReplacement));
238
            try {
239
                $replacement = sprintf('<img src="%s" class="emoji" title=":%s:" alt=":%s:">', $this->emojiManager->getImageUrl($name), $name, $name);
240
            } catch (UndefinedEmojiException $e) {
241
                $replacement = sprintf(':%s:', $name);
242
            }
243
244
            $replacements[$pattern] = $replacement;
245
        }
246
247
        return $this->replaceHtml($replacements);
248
    }
249
250
    /**
251
     * Return map of ['id' => id, 'text' => text] of headings as TOC.
252
     */
253
    public function getToc(): array
254
    {
255
        $this->ensureInitialized();
256
257
        $toc = $this->crawler->filter('h1, h2, h3')->each($this->getWalkerForToc());
258
259
        return $toc;
260
    }
261
262
    /**
263
     * Return closure returns map of ['id' => id, 'text' => text] of h tags.
264
     */
265
    public function getWalkerForToc(): \Closure
266
    {
267
        $walker = function (Crawler $node) {
268
            return [
269
                'id' => $node->attr('id'),
270
                'text' => trim(str_replace($node->filter('a')->text(), '', $node->text())),
271
            ];
272
        };
273
274
        return $walker;
275
    }
276
277
    private function ensureInitialized(): void
278
    {
279
        if (!$this->crawler->count()) {
280
            throw new \LogicException('Initialize before using.');
281
        }
282
    }
283
}
284