Passed
Pull Request — master (#15)
by Takashi
02:54
created

HtmlHandler   A

Complexity

Total Complexity 28

Size/Duplication

Total Lines 270
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 1
Bugs 1 Features 0
Metric Value
eloc 96
c 1
b 1
f 0
dl 0
loc 270
ccs 110
cts 110
cp 1
rs 10
wmc 28

18 Methods

Rating   Name   Duplication   Size   Complexity  
A getATagWalkerForMentionLinks() 0 16 1
A replaceHtml() 0 11 2
A getMentionLinkPattern() 0 3 1
A replacePostUrls() 0 8 1
A dumpHtml() 0 5 1
A getATagWalkerForPostUrls() 0 25 2
A getWalkerForToc() 0 10 1
A getToc() 0 7 1
A getPostUrlPattern() 0 6 1
A __construct() 0 6 1
A replaceATagWithWalker() 0 9 1
A disableMentionLinks() 0 6 1
A replaceText() 0 12 1
A ensureInitialized() 0 4 2
A replaceEmojiCodes() 0 32 4
A initialize() 0 6 1
A walkDomNodesAndReplaceOnlyTextNodes() 0 19 5
A getATagReducer() 0 9 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace App\Esa;
6
7
use App\Esa\Exception\UndefinedEmojiException;
8
use Symfony\Component\DomCrawler\Crawler;
9
use Symfony\Component\Routing\Generator\UrlGeneratorInterface;
10
11
class HtmlHandler
12
{
13 58
    public function __construct(
14
        private Crawler $crawler,
15
        private UrlGeneratorInterface $urlGenerator,
16
        private EmojiManager $emojiManager,
17
        private ?string $teamName,
18
    ) {
19 58
    }
20
21 19
    public function initialize(string $html): self
22
    {
23 19
        $this->crawler->clear();
24 19
        $this->crawler->addHtmlContent($html);
25
26 19
        return $this;
27
    }
28
29 3
    public function dumpHtml(): string
30
    {
31 3
        $this->ensureInitialized();
32
33 2
        return $this->crawler->html();
34
    }
35
36
    /**
37
     * @param array $replacements map of [regexp pattern => replacement]
38
     */
39 19
    public function replaceHtml(array $replacements): self
40
    {
41 19
        $this->ensureInitialized();
42
43 18
        $html = $this->crawler->html();
44
45 18
        foreach ($replacements as $pattern => $replacement) {
46 17
            $html = preg_replace($pattern, $replacement, $html);
47
        }
48
49 18
        return $this->initialize($html);
50
    }
51
52
    /**
53
     * @param array $replacements map of [regexp pattern => replacement]
54
     */
55 17
    public function replaceText(array $replacements): self
56
    {
57 17
        $this->ensureInitialized();
58
59 16
        $domNode = $this->crawler->getNode(0);
60
61 16
        $this->walkDomNodesAndReplaceOnlyTextNodes($domNode, $replacements);
0 ignored issues
show
Bug introduced by
It seems like $domNode can also be of type null; however, parameter $node of App\Esa\HtmlHandler::wal...dReplaceOnlyTextNodes() does only seem to accept DOMNode, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

61
        $this->walkDomNodesAndReplaceOnlyTextNodes(/** @scrutinizer ignore-type */ $domNode, $replacements);
Loading history...
62
63 16
        $this->crawler->clear();
64 16
        $this->crawler->addNode($domNode);
0 ignored issues
show
Bug introduced by
It seems like $domNode can also be of type null; however, parameter $node of Symfony\Component\DomCrawler\Crawler::addNode() does only seem to accept DOMNode, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

64
        $this->crawler->addNode(/** @scrutinizer ignore-type */ $domNode);
Loading history...
65
66 16
        return $this;
67
    }
68
69
    /**
70
     * @param array $replacements map of [regexp pattern => replacement]
71
     */
72 17
    public function walkDomNodesAndReplaceOnlyTextNodes(\DOMNode $node, array $replacements): self
73
    {
74 17
        if (XML_TEXT_NODE === $node->nodeType) {
75 17
            foreach ($replacements as $pattern => $replacement) {
76 15
                $node->textContent = preg_replace($pattern, $replacement, $node->textContent);
77
            }
78
79 17
            return $this;
80
        }
81
82 17
        if (!$node->hasChildNodes()) {
83 17
            return $this;
84
        }
85
86 17
        foreach ($node->childNodes as $childNode) {
87 17
            $this->walkDomNodesAndReplaceOnlyTextNodes($childNode, $replacements);
88
        }
89
90 17
        return $this;
91
    }
92
93
    /**
94
     * Replace links to other post with links to see the post on esaba.
95
     */
96 2
    public function replacePostUrls(string $routeName, string $routeVariableName): self
97
    {
98 2
        $backReferenceNumberForPostId = null;
99 2
        $backReferenceNumberForAnchorHash = null;
100 2
        $pattern = $this->getPostUrlPattern($backReferenceNumberForPostId, $backReferenceNumberForAnchorHash);
101 2
        $walker = $this->getATagWalkerForPostUrls($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName);
102
103 2
        return $this->replaceATagWithWalker($pattern, $walker);
104
    }
105
106
    /**
107
     * Disable @mention links.
108
     */
109 2
    public function disableMentionLinks(): self
110
    {
111 2
        $pattern = $this->getMentionLinkPattern();
112 2
        $walker = $this->getATagWalkerForMentionLinks($pattern);
113
114 2
        return $this->replaceATagWithWalker($pattern, $walker);
115
    }
116
117
    /**
118
     * Replace <a> tag href values for specified regexp pattern with closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement].
119
     */
120 4
    public function replaceATagWithWalker(string $pattern, \Closure $walker): self
121
    {
122 4
        $this->ensureInitialized();
123
124 3
        $targetATags = $this->crawler->filter('a')->reduce($this->getATagReducer($pattern));
125 3
        $replacements = $targetATags->each($walker);
126 3
        $replacements = array_combine(array_column($replacements, 'pattern'), array_column($replacements, 'replacement'));
127
128 3
        return $this->replaceHtml($replacements);
129
    }
130
131
    /**
132
     * @param ?int $backReferenceNumberForPostId     for returning position of post id in regexp pattern
133
     * @param ?int $backReferenceNumberForAnchorHash for returning position of anchor hash regexp pattern
134
     */
135 28
    public function getPostUrlPattern(?int &$backReferenceNumberForPostId, ?int &$backReferenceNumberForAnchorHash): string
136
    {
137 28
        $backReferenceNumberForPostId = 3;
138 28
        $backReferenceNumberForAnchorHash = 5;
139
140 28
        return sprintf('#^((https?:)?//%s\.esa\.io)?/posts/(\d+)(/|/edit/?)?(\#.+)?$#', (string) $this->teamName);
141
    }
142
143 3
    public function getMentionLinkPattern(): string
144
    {
145 3
        return '#/members/([^\'"]+)#';
146
    }
147
148
    /**
149
     * Return closure reduces ATags Crawler with regexp pattern for href value.
150
     */
151 28
    public function getATagReducer(string $pattern): \Closure
152
    {
153 28
        $reducer = function (Crawler $node) use ($pattern) {
154 26
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
Bug introduced by
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

154
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
155
156 26
            return boolval($matches);
157
        };
158
159 28
        return $reducer;
160
    }
161
162
    /**
163
     * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of post urls.
164
     */
165 3
    public function getATagWalkerForPostUrls(
166
        string $pattern,
167
        ?int $backReferenceNumberForPostId,
168
        ?int $backReferenceNumberForAnchorHash,
169
        string $routeName,
170
        string $routeVariableName,
171
    ): \Closure {
172 3
        $that = $this;
173
174 3
        $walker = function (Crawler $node) use ($pattern, $backReferenceNumberForPostId, $backReferenceNumberForAnchorHash, $routeName, $routeVariableName, $that) {
175 1
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
Bug introduced by
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

175
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
176 1
            $href = $matches[0];
177 1
            $postId = $matches[$backReferenceNumberForPostId];
178 1
            $anchorHash = isset($matches[$backReferenceNumberForAnchorHash]) ? $matches[$backReferenceNumberForAnchorHash] : '';
179
180 1
            $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href));
181 1
            $replacement = sprintf('href="%s%s"', $that->urlGenerator->generate($routeName, [$routeVariableName => $postId]), $anchorHash);
182
183
            return [
184 1
                'pattern' => $pattern,
185 1
                'replacement' => $replacement,
186
            ];
187
        };
188
189 3
        return $walker;
190
    }
191
192
    /**
193
     * Return closure returns map of ['pattern' => regexp pattern, 'replacement' => replacement] for href value of mention links.
194
     */
195 3
    public function getATagWalkerForMentionLinks(string $pattern): \Closure
196
    {
197 3
        $walker = function (Crawler $node) use ($pattern) {
198 1
            preg_match($pattern, $node->attr('href'), $matches);
0 ignored issues
show
Bug introduced by
It seems like $node->attr('href') can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

198
            preg_match($pattern, /** @scrutinizer ignore-type */ $node->attr('href'), $matches);
Loading history...
199 1
            $href = $matches[0];
200
201 1
            $pattern = sprintf('/href=(\'|")%s\1/', str_replace('/', '\/', $href));
202 1
            $replacement = '';
203
204
            return [
205 1
                'pattern' => $pattern,
206 1
                'replacement' => $replacement,
207
            ];
208
        };
209
210 3
        return $walker;
211
    }
212
213
    /**
214
     * Replace emoji codes only in text content of each nodes with img tags.
215
     */
216 15
    public function replaceEmojiCodes(): self
217
    {
218
        // find emoji codes.
219 15
        preg_match_all('/:([^\s:<>\'\/"]+):/', $this->crawler->text(), $matches);
220
221 15
        $tempReplacements = [];
222 15
        foreach (array_unique($matches[1]) as $name) {
223 13
            $pattern = sprintf('/:%s:/', preg_quote($name));
224 13
            $replacement = sprintf('__ESABA_IMG_TAG__%s__ESABA_IMG_TAG__', $name);
225
226 13
            $tempReplacements[$pattern] = $replacement;
227
        }
228
229
        // set temporarily replaced html content.
230 15
        $this->replaceText($tempReplacements);
231
232 15
        $replacements = [];
233 15
        foreach (array_values($tempReplacements) as $tempReplacement) {
234 13
            preg_match('/__ESABA_IMG_TAG__(.+)__ESABA_IMG_TAG__/', $tempReplacement, $matches);
235 13
            $name = $matches[1];
236
237 13
            $pattern = sprintf('/%s/', preg_quote($tempReplacement));
238
            try {
239 13
                $replacement = sprintf('<img src="%s" class="emoji" title=":%s:" alt=":%s:">', $this->emojiManager->getImageUrl($name), $name, $name);
240 1
            } catch (UndefinedEmojiException $e) {
241 1
                $replacement = sprintf(':%s:', $name);
242
            }
243
244 13
            $replacements[$pattern] = $replacement;
245
        }
246
247 15
        return $this->replaceHtml($replacements);
248
    }
249
250
    /**
251
     * Return map of ['id' => id, 'text' => text] of headings as TOC.
252
     */
253 3
    public function getToc(): array
254
    {
255 3
        $this->ensureInitialized();
256
257 2
        $toc = $this->crawler->filter('h1, h2, h3')->each($this->getWalkerForToc());
258
259 2
        return $toc;
260
    }
261
262
    /**
263
     * Return closure returns map of ['id' => id, 'text' => text] of h tags.
264
     */
265 3
    public function getWalkerForToc(): \Closure
266
    {
267 3
        $walker = function (Crawler $node) {
268
            return [
269 2
                'id' => $node->attr('id'),
270 2
                'text' => trim(str_replace($node->filter('a')->text(), '', $node->text())),
271
            ];
272
        };
273
274 3
        return $walker;
275
    }
276
277 26
    private function ensureInitialized(): void
278
    {
279 26
        if (!$this->crawler->count()) {
280 5
            throw new \LogicException('Initialize before using.');
281
        }
282 21
    }
283
}
284