DomTreeBuilder   F
last analyzed

Complexity

Total Complexity 70

Size/Duplication

Total Lines 328
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 130
dl 0
loc 328
ccs 135
cts 135
cp 1
rs 2.8
c 0
b 0
f 0
wmc 70

14 Methods

Rating   Name   Duplication   Size   Complexity  
A isDocumentStarted() 0 3 1
A addSeparatorNode() 0 12 3
B characters() 0 30 8
A startDocument() 0 7 2
A __construct() 0 4 1
A getBodyNode() 0 3 1
A isDocumentEnded() 0 3 1
A endDocument() 0 10 3
A getTextNodes() 0 3 1
B endElement() 0 40 10
A isSeparatingTag() 0 3 1
B startElement() 0 35 11
A endWord() 0 10 2
D isDelimiter() 0 38 25

How to fix   Complexity   

Complex Class

Complex classes like DomTreeBuilder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use DomTreeBuilder, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * (c) Steve Nebes <[email protected]>
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 */
8
9
declare(strict_types=1);
10
11
namespace SN\DaisyDiff\Html\Dom;
12
13
/**
14
 * Creates a DOM tree from SAX-like events.
15
 */
16
class DomTreeBuilder
17
{
18
    /** @var TextNode[] */
19
    private $textNodes = [];
20
21
    /** @var BodyNode */
22
    private $bodyNode;
23
24
    /** @var TagNode */
25
    private $currentParent;
26
27
    /** @var string */
28
    private $newWord = '';
29
30
    /** @var bool */
31
    private $documentStarted = false;
32
33
    /** @var bool */
34
    private $documentEnded = false;
35
36
    /** @var bool */
37
    private $bodyStarted = false;
38
39
    /** @var bool */
40
    private $bodyEnded = false;
41
42
    /** @var bool */
43
    private $whiteSpaceBeforeThis = false;
44
45
    /** @var int */
46
    private $numberOfActivePreTags = 0;
47
48
    /** @var Node|null */
49
    private $lastSibling;
50
51
    /**
52
     * Default values.
53
     */
54 101
    public function __construct()
55
    {
56 101
        $this->bodyNode = new BodyNode();
57 101
        $this->currentParent = $this->bodyNode;
58 101
    }
59
60
    /**
61
     * @return BodyNode
62
     */
63 81
    public function getBodyNode(): BodyNode
64
    {
65 81
        return $this->bodyNode;
66
    }
67
68
    /**
69
     * @return TextNode[]
70
     */
71 68
    public function getTextNodes(): array
72
    {
73 68
        return $this->textNodes;
74
    }
75
76
    /**
77
     * @return bool
78
     */
79 2
    public function isDocumentStarted(): bool
80
    {
81 2
        return $this->documentStarted;
82
    }
83
84
    /**
85
     * @return bool
86
     */
87 1
    public function isDocumentEnded(): bool
88
    {
89 1
        return $this->documentEnded;
90
    }
91
92
    /**
93
     * Starts the document, if one has not already been started.
94
     *
95
     * @throws \RuntimeException
96
     */
97 80
    public function startDocument(): void
98
    {
99 80
        if ($this->documentStarted) {
100 1
            throw new \RuntimeException('This Handler only accepts one document.');
101
        }
102
103 80
        $this->documentStarted = true;
104 80
    }
105
106
    /**
107
     * Ends the document, if a document is started.
108
     *
109
     * @throws \RuntimeException
110
     */
111 61
    public function endDocument(): void
112
    {
113 61
        if (!$this->documentStarted || $this->documentEnded) {
114 2
            throw new \RuntimeException();
115
        }
116
117 60
        $this->endWord();
118
119 60
        $this->documentEnded = true;
120 60
        $this->documentStarted = false;
121 60
    }
122
123
    /**
124
     * @param mixed  $xmlParser
125
     * @param string $qName
126
     * @param array  $attributes
127
     *
128
     * @throws \RuntimeException
129
     */
130 70
    public function startElement($xmlParser, string $qName, array $attributes = []): void
131
    {
132
        // Required parameter, but not used.
133 70
        \assert($xmlParser);
134
135 70
        $qName = \mb_strtolower($qName);
136
137 70
        if (!$this->documentStarted || $this->documentEnded) {
138 2
            throw new \RuntimeException();
139
        }
140
141 68
        if ($this->bodyStarted && !$this->bodyEnded) {
142 59
            $this->endWord();
143
144 59
            $newTagNode = new TagNode($this->currentParent, $qName, $attributes);
145 59
            $this->currentParent = $newTagNode;
146 59
            $this->lastSibling = null;
147
148 59
            if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) {
149 24
                $this->currentParent->setWhiteBefore(true);
150
            }
151
152 59
            $this->whiteSpaceBeforeThis = false;
153
154 59
            if ($newTagNode->isPre()) {
155 3
                $this->numberOfActivePreTags++;
156
            }
157
158 59
            if ($this->isSeparatingTag($newTagNode)) {
159 59
                $this->addSeparatorNode();
160
            }
161 58
        } elseif ($this->bodyStarted) {
162
            // Ignoring element after body tag closed.
163 56
        } elseif ('body' === $qName) {
164 52
            $this->bodyStarted = true;
165
        }
166 68
    }
167
168
    /**
169
     * @param mixed  $xmlParser
170
     * @param string $qName
171
     *
172
     * @throws \RuntimeException
173
     */
174 62
    public function endElement($xmlParser, string $qName): void
175
    {
176
        // Required parameter, but not used.
177 62
        \assert($xmlParser);
178
179 62
        $qName = \mb_strtolower($qName);
180
181 62
        if (!$this->documentStarted || $this->documentEnded) {
182 2
            throw new \RuntimeException();
183
        }
184
185 60
        if ('body' === $qName) {
186 51
            $this->bodyEnded = true;
187 58
        } elseif ($this->bodyStarted && !$this->bodyEnded) {
188 53
            if ('img' === $qName) {
189
                // Insert a dummy leaf for the image.
190 2
                $img = new ImageNode($this->currentParent, $this->currentParent->getAttributes());
191 2
                $img->setWhiteBefore($this->whiteSpaceBeforeThis);
192 2
                $this->lastSibling = $img;
193 2
                $this->textNodes[] = $img;
194
            }
195
196 53
            $this->endWord();
197
198 53
            if ($this->currentParent->isInline()) {
199 38
                $this->lastSibling = $this->currentParent;
200
            } else {
201 49
                $this->lastSibling = null;
202
            }
203
204 53
            if ('pre' === $qName) {
205 2
                $this->numberOfActivePreTags--;
206
            }
207
208 53
            if ($this->isSeparatingTag($this->currentParent)) {
209 49
                $this->addSeparatorNode();
210
            }
211
212 53
            $this->currentParent = $this->currentParent->getParent();
213 53
            $this->whiteSpaceBeforeThis = false;
214
        }
215 60
    }
216
217
    /**
218
     * @param mixed  $xmlParser
219
     * @param string $chars
220
     *
221
     * @throws \RuntimeException
222
     */
223 56
    public function characters($xmlParser, string $chars): void
224
    {
225 56
        \assert($xmlParser);
226
227 56
        if (!$this->documentStarted || $this->documentEnded) {
228 1
            throw new \RuntimeException();
229
        }
230
231 55
        for ($i = 0, $iMax = \mb_strlen($chars); $i < $iMax; $i++) {
232 55
            $c = \mb_substr($chars, $i, 1);
233
234 55
            if ($this->isDelimiter($c)) {
235 50
                $this->endWord();
236
237 50
                if (WhiteSpaceNode::isWhiteSpace($c) && $this->numberOfActivePreTags === 0) {
238 47
                    if (null !== $this->lastSibling) {
239 47
                        $this->lastSibling->setWhiteAfter(true);
240
                    }
241
242 47
                    $this->whiteSpaceBeforeThis = true;
243
                } else {
244 22
                    $textNode = new TextNode($this->currentParent, $c);
245 22
                    $textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
246
247 22
                    $this->whiteSpaceBeforeThis = false;
248 22
                    $this->lastSibling = $textNode;
249 50
                    $this->textNodes[] = $textNode;
250
                }
251
            } else {
252 55
                $this->newWord .= $c;
253
            }
254
        }
255 55
    }
256
257
    /**
258
     * @return void
259
     */
260 68
    private function endWord(): void
261
    {
262 68
        if (\mb_strlen($this->newWord) > 0) {
263 55
            $node = new TextNode($this->currentParent, $this->newWord);
264 55
            $node->setWhiteBefore($this->whiteSpaceBeforeThis);
265
266 55
            $this->whiteSpaceBeforeThis = false;
267 55
            $this->lastSibling = $node;
268 55
            $this->textNodes[] = $node;
269 55
            $this->newWord = '';
270
        }
271 68
    }
272
273
    /**
274
     * Returns true if the given tag separates text nodes from being successive. I.e. every block starts a new distinct
275
     * text flow.
276
     *
277
     * @param TagNode $tagNode
278
     * @return bool
279
     */
280 60
    private function isSeparatingTag(TagNode $tagNode): bool
281
    {
282 60
        return $tagNode->isBlockLevel();
283
    }
284
285
    /**
286
     * Ensures that a separator is added after the last text node.
287
     */
288 56
    private function addSeparatorNode(): void
289
    {
290 56
        if (empty($this->textNodes)) {
291 54
            return;
292
        }
293
294
        // Don't add multiple separators.
295 46
        if ($this->textNodes[\count($this->textNodes) - 1] instanceof SeparatingNode) {
296 12
            return;
297
        }
298
299 46
        $this->textNodes[] = new SeparatingNode($this->currentParent);
300 46
    }
301
302
    /**
303
     * @param string $c
304
     * @return bool
305
     */
306 55
    public static function isDelimiter(string $c): bool
307
    {
308 55
        if (WhiteSpaceNode::isWhiteSpace($c)) {
309 48
            return true;
310
        }
311
312
        switch ($c) {
313
            // Basic Delimiters
314 55
            case '/':
315 55
            case '.':
316 55
            case '!':
317 55
            case ',':
318 55
            case ';':
319 55
            case '?':
320 55
            case '=':
321 55
            case "'":
322 55
            case '"':
323
                // Extra Delimiters
324 55
            case '[':
325 55
            case ']':
326 55
            case '{':
327 55
            case '}':
328 55
            case '(':
329 55
            case ')':
330 55
            case '&':
331 55
            case '|':
332 55
            case "\\":
333 55
            case '-':
334 55
            case '_':
335 55
            case '+':
336 55
            case '*':
337 55
            case ':':
338 22
                return true;
339
            default:
340 55
                break;
341
        }
342
343 55
        return false;
344
    }
345
}
346