Passed
Push — master ( bb928b...f4cc35 )
by Andrew
01:56
created

Document::substituteWith()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 2
c 0
b 0
f 0
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
nc 1
nop 1
crap 1
1
<?php declare(strict_types=1);
2
3
namespace DOMWrap;
4
5
use DOMWrap\Traits\{
6
    CommonTrait,
7
    TraversalTrait,
8
    ManipulationTrait
9
};
10
11
/**
12
 * Document Node
13
 *
14
 * @package DOMWrap
15
 * @license http://opensource.org/licenses/BSD-3-Clause BSD 3 Clause
16
 */
17
class Document extends \DOMDocument
18
{
19
    use CommonTrait;
20
    use TraversalTrait;
21
    use ManipulationTrait;
22
23 140
    /** @var int */
24 140
    protected $libxmlOptions = 0;
25
26 140
    /** @var string|null */
27 140
    protected $documentEncoding = null;
28 140
29 140
    public function __construct(string $version = '1.0', string $encoding = 'UTF-8') {
30 140
        parent::__construct($version, $encoding);
31 140
32
        $this->registerNodeClass('DOMText', 'DOMWrap\\Text');
33
        $this->registerNodeClass('DOMElement', 'DOMWrap\\Element');
34
        $this->registerNodeClass('DOMComment', 'DOMWrap\\Comment');
35
        $this->registerNodeClass('DOMDocument', 'DOMWrap\\Document');
36 138
        $this->registerNodeClass('DOMDocumentType', 'DOMWrap\\DocumentType');
37 138
        $this->registerNodeClass('DOMProcessingInstruction', 'DOMWrap\\ProcessingInstruction');
38
    }
39
40
    /**
41
     * Set libxml options.
42
     *
43 134
     * Multiple values must use bitwise OR.
44 134
     * eg: LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
45
     *
46
     * @link http://php.net/manual/en/libxml.constants.php
47
     *
48
     * @param int $libxmlOptions
49
     */
50 1
    public function setLibxmlOptions(int $libxmlOptions): void {
51 1
        $this->libxmlOptions = $libxmlOptions;
52
    }
53
54
    /**
55 1
     * {@inheritdoc}
56
     */
57
    public function document(): ?\DOMDocument {
58
        return $this;
59
    }
60
61 1
    /**
62 1
     * {@inheritdoc}
63
     */
64
    public function collection(): NodeList {
65
        return $this->newNodeList([$this]);
66
    }
67
68 1
    /**
69 1
     * {@inheritdoc}
70
     */
71
    public function result(NodeList $nodeList) {
72
        if ($nodeList->count()) {
73
            return $nodeList->first();
74
        }
75
76
        return null;
77
    }
78
79
    /**
80
     * {@inheritdoc}
81
     */
82
    public function parent() {
83
        return null;
84 1
    }
85 1
86
    /**
87
     * {@inheritdoc}
88
     */
89
    public function parents() {
90
        return $this->newNodeList();
91 2
    }
92 2
93
    /**
94
     * {@inheritdoc}
95
     */
96
    public function substituteWith($newNode): self {
97
        $this->replaceChild($newNode, $this);
98 140
99 140
        return $this;
100 1
    }
101
102
    /**
103 140
     * {@inheritdoc}
104 140
     */
105
    public function _clone() {
106 140
        return null;
107 140
    }
108
109 140
    /**
110 1
     * {@inheritdoc}
111 1
     */
112 1
    public function getHtml(): string {
113
        return $this->getOuterHtml();
114
    }
115
116 140
    /**
117
     * {@inheritdoc}
118
     */
119 140
    public function setHtml($html): self {
120
        if (!is_string($html) || trim($html) == '') {
121 140
            return $this;
122 140
        }
123
124 140
        $internalErrors = libxml_use_internal_errors(true);
125
        $disableEntities = libxml_disable_entity_loader(true);
126
127
        $this->detectEncoding($html);
128
129
        $html = $this->convertToUtf8($html);
130
131
        $this->loadHTML($html, $this->libxmlOptions);
132
133
        // Remove <?xml ...> processing instruction.
134
        $this->contents()->each(function($node) {
135
            if ($node instanceof ProcessingInstruction && $node->nodeName == 'xml') {
136
                $node->destroy();
137
            }
138
        });
139
140
        libxml_use_internal_errors($internalErrors);
141
        libxml_disable_entity_loader($disableEntities);
142
143
        return $this;
144
    }
145
146
    /**
147
     * @param string $html
148
     * @param int $options
149
     *
150
     * @return bool
151
     */
152
    public function loadHTML($html, $options = 0): bool {
153
        // Fix LibXML's crazy-ness RE root nodes
154
        // While importing HTML using the LIBXML_HTML_NOIMPLIED option LibXML insists
155
        //  on having one root node. All subsequent nodes are appended to this first node.
156
        // To counter this we will create a fake element, allow LibXML to 'do its thing'
157
        //  then undo it by taking the contents of the fake element, placing it back into
158
        //  the root and then remove our fake element.
159
        if ($options & LIBXML_HTML_NOIMPLIED) {
160
            $html = '<domwrap></domwrap>' . $html;
161
        }
162
163
        $html = '<?xml encoding="' . ($this->getEncoding() ?? 'UTF-8') . '">' . $html;
164
165
        $result = parent::loadHTML($html, $options);
166
167
        // Do our re-shuffling of nodes.
168
        if ($this->libxmlOptions & LIBXML_HTML_NOIMPLIED) {
169
            $this->children()->first()->contents()->each(function($node){
170
                $this->appendWith($node);
171
            });
172
173
            $this->removeChild($this->children()->first());
174
        }
175
176
        return $result;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $result could return the type DOMDocument which is incompatible with the type-hinted return boolean. Consider adding an additional type-check to rule them out.
Loading history...
177
    }
178
179
    /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
180
     * @param $encoding string|null
181
     */
182
    public function setEncoding(string $encoding = null) {
183
        $this->documentEncoding = $encoding;
184
    }
185
186
    /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
187
     * @return string|null
188
     */
189
    public function getEncoding(): ?string {
190
        return $this->documentEncoding;
191
    }
192
193
    /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
194
     * @param $html string
195
     *
196
     * @return string|null
197
     */
198
    private function getCharset(string $html): ?string {
199
        $charset = null;
200
201
        if (preg_match('@<meta.*?charset=["\']?([^"\'\s>]+)@im', $html, $matches)) {
202
            $charset = mb_strtoupper($matches[1]);
203
        }
204
205
        return $charset;
206
    }
207
208
    /*
209
     * @param $html string
210
     */
211
    private function detectEncoding(string $html) {
212
        $charset = $this->getEncoding();
213
214
        if (is_null($charset)) {
215
            $charset = $this->getCharset($html);
216
        }
217
218
        $detectedCharset = mb_detect_encoding($html, mb_detect_order(), true);
0 ignored issues
show
Bug introduced by
It seems like mb_detect_order() can also be of type true; however, parameter $encodings of mb_detect_encoding() does only seem to accept array|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

218
        $detectedCharset = mb_detect_encoding($html, /** @scrutinizer ignore-type */ mb_detect_order(), true);
Loading history...
219
220
        if ($charset === null && $detectedCharset == 'UTF-8') {
221
            $charset = $detectedCharset;
222
        }
223
224
        $this->setEncoding($charset);
225
    }
226
227
    /*
228
     * @param $html string
229
     *
230
     * @return string
231
     */
232
    private function convertToUtf8(string $html): string {
233
        $charset = $this->getEncoding();
234
235
        if ($charset !== null) {
236
            $html = preg_replace('@(charset=["]?)([^"\s]+)([^"]*["]?)@im', '$1UTF-8$3', $html);
237
            $mbHasCharset = in_array($charset, array_map('mb_strtoupper', mb_list_encodings()));
238
239
            if ($mbHasCharset) {
240
                $html = mb_convert_encoding($html, 'UTF-8', $charset);
241
242
            // Fallback to iconv if available.
243
            } elseif (extension_loaded('iconv')) {
244
                $htmlIconv = iconv($charset, 'UTF-8', $html);
245
246
                if ($htmlIconv !== false) {
247
                    $html = $htmlIconv;
248
                } else {
249
                    $charset = null;
250
                }
251
            }
252
        }
253
254
        if ($charset === null) {
255
            $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
256
        }
257
258
        return $html;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $html could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
259
    }
260
}
261