Passed
Push — master ( 693e6f...23c90b )
by Andrew
02:02
created

Document::convertToUtf8()   B

Complexity

Conditions 7
Paths 11

Size

Total Lines 31
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 0
Metric Value
eloc 17
dl 0
loc 31
ccs 0
cts 0
cp 0
rs 8.8333
c 0
b 0
f 0
cc 7
nc 11
nop 1
crap 56
1
<?php declare(strict_types=1);
2
3
namespace DOMWrap;
4
5
use DOMWrap\Traits\{
6
    CommonTrait,
7
    TraversalTrait,
8
    ManipulationTrait
9
};
10
11
/**
12
 * Document Node
13
 *
14
 * @package DOMWrap
15
 * @license http://opensource.org/licenses/BSD-3-Clause BSD 3 Clause
16
 */
17
class Document extends \DOMDocument
18
{
19
    use CommonTrait;
20
    use TraversalTrait;
21
    use ManipulationTrait;
22
23 140
    public function __construct(string $version = '1.0', string $encoding = 'UTF-8') {
24 140
        parent::__construct($version, $encoding);
25
26 140
        $this->registerNodeClass('DOMText', 'DOMWrap\\Text');
27 140
        $this->registerNodeClass('DOMElement', 'DOMWrap\\Element');
28 140
        $this->registerNodeClass('DOMComment', 'DOMWrap\\Comment');
29 140
        $this->registerNodeClass('DOMDocumentType', 'DOMWrap\\DocumentType');
30 140
        $this->registerNodeClass('DOMProcessingInstruction', 'DOMWrap\\ProcessingInstruction');
31 140
    }
32
33
    /**
34
     * {@inheritdoc}
35
     */
36 138
    public function document(): ?\DOMDocument {
37 138
        return $this;
38
    }
39
40
    /**
41
     * {@inheritdoc}
42
     */
43 134
    public function collection(): NodeList {
44 134
        return $this->newNodeList([$this]);
45
    }
46
47
    /**
48
     * {@inheritdoc}
49
     */
50 1
    public function result(NodeList $nodeList) {
51 1
        if ($nodeList->count()) {
52
            return $nodeList->first();
53
        }
54
55 1
        return null;
56
    }
57
58
    /**
59
     * {@inheritdoc}
60
     */
61 1
    public function parent() {
62 1
        return null;
63
    }
64
65
    /**
66
     * {@inheritdoc}
67
     */
68 1
    public function parents() {
69 1
        return $this->newNodeList();
70
    }
71
72
    /**
73
     * {@inheritdoc}
74
     */
75
    public function replaceWith($newNode): self {
76
        $this->replaceChild($newNode, $this);
77
78
        return $this;
79
    }
80
81
    /**
82
     * {@inheritdoc}
83
     */
84 1
    public function _clone() {
85 1
        return null;
86
    }
87
88
    /**
89
     * {@inheritdoc}
90
     */
91 2
    public function getHtml(): string {
92 2
        return $this->getOuterHtml();
93
    }
94
95
    /**
96
     * {@inheritdoc}
97
     */
98 140
    public function setHtml($html): self {
99 140
        if (!is_string($html) || trim($html) == '') {
100 1
            return $this;
101
        }
102
103 140
        $internalErrors = libxml_use_internal_errors(true);
104 140
        $disableEntities = libxml_disable_entity_loader(true);
105
106 140
        $html = $this->convertToUtf8($html);
107 140
        $this->loadHTML($html);
108
109 140
        libxml_use_internal_errors($internalErrors);
110 1
        libxml_disable_entity_loader($disableEntities);
111 1
112 1
        return $this;
113
    }
114
115
    private function getCharset(string $html): ?string {
116 140
        $charset = null;
117
118
        if (preg_match('@<meta.*?charset=["]?([^"\s]+)@im', $html, $matches)) {
119 140
            $charset = strtoupper($matches[1]);
120
        }
121 140
122 140
        return $charset;
123
    }
124 140
        
125
    private function convertToUtf8(string $html): string {
126
        if (mb_detect_encoding($html, mb_detect_order(), true) === 'UTF-8') {
127
            return $html;
128
        }
129
130
        $charset = $this->getCharset($html);
131
132
        if ($charset !== null) {
133
            $html = preg_replace('@(charset=["]?)([^"\s]+)([^"]*["]?)@im', '$1UTF-8$3', $html);
134
            $mbHasCharset = in_array($charset, array_map('strtoupper', mb_list_encodings()));
135
136
            if ($mbHasCharset) {
137
                $html = mb_convert_encoding($html, 'UTF-8', $charset);
138
139
            // Fallback to iconv if available.
140
            } elseif (extension_loaded('iconv')) {
141
                $htmlIconv = iconv($charset, 'UTF-8', $html);
142
143
                if ($htmlIconv !== false) {
144
                    $html = $htmlIconv;
145
                } else {
146
                    $charset = null;
147
                }
148
            }
149
        }
150
151
        if ($charset === null) {
152
            $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
153
        }
154
155
        return $html;
156
    }
157
}
158