Completed
Push — master ( 662a80...c104cf )
by Andrew
03:39
created

Document::setLibxmlOptions()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 2
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 2
ccs 0
cts 0
cp 0
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 2
1
<?php declare(strict_types=1);
2
3
namespace DOMWrap;
4
5
use DOMWrap\Traits\{
6
    CommonTrait,
7
    TraversalTrait,
8
    ManipulationTrait
9
};
10
11
/**
12
 * Document Node
13
 *
14
 * @package DOMWrap
15
 * @license http://opensource.org/licenses/BSD-3-Clause BSD 3 Clause
16
 */
17
class Document extends \DOMDocument
18
{
19
    use CommonTrait;
20
    use TraversalTrait;
21
    use ManipulationTrait;
22
23 140
    /** @var int */
24 140
    protected $libxmlOptions = 0;
25
26 140
    public function __construct(string $version = '1.0', string $encoding = 'UTF-8') {
27 140
        parent::__construct($version, $encoding);
28 140
29 140
        $this->registerNodeClass('DOMText', 'DOMWrap\\Text');
30 140
        $this->registerNodeClass('DOMElement', 'DOMWrap\\Element');
31 140
        $this->registerNodeClass('DOMComment', 'DOMWrap\\Comment');
32
        $this->registerNodeClass('DOMDocumentType', 'DOMWrap\\DocumentType');
33
        $this->registerNodeClass('DOMProcessingInstruction', 'DOMWrap\\ProcessingInstruction');
34
    }
35
36 138
    /**
37 138
     * Set libxml options.
38
     *
39
     * Multiple values must use bitwise OR.
40
     * eg: LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
41
     *
42
     * @link http://php.net/manual/en/libxml.constants.php
43 134
     *
44 134
     * @param int $libxmlOptions
45
     */
46
    public function setLibxmlOptions(int $libxmlOptions): void {
47
        $this->libxmlOptions = $libxmlOptions;
48
    }
49
50 1
    /**
51 1
     * {@inheritdoc}
52
     */
53
    public function document(): ?\DOMDocument {
54
        return $this;
55 1
    }
56
57
    /**
58
     * {@inheritdoc}
59
     */
60
    public function collection(): NodeList {
61 1
        return $this->newNodeList([$this]);
62 1
    }
63
64
    /**
65
     * {@inheritdoc}
66
     */
67
    public function result(NodeList $nodeList) {
68 1
        if ($nodeList->count()) {
69 1
            return $nodeList->first();
70
        }
71
72
        return null;
73
    }
74
75
    /**
76
     * {@inheritdoc}
77
     */
78
    public function parent() {
79
        return null;
80
    }
81
82
    /**
83
     * {@inheritdoc}
84 1
     */
85 1
    public function parents() {
86
        return $this->newNodeList();
87
    }
88
89
    /**
90
     * {@inheritdoc}
91 2
     */
92 2
    public function replaceWith($newNode): self {
93
        $this->replaceChild($newNode, $this);
94
95
        return $this;
96
    }
97
98 140
    /**
99 140
     * {@inheritdoc}
100 1
     */
101
    public function _clone() {
102
        return null;
103 140
    }
104 140
105
    /**
106 140
     * {@inheritdoc}
107 140
     */
108
    public function getHtml(): string {
109 140
        return $this->getOuterHtml();
110 1
    }
111 1
112 1
    /**
113
     * {@inheritdoc}
114
     */
115
    public function setHtml($html): self {
116 140
        if (!is_string($html) || trim($html) == '') {
117
            return $this;
118
        }
119 140
120
        $internalErrors = libxml_use_internal_errors(true);
121 140
        $disableEntities = libxml_disable_entity_loader(true);
122 140
123
        $html = $this->convertToUtf8($html);
124 140
        $this->loadHTML($html, $this->libxmlOptions);
125
126
        libxml_use_internal_errors($internalErrors);
127
        libxml_disable_entity_loader($disableEntities);
128
129
        return $this;
130
    }
131
132
    private function getCharset(string $html): ?string {
133
        $charset = null;
134
135
        if (preg_match('@<meta.*?charset=["]?([^"\s]+)@im', $html, $matches)) {
136
            $charset = strtoupper($matches[1]);
137
        }
138
139
        return $charset;
140
    }
141
        
142
    private function convertToUtf8(string $html): string {
143
        if (mb_detect_encoding($html, mb_detect_order(), true) === 'UTF-8') {
144
            return $html;
145
        }
146
147
        $charset = $this->getCharset($html);
148
149
        if ($charset !== null) {
150
            $html = preg_replace('@(charset=["]?)([^"\s]+)([^"]*["]?)@im', '$1UTF-8$3', $html);
151
            $mbHasCharset = in_array($charset, array_map('strtoupper', mb_list_encodings()));
152
153
            if ($mbHasCharset) {
154
                $html = mb_convert_encoding($html, 'UTF-8', $charset);
155
156
            // Fallback to iconv if available.
157
            } elseif (extension_loaded('iconv')) {
158
                $htmlIconv = iconv($charset, 'UTF-8', $html);
159
160
                if ($htmlIconv !== false) {
161
                    $html = $htmlIconv;
162
                } else {
163
                    $charset = null;
164
                }
165
            }
166
        }
167
168
        if ($charset === null) {
169
            $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
170
        }
171
172
        return $html;
173
    }
174
}
175