1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Class Document |
4
|
|
|
* |
5
|
|
|
* @filesource Document.php |
6
|
|
|
* @created 05.05.2017 |
7
|
|
|
* @package chillerlan\PrototypeDOM |
8
|
|
|
* @author Smiley <[email protected]> |
9
|
|
|
* @copyright 2017 Smiley |
10
|
|
|
* @license MIT |
11
|
|
|
*/ |
12
|
|
|
|
13
|
|
|
namespace chillerlan\PrototypeDOM; |
14
|
|
|
|
15
|
|
|
use DOMDocument, DOMNode, DOMNodeList, DOMXPath; |
16
|
|
|
use Symfony\Component\CssSelector\CssSelectorConverter; |
17
|
|
|
|
18
|
|
|
class Document extends DOMDocument{ |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* Document constructor. |
22
|
|
|
* |
23
|
|
|
* @param string|null $version |
24
|
|
|
* @param string|null $encoding |
25
|
|
|
*/ |
26
|
39 |
|
public function __construct($version = null, $encoding = null){ |
27
|
39 |
|
parent::__construct($version, $encoding); |
28
|
|
|
|
29
|
39 |
|
$this->registerNodeClass('DOMElement', Element::class); |
30
|
39 |
|
$this->registerNodeClass('DOMText', Text::class); |
31
|
39 |
|
$this->registerNodeClass('DOMCharacterData', CharacterData::class); |
32
|
39 |
|
$this->registerNodeClass('DOMDocumentFragment', DocumentFragment::class); |
33
|
39 |
|
$this->registerNodeClass('DOMDocumentType', DocumentType::class); |
34
|
39 |
|
$this->registerNodeClass('DOMComment', Comment::class); |
35
|
39 |
|
$this->registerNodeClass('DOMAttr', Attr::class); |
36
|
39 |
|
} |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @param string $selector |
40
|
|
|
* @param string $axis |
41
|
|
|
* |
42
|
|
|
* @return string |
43
|
|
|
*/ |
44
|
38 |
|
public function selector2xpath(string $selector, string $axis = '//'):string{ |
45
|
38 |
|
return (new CssSelectorConverter)->toXPath($selector, $axis); |
46
|
|
|
} |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* @param string $xpath |
50
|
|
|
* @param \DOMNode|null $contextNode |
51
|
|
|
* |
52
|
|
|
* @return \chillerlan\PrototypeDOM\NodeList |
53
|
|
|
*/ |
54
|
38 |
|
public function query(string $xpath, DOMNode $contextNode = null):NodeList{ |
55
|
38 |
|
return new NodeList((new DOMXPath($this))->query($xpath, $contextNode)); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* @param string $selector |
60
|
|
|
* @param \DOMNode|null $contextNode |
61
|
|
|
* @param string $axis |
62
|
|
|
* |
63
|
|
|
* @return \chillerlan\PrototypeDOM\NodeList |
64
|
|
|
*/ |
65
|
38 |
|
public function querySelectorAll(string $selector, DOMNode $contextNode = null, string $axis = 'descendant-or-self::'):NodeList{ |
66
|
38 |
|
return $this->query($this->selector2xpath($selector, $axis), $contextNode); |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* @param string|array $selectors |
71
|
|
|
* @param \DOMNode|null $contextNode |
72
|
|
|
* @param string $axis |
73
|
|
|
* @param int $nodeType |
74
|
|
|
* |
75
|
|
|
* @return \chillerlan\PrototypeDOM\NodeList |
76
|
|
|
*/ |
77
|
38 |
|
public function select($selectors = null, DOMNode $contextNode = null, string $axis = 'descendant-or-self::', int $nodeType = XML_ELEMENT_NODE):NodeList{ |
78
|
|
|
|
79
|
38 |
|
if(is_string($selectors)){ |
80
|
38 |
|
$selectors = [trim($selectors)]; |
81
|
|
|
} |
82
|
|
|
|
83
|
38 |
|
if(!is_array($selectors) || empty($selectors)){ |
84
|
4 |
|
$selectors = ['*']; |
85
|
|
|
} |
86
|
|
|
|
87
|
38 |
|
$elements = new NodeList; |
88
|
|
|
|
89
|
38 |
|
foreach($selectors as $selector){ |
90
|
|
|
|
91
|
38 |
|
foreach($this->querySelectorAll($selector, $contextNode, $axis) as $element){ |
92
|
|
|
|
93
|
38 |
|
if($element->nodeType === $nodeType){ |
94
|
38 |
|
$elements[] = $element; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
} |
100
|
|
|
|
101
|
38 |
|
return $elements; |
102
|
|
|
} |
103
|
|
|
|
104
|
6 |
|
public function _loadHTMLFragment(string $content):NodeList{ |
105
|
6 |
|
$document = new Document; |
106
|
6 |
|
$document->loadHTML('<html><body id="-import-content">'.$content.'</body></html>'); |
107
|
|
|
|
108
|
6 |
|
return new NodeList($document->getElementById('-import-content')->childNodes); |
109
|
|
|
|
110
|
|
|
/* |
|
|
|
|
111
|
|
|
$document->loadHTML('<!DOCTYPE html>' .$content); |
112
|
|
|
return $document->getElementsByTagName('head')[0]->childNodes |
113
|
|
|
?? $document->getElementsByTagName('body')[0]->childNodes; |
114
|
|
|
*/ |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
/** |
118
|
|
|
* @param \DOMNode|null $context |
119
|
|
|
* @param bool $xml |
120
|
|
|
* |
121
|
|
|
* @return string |
122
|
|
|
*/ |
123
|
7 |
|
public function inspect(DOMNode $context = null, $xml = false):string{ |
124
|
7 |
|
return $xml |
125
|
1 |
|
? $this->saveXML($context) |
126
|
7 |
|
: $this->saveHTML($context); |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
/** |
130
|
|
|
* @param string|array $selectors |
131
|
|
|
* @param \DOMNode|null $contextNode |
132
|
|
|
* @param string $axis |
133
|
|
|
* |
134
|
|
|
* @return \chillerlan\PrototypeDOM\Document |
135
|
|
|
*/ |
136
|
1 |
|
public function removeElementsBySelector($selectors, DOMNode $contextNode = null, string $axis = 'descendant-or-self::'):Document{ |
137
|
1 |
|
$nodes = $this->select($selectors, $contextNode, $axis); |
138
|
|
|
|
139
|
1 |
|
if(count($nodes) > 0){ |
140
|
|
|
/** @var \chillerlan\PrototypeDOM\Element $node */ |
141
|
1 |
|
foreach($nodes as $node){ |
142
|
1 |
|
$node->remove(); |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
} |
146
|
|
|
|
147
|
1 |
|
return $this; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* @param string|\DOMNode|\DOMNodeList|\chillerlan\PrototypeDOM\NodeList $content |
152
|
|
|
* |
153
|
|
|
* @return \chillerlan\PrototypeDOM\NodeList |
154
|
|
|
* @throws \Exception |
155
|
|
|
*/ |
156
|
5 |
|
public function _toNodeList($content):NodeList{ |
157
|
|
|
|
158
|
5 |
|
if($content instanceof NodeList || $content instanceof DOMNodeList || is_array($content)){ |
159
|
|
|
return new NodeList($content); |
|
|
|
|
160
|
|
|
} |
161
|
|
|
elseif($content instanceof DOMNode){ |
162
|
2 |
|
return new NodeList([$content]); |
|
|
|
|
163
|
|
|
} |
164
|
5 |
|
elseif(is_string($content)){ |
165
|
5 |
|
return $this->_loadHTMLFragment($content); |
166
|
|
|
} |
167
|
|
|
else{ |
168
|
|
|
throw new \Exception('invalid content'); // @codeCoverageIgnore |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
/** |
174
|
|
|
* @param \DOMNode $element |
175
|
|
|
* @param string $property |
176
|
|
|
* @param int $maxLength |
177
|
|
|
* @param int $nodeType |
178
|
|
|
* |
179
|
|
|
* @return \chillerlan\PrototypeDOM\NodeList |
180
|
|
|
*/ |
181
|
3 |
|
public function recursivelyCollect(DOMNode $element, string $property, int $maxLength = -1, int $nodeType = XML_ELEMENT_NODE):NodeList{ |
182
|
3 |
|
$nodes = new NodeList; |
183
|
|
|
|
184
|
3 |
|
if(in_array($property, ['parentNode', 'previousSibling', 'nextSibling'])){ |
185
|
|
|
|
186
|
3 |
|
while($element = $element->{$property}){ |
187
|
|
|
|
188
|
3 |
|
if($element->nodeType === $nodeType){ |
189
|
3 |
|
$nodes[] = $element; |
190
|
|
|
} |
191
|
|
|
|
192
|
3 |
|
if(count($nodes) === $maxLength){ |
193
|
2 |
|
break; |
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
} |
199
|
|
|
|
200
|
3 |
|
return $nodes; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* @param \DOMNode $element |
205
|
|
|
* @param string $property |
206
|
|
|
* @param string|null $selector |
207
|
|
|
* @param int $index |
208
|
|
|
* @param int $nodeType |
209
|
|
|
* |
210
|
|
|
* @return \DOMNode|null |
211
|
|
|
*/ |
212
|
5 |
|
public function _recursivelyFind(DOMNode $element, string $property, string $selector = null, int $index = 0, int $nodeType = XML_ELEMENT_NODE){ |
213
|
|
|
|
214
|
5 |
|
if(in_array($property, ['parentNode', 'previousSibling', 'nextSibling'])){ |
215
|
|
|
|
216
|
|
|
/** @var \chillerlan\PrototypeDOM\Element $element */ |
217
|
5 |
|
while($element = $element->{$property}){ |
218
|
|
|
|
219
|
5 |
|
if($element->nodeType !== $nodeType || !is_null($selector) && !$element->match($selector) || --$index >= 0){ |
220
|
5 |
|
continue; |
221
|
|
|
} |
222
|
|
|
|
223
|
5 |
|
return $element; |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
} |
227
|
|
|
|
228
|
1 |
|
return null; |
229
|
|
|
} |
230
|
|
|
|
231
|
|
|
/** |
232
|
|
|
* @param \DOMNode $element |
233
|
|
|
* @param string $selector |
234
|
|
|
* |
235
|
|
|
* @return bool |
236
|
|
|
*/ |
237
|
2 |
|
public function match(DOMNode $element, string $selector):bool{ |
238
|
|
|
|
239
|
2 |
|
foreach($this->select($selector) as $match){ |
240
|
|
|
|
241
|
2 |
|
if($element->isSameNode($match)){ |
242
|
2 |
|
return true; |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
} |
246
|
|
|
|
247
|
2 |
|
return false; |
248
|
|
|
} |
249
|
|
|
|
250
|
|
|
/** |
251
|
|
|
* @param string $tag |
252
|
|
|
* @param array|null $attributes |
253
|
|
|
* |
254
|
|
|
* @return \chillerlan\PrototypeDOM\Element |
255
|
|
|
*/ |
256
|
4 |
|
public function newElement(string $tag, array $attributes = null):Element{ |
257
|
|
|
/** @var \chillerlan\PrototypeDOM\Element $element */ |
258
|
4 |
|
$element = $this->createElement($tag); |
259
|
|
|
|
260
|
4 |
|
if($attributes){ |
261
|
4 |
|
$element->setAttributes($attributes); |
262
|
|
|
} |
263
|
|
|
|
264
|
4 |
|
return $element; |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
} |
268
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.