1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace voku\helper; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* @noinspection PhpHierarchyChecksInspection |
9
|
|
|
* |
10
|
|
|
* {@inheritdoc} |
11
|
|
|
*/ |
12
|
|
|
class SimpleHtmlDom extends AbstractSimpleHtmlDom implements \IteratorAggregate, SimpleHtmlDomInterface |
13
|
|
|
{ |
14
|
|
|
/** |
15
|
|
|
* @param \DOMElement|\DOMNode $node |
16
|
|
|
*/ |
17
|
137 |
|
public function __construct(\DOMNode $node) |
18
|
|
|
{ |
19
|
137 |
|
$this->node = $node; |
20
|
137 |
|
} |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* @param string $name |
24
|
|
|
* @param array $arguments |
25
|
|
|
* |
26
|
|
|
* @throws \BadMethodCallException |
27
|
|
|
* |
28
|
|
|
* @return SimpleHtmlDomInterface|string|null |
29
|
|
|
*/ |
30
|
10 |
|
public function __call($name, $arguments) |
31
|
|
|
{ |
32
|
10 |
|
$name = \strtolower($name); |
33
|
|
|
|
34
|
10 |
|
if (isset(self::$functionAliases[$name])) { |
35
|
10 |
|
return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); |
36
|
|
|
} |
37
|
|
|
|
38
|
|
|
throw new \BadMethodCallException('Method does not exist'); |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Find list of nodes with a CSS selector. |
43
|
|
|
* |
44
|
|
|
* @param string $selector |
45
|
|
|
* @param int|null $idx |
46
|
|
|
* |
47
|
|
|
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
48
|
|
|
*/ |
49
|
26 |
|
public function find(string $selector, $idx = null) |
50
|
|
|
{ |
51
|
26 |
|
return $this->getHtmlDomParser()->find($selector, $idx); |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Returns an array of attributes. |
56
|
|
|
* |
57
|
|
|
* @return array|null |
58
|
|
|
*/ |
59
|
3 |
View Code Duplication |
public function getAllAttributes() |
|
|
|
|
60
|
|
|
{ |
61
|
3 |
|
if ($this->node->hasAttributes()) { |
62
|
3 |
|
$attributes = []; |
63
|
3 |
|
foreach ($this->node->attributes as $attr) { |
64
|
3 |
|
$attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value); |
65
|
|
|
} |
66
|
|
|
|
67
|
3 |
|
return $attributes; |
68
|
|
|
} |
69
|
|
|
|
70
|
1 |
|
return null; |
71
|
|
|
} |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* @return bool |
75
|
|
|
*/ |
76
|
|
|
public function hasAttributes(): bool |
77
|
|
|
{ |
78
|
|
|
return $this->node->hasAttributes(); |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
/** |
82
|
|
|
* Return attribute value. |
83
|
|
|
* |
84
|
|
|
* @param string $name |
85
|
|
|
* |
86
|
|
|
* @return string |
87
|
|
|
*/ |
88
|
24 |
|
public function getAttribute(string $name): string |
89
|
|
|
{ |
90
|
24 |
|
if ($this->node instanceof \DOMElement) { |
91
|
24 |
|
return HtmlDomParser::putReplacedBackToPreserveHtmlEntities( |
92
|
24 |
|
$this->node->getAttribute($name) |
93
|
|
|
); |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
return ''; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* Determine if an attribute exists on the element. |
101
|
|
|
* |
102
|
|
|
* @param string $name |
103
|
|
|
* |
104
|
|
|
* @return bool |
105
|
|
|
*/ |
106
|
2 |
|
public function hasAttribute(string $name): bool |
107
|
|
|
{ |
108
|
2 |
|
if (!$this->node instanceof \DOMElement) { |
109
|
|
|
return false; |
110
|
|
|
} |
111
|
|
|
|
112
|
2 |
|
return $this->node->hasAttribute($name); |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
/** |
116
|
|
|
* Get dom node's outer html. |
117
|
|
|
* |
118
|
|
|
* @param bool $multiDecodeNewHtmlEntity |
119
|
|
|
* |
120
|
|
|
* @return string |
121
|
|
|
*/ |
122
|
31 |
|
public function html(bool $multiDecodeNewHtmlEntity = false): string |
123
|
|
|
{ |
124
|
31 |
|
return $this->getHtmlDomParser()->html($multiDecodeNewHtmlEntity); |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
/** |
128
|
|
|
* Get dom node's inner html. |
129
|
|
|
* |
130
|
|
|
* @param bool $multiDecodeNewHtmlEntity |
131
|
|
|
* |
132
|
|
|
* @return string |
133
|
|
|
*/ |
134
|
20 |
|
public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string |
135
|
|
|
{ |
136
|
20 |
|
return $this->getHtmlDomParser()->innerHtml($multiDecodeNewHtmlEntity); |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* Remove attribute. |
141
|
|
|
* |
142
|
|
|
* @param string $name <p>The name of the html-attribute.</p> |
143
|
|
|
* |
144
|
|
|
* @return SimpleHtmlDomInterface |
145
|
|
|
*/ |
146
|
2 |
|
public function removeAttribute(string $name): SimpleHtmlDomInterface |
147
|
|
|
{ |
148
|
2 |
|
if (\method_exists($this->node, 'removeAttribute')) { |
149
|
2 |
|
$this->node->removeAttribute($name); |
|
|
|
|
150
|
|
|
} |
151
|
|
|
|
152
|
2 |
|
return $this; |
|
|
|
|
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
/** |
156
|
|
|
* Replace child node. |
157
|
|
|
* |
158
|
|
|
* @param string $string |
159
|
|
|
* |
160
|
|
|
* @return SimpleHtmlDomInterface |
161
|
|
|
*/ |
162
|
8 |
View Code Duplication |
protected function replaceChildWithString(string $string): SimpleHtmlDomInterface |
|
|
|
|
163
|
|
|
{ |
164
|
8 |
|
if (!empty($string)) { |
165
|
7 |
|
$newDocument = new HtmlDomParser($string); |
166
|
|
|
|
167
|
7 |
|
$tmpDomString = $this->normalizeStringForComparision($newDocument); |
168
|
7 |
|
$tmpStr = $this->normalizeStringForComparision($string); |
169
|
7 |
|
if ($tmpDomString !== $tmpStr) { |
170
|
|
|
throw new \RuntimeException( |
171
|
|
|
'Not valid HTML fragment!' . "\n" . |
172
|
|
|
$tmpDomString . "\n" . |
173
|
|
|
$tmpStr |
174
|
|
|
); |
175
|
|
|
} |
176
|
|
|
} |
177
|
|
|
|
178
|
|
|
/** @var \DOMNode[] $remove_nodes */ |
179
|
8 |
|
$remove_nodes = []; |
180
|
8 |
|
if ($this->node->childNodes->length > 0) { |
181
|
|
|
// INFO: We need to fetch the nodes first, before we can delete them, because of missing references in the dom, |
182
|
|
|
// if we delete the elements on the fly. |
183
|
8 |
|
foreach ($this->node->childNodes as $node) { |
184
|
8 |
|
$remove_nodes[] = $node; |
185
|
|
|
} |
186
|
|
|
} |
187
|
8 |
|
foreach ($remove_nodes as $remove_node) { |
188
|
8 |
|
$this->node->removeChild($remove_node); |
189
|
|
|
} |
190
|
|
|
|
191
|
8 |
|
if (!empty($newDocument)) { |
192
|
7 |
|
$newDocument = $this->cleanHtmlWrapper($newDocument); |
193
|
7 |
|
$ownerDocument = $this->node->ownerDocument; |
194
|
|
|
if ( |
195
|
7 |
|
$ownerDocument !== null |
196
|
|
|
&& |
197
|
7 |
|
$newDocument->getDocument()->documentElement !== null |
198
|
|
|
) { |
199
|
7 |
|
$newNode = $ownerDocument->importNode($newDocument->getDocument()->documentElement, true); |
200
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
201
|
7 |
|
$this->node->appendChild($newNode); |
202
|
|
|
} |
203
|
|
|
} |
204
|
|
|
|
205
|
8 |
|
return $this; |
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
/** |
209
|
|
|
* Replace this node. |
210
|
|
|
* |
211
|
|
|
* @param string $string |
212
|
|
|
* |
213
|
|
|
* @return SimpleHtmlDomInterface |
214
|
|
|
*/ |
215
|
5 |
|
protected function replaceNodeWithString(string $string): SimpleHtmlDomInterface |
216
|
|
|
{ |
217
|
5 |
|
if (empty($string)) { |
218
|
2 |
|
$this->node->parentNode->removeChild($this->node); |
219
|
|
|
|
220
|
2 |
|
return $this; |
221
|
|
|
} |
222
|
|
|
|
223
|
4 |
|
$newDocument = new HtmlDomParser($string); |
224
|
|
|
|
225
|
4 |
|
$tmpDomOuterTextString = $this->normalizeStringForComparision($newDocument); |
226
|
4 |
|
$tmpStr = $this->normalizeStringForComparision($string); |
227
|
4 |
View Code Duplication |
if ($tmpDomOuterTextString !== $tmpStr) { |
|
|
|
|
228
|
|
|
throw new \RuntimeException( |
229
|
|
|
'Not valid HTML fragment!' . "\n" |
230
|
|
|
. $tmpDomOuterTextString . "\n" . |
231
|
|
|
$tmpStr |
232
|
|
|
); |
233
|
|
|
} |
234
|
|
|
|
235
|
4 |
|
$newDocument = $this->cleanHtmlWrapper($newDocument, true); |
236
|
4 |
|
$ownerDocument = $this->node->ownerDocument; |
237
|
|
|
if ( |
238
|
4 |
|
$ownerDocument === null |
239
|
|
|
|| |
240
|
4 |
|
$newDocument->getDocument()->documentElement === null |
241
|
|
|
) { |
242
|
|
|
return $this; |
243
|
|
|
} |
244
|
|
|
|
245
|
4 |
|
$newNode = $ownerDocument->importNode($newDocument->getDocument()->documentElement, true); |
246
|
|
|
|
247
|
4 |
|
$this->node->parentNode->replaceChild($newNode, $this->node); |
248
|
4 |
|
$this->node = $newNode; |
249
|
|
|
|
250
|
|
|
// Remove head element, preserving child nodes. (again) |
251
|
|
View Code Duplication |
if ( |
|
|
|
|
252
|
4 |
|
$this->node->parentNode instanceof \DOMElement |
253
|
|
|
&& |
254
|
4 |
|
$newDocument->getIsDOMDocumentCreatedWithoutHeadWrapper() |
255
|
|
|
) { |
256
|
2 |
|
$html = $this->node->parentNode->getElementsByTagName('head')[0]; |
257
|
2 |
|
if ($this->node->parentNode->ownerDocument !== null) { |
258
|
2 |
|
$fragment = $this->node->parentNode->ownerDocument->createDocumentFragment(); |
259
|
2 |
|
if ($html !== null) { |
260
|
|
|
/** @var \DOMNode $html */ |
261
|
1 |
|
while ($html->childNodes->length > 0) { |
262
|
1 |
|
$tmpNode = $html->childNodes->item(0); |
263
|
1 |
|
if ($tmpNode !== null) { |
264
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
265
|
1 |
|
$fragment->appendChild($tmpNode); |
266
|
|
|
} |
267
|
|
|
} |
268
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
269
|
1 |
|
$html->parentNode->replaceChild($fragment, $html); |
270
|
|
|
} |
271
|
|
|
} |
272
|
|
|
} |
273
|
|
|
|
274
|
4 |
|
return $this; |
275
|
|
|
} |
276
|
|
|
|
277
|
|
|
/** |
278
|
|
|
* Replace this node with text |
279
|
|
|
* |
280
|
|
|
* @param string $string |
281
|
|
|
* |
282
|
|
|
* @return SimpleHtmlDomInterface |
283
|
|
|
*/ |
284
|
1 |
View Code Duplication |
protected function replaceTextWithString($string): SimpleHtmlDomInterface |
|
|
|
|
285
|
|
|
{ |
286
|
1 |
|
if (empty($string)) { |
287
|
1 |
|
$this->node->parentNode->removeChild($this->node); |
288
|
|
|
|
289
|
1 |
|
return $this; |
290
|
|
|
} |
291
|
|
|
|
292
|
1 |
|
$ownerDocument = $this->node->ownerDocument; |
293
|
1 |
|
if ($ownerDocument !== null) { |
294
|
1 |
|
$newElement = $ownerDocument->createTextNode($string); |
295
|
1 |
|
$newNode = $ownerDocument->importNode($newElement, true); |
296
|
1 |
|
$this->node->parentNode->replaceChild($newNode, $this->node); |
297
|
1 |
|
$this->node = $newNode; |
298
|
|
|
} |
299
|
|
|
|
300
|
1 |
|
return $this; |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
/** |
304
|
|
|
* Set attribute value. |
305
|
|
|
* |
306
|
|
|
* @param string $name <p>The name of the html-attribute.</p> |
307
|
|
|
* @param string|null $value <p>Set to NULL or empty string, to remove the attribute.</p> |
308
|
|
|
* @param bool $strict </p> |
309
|
|
|
* $value must be NULL, to remove the attribute, |
310
|
|
|
* so that you can set an empty string as attribute-value e.g. autofocus="" |
311
|
|
|
* </p> |
312
|
|
|
* |
313
|
|
|
* @return SimpleHtmlDomInterface |
314
|
|
|
*/ |
315
|
14 |
View Code Duplication |
public function setAttribute(string $name, $value = null, bool $strict = false): SimpleHtmlDomInterface |
|
|
|
|
316
|
|
|
{ |
317
|
|
|
if ( |
318
|
14 |
|
($strict && $value === null) |
319
|
|
|
|| |
320
|
14 |
|
(!$strict && empty($value)) |
321
|
|
|
) { |
322
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
323
|
2 |
|
$this->removeAttribute($name); |
324
|
14 |
|
} elseif (\method_exists($this->node, 'setAttribute')) { |
325
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
326
|
14 |
|
$this->node->setAttribute($name, $value); |
|
|
|
|
327
|
|
|
} |
328
|
|
|
|
329
|
14 |
|
return $this; |
|
|
|
|
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
/** |
333
|
|
|
* Get dom node's plain text. |
334
|
|
|
* |
335
|
|
|
* @return string |
336
|
|
|
*/ |
337
|
18 |
|
public function text(): string |
338
|
|
|
{ |
339
|
18 |
|
return $this->getHtmlDomParser()->fixHtmlOutput($this->node->textContent); |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
/** |
343
|
|
|
* Change the name of a tag in a "DOMNode". |
344
|
|
|
* |
345
|
|
|
* @param \DOMNode $node |
346
|
|
|
* @param string $name |
347
|
|
|
* |
348
|
|
|
* @return \DOMElement|false |
349
|
|
|
* <p>DOMElement a new instance of class DOMElement or false |
350
|
|
|
* if an error occured.</p> |
351
|
|
|
*/ |
352
|
6 |
View Code Duplication |
protected function changeElementName(\DOMNode $node, string $name) |
|
|
|
|
353
|
|
|
{ |
354
|
6 |
|
$ownerDocument = $node->ownerDocument; |
355
|
6 |
|
if ($ownerDocument) { |
356
|
6 |
|
$newNode = $ownerDocument->createElement($name); |
357
|
|
|
} else { |
358
|
|
|
return false; |
359
|
|
|
} |
360
|
|
|
|
361
|
6 |
|
foreach ($node->childNodes as $child) { |
362
|
6 |
|
$child = $ownerDocument->importNode($child, true); |
363
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
364
|
6 |
|
$newNode->appendChild($child); |
365
|
|
|
} |
366
|
|
|
|
367
|
6 |
|
foreach ($node->attributes as $attrName => $attrNode) { |
368
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
369
|
|
|
$newNode->setAttribute($attrName, $attrNode); |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
373
|
6 |
|
$newNode->ownerDocument->replaceChild($newNode, $node); |
374
|
|
|
|
375
|
6 |
|
return $newNode; |
376
|
|
|
} |
377
|
|
|
|
378
|
|
|
/** |
379
|
|
|
* Returns children of node. |
380
|
|
|
* |
381
|
|
|
* @param int $idx |
382
|
|
|
* |
383
|
|
|
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface|null |
384
|
|
|
*/ |
385
|
2 |
View Code Duplication |
public function childNodes(int $idx = -1) |
|
|
|
|
386
|
|
|
{ |
387
|
2 |
|
$nodeList = $this->getIterator(); |
388
|
|
|
|
389
|
2 |
|
if ($idx === -1) { |
390
|
2 |
|
return $nodeList; |
391
|
|
|
} |
392
|
|
|
|
393
|
2 |
|
return $nodeList[$idx] ?? null; |
394
|
|
|
} |
395
|
|
|
|
396
|
|
|
/** |
397
|
|
|
* Find nodes with a CSS selector. |
398
|
|
|
* |
399
|
|
|
* @param string $selector |
400
|
|
|
* |
401
|
|
|
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
402
|
|
|
*/ |
403
|
1 |
|
public function findMulti(string $selector): SimpleHtmlDomNodeInterface |
404
|
|
|
{ |
405
|
1 |
|
return $this->getHtmlDomParser()->findMulti($selector); |
406
|
|
|
} |
407
|
|
|
|
408
|
|
|
/** |
409
|
|
|
* Find nodes with a CSS selector or false, if no element is found. |
410
|
|
|
* |
411
|
|
|
* @param string $selector |
412
|
|
|
* |
413
|
|
|
* @return false|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
414
|
|
|
*/ |
415
|
1 |
|
public function findMultiOrFalse(string $selector) |
416
|
|
|
{ |
417
|
1 |
|
return $this->getHtmlDomParser()->findMultiOrFalse($selector); |
|
|
|
|
418
|
|
|
} |
419
|
|
|
|
420
|
|
|
/** |
421
|
|
|
* Find one node with a CSS selector. |
422
|
|
|
* |
423
|
|
|
* @param string $selector |
424
|
|
|
* |
425
|
|
|
* @return SimpleHtmlDomInterface |
426
|
|
|
*/ |
427
|
2 |
|
public function findOne(string $selector): SimpleHtmlDomInterface |
428
|
|
|
{ |
429
|
2 |
|
return $this->getHtmlDomParser()->findOne($selector); |
430
|
|
|
} |
431
|
|
|
|
432
|
|
|
/** |
433
|
|
|
* Find one node with a CSS selector or false, if no element is found. |
434
|
|
|
* |
435
|
|
|
* @param string $selector |
436
|
|
|
* |
437
|
|
|
* @return false|SimpleHtmlDomInterface |
438
|
|
|
*/ |
439
|
1 |
|
public function findOneOrFalse(string $selector) |
440
|
|
|
{ |
441
|
1 |
|
return $this->getHtmlDomParser()->findOneOrFalse($selector); |
|
|
|
|
442
|
|
|
} |
443
|
|
|
|
444
|
|
|
/** |
445
|
|
|
* Returns the first child of node. |
446
|
|
|
* |
447
|
|
|
* @return SimpleHtmlDomInterface|null |
448
|
|
|
*/ |
449
|
4 |
View Code Duplication |
public function firstChild() |
|
|
|
|
450
|
|
|
{ |
451
|
|
|
/** @var \DOMNode|null $node */ |
452
|
4 |
|
$node = $this->node->firstChild; |
453
|
|
|
|
454
|
4 |
|
if ($node === null) { |
455
|
1 |
|
return null; |
456
|
|
|
} |
457
|
|
|
|
458
|
4 |
|
return new static($node); |
459
|
|
|
} |
460
|
|
|
|
461
|
|
|
/** |
462
|
|
|
* Return elements by ".class". |
463
|
|
|
* |
464
|
|
|
* @param string $class |
465
|
|
|
* |
466
|
|
|
* @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
467
|
|
|
*/ |
468
|
|
|
public function getElementByClass(string $class): SimpleHtmlDomNodeInterface |
469
|
|
|
{ |
470
|
|
|
return $this->findMulti(".${class}"); |
471
|
|
|
} |
472
|
|
|
|
473
|
|
|
/** |
474
|
|
|
* Return element by #id. |
475
|
|
|
* |
476
|
|
|
* @param string $id |
477
|
|
|
* |
478
|
|
|
* @return SimpleHtmlDomInterface |
479
|
|
|
*/ |
480
|
1 |
|
public function getElementById(string $id): SimpleHtmlDomInterface |
481
|
|
|
{ |
482
|
1 |
|
return $this->findOne("#${id}"); |
483
|
|
|
} |
484
|
|
|
|
485
|
|
|
/** |
486
|
|
|
* Return element by tag name. |
487
|
|
|
* |
488
|
|
|
* @param string $name |
489
|
|
|
* |
490
|
|
|
* @return SimpleHtmlDomInterface |
491
|
|
|
*/ |
492
|
1 |
View Code Duplication |
public function getElementByTagName(string $name): SimpleHtmlDomInterface |
|
|
|
|
493
|
|
|
{ |
494
|
1 |
|
if ($this->node instanceof \DOMElement) { |
495
|
1 |
|
$node = $this->node->getElementsByTagName($name)->item(0); |
496
|
|
|
} else { |
497
|
|
|
$node = null; |
498
|
|
|
} |
499
|
|
|
|
500
|
1 |
|
if ($node === null) { |
501
|
|
|
return new SimpleHtmlDomBlank(); |
502
|
|
|
} |
503
|
|
|
|
504
|
1 |
|
return new static($node); |
505
|
|
|
} |
506
|
|
|
|
507
|
|
|
/** |
508
|
|
|
* Returns elements by "#id". |
509
|
|
|
* |
510
|
|
|
* @param string $id |
511
|
|
|
* @param int|null $idx |
512
|
|
|
* |
513
|
|
|
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
514
|
|
|
*/ |
515
|
|
|
public function getElementsById(string $id, $idx = null) |
516
|
|
|
{ |
517
|
|
|
return $this->find("#${id}", $idx); |
518
|
|
|
} |
519
|
|
|
|
520
|
|
|
/** |
521
|
|
|
* Returns elements by tag name. |
522
|
|
|
* |
523
|
|
|
* @param string $name |
524
|
|
|
* @param int|null $idx |
525
|
|
|
* |
526
|
|
|
* @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
527
|
|
|
*/ |
528
|
1 |
View Code Duplication |
public function getElementsByTagName(string $name, $idx = null) |
|
|
|
|
529
|
|
|
{ |
530
|
1 |
|
if ($this->node instanceof \DOMElement) { |
531
|
1 |
|
$nodesList = $this->node->getElementsByTagName($name); |
532
|
|
|
} else { |
533
|
|
|
$nodesList = []; |
534
|
|
|
} |
535
|
|
|
|
536
|
1 |
|
$elements = new SimpleHtmlDomNode(); |
537
|
|
|
|
538
|
1 |
|
foreach ($nodesList as $node) { |
539
|
1 |
|
$elements[] = new static($node); |
540
|
|
|
} |
541
|
|
|
|
542
|
|
|
// return all elements |
543
|
1 |
|
if ($idx === null) { |
544
|
1 |
|
if (\count($elements) === 0) { |
545
|
|
|
return new SimpleHtmlDomNodeBlank(); |
546
|
|
|
} |
547
|
|
|
|
548
|
1 |
|
return $elements; |
549
|
|
|
} |
550
|
|
|
|
551
|
|
|
// handle negative values |
552
|
|
|
if ($idx < 0) { |
553
|
|
|
$idx = \count($elements) + $idx; |
554
|
|
|
} |
555
|
|
|
|
556
|
|
|
// return one element |
557
|
|
|
return $elements[$idx] ?? new SimpleHtmlDomBlank(); |
558
|
|
|
} |
559
|
|
|
|
560
|
|
|
/** |
561
|
|
|
* Create a new "HtmlDomParser"-object from the current context. |
562
|
|
|
* |
563
|
|
|
* @return HtmlDomParser |
564
|
|
|
*/ |
565
|
84 |
|
public function getHtmlDomParser(): HtmlDomParser |
566
|
|
|
{ |
567
|
84 |
|
return new HtmlDomParser($this); |
568
|
|
|
} |
569
|
|
|
|
570
|
|
|
/** |
571
|
|
|
* @return \DOMNode |
572
|
|
|
*/ |
573
|
85 |
|
public function getNode(): \DOMNode |
574
|
|
|
{ |
575
|
85 |
|
return $this->node; |
576
|
|
|
} |
577
|
|
|
|
578
|
|
|
/** |
579
|
|
|
* Nodes can get partially destroyed in which they're still an |
580
|
|
|
* actual DOM node (such as \DOMElement) but almost their entire |
581
|
|
|
* body is gone, including the `nodeType` attribute. |
582
|
|
|
* |
583
|
|
|
* @return bool true if node has been destroyed |
584
|
|
|
*/ |
585
|
|
|
public function isRemoved(): bool |
586
|
|
|
{ |
587
|
|
|
return !isset($this->node->nodeType); |
588
|
|
|
} |
589
|
|
|
|
590
|
|
|
/** |
591
|
|
|
* Returns the last child of node. |
592
|
|
|
* |
593
|
|
|
* @return SimpleHtmlDomInterface|null |
594
|
|
|
*/ |
595
|
4 |
View Code Duplication |
public function lastChild() |
|
|
|
|
596
|
|
|
{ |
597
|
|
|
/** @var \DOMNode|null $node */ |
598
|
4 |
|
$node = $this->node->lastChild; |
599
|
|
|
|
600
|
4 |
|
if ($node === null) { |
601
|
1 |
|
return null; |
602
|
|
|
} |
603
|
|
|
|
604
|
4 |
|
return new static($node); |
605
|
|
|
} |
606
|
|
|
|
607
|
|
|
/** |
608
|
|
|
* Returns the next sibling of node. |
609
|
|
|
* |
610
|
|
|
* @return SimpleHtmlDomInterface|null |
611
|
|
|
*/ |
612
|
1 |
View Code Duplication |
public function nextSibling() |
|
|
|
|
613
|
|
|
{ |
614
|
|
|
/** @var \DOMNode|null $node */ |
615
|
1 |
|
$node = $this->node->nextSibling; |
616
|
|
|
|
617
|
1 |
|
if ($node === null) { |
618
|
1 |
|
return null; |
619
|
|
|
} |
620
|
|
|
|
621
|
1 |
|
return new static($node); |
622
|
|
|
} |
623
|
|
|
|
624
|
|
|
/** |
625
|
|
|
* Returns the next sibling of node. |
626
|
|
|
* |
627
|
|
|
* @return SimpleHtmlDomInterface|null |
628
|
|
|
*/ |
629
|
1 |
View Code Duplication |
public function nextNonWhitespaceSibling() |
|
|
|
|
630
|
|
|
{ |
631
|
|
|
/** @var \DOMNode|null $node */ |
632
|
1 |
|
$node = $this->node->nextSibling; |
633
|
|
|
|
634
|
1 |
|
while ($node && !\trim($node->textContent)) { |
635
|
|
|
/** @var \DOMNode|null $node */ |
636
|
1 |
|
$node = $node->nextSibling; |
637
|
|
|
} |
638
|
|
|
|
639
|
1 |
|
if ($node === null) { |
640
|
|
|
return null; |
641
|
|
|
} |
642
|
|
|
|
643
|
1 |
|
return new static($node); |
644
|
|
|
} |
645
|
|
|
|
646
|
|
|
/** |
647
|
|
|
* Returns the parent of node. |
648
|
|
|
* |
649
|
|
|
* @return SimpleHtmlDomInterface |
650
|
|
|
*/ |
651
|
2 |
|
public function parentNode(): SimpleHtmlDomInterface |
652
|
|
|
{ |
653
|
2 |
|
return new static($this->node->parentNode); |
|
|
|
|
654
|
|
|
} |
655
|
|
|
|
656
|
|
|
/** |
657
|
|
|
* Returns the previous sibling of node. |
658
|
|
|
* |
659
|
|
|
* @return SimpleHtmlDomInterface|null |
660
|
|
|
*/ |
661
|
1 |
View Code Duplication |
public function previousSibling() |
|
|
|
|
662
|
|
|
{ |
663
|
|
|
/** @var \DOMNode|null $node */ |
664
|
1 |
|
$node = $this->node->previousSibling; |
665
|
|
|
|
666
|
1 |
|
if ($node === null) { |
667
|
1 |
|
return null; |
668
|
|
|
} |
669
|
|
|
|
670
|
1 |
|
return new static($node); |
671
|
|
|
} |
672
|
|
|
|
673
|
|
|
/** |
674
|
|
|
* @param string|string[]|null $value <p> |
675
|
|
|
* null === get the current input value |
676
|
|
|
* text === set a new input value |
677
|
|
|
* </p> |
678
|
|
|
* |
679
|
|
|
* @return string|string[]|null |
680
|
|
|
*/ |
681
|
1 |
View Code Duplication |
public function val($value = null) |
|
|
|
|
682
|
|
|
{ |
683
|
1 |
|
if ($value === null) { |
684
|
|
|
if ( |
685
|
1 |
|
$this->tag === 'input' |
|
|
|
|
686
|
|
|
&& |
687
|
|
|
( |
688
|
1 |
|
$this->getAttribute('type') === 'text' |
689
|
|
|
|| |
690
|
1 |
|
!$this->hasAttribute('type') |
691
|
|
|
) |
692
|
|
|
) { |
693
|
1 |
|
return $this->getAttribute('value'); |
694
|
|
|
} |
695
|
|
|
|
696
|
|
|
if ( |
697
|
1 |
|
$this->hasAttribute('checked') |
698
|
|
|
&& |
699
|
1 |
|
\in_array($this->getAttribute('type'), ['checkbox', 'radio'], true) |
700
|
|
|
) { |
701
|
1 |
|
return $this->getAttribute('value'); |
702
|
|
|
} |
703
|
|
|
|
704
|
1 |
|
if ($this->node->nodeName === 'select') { |
705
|
|
|
$valuesFromDom = []; |
706
|
|
|
$options = $this->getElementsByTagName('option'); |
707
|
|
|
if ($options instanceof SimpleHtmlDomNode) { |
708
|
|
|
foreach ($options as $option) { |
709
|
|
|
if ($this->hasAttribute('checked')) { |
710
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
711
|
|
|
$valuesFromDom[] = (string) $option->getAttribute('value'); |
712
|
|
|
} |
713
|
|
|
} |
714
|
|
|
} |
715
|
|
|
|
716
|
|
|
if (\count($valuesFromDom) === 0) { |
717
|
|
|
return null; |
718
|
|
|
} |
719
|
|
|
|
720
|
|
|
return $valuesFromDom; |
|
|
|
|
721
|
|
|
} |
722
|
|
|
|
723
|
1 |
|
if ($this->node->nodeName === 'textarea') { |
724
|
1 |
|
return $this->node->nodeValue; |
725
|
|
|
} |
726
|
|
|
} else { |
727
|
|
|
/** @noinspection NestedPositiveIfStatementsInspection */ |
728
|
1 |
|
if (\in_array($this->getAttribute('type'), ['checkbox', 'radio'], true)) { |
729
|
1 |
|
if ($value === $this->getAttribute('value')) { |
730
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
731
|
1 |
|
$this->setAttribute('checked', 'checked'); |
732
|
|
|
} else { |
733
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
734
|
1 |
|
$this->removeAttribute('checked'); |
735
|
|
|
} |
736
|
1 |
|
} elseif ($this->node instanceof \DOMElement && $this->node->nodeName === 'select') { |
737
|
|
|
foreach ($this->node->getElementsByTagName('option') as $option) { |
738
|
|
|
/** @var \DOMElement $option */ |
739
|
|
|
if ($value === $option->getAttribute('value')) { |
740
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
741
|
|
|
$option->setAttribute('selected', 'selected'); |
742
|
|
|
} else { |
743
|
|
|
$option->removeAttribute('selected'); |
744
|
|
|
} |
745
|
|
|
} |
746
|
1 |
|
} elseif ($this->node->nodeName === 'input' && \is_string($value)) { |
747
|
|
|
// Set value for input elements |
748
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
749
|
1 |
|
$this->setAttribute('value', $value); |
750
|
1 |
|
} elseif ($this->node->nodeName === 'textarea' && \is_string($value)) { |
751
|
1 |
|
$this->node->nodeValue = $value; |
752
|
|
|
} |
753
|
|
|
} |
754
|
|
|
|
755
|
1 |
|
return null; |
756
|
|
|
} |
757
|
|
|
|
758
|
|
|
/** |
759
|
|
|
* @param HtmlDomParser $newDocument |
760
|
|
|
* @param bool $removeExtraHeadTag |
761
|
|
|
* |
762
|
|
|
* @return HtmlDomParser |
763
|
|
|
*/ |
764
|
11 |
|
protected function cleanHtmlWrapper( |
765
|
|
|
HtmlDomParser $newDocument, |
766
|
|
|
$removeExtraHeadTag = false |
767
|
|
|
): HtmlDomParser { |
768
|
|
|
if ( |
769
|
11 |
|
$newDocument->getIsDOMDocumentCreatedWithoutHtml() |
770
|
|
|
|| |
771
|
11 |
|
$newDocument->getIsDOMDocumentCreatedWithoutHtmlWrapper() |
772
|
|
|
) { |
773
|
|
|
|
774
|
|
|
// Remove doc-type node. |
775
|
11 |
|
if ($newDocument->getDocument()->doctype !== null) { |
776
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
777
|
|
|
$newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype); |
778
|
|
|
} |
779
|
|
|
|
780
|
|
|
// Remove html element, preserving child nodes. |
781
|
11 |
|
$html = $newDocument->getDocument()->getElementsByTagName('html')->item(0); |
782
|
11 |
|
$fragment = $newDocument->getDocument()->createDocumentFragment(); |
783
|
11 |
|
if ($html !== null) { |
784
|
8 |
|
while ($html->childNodes->length > 0) { |
785
|
8 |
|
$tmpNode = $html->childNodes->item(0); |
786
|
8 |
|
if ($tmpNode !== null) { |
787
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
788
|
8 |
|
$fragment->appendChild($tmpNode); |
789
|
|
|
} |
790
|
|
|
} |
791
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
792
|
8 |
|
$html->parentNode->replaceChild($fragment, $html); |
793
|
|
|
} |
794
|
|
|
|
795
|
|
|
// Remove body element, preserving child nodes. |
796
|
11 |
|
$body = $newDocument->getDocument()->getElementsByTagName('body')->item(0); |
797
|
11 |
|
$fragment = $newDocument->getDocument()->createDocumentFragment(); |
798
|
11 |
|
if ($body instanceof \DOMElement) { |
799
|
6 |
|
while ($body->childNodes->length > 0) { |
800
|
6 |
|
$tmpNode = $body->childNodes->item(0); |
801
|
6 |
|
if ($tmpNode !== null) { |
802
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
803
|
6 |
|
$fragment->appendChild($tmpNode); |
804
|
|
|
} |
805
|
|
|
} |
806
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
807
|
6 |
|
$body->parentNode->replaceChild($fragment, $body); |
808
|
|
|
|
809
|
|
|
// At this point DOMDocument still added a "<p>"-wrapper around our string, |
810
|
|
|
// so we replace it with "<simpleHtmlDomP>" and delete this at the ending ... |
811
|
6 |
|
$item = $newDocument->getDocument()->getElementsByTagName('p')->item(0); |
812
|
6 |
|
if ($item !== null) { |
813
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
814
|
6 |
|
$this->changeElementName($item, 'simpleHtmlDomP'); |
815
|
|
|
} |
816
|
|
|
} |
817
|
|
|
} |
818
|
|
|
|
819
|
|
|
// Remove head element, preserving child nodes. |
820
|
|
View Code Duplication |
if ( |
|
|
|
|
821
|
11 |
|
$removeExtraHeadTag |
822
|
|
|
&& |
823
|
11 |
|
$this->node->parentNode instanceof \DOMElement |
824
|
|
|
&& |
825
|
11 |
|
$newDocument->getIsDOMDocumentCreatedWithoutHeadWrapper() |
826
|
|
|
) { |
827
|
2 |
|
$html = $this->node->parentNode->getElementsByTagName('head')[0]; |
828
|
2 |
|
if ($this->node->parentNode->ownerDocument !== null) { |
829
|
2 |
|
$fragment = $this->node->parentNode->ownerDocument->createDocumentFragment(); |
830
|
2 |
|
if ($html !== null) { |
831
|
|
|
/** @var \DOMNode $html */ |
832
|
|
|
while ($html->childNodes->length > 0) { |
833
|
|
|
$tmpNode = $html->childNodes->item(0); |
834
|
|
|
if ($tmpNode !== null) { |
835
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
836
|
|
|
$fragment->appendChild($tmpNode); |
837
|
|
|
} |
838
|
|
|
} |
839
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
840
|
|
|
$html->parentNode->replaceChild($fragment, $html); |
841
|
|
|
} |
842
|
|
|
} |
843
|
|
|
} |
844
|
|
|
|
845
|
11 |
|
return $newDocument; |
846
|
|
|
} |
847
|
|
|
|
848
|
|
|
/** |
849
|
|
|
* Retrieve an external iterator. |
850
|
|
|
* |
851
|
|
|
* @see http://php.net/manual/en/iteratoraggregate.getiterator.php |
852
|
|
|
* |
853
|
|
|
* @return SimpleHtmlDomNode |
854
|
|
|
* <p> |
855
|
|
|
* An instance of an object implementing <b>Iterator</b> or |
856
|
|
|
* <b>Traversable</b> |
857
|
|
|
* </p> |
858
|
|
|
*/ |
859
|
3 |
View Code Duplication |
public function getIterator(): SimpleHtmlDomNodeInterface |
|
|
|
|
860
|
|
|
{ |
861
|
3 |
|
$elements = new SimpleHtmlDomNode(); |
862
|
3 |
|
if ($this->node->hasChildNodes()) { |
863
|
3 |
|
foreach ($this->node->childNodes as $node) { |
864
|
3 |
|
$elements[] = new static($node); |
865
|
|
|
} |
866
|
|
|
} |
867
|
|
|
|
868
|
3 |
|
return $elements; |
869
|
|
|
} |
870
|
|
|
|
871
|
|
|
/** |
872
|
|
|
* Get dom node's inner html. |
873
|
|
|
* |
874
|
|
|
* @param bool $multiDecodeNewHtmlEntity |
875
|
|
|
* |
876
|
|
|
* @return string |
877
|
|
|
*/ |
878
|
|
|
public function innerXml(bool $multiDecodeNewHtmlEntity = false): string |
879
|
|
|
{ |
880
|
|
|
return $this->getHtmlDomParser()->innerXml($multiDecodeNewHtmlEntity); |
881
|
|
|
} |
882
|
|
|
|
883
|
|
|
/** |
884
|
|
|
* Normalize the given input for comparision. |
885
|
|
|
* |
886
|
|
|
* @param HtmlDomParser|string $input |
887
|
|
|
* |
888
|
|
|
* @return string |
889
|
|
|
*/ |
890
|
11 |
|
private function normalizeStringForComparision($input): string |
891
|
|
|
{ |
892
|
11 |
|
if ($input instanceof HtmlDomParser) { |
893
|
11 |
|
$string = $input->outerText(); |
894
|
|
|
|
895
|
11 |
|
if ($input->getIsDOMDocumentCreatedWithoutHeadWrapper()) { |
896
|
|
|
/** @noinspection HtmlRequiredTitleElement */ |
897
|
11 |
|
$string = \str_replace(['<head>', '</head>'], '', $string); |
898
|
|
|
} |
899
|
|
|
} else { |
900
|
11 |
|
$string = (string) $input; |
901
|
|
|
} |
902
|
|
|
|
903
|
|
|
return |
904
|
11 |
|
\urlencode( |
905
|
11 |
|
\urldecode( |
906
|
11 |
|
\trim( |
907
|
11 |
|
\str_replace( |
908
|
|
|
[ |
909
|
11 |
|
' ', |
910
|
|
|
"\n", |
911
|
|
|
"\r", |
912
|
|
|
'/>', |
913
|
|
|
], |
914
|
|
|
[ |
915
|
11 |
|
'', |
916
|
|
|
'', |
917
|
|
|
'', |
918
|
|
|
'>', |
919
|
|
|
], |
920
|
11 |
|
\strtolower($string) |
921
|
|
|
) |
922
|
|
|
) |
923
|
|
|
) |
924
|
|
|
); |
925
|
|
|
} |
926
|
|
|
} |
927
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.