1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace voku\helper; |
6
|
|
|
|
7
|
|
|
use BadMethodCallException; |
8
|
|
|
use DOMElement; |
9
|
|
|
use DOMNode; |
10
|
|
|
use RuntimeException; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* @property string $outertext |
14
|
|
|
* <p>Get dom node's outer html (alias for "outerHtml").</p> |
15
|
|
|
* @property string $outerhtml |
16
|
|
|
* <p>Get dom node's outer html.</p> |
17
|
|
|
* @property string $innertext |
18
|
|
|
* <p>Get dom node's inner html (alias for "innerHtml").</p> |
19
|
|
|
* @property string $innerhtml |
20
|
|
|
* <p>Get dom node's inner html.</p> |
21
|
|
|
* @property string $plaintext |
22
|
|
|
* <p>Get dom node's plain text.</p> |
23
|
|
|
* |
24
|
|
|
* @property-read string $tag |
25
|
|
|
* <p>Get dom node name.</p> |
26
|
|
|
* @property-read string $attr |
27
|
|
|
* <p>Get dom node attributes.</p> |
28
|
|
|
* @property-read string $text |
29
|
|
|
* <p>Get dom node name.</p> |
30
|
|
|
* @property-read string $html |
31
|
|
|
* <p>Get dom node's outer html.</p> |
32
|
|
|
* |
33
|
|
|
* @method SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNode|null children() children($idx = -1) |
34
|
|
|
* <p>Returns children of |
35
|
|
|
* node.</p> |
36
|
|
|
* @method SimpleHtmlDom|null first_child() |
37
|
|
|
* <p>Returns the first child of node.</p> |
38
|
|
|
* @method SimpleHtmlDom|null last_child() |
39
|
|
|
* <p>Returns the last child of node.</p> |
40
|
|
|
* @method SimpleHtmlDom|null next_sibling() |
41
|
|
|
* <p>Returns the next sibling of node.</p> |
42
|
|
|
* @method SimpleHtmlDom|null prev_sibling() |
43
|
|
|
* <p>Returns the previous sibling of node.</p> |
44
|
|
|
* @method SimpleHtmlDom|null parent() |
45
|
|
|
* <p>Returns the parent of node.</p> |
46
|
|
|
* @method string outerText() |
47
|
|
|
* <p>Get dom node's outer html (alias for "outerHtml()").</p> |
48
|
|
|
* @method string outerHtml() |
49
|
|
|
* <p>Get dom node's outer html.</p> |
50
|
|
|
* @method string innerText() |
51
|
|
|
* <p>Get dom node's inner html (alias for "innerHtml()").</p> |
52
|
|
|
*/ |
53
|
|
|
class SimpleHtmlDom implements \IteratorAggregate |
54
|
|
|
{ |
55
|
|
|
/** |
56
|
|
|
* @var array |
57
|
|
|
*/ |
58
|
|
|
protected static $functionAliases = [ |
59
|
|
|
'children' => 'childNodes', |
60
|
|
|
'first_child' => 'firstChild', |
61
|
|
|
'last_child' => 'lastChild', |
62
|
|
|
'next_sibling' => 'nextSibling', |
63
|
|
|
'prev_sibling' => 'previousSibling', |
64
|
|
|
'parent' => 'parentNode', |
65
|
|
|
'outertext' => 'html', |
66
|
|
|
'outerhtml' => 'html', |
67
|
|
|
'innertext' => 'innerHtml', |
68
|
|
|
'innerhtml' => 'innerHtml', |
69
|
|
|
]; |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* @var DOMElement|DOMNode |
73
|
|
|
*/ |
74
|
|
|
protected $node; |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* @param DOMElement|DOMNode $node |
78
|
|
|
*/ |
79
|
103 |
|
public function __construct(DOMNode $node) |
80
|
|
|
{ |
81
|
103 |
|
$this->node = $node; |
82
|
103 |
|
} |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* @param string $name |
86
|
|
|
* @param array $arguments |
87
|
|
|
* |
88
|
|
|
* @return SimpleHtmlDom|string|null |
89
|
|
|
* @throws \BadMethodCallException |
90
|
|
|
* |
91
|
|
|
*/ |
92
|
9 |
View Code Duplication |
public function __call($name, $arguments) |
|
|
|
|
93
|
|
|
{ |
94
|
9 |
|
$name = \strtolower($name); |
95
|
|
|
|
96
|
9 |
|
if (isset(self::$functionAliases[$name])) { |
97
|
9 |
|
return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
throw new BadMethodCallException('Method does not exist'); |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* @param string $name |
105
|
|
|
* |
106
|
|
|
* @return array|string|null |
107
|
|
|
*/ |
108
|
47 |
|
public function __get($name) |
109
|
|
|
{ |
110
|
47 |
|
$nameOrig = $name; |
111
|
47 |
|
$name = \strtolower($name); |
112
|
|
|
|
113
|
|
|
switch ($name) { |
114
|
47 |
|
case 'outerhtml': |
115
|
43 |
|
case 'outertext': |
116
|
36 |
|
case 'html': |
117
|
20 |
|
return $this->html(); |
118
|
36 |
|
case 'innerhtml': |
119
|
30 |
|
case 'innertext': |
120
|
11 |
|
return $this->innerHtml(); |
121
|
27 |
|
case 'text': |
122
|
22 |
|
case 'plaintext': |
123
|
17 |
|
return $this->text(); |
124
|
12 |
|
case 'tag': |
125
|
5 |
|
return $this->node->nodeName; |
126
|
10 |
|
case 'attr': |
127
|
|
|
return $this->getAllAttributes(); |
128
|
|
View Code Duplication |
default: |
|
|
|
|
129
|
10 |
|
if (\property_exists($this->node, $nameOrig)) { |
130
|
1 |
|
return $this->node->{$nameOrig}; |
131
|
|
|
} |
132
|
|
|
|
133
|
10 |
|
return $this->getAttribute($name); |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
/** |
138
|
|
|
* @param string $selector |
139
|
|
|
* @param int $idx |
140
|
|
|
* |
141
|
|
|
* @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
142
|
|
|
*/ |
143
|
12 |
|
public function __invoke($selector, $idx = null) |
144
|
|
|
{ |
145
|
12 |
|
return $this->find($selector, $idx); |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
/** |
149
|
|
|
* @param string $name |
150
|
|
|
* |
151
|
|
|
* @return bool |
152
|
|
|
*/ |
153
|
1 |
|
public function __isset($name) |
154
|
|
|
{ |
155
|
1 |
|
$nameOrig = $name; |
156
|
1 |
|
$name = \strtolower($name); |
157
|
|
|
|
158
|
|
|
switch ($name) { |
159
|
1 |
|
case 'outertext': |
160
|
1 |
|
case 'outerhtml': |
161
|
1 |
|
case 'innertext': |
162
|
1 |
|
case 'innerhtml': |
163
|
1 |
|
case 'plaintext': |
164
|
1 |
|
case 'text': |
165
|
1 |
|
case 'tag': |
166
|
|
|
return true; |
167
|
|
View Code Duplication |
default: |
|
|
|
|
168
|
1 |
|
if (\property_exists($this->node, $nameOrig)) { |
169
|
|
|
return isset($this->node->{$nameOrig}); |
170
|
|
|
} |
171
|
|
|
|
172
|
1 |
|
return $this->hasAttribute($name); |
173
|
|
|
} |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
/** |
177
|
|
|
* @param string $name |
178
|
|
|
* @param mixed $value |
179
|
|
|
* |
180
|
|
|
* @return null|SimpleHtmlDom |
181
|
|
|
*/ |
182
|
16 |
|
public function __set($name, $value) |
183
|
|
|
{ |
184
|
16 |
|
$nameOrig = $name; |
185
|
16 |
|
$name = \strtolower($name); |
186
|
|
|
|
187
|
|
|
switch ($name) { |
188
|
16 |
|
case 'outerhtml': |
189
|
14 |
|
case 'outertext': |
190
|
4 |
|
return $this->replaceNodeWithString($value); |
191
|
12 |
|
case 'innertext': |
192
|
10 |
|
case 'innerhtml': |
193
|
7 |
|
return $this->replaceChildWithString($value); |
194
|
9 |
|
case 'plaintext': |
195
|
1 |
|
return $this->replaceTextWithString($value); |
196
|
|
View Code Duplication |
default: |
|
|
|
|
197
|
8 |
|
if (\property_exists($this->node, $nameOrig)) { |
198
|
|
|
return $this->node->{$nameOrig} = $value; |
199
|
|
|
} |
200
|
|
|
|
201
|
8 |
|
return $this->setAttribute($name, $value); |
202
|
|
|
} |
203
|
|
|
} |
204
|
|
|
|
205
|
|
|
/** |
206
|
|
|
* @return string |
207
|
|
|
*/ |
208
|
2 |
|
public function __toString() |
209
|
|
|
{ |
210
|
2 |
|
return $this->html(); |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
/** |
214
|
|
|
* @param string $name |
215
|
|
|
* |
216
|
|
|
* @return void |
217
|
|
|
*/ |
218
|
|
|
public function __unset($name) |
219
|
|
|
{ |
220
|
|
|
$this->removeAttribute($name); |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
/** |
224
|
|
|
* Returns children of node. |
225
|
|
|
* |
226
|
|
|
* @param int $idx |
227
|
|
|
* |
228
|
|
|
* @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface|null |
229
|
|
|
*/ |
230
|
2 |
|
public function childNodes(int $idx = -1) |
231
|
|
|
{ |
232
|
2 |
|
$nodeList = $this->getIterator(); |
233
|
|
|
|
234
|
2 |
|
if ($idx === -1) { |
235
|
2 |
|
return $nodeList; |
236
|
|
|
} |
237
|
|
|
|
238
|
2 |
|
return $nodeList[$idx] ?? null; |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
/** |
242
|
|
|
* Find list of nodes with a CSS selector. |
243
|
|
|
* |
244
|
|
|
* @param string $selector |
245
|
|
|
* @param int|null $idx |
246
|
|
|
* |
247
|
|
|
* @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
248
|
|
|
*/ |
249
|
26 |
|
public function find(string $selector, $idx = null) |
250
|
|
|
{ |
251
|
26 |
|
return $this->getHtmlDomParser()->find($selector, $idx); |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
/** |
255
|
|
|
* Find one node with a CSS selector. |
256
|
|
|
* |
257
|
|
|
* @param string $selector |
258
|
|
|
* |
259
|
|
|
* @return SimpleHtmlDom |
260
|
|
|
*/ |
261
|
1 |
|
public function findOne(string $selector): self |
262
|
|
|
{ |
263
|
1 |
|
return $this->find($selector, 0); |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
/** |
267
|
|
|
* Find nodes with a CSS selector. |
268
|
|
|
* |
269
|
|
|
* @param string $selector |
270
|
|
|
* |
271
|
|
|
* @return SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
272
|
|
|
*/ |
273
|
|
|
public function findMulti(string $selector) |
274
|
|
|
{ |
275
|
|
|
return $this->find($selector, null); |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
/** |
279
|
|
|
* Returns the first child of node. |
280
|
|
|
* |
281
|
|
|
* @return SimpleHtmlDom|null |
282
|
|
|
*/ |
283
|
4 |
|
public function firstChild() |
284
|
|
|
{ |
285
|
|
|
/** @var null|DOMNode $node */ |
286
|
4 |
|
$node = $this->node->firstChild; |
287
|
|
|
|
288
|
4 |
|
if ($node === null) { |
289
|
1 |
|
return null; |
290
|
|
|
} |
291
|
|
|
|
292
|
4 |
|
return new self($node); |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* Returns an array of attributes. |
297
|
|
|
* |
298
|
|
|
* @return array|null |
299
|
|
|
*/ |
300
|
2 |
|
public function getAllAttributes() |
301
|
|
|
{ |
302
|
2 |
|
if ($this->node->hasAttributes()) { |
303
|
2 |
|
$attributes = []; |
304
|
2 |
|
foreach ($this->node->attributes as $attr) { |
305
|
2 |
|
$attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value); |
306
|
|
|
} |
307
|
|
|
|
308
|
2 |
|
return $attributes; |
309
|
|
|
} |
310
|
|
|
|
311
|
1 |
|
return null; |
312
|
|
|
} |
313
|
|
|
|
314
|
|
|
/** |
315
|
|
|
* Return attribute value. |
316
|
|
|
* |
317
|
|
|
* @param string $name |
318
|
|
|
* |
319
|
|
|
* @return string |
320
|
|
|
*/ |
321
|
14 |
|
public function getAttribute(string $name): string |
322
|
|
|
{ |
323
|
14 |
|
if ($this->node instanceof DOMElement) { |
324
|
14 |
|
return HtmlDomParser::putReplacedBackToPreserveHtmlEntities( |
325
|
14 |
|
$this->node->getAttribute($name) |
326
|
|
|
); |
327
|
|
|
} |
328
|
|
|
|
329
|
|
|
return ''; |
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
/** |
333
|
|
|
* Return element by #id. |
334
|
|
|
* |
335
|
|
|
* @param string $id |
336
|
|
|
* |
337
|
|
|
* @return SimpleHtmlDom |
338
|
|
|
*/ |
339
|
1 |
|
public function getElementById(string $id): self |
340
|
|
|
{ |
341
|
1 |
|
return $this->findOne("#${id}"); |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
/** |
345
|
|
|
* Returns elements by #id. |
346
|
|
|
* |
347
|
|
|
* @param string $id |
348
|
|
|
* @param int|null $idx |
349
|
|
|
* |
350
|
|
|
* @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
351
|
|
|
*/ |
352
|
|
|
public function getElementsById(string $id, $idx = null) |
353
|
|
|
{ |
354
|
|
|
return $this->find("#${id}", $idx); |
355
|
|
|
} |
356
|
|
|
|
357
|
|
|
/** |
358
|
|
|
* Return elements by .class. |
359
|
|
|
* |
360
|
|
|
* @param string $class |
361
|
|
|
* |
362
|
|
|
* @return SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
363
|
|
|
*/ |
364
|
|
|
public function getElementByClass(string $class) |
365
|
|
|
{ |
366
|
|
|
return $this->findMulti(".${class}"); |
367
|
|
|
} |
368
|
|
|
|
369
|
|
|
/** |
370
|
|
|
* Return element by tag name. |
371
|
|
|
* |
372
|
|
|
* @param string $name |
373
|
|
|
* |
374
|
|
|
* @return SimpleHtmlDom|SimpleHtmlDomNodeBlank |
375
|
|
|
*/ |
376
|
1 |
|
public function getElementByTagName(string $name) |
377
|
|
|
{ |
378
|
1 |
|
if ($this->node instanceof DOMElement) { |
379
|
1 |
|
$node = $this->node->getElementsByTagName($name)->item(0); |
380
|
|
|
} else { |
381
|
|
|
$node = null; |
382
|
|
|
} |
383
|
|
|
|
384
|
1 |
|
if ($node === null) { |
385
|
|
|
return new SimpleHtmlDomNodeBlank(); |
386
|
|
|
} |
387
|
|
|
|
388
|
1 |
|
return new self($node); |
389
|
|
|
} |
390
|
|
|
|
391
|
|
|
/** |
392
|
|
|
* Returns elements by tag name. |
393
|
|
|
* |
394
|
|
|
* @param string $name |
395
|
|
|
* @param int|null $idx |
396
|
|
|
* |
397
|
|
|
* @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
398
|
|
|
*/ |
399
|
1 |
|
public function getElementsByTagName(string $name, $idx = null) |
400
|
|
|
{ |
401
|
1 |
|
if ($this->node instanceof DOMElement) { |
402
|
1 |
|
$nodesList = $this->node->getElementsByTagName($name); |
403
|
|
|
} else { |
404
|
|
|
$nodesList = []; |
405
|
|
|
} |
406
|
|
|
|
407
|
1 |
|
$elements = new SimpleHtmlDomNode(); |
408
|
|
|
|
409
|
1 |
|
foreach ($nodesList as $node) { |
410
|
1 |
|
$elements[] = new self($node); |
411
|
|
|
} |
412
|
|
|
|
413
|
|
|
// return all elements |
414
|
1 |
|
if ($idx === null) { |
415
|
1 |
|
return $elements; |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
// handle negative values |
419
|
|
|
if ($idx < 0) { |
420
|
|
|
$idx = \count($elements) + $idx; |
421
|
|
|
} |
422
|
|
|
|
423
|
|
|
// return one element |
424
|
|
|
return $elements[$idx] ?? new self(new DOMNode()); |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
/** |
428
|
|
|
* Create a new "HtmlDomParser"-object from the current context. |
429
|
|
|
* |
430
|
|
|
* @return HtmlDomParser |
431
|
|
|
*/ |
432
|
67 |
|
public function getHtmlDomParser(): HtmlDomParser |
433
|
|
|
{ |
434
|
67 |
|
return new HtmlDomParser($this); |
435
|
|
|
} |
436
|
|
|
|
437
|
|
|
/** |
438
|
|
|
* Retrieve an external iterator. |
439
|
|
|
* |
440
|
|
|
* @see http://php.net/manual/en/iteratoraggregate.getiterator.php |
441
|
|
|
* |
442
|
|
|
* @return SimpleHtmlDomNode |
443
|
|
|
* <p> |
444
|
|
|
* An instance of an object implementing <b>Iterator</b> or |
445
|
|
|
* <b>Traversable</b> |
446
|
|
|
* </p> |
447
|
|
|
*/ |
448
|
2 |
|
public function getIterator(): SimpleHtmlDomNode |
449
|
|
|
{ |
450
|
2 |
|
$elements = new SimpleHtmlDomNode(); |
451
|
2 |
|
if ($this->node->hasChildNodes()) { |
452
|
2 |
|
foreach ($this->node->childNodes as $node) { |
453
|
2 |
|
$elements[] = new self($node); |
454
|
|
|
} |
455
|
|
|
} |
456
|
|
|
|
457
|
2 |
|
return $elements; |
458
|
|
|
} |
459
|
|
|
|
460
|
|
|
/** |
461
|
|
|
* @return DOMNode |
462
|
|
|
*/ |
463
|
68 |
|
public function getNode(): DOMNode |
464
|
|
|
{ |
465
|
68 |
|
return $this->node; |
466
|
|
|
} |
467
|
|
|
|
468
|
|
|
/** |
469
|
|
|
* Determine if an attribute exists on the element. |
470
|
|
|
* |
471
|
|
|
* @param string $name |
472
|
|
|
* |
473
|
|
|
* @return bool |
474
|
|
|
*/ |
475
|
2 |
|
public function hasAttribute(string $name): bool |
476
|
|
|
{ |
477
|
2 |
|
if (!$this->node instanceof DOMElement) { |
478
|
|
|
return false; |
479
|
|
|
} |
480
|
|
|
|
481
|
2 |
|
return $this->node->hasAttribute($name); |
482
|
|
|
} |
483
|
|
|
|
484
|
|
|
/** |
485
|
|
|
* Get dom node's outer html. |
486
|
|
|
* |
487
|
|
|
* @param bool $multiDecodeNewHtmlEntity |
488
|
|
|
* |
489
|
|
|
* @return string |
490
|
|
|
*/ |
491
|
23 |
|
public function html(bool $multiDecodeNewHtmlEntity = false): string |
492
|
|
|
{ |
493
|
23 |
|
return $this->getHtmlDomParser()->html($multiDecodeNewHtmlEntity); |
494
|
|
|
} |
495
|
|
|
|
496
|
|
|
/** |
497
|
|
|
* Get dom node's inner html. |
498
|
|
|
* |
499
|
|
|
* @param bool $multiDecodeNewHtmlEntity |
500
|
|
|
* |
501
|
|
|
* @return string |
502
|
|
|
*/ |
503
|
11 |
|
public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string |
504
|
|
|
{ |
505
|
11 |
|
return $this->getHtmlDomParser()->innerHtml($multiDecodeNewHtmlEntity); |
506
|
|
|
} |
507
|
|
|
|
508
|
|
|
/** |
509
|
|
|
* Returns the last child of node. |
510
|
|
|
* |
511
|
|
|
* @return SimpleHtmlDom|null |
512
|
|
|
*/ |
513
|
4 |
|
public function lastChild() |
514
|
|
|
{ |
515
|
|
|
/** @var null|DOMNode $node */ |
516
|
4 |
|
$node = $this->node->lastChild; |
517
|
|
|
|
518
|
4 |
|
if ($node === null) { |
519
|
1 |
|
return null; |
520
|
|
|
} |
521
|
|
|
|
522
|
4 |
|
return new self($node); |
523
|
|
|
} |
524
|
|
|
|
525
|
|
|
/** |
526
|
|
|
* Returns the next sibling of node. |
527
|
|
|
* |
528
|
|
|
* @return SimpleHtmlDom|null |
529
|
|
|
*/ |
530
|
1 |
|
public function nextSibling() |
531
|
|
|
{ |
532
|
|
|
/** @var null|DOMNode $node */ |
533
|
1 |
|
$node = $this->node->nextSibling; |
534
|
|
|
|
535
|
1 |
|
if ($node === null) { |
536
|
1 |
|
return null; |
537
|
|
|
} |
538
|
|
|
|
539
|
1 |
|
return new self($node); |
540
|
|
|
} |
541
|
|
|
|
542
|
|
|
/** |
543
|
|
|
* Returns the parent of node. |
544
|
|
|
* |
545
|
|
|
* @return SimpleHtmlDom |
546
|
|
|
*/ |
547
|
1 |
|
public function parentNode(): self |
548
|
|
|
{ |
549
|
1 |
|
return new self($this->node->parentNode); |
550
|
|
|
} |
551
|
|
|
|
552
|
|
|
/** |
553
|
|
|
* Nodes can get partially destroyed in which they're still an |
554
|
|
|
* actual DOM node (such as \DOMElement) but almost their entire |
555
|
|
|
* body is gone, including the `nodeType` attribute. |
556
|
|
|
* |
557
|
|
|
* @return bool true if node has been destroyed |
558
|
|
|
*/ |
559
|
|
|
public function isRemoved(): bool |
560
|
|
|
{ |
561
|
|
|
return !isset($this->node->nodeType); |
562
|
|
|
} |
563
|
|
|
|
564
|
|
|
/** |
565
|
|
|
* Returns the previous sibling of node. |
566
|
|
|
* |
567
|
|
|
* @return SimpleHtmlDom|null |
568
|
|
|
*/ |
569
|
1 |
|
public function previousSibling() |
570
|
|
|
{ |
571
|
|
|
/** @var null|DOMNode $node */ |
572
|
1 |
|
$node = $this->node->previousSibling; |
573
|
|
|
|
574
|
1 |
|
if ($node === null) { |
575
|
1 |
|
return null; |
576
|
|
|
} |
577
|
|
|
|
578
|
1 |
|
return new self($node); |
579
|
|
|
} |
580
|
|
|
|
581
|
|
|
/** |
582
|
|
|
* Replace child node. |
583
|
|
|
* |
584
|
|
|
* @param string $string |
585
|
|
|
* |
586
|
|
|
* @return SimpleHtmlDom |
587
|
|
|
* |
588
|
|
|
*/ |
589
|
7 |
|
protected function replaceChildWithString(string $string): self |
590
|
|
|
{ |
591
|
7 |
|
if (!empty($string)) { |
592
|
6 |
|
$newDocument = new HtmlDomParser($string); |
593
|
|
|
|
594
|
6 |
|
$tmpDomString = $this->normalizeStringForComparision($newDocument); |
595
|
6 |
|
$tmpStr = $this->normalizeStringForComparision($string); |
596
|
6 |
|
if ($tmpDomString !== $tmpStr) { |
597
|
|
|
throw new RuntimeException( |
598
|
|
|
'Not valid HTML fragment!' . "\n" . |
599
|
|
|
$tmpDomString . "\n" . |
600
|
|
|
$tmpStr |
601
|
|
|
); |
602
|
|
|
} |
603
|
|
|
} |
604
|
|
|
|
605
|
|
|
/** @noinspection PhpParamsInspection */ |
606
|
7 |
|
if (\count($this->node->childNodes) > 0) { |
607
|
7 |
|
foreach ($this->node->childNodes as $node) { |
608
|
7 |
|
$this->node->removeChild($node); |
609
|
|
|
} |
610
|
|
|
} |
611
|
|
|
|
612
|
7 |
|
if (!empty($newDocument)) { |
613
|
6 |
|
$newDocument = $this->cleanHtmlWrapper($newDocument); |
614
|
6 |
|
$ownerDocument = $this->node->ownerDocument; |
615
|
6 |
|
if ($ownerDocument !== null) { |
616
|
6 |
|
$newNode = $ownerDocument->importNode($newDocument->getDocument()->documentElement, true); |
617
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
618
|
6 |
|
$this->node->appendChild($newNode); |
619
|
|
|
} |
620
|
|
|
} |
621
|
|
|
|
622
|
7 |
|
return $this; |
623
|
|
|
} |
624
|
|
|
|
625
|
|
|
/** |
626
|
|
|
* Replace this node with text |
627
|
|
|
* |
628
|
|
|
* @param string $string |
629
|
|
|
* |
630
|
|
|
* @return SimpleHtmlDom |
631
|
|
|
*/ |
632
|
1 |
|
protected function replaceTextWithString($string): self |
633
|
|
|
{ |
634
|
1 |
|
if (empty($string)) { |
635
|
1 |
|
$this->node->parentNode->removeChild($this->node); |
636
|
|
|
|
637
|
1 |
|
return $this; |
638
|
|
|
} |
639
|
|
|
|
640
|
1 |
|
$ownerDocument = $this->node->ownerDocument; |
641
|
1 |
|
if ($ownerDocument !== null) { |
642
|
1 |
|
$newElement = $ownerDocument->createTextNode($string); |
643
|
1 |
|
$newNode = $ownerDocument->importNode($newElement, true); |
644
|
1 |
|
$this->node->parentNode->replaceChild($newNode, $this->node); |
645
|
1 |
|
$this->node = $newNode; |
646
|
|
|
} |
647
|
|
|
|
648
|
1 |
|
return $this; |
649
|
|
|
} |
650
|
|
|
|
651
|
|
|
/** |
652
|
|
|
* Replace this node. |
653
|
|
|
* |
654
|
|
|
* @param string $string |
655
|
|
|
* |
656
|
|
|
* @return SimpleHtmlDom |
657
|
|
|
* |
658
|
|
|
*/ |
659
|
4 |
|
protected function replaceNodeWithString(string $string): self |
660
|
|
|
{ |
661
|
4 |
|
if (empty($string)) { |
662
|
2 |
|
$this->node->parentNode->removeChild($this->node); |
663
|
|
|
|
664
|
2 |
|
return $this; |
665
|
|
|
} |
666
|
|
|
|
667
|
3 |
|
$newDocument = new HtmlDomParser($string); |
668
|
|
|
|
669
|
3 |
|
$tmpDomOuterTextString = $this->normalizeStringForComparision($newDocument); |
670
|
3 |
|
$tmpStr = $this->normalizeStringForComparision($string); |
671
|
3 |
|
if ($tmpDomOuterTextString !== $tmpStr) { |
672
|
|
|
throw new RuntimeException( |
673
|
|
|
'Not valid HTML fragment!' . "\n" |
674
|
|
|
. $tmpDomOuterTextString . "\n" . |
675
|
|
|
$tmpStr |
676
|
|
|
); |
677
|
|
|
} |
678
|
|
|
|
679
|
3 |
|
$newDocument = $this->cleanHtmlWrapper($newDocument, true); |
680
|
3 |
|
$ownerDocument = $this->node->ownerDocument; |
681
|
3 |
|
if ($ownerDocument === null) { |
682
|
|
|
return $this; |
683
|
|
|
} |
684
|
|
|
|
685
|
3 |
|
$newNode = $ownerDocument->importNode($newDocument->getDocument()->documentElement, true); |
686
|
|
|
|
687
|
3 |
|
$this->node->parentNode->replaceChild($newNode, $this->node); |
688
|
3 |
|
$this->node = $newNode; |
689
|
|
|
|
690
|
|
|
// Remove head element, preserving child nodes. (again) |
691
|
3 |
View Code Duplication |
if ($newDocument->getIsDOMDocumentCreatedWithoutHeadWrapper()) { |
|
|
|
|
692
|
3 |
|
$html = $this->node->parentNode->getElementsByTagName('head')[0]; |
693
|
3 |
|
if ($this->node->parentNode->ownerDocument !== null) { |
694
|
2 |
|
$fragment = $this->node->parentNode->ownerDocument->createDocumentFragment(); |
695
|
2 |
|
if ($html !== null) { |
696
|
|
|
/** @var DOMNode $html */ |
697
|
1 |
|
while ($html->childNodes->length > 0) { |
698
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
699
|
1 |
|
$fragment->appendChild($html->childNodes->item(0)); |
700
|
|
|
} |
701
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
702
|
1 |
|
$html->parentNode->replaceChild($fragment, $html); |
703
|
|
|
} |
704
|
|
|
} |
705
|
|
|
} |
706
|
|
|
|
707
|
3 |
|
return $this; |
708
|
|
|
} |
709
|
|
|
|
710
|
|
|
/** |
711
|
|
|
* Normalize the given input for comparision. |
712
|
|
|
* |
713
|
|
|
* @param HtmlDomParser|string $input |
714
|
|
|
* |
715
|
|
|
* @return string |
716
|
|
|
*/ |
717
|
9 |
|
private function normalizeStringForComparision($input): string |
718
|
|
|
{ |
719
|
9 |
|
if ($input instanceof HtmlDomParser) { |
720
|
9 |
|
$string = $input->outerText(); |
721
|
|
|
|
722
|
9 |
|
if ($input->getIsDOMDocumentCreatedWithoutHeadWrapper()) { |
723
|
|
|
/** @noinspection HtmlRequiredTitleElement */ |
724
|
9 |
|
$string = \str_replace(['<head>', '</head>'], '', $string); |
725
|
|
|
} |
726
|
|
|
} else { |
727
|
9 |
|
$string = (string) $input; |
728
|
|
|
} |
729
|
|
|
|
730
|
|
|
return |
731
|
9 |
|
\urlencode( |
732
|
9 |
|
\urldecode( |
733
|
9 |
|
\trim( |
734
|
9 |
|
\str_replace( |
735
|
|
|
[ |
736
|
9 |
|
' ', |
737
|
|
|
"\n", |
738
|
|
|
"\r", |
739
|
|
|
'/>', |
740
|
|
|
], |
741
|
|
|
[ |
742
|
9 |
|
'', |
743
|
|
|
'', |
744
|
|
|
'', |
745
|
|
|
'>', |
746
|
|
|
], |
747
|
9 |
|
\strtolower($string) |
748
|
|
|
) |
749
|
|
|
) |
750
|
|
|
) |
751
|
|
|
); |
752
|
|
|
} |
753
|
|
|
|
754
|
|
|
/** |
755
|
|
|
* @param HtmlDomParser $newDocument |
756
|
|
|
* @param bool $removeExtraHeadTag |
757
|
|
|
* |
758
|
|
|
* @return HtmlDomParser |
759
|
|
|
*/ |
760
|
9 |
|
protected function cleanHtmlWrapper(HtmlDomParser $newDocument, $removeExtraHeadTag = false): HtmlDomParser |
761
|
|
|
{ |
762
|
|
|
if ( |
763
|
9 |
|
$newDocument->getIsDOMDocumentCreatedWithoutHtml() |
764
|
|
|
|| |
765
|
9 |
|
$newDocument->getIsDOMDocumentCreatedWithoutHtmlWrapper() |
766
|
|
|
) { |
767
|
|
|
|
768
|
|
|
// Remove doc-type node. |
769
|
9 |
|
if ($newDocument->getDocument()->doctype !== null) { |
770
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
771
|
|
|
$newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype); |
772
|
|
|
} |
773
|
|
|
|
774
|
|
|
// Remove html element, preserving child nodes. |
775
|
9 |
|
$html = $newDocument->getDocument()->getElementsByTagName('html')->item(0); |
776
|
9 |
|
$fragment = $newDocument->getDocument()->createDocumentFragment(); |
777
|
9 |
|
if ($html !== null) { |
778
|
6 |
|
while ($html->childNodes->length > 0) { |
779
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
780
|
6 |
|
$fragment->appendChild($html->childNodes->item(0)); |
781
|
|
|
} |
782
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
783
|
6 |
|
$html->parentNode->replaceChild($fragment, $html); |
784
|
|
|
} |
785
|
|
|
|
786
|
|
|
// Remove body element, preserving child nodes. |
787
|
9 |
|
$body = $newDocument->getDocument()->getElementsByTagName('body')->item(0); |
788
|
9 |
|
$fragment = $newDocument->getDocument()->createDocumentFragment(); |
789
|
9 |
|
if ($body instanceof \DOMElement) { |
790
|
4 |
|
while ($body->childNodes->length > 0) { |
791
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
792
|
4 |
|
$fragment->appendChild($body->childNodes->item(0)); |
793
|
|
|
} |
794
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
795
|
4 |
|
$body->parentNode->replaceChild($fragment, $body); |
796
|
|
|
|
797
|
|
|
// At this point DOMDocument still added a "<p>"-wrapper around our string, |
798
|
|
|
// so we replace it with "<simpleHtmlDomP>" and delete this at the ending ... |
799
|
4 |
|
$item = $newDocument->getDocument()->getElementsByTagName('p')->item(0); |
800
|
4 |
|
if ($item !== null) { |
801
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
802
|
4 |
|
$this->changeElementName($item, 'simpleHtmlDomP'); |
803
|
|
|
} |
804
|
|
|
} |
805
|
|
|
} |
806
|
|
|
|
807
|
|
|
// Remove head element, preserving child nodes. |
808
|
|
View Code Duplication |
if ( |
|
|
|
|
809
|
9 |
|
$removeExtraHeadTag |
810
|
|
|
&& |
811
|
9 |
|
$newDocument->getIsDOMDocumentCreatedWithoutHeadWrapper() |
812
|
|
|
) { |
813
|
3 |
|
$html = $this->node->parentNode->getElementsByTagName('head')[0]; |
814
|
3 |
|
if ($this->node->parentNode->ownerDocument !== null) { |
815
|
2 |
|
$fragment = $this->node->parentNode->ownerDocument->createDocumentFragment(); |
816
|
2 |
|
if ($html !== null) { |
817
|
|
|
/** @var DOMNode $html */ |
818
|
|
|
while ($html->childNodes->length > 0) { |
819
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
820
|
|
|
$fragment->appendChild($html->childNodes->item(0)); |
821
|
|
|
} |
822
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
823
|
|
|
$html->parentNode->replaceChild($fragment, $html); |
824
|
|
|
} |
825
|
|
|
} |
826
|
|
|
} |
827
|
|
|
|
828
|
9 |
|
return $newDocument; |
829
|
|
|
} |
830
|
|
|
|
831
|
|
|
/** |
832
|
|
|
* Change the name of a tag in a "DOMNode". |
833
|
|
|
* |
834
|
|
|
* @param DOMNode $node |
835
|
|
|
* @param string $name |
836
|
|
|
* |
837
|
|
|
* @return false|DOMElement |
838
|
|
|
* <p>DOMElement a new instance of class DOMElement or false |
839
|
|
|
* if an error occured.</p> |
840
|
|
|
*/ |
841
|
4 |
|
protected function changeElementName(\DOMNode $node, string $name) |
842
|
|
|
{ |
843
|
4 |
|
$ownerDocument = $node->ownerDocument; |
844
|
4 |
|
if ($ownerDocument) { |
845
|
4 |
|
$newNode = $ownerDocument->createElement($name); |
846
|
|
|
} else { |
847
|
|
|
return false; |
848
|
|
|
} |
849
|
|
|
|
850
|
4 |
|
foreach ($node->childNodes as $child) { |
851
|
4 |
|
$child = $ownerDocument->importNode($child, true); |
852
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
853
|
4 |
|
$newNode->appendChild($child); |
854
|
|
|
} |
855
|
|
|
|
856
|
4 |
|
foreach ($node->attributes as $attrName => $attrNode) { |
857
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
858
|
|
|
$newNode->setAttribute($attrName, $attrNode); |
859
|
|
|
} |
860
|
|
|
|
861
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
862
|
4 |
|
$newNode->ownerDocument->replaceChild($newNode, $node); |
863
|
|
|
|
864
|
4 |
|
return $newNode; |
865
|
|
|
} |
866
|
|
|
|
867
|
|
|
/** |
868
|
|
|
* Set attribute value. |
869
|
|
|
* |
870
|
|
|
* @param string $name <p>The name of the html-attribute.</p> |
871
|
|
|
* @param string|null $value <p>Set to NULL or empty string, to remove the attribute.</p> |
872
|
|
|
* @param bool $strict </p> |
873
|
|
|
* $value must be NULL, to remove the attribute, |
874
|
|
|
* so that you can set an empty string as attribute-value e.g. autofocus="" |
875
|
|
|
* </p> |
876
|
|
|
* |
877
|
|
|
* @return SimpleHtmlDom |
878
|
|
|
*/ |
879
|
10 |
|
public function setAttribute(string $name, $value = null, bool $strict = false): self |
880
|
|
|
{ |
881
|
|
|
if ( |
882
|
10 |
|
($strict && $value === null) |
883
|
|
|
|| |
884
|
10 |
|
(!$strict && empty($value)) |
885
|
|
|
) { |
886
|
2 |
|
$this->node->removeAttribute($name); |
|
|
|
|
887
|
|
|
} else { |
888
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
889
|
10 |
|
$this->node->setAttribute($name, $value); |
|
|
|
|
890
|
|
|
} |
891
|
|
|
|
892
|
10 |
|
return $this; |
893
|
|
|
} |
894
|
|
|
|
895
|
|
|
/** |
896
|
|
|
* @param string|string[]|null $value <p> |
897
|
|
|
* null === get the current input value |
898
|
|
|
* text === set a new input value |
899
|
|
|
* </p> |
900
|
|
|
* |
901
|
|
|
* @return string|string[]|null |
902
|
|
|
*/ |
903
|
1 |
|
public function val($value = null) |
904
|
|
|
{ |
905
|
1 |
|
if ($value === null) { |
906
|
|
|
if ( |
907
|
1 |
|
$this->tag === 'input' |
908
|
|
|
&& |
909
|
|
|
( |
910
|
1 |
|
$this->getAttribute('type') === 'text' |
911
|
|
|
|| |
912
|
1 |
|
!$this->hasAttribute('type') |
913
|
|
|
) |
914
|
|
|
) { |
915
|
1 |
|
return $this->getAttribute('value'); |
916
|
|
|
} |
917
|
|
|
|
918
|
|
|
if ( |
919
|
1 |
|
$this->hasAttribute('checked') |
920
|
|
|
&& |
921
|
1 |
|
\in_array($this->getAttribute('type'), ['checkbox', 'radio'], true) |
922
|
|
|
) { |
923
|
1 |
|
return $this->getAttribute('value'); |
924
|
|
|
} |
925
|
|
|
|
926
|
1 |
|
if ($this->node->nodeName === 'select') { |
927
|
|
|
$valuesFromDom = []; |
928
|
|
|
foreach ($this->getElementsByTagName('option') as $option) { |
|
|
|
|
929
|
|
|
if ($this->hasAttribute('checked')) { |
930
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
931
|
|
|
$valuesFromDom[] = (string) $option->getAttribute('value'); |
932
|
|
|
} |
933
|
|
|
} |
934
|
|
|
|
935
|
|
|
if (\count($valuesFromDom) === 0) { |
936
|
|
|
return null; |
937
|
|
|
} |
938
|
|
|
|
939
|
|
|
return $valuesFromDom; |
940
|
|
|
} |
941
|
|
|
|
942
|
1 |
|
if ($this->node->nodeName === 'textarea') { |
943
|
1 |
|
return $this->node->nodeValue; |
944
|
|
|
} |
945
|
|
|
|
946
|
|
|
} else { |
947
|
|
|
|
948
|
1 |
|
if (\in_array($this->getAttribute('type'), ['checkbox', 'radio'], true)) { |
949
|
1 |
|
if ($value === $this->getAttribute('value')) { |
950
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
951
|
1 |
|
$this->setAttribute('checked', 'checked'); |
952
|
|
|
} else { |
953
|
1 |
|
$this->removeAttribute('checked'); |
954
|
|
|
} |
955
|
1 |
|
} elseif ($this->node->nodeName === 'select') { |
956
|
|
|
foreach ($this->node->getElementsByTagName('option') as $option) { |
957
|
|
|
/** @var DOMElement $option */ |
958
|
|
|
if ($value === $option->getAttribute('value')) { |
959
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
960
|
|
|
$option->setAttribute('selected', 'selected'); |
961
|
|
|
} else { |
962
|
|
|
$option->removeAttribute('selected'); |
963
|
|
|
} |
964
|
|
|
} |
965
|
1 |
|
} elseif ($this->node->nodeName === 'input') { |
966
|
|
|
// Set value for input elements |
967
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
968
|
1 |
|
$this->setAttribute('value', (string) $value); |
969
|
1 |
|
} elseif ($this->node->nodeName === 'textarea') { |
970
|
1 |
|
$this->node->nodeValue = (string) $value; |
971
|
|
|
} |
972
|
|
|
} |
973
|
|
|
|
974
|
1 |
|
return null; |
975
|
|
|
} |
976
|
|
|
|
977
|
|
|
/** |
978
|
|
|
* Remove attribute. |
979
|
|
|
* |
980
|
|
|
* @param string $name <p>The name of the html-attribute.</p> |
981
|
|
|
* |
982
|
|
|
* @return mixed |
983
|
|
|
*/ |
984
|
|
|
public function removeAttribute(string $name) |
985
|
|
|
{ |
986
|
|
|
$this->node->removeAttribute($name); |
|
|
|
|
987
|
|
|
|
988
|
|
|
return $this; |
989
|
|
|
} |
990
|
|
|
|
991
|
|
|
/** |
992
|
|
|
* Get dom node's plain text. |
993
|
|
|
* |
994
|
|
|
* @return string |
995
|
|
|
*/ |
996
|
17 |
|
public function text(): string |
997
|
|
|
{ |
998
|
17 |
|
return $this->getHtmlDomParser()->fixHtmlOutput($this->node->textContent); |
999
|
|
|
} |
1000
|
|
|
} |
1001
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.