|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace simplehtmldom; |
|
4
|
|
|
|
|
5
|
|
|
/** |
|
6
|
|
|
* Website: http://sourceforge.net/projects/simplehtmldom/ |
|
7
|
|
|
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/). |
|
8
|
|
|
* |
|
9
|
|
|
* Licensed under The MIT License |
|
10
|
|
|
* See the LICENSE file in the project root for more information. |
|
11
|
|
|
* |
|
12
|
|
|
* Authors: |
|
13
|
|
|
* S.C. Chen |
|
14
|
|
|
* John Schlick |
|
15
|
|
|
* Rus Carroll |
|
16
|
|
|
* logmanoriginal |
|
17
|
|
|
* |
|
18
|
|
|
* Contributors: |
|
19
|
|
|
* Yousuke Kumakura |
|
20
|
|
|
* Vadim Voituk |
|
21
|
|
|
* Antcs |
|
22
|
|
|
* |
|
23
|
|
|
* Version Rev. 2.0-RC2 (415) |
|
24
|
|
|
*/ |
|
25
|
|
|
include_once __DIR__ . '/constants.php'; |
|
26
|
|
|
include_once __DIR__ . '/Debug.php'; |
|
27
|
|
|
|
|
28
|
|
|
/** |
|
29
|
|
|
* HTMLNode class |
|
30
|
|
|
* @property string $innertext |
|
31
|
|
|
* @property string|null $title |
|
32
|
|
|
* @property string|null $alt |
|
33
|
|
|
* @property string|null $src |
|
34
|
|
|
* @property string|null $href |
|
35
|
|
|
* @property string|null $async |
|
36
|
|
|
* @property string|null $defer |
|
37
|
|
|
*/ |
|
38
|
|
|
class HtmlNode |
|
39
|
|
|
{ |
|
40
|
|
|
const HDOM_TYPE_ELEMENT = 1; |
|
41
|
|
|
const HDOM_TYPE_COMMENT = 2; |
|
42
|
|
|
const HDOM_TYPE_TEXT = 3; |
|
43
|
|
|
const HDOM_TYPE_ROOT = 5; |
|
44
|
|
|
const HDOM_TYPE_UNKNOWN = 6; |
|
45
|
|
|
const HDOM_TYPE_CDATA = 7; |
|
46
|
|
|
|
|
47
|
|
|
const HDOM_QUOTE_DOUBLE = 0; |
|
48
|
|
|
const HDOM_QUOTE_SINGLE = 1; |
|
49
|
|
|
const HDOM_QUOTE_NO = 3; |
|
50
|
|
|
|
|
51
|
|
|
const HDOM_INFO_BEGIN = 0; |
|
52
|
|
|
const HDOM_INFO_END = 1; |
|
53
|
|
|
const HDOM_INFO_QUOTE = 2; |
|
54
|
|
|
const HDOM_INFO_SPACE = 3; |
|
55
|
|
|
const HDOM_INFO_TEXT = 4; |
|
56
|
|
|
const HDOM_INFO_INNER = 5; |
|
57
|
|
|
const HDOM_INFO_OUTER = 6; |
|
58
|
|
|
const HDOM_INFO_ENDSPACE = 7; |
|
59
|
|
|
|
|
60
|
|
|
public $nodetype = self::HDOM_TYPE_TEXT; |
|
61
|
|
|
public $tag = 'text'; |
|
62
|
|
|
public $attr = []; |
|
63
|
|
|
public $children = []; |
|
64
|
|
|
public $nodes = []; |
|
65
|
|
|
public $parent = null; |
|
66
|
|
|
public $_ = []; |
|
67
|
|
|
private $dom = null; |
|
68
|
|
|
|
|
69
|
|
|
public function __call($func, $args) |
|
70
|
|
|
{ |
|
71
|
|
|
// Allow users to call methods with lower_case syntax |
|
72
|
|
|
switch ($func) { |
|
73
|
|
|
case 'children': |
|
74
|
|
|
$actual_function = 'childNodes'; |
|
75
|
|
|
break; |
|
76
|
|
|
case 'first_child': |
|
77
|
|
|
$actual_function = 'firstChild'; |
|
78
|
|
|
break; |
|
79
|
|
|
case 'has_child': |
|
80
|
|
|
$actual_function = 'hasChildNodes'; |
|
81
|
|
|
break; |
|
82
|
|
|
case 'last_child': |
|
83
|
|
|
$actual_function = 'lastChild'; |
|
84
|
|
|
break; |
|
85
|
|
|
case 'next_sibling': |
|
86
|
|
|
$actual_function = 'nextSibling'; |
|
87
|
|
|
break; |
|
88
|
|
|
case 'prev_sibling': |
|
89
|
|
|
$actual_function = 'previousSibling'; |
|
90
|
|
|
break; |
|
91
|
|
|
default: |
|
92
|
|
|
trigger_error( |
|
93
|
|
|
'Call to undefined method ' . __CLASS__ . '::' . $func . '()', |
|
94
|
|
|
E_USER_ERROR |
|
95
|
|
|
); |
|
96
|
|
|
} |
|
97
|
|
|
|
|
98
|
|
|
// phpcs:ignore Generic.Files.LineLength |
|
99
|
|
|
Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.'); |
|
|
|
|
|
|
100
|
|
|
|
|
101
|
|
|
return call_user_func_array([$this, $actual_function], $args); |
|
102
|
|
|
} |
|
103
|
|
|
|
|
104
|
|
|
public function __construct($dom) |
|
105
|
|
|
{ |
|
106
|
|
|
if (null === $dom) { |
|
107
|
|
|
return $this; |
|
108
|
|
|
} |
|
109
|
|
|
|
|
110
|
|
|
$this->dom = $dom; |
|
111
|
|
|
$dom->nodes[] = $this; |
|
112
|
|
|
} |
|
113
|
|
|
|
|
114
|
|
|
public function __debugInfo() |
|
115
|
|
|
{ |
|
116
|
|
|
// Translate node type to human-readable form |
|
117
|
|
|
switch ($this->nodetype) { |
|
118
|
|
|
case self::HDOM_TYPE_ELEMENT: |
|
119
|
|
|
$nodetype = "HDOM_TYPE_ELEMENT ($this->nodetype)"; |
|
120
|
|
|
break; |
|
121
|
|
|
case self::HDOM_TYPE_COMMENT: |
|
122
|
|
|
$nodetype = "HDOM_TYPE_COMMENT ($this->nodetype)"; |
|
123
|
|
|
break; |
|
124
|
|
|
case self::HDOM_TYPE_TEXT: |
|
125
|
|
|
$nodetype = "HDOM_TYPE_TEXT ($this->nodetype)"; |
|
126
|
|
|
break; |
|
127
|
|
|
case self::HDOM_TYPE_ROOT: |
|
128
|
|
|
$nodetype = "HDOM_TYPE_ROOT ($this->nodetype)"; |
|
129
|
|
|
break; |
|
130
|
|
|
case self::HDOM_TYPE_CDATA: |
|
131
|
|
|
$nodetype = "HDOM_TYPE_CDATA ($this->nodetype)"; |
|
132
|
|
|
break; |
|
133
|
|
|
case self::HDOM_TYPE_UNKNOWN: |
|
134
|
|
|
default: |
|
135
|
|
|
$nodetype = "HDOM_TYPE_UNKNOWN ($this->nodetype)"; |
|
136
|
|
|
} |
|
137
|
|
|
|
|
138
|
|
|
return [ |
|
139
|
|
|
'nodetype' => $nodetype, |
|
140
|
|
|
'tag' => $this->tag, |
|
141
|
|
|
'attributes' => empty($this->attr) ? 'none' : $this->attr, |
|
142
|
|
|
'nodes' => empty($this->nodes) ? 'none' : $this->nodes, |
|
143
|
|
|
]; |
|
144
|
|
|
} |
|
145
|
|
|
|
|
146
|
|
|
public function __toString() |
|
147
|
|
|
{ |
|
148
|
|
|
return $this->outertext(); |
|
149
|
|
|
} |
|
150
|
|
|
|
|
151
|
|
|
public function clear() |
|
152
|
|
|
{ |
|
153
|
|
|
unset($this->dom, $this->parent); // Break link to origin |
|
154
|
|
|
// Break link to branch |
|
155
|
|
|
} |
|
156
|
|
|
|
|
157
|
|
|
/** @codeCoverageIgnore */ |
|
158
|
|
|
public function dump($show_attr = true, $depth = 0) |
|
159
|
|
|
{ |
|
160
|
|
|
echo str_repeat("\t", $depth) . $this->tag; |
|
161
|
|
|
|
|
162
|
|
|
if ($show_attr && count($this->attr) > 0) { |
|
163
|
|
|
echo '('; |
|
164
|
|
|
foreach ($this->attr as $k => $v) { |
|
165
|
|
|
echo "[$k]=>\"$v\", "; |
|
166
|
|
|
} |
|
167
|
|
|
echo ')'; |
|
168
|
|
|
} |
|
169
|
|
|
|
|
170
|
|
|
echo "\n"; |
|
171
|
|
|
|
|
172
|
|
|
if ($this->nodes) { |
|
|
|
|
|
|
173
|
|
|
foreach ($this->nodes as $node) { |
|
174
|
|
|
$node->dump($show_attr, $depth + 1); |
|
175
|
|
|
} |
|
176
|
|
|
} |
|
177
|
|
|
} |
|
178
|
|
|
|
|
179
|
|
|
/** @codeCoverageIgnore */ |
|
180
|
|
|
public function dump_node($echo = true) |
|
181
|
|
|
{ |
|
182
|
|
|
$string = $this->tag; |
|
183
|
|
|
|
|
184
|
|
|
if (count($this->attr) > 0) { |
|
185
|
|
|
$string .= '('; |
|
186
|
|
|
foreach ($this->attr as $k => $v) { |
|
187
|
|
|
$string .= "[$k]=>\"$v\", "; |
|
188
|
|
|
} |
|
189
|
|
|
$string .= ')'; |
|
190
|
|
|
} |
|
191
|
|
|
|
|
192
|
|
|
if (count($this->_) > 0) { |
|
193
|
|
|
$string .= ' $_ ('; |
|
194
|
|
|
foreach ($this->_ as $k => $v) { |
|
195
|
|
|
if (is_array($v)) { |
|
196
|
|
|
$string .= "[$k]=>("; |
|
197
|
|
|
foreach ($v as $k2 => $v2) { |
|
198
|
|
|
$string .= "[$k2]=>\"$v2\", "; |
|
199
|
|
|
} |
|
200
|
|
|
$string .= ')'; |
|
201
|
|
|
} else { |
|
202
|
|
|
$string .= "[$k]=>\"$v\", "; |
|
203
|
|
|
} |
|
204
|
|
|
} |
|
205
|
|
|
$string .= ')'; |
|
206
|
|
|
} |
|
207
|
|
|
|
|
208
|
|
|
if (isset($this->text)) { |
|
|
|
|
|
|
209
|
|
|
$string .= " text: ({$this->text})"; |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
|
$string .= ' HDOM_INNER_INFO: '; |
|
213
|
|
|
/** |
|
214
|
|
|
* @var mixed |
|
215
|
|
|
*/ |
|
216
|
|
|
if (isset($node)) { |
|
|
|
|
|
|
217
|
|
|
if (isset($node->_[self::HDOM_INFO_INNER])) { |
|
218
|
|
|
$string .= "'" . $node->_[self::HDOM_INFO_INNER] . "'"; |
|
219
|
|
|
} else { |
|
220
|
|
|
$string .= ' NULL '; |
|
221
|
|
|
} |
|
222
|
|
|
} |
|
223
|
|
|
|
|
224
|
|
|
$string .= ' children: ' . count($this->children); |
|
225
|
|
|
$string .= ' nodes: ' . count($this->nodes); |
|
226
|
|
|
$string .= "\n"; |
|
227
|
|
|
|
|
228
|
|
|
if ($echo) { |
|
229
|
|
|
echo $string; |
|
230
|
|
|
|
|
231
|
|
|
return; |
|
232
|
|
|
} else { |
|
233
|
|
|
return $string; |
|
234
|
|
|
} |
|
235
|
|
|
} |
|
236
|
|
|
|
|
237
|
|
|
public function parent($parent = null) |
|
238
|
|
|
{ |
|
239
|
|
|
// I am SURE that this doesn't work properly. |
|
240
|
|
|
// It fails to unset the current node from it's current parents nodes or |
|
241
|
|
|
// children list first. |
|
242
|
|
|
if (null !== $parent) { |
|
243
|
|
|
$this->parent = $parent; |
|
244
|
|
|
$this->parent->nodes[] = $this; |
|
245
|
|
|
$this->parent->children[] = $this; |
|
246
|
|
|
} |
|
247
|
|
|
|
|
248
|
|
|
return $this->parent; |
|
249
|
|
|
} |
|
250
|
|
|
|
|
251
|
|
|
public function find_ancestor_tag($tag) |
|
252
|
|
|
{ |
|
253
|
|
|
if (null === $this->parent) { |
|
254
|
|
|
return null; |
|
255
|
|
|
} |
|
256
|
|
|
|
|
257
|
|
|
$ancestor = $this->parent; |
|
258
|
|
|
|
|
259
|
|
|
while (!is_null($ancestor)) { |
|
260
|
|
|
if ($ancestor->tag === $tag) { |
|
261
|
|
|
break; |
|
262
|
|
|
} |
|
263
|
|
|
|
|
264
|
|
|
$ancestor = $ancestor->parent; |
|
265
|
|
|
} |
|
266
|
|
|
|
|
267
|
|
|
return $ancestor; |
|
268
|
|
|
} |
|
269
|
|
|
|
|
270
|
|
|
public function innertext() |
|
271
|
|
|
{ |
|
272
|
|
|
if (isset($this->_[self::HDOM_INFO_INNER])) { |
|
273
|
|
|
$ret = $this->_[self::HDOM_INFO_INNER]; |
|
274
|
|
|
} elseif (isset($this->_[self::HDOM_INFO_TEXT])) { |
|
275
|
|
|
$ret = $this->_[self::HDOM_INFO_TEXT]; |
|
276
|
|
|
} else { |
|
277
|
|
|
$ret = ''; |
|
278
|
|
|
} |
|
279
|
|
|
|
|
280
|
|
|
foreach ($this->nodes as $n) { |
|
281
|
|
|
$ret .= $n->outertext(); |
|
282
|
|
|
} |
|
283
|
|
|
|
|
284
|
|
|
return $this->convert_text($ret); |
|
285
|
|
|
} |
|
286
|
|
|
|
|
287
|
|
|
public function outertext() |
|
288
|
|
|
{ |
|
289
|
|
|
if ('root' === $this->tag) { |
|
290
|
|
|
return $this->innertext(); |
|
291
|
|
|
} |
|
292
|
|
|
|
|
293
|
|
|
// todo: What is the use of this callback? Remove? |
|
294
|
|
|
if ($this->dom && null !== $this->dom->callback) { |
|
295
|
|
|
call_user_func_array($this->dom->callback, [$this]); |
|
296
|
|
|
} |
|
297
|
|
|
|
|
298
|
|
|
if (isset($this->_[self::HDOM_INFO_OUTER])) { |
|
299
|
|
|
return $this->convert_text($this->_[self::HDOM_INFO_OUTER]); |
|
300
|
|
|
} |
|
301
|
|
|
|
|
302
|
|
|
if (isset($this->_[self::HDOM_INFO_TEXT])) { |
|
303
|
|
|
return $this->convert_text($this->_[self::HDOM_INFO_TEXT]); |
|
304
|
|
|
} |
|
305
|
|
|
|
|
306
|
|
|
$ret = ''; |
|
307
|
|
|
|
|
308
|
|
|
if (isset($this->_[self::HDOM_INFO_BEGIN])) { |
|
309
|
|
|
$ret = $this->makeup(); |
|
310
|
|
|
} |
|
311
|
|
|
|
|
312
|
|
|
if (isset($this->_[self::HDOM_INFO_INNER])) { |
|
313
|
|
|
// todo: <br> should either never have self::HDOM_INFO_INNER or always |
|
314
|
|
|
if ('br' !== $this->tag) { |
|
315
|
|
|
$ret .= $this->_[self::HDOM_INFO_INNER]; |
|
316
|
|
|
} |
|
317
|
|
|
} |
|
318
|
|
|
|
|
319
|
|
|
if ($this->nodes) { |
|
|
|
|
|
|
320
|
|
|
foreach ($this->nodes as $n) { |
|
321
|
|
|
$ret .= $n->outertext(); |
|
322
|
|
|
} |
|
323
|
|
|
} |
|
324
|
|
|
|
|
325
|
|
|
if (isset($this->_[self::HDOM_INFO_END]) && 0 != $this->_[self::HDOM_INFO_END]) { |
|
326
|
|
|
$ret .= '</' . $this->tag . '>'; |
|
327
|
|
|
} |
|
328
|
|
|
|
|
329
|
|
|
return $this->convert_text($ret); |
|
330
|
|
|
} |
|
331
|
|
|
|
|
332
|
|
|
/** |
|
333
|
|
|
* Returns true if the provided element is a block level element. |
|
334
|
|
|
* |
|
335
|
|
|
* @see https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php |
|
336
|
|
|
*/ |
|
337
|
|
|
protected function is_block_element($node) |
|
338
|
|
|
{ |
|
339
|
|
|
// todo: When we have the utility class this should be moved there |
|
340
|
|
|
return in_array(strtolower($node->tag), [ |
|
341
|
|
|
'p', |
|
342
|
|
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', |
|
343
|
|
|
'ol', 'ul', |
|
344
|
|
|
'pre', |
|
345
|
|
|
'address', |
|
346
|
|
|
'blockquote', |
|
347
|
|
|
'dl', |
|
348
|
|
|
'div', |
|
349
|
|
|
'fieldset', |
|
350
|
|
|
'form', |
|
351
|
|
|
'hr', |
|
352
|
|
|
'noscript', |
|
353
|
|
|
'table', |
|
354
|
|
|
]); |
|
355
|
|
|
} |
|
356
|
|
|
|
|
357
|
|
|
/** |
|
358
|
|
|
* Returns true if the provided element is an inline level element. |
|
359
|
|
|
* |
|
360
|
|
|
* @see https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php |
|
361
|
|
|
*/ |
|
362
|
|
|
protected function is_inline_element($node) |
|
363
|
|
|
{ |
|
364
|
|
|
// todo: When we have the utility class this should be moved there |
|
365
|
|
|
return in_array(strtolower($node->tag), [ |
|
366
|
|
|
'b', 'big', 'i', 'small', 'tt', |
|
367
|
|
|
'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var', |
|
368
|
|
|
'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'script', 'span', 'sub', 'sup', |
|
369
|
|
|
'button', 'input', 'label', 'select', 'textarea', |
|
370
|
|
|
]); |
|
371
|
|
|
} |
|
372
|
|
|
|
|
373
|
|
|
public function text($trim = true) |
|
374
|
|
|
{ |
|
375
|
|
|
$ret = ''; |
|
376
|
|
|
|
|
377
|
|
|
if ('script' === strtolower($this->tag)) { |
|
378
|
|
|
$ret = ''; |
|
379
|
|
|
} elseif ('style' === strtolower($this->tag)) { |
|
380
|
|
|
$ret = ''; |
|
381
|
|
|
} elseif (self::HDOM_TYPE_COMMENT === $this->nodetype) { |
|
382
|
|
|
$ret = ''; |
|
383
|
|
|
} elseif (self::HDOM_TYPE_CDATA === $this->nodetype) { |
|
384
|
|
|
$ret = $this->_[self::HDOM_INFO_INNER]; |
|
385
|
|
|
} elseif (self::HDOM_TYPE_UNKNOWN === $this->nodetype) { |
|
386
|
|
|
$ret = ''; |
|
387
|
|
|
} elseif (isset($this->_[self::HDOM_INFO_INNER])) { |
|
388
|
|
|
$ret = $this->_[self::HDOM_INFO_INNER]; |
|
389
|
|
|
} elseif (self::HDOM_TYPE_TEXT === $this->nodetype) { |
|
390
|
|
|
$ret = $this->_[self::HDOM_INFO_TEXT]; |
|
391
|
|
|
} |
|
392
|
|
|
|
|
393
|
|
|
if (is_null($this->nodes)) { |
|
|
|
|
|
|
394
|
|
|
return ''; |
|
395
|
|
|
} |
|
396
|
|
|
|
|
397
|
|
|
foreach ($this->nodes as $n) { |
|
398
|
|
|
if ($this->is_block_element($n)) { |
|
399
|
|
|
$block = ltrim($this->convert_text($n->text(false))); |
|
400
|
|
|
|
|
401
|
|
|
if (empty($block)) { |
|
402
|
|
|
continue; |
|
403
|
|
|
} |
|
404
|
|
|
|
|
405
|
|
|
$ret = rtrim($ret) . "\n\n" . $block; |
|
406
|
|
|
} elseif ($this->is_inline_element($n)) { |
|
407
|
|
|
// todo: <br> introduces code smell because no space but \n |
|
408
|
|
|
if ('br' === strtolower($n->tag)) { |
|
409
|
|
|
$ret .= $this->dom->default_br_text ?: DEFAULT_BR_TEXT; |
|
410
|
|
|
} else { |
|
411
|
|
|
$inline = ltrim($this->convert_text($n->text(false))); |
|
412
|
|
|
|
|
413
|
|
|
if (empty($inline)) { |
|
414
|
|
|
continue; |
|
415
|
|
|
} |
|
416
|
|
|
|
|
417
|
|
|
$ret = $ret . $this->convert_text($n->text(false)); |
|
418
|
|
|
} |
|
419
|
|
|
} else { |
|
420
|
|
|
$ret .= $this->convert_text($n->text(false)); |
|
421
|
|
|
} |
|
422
|
|
|
} |
|
423
|
|
|
|
|
424
|
|
|
// Reduce whitespace at start/end to a single (or none) space |
|
425
|
|
|
$ret = preg_replace('/[ \t\n\r\0\x0B\xC2\xA0]+$/u', $trim ? '' : ' ', $ret); |
|
426
|
|
|
$ret = preg_replace('/^[ \t\n\r\0\x0B\xC2\xA0]+/u', $trim ? '' : ' ', $ret); |
|
427
|
|
|
|
|
428
|
|
|
return $ret; |
|
429
|
|
|
} |
|
430
|
|
|
|
|
431
|
|
|
public function xmltext() |
|
432
|
|
|
{ |
|
433
|
|
|
$ret = $this->innertext(); |
|
434
|
|
|
$ret = str_ireplace('<![CDATA[', '', $ret); |
|
435
|
|
|
$ret = str_replace(']]>', '', $ret); |
|
436
|
|
|
|
|
437
|
|
|
return $ret; |
|
438
|
|
|
} |
|
439
|
|
|
|
|
440
|
|
|
public function makeup() |
|
441
|
|
|
{ |
|
442
|
|
|
// text, comment, unknown |
|
443
|
|
|
if (isset($this->_[self::HDOM_INFO_TEXT])) { |
|
444
|
|
|
return $this->_[self::HDOM_INFO_TEXT]; |
|
445
|
|
|
} |
|
446
|
|
|
|
|
447
|
|
|
$ret = '<' . $this->tag; |
|
448
|
|
|
|
|
449
|
|
|
foreach ($this->attr as $key => $val) { |
|
450
|
|
|
// skip removed attribute |
|
451
|
|
|
if (null === $val || false === $val) { |
|
452
|
|
|
continue; |
|
453
|
|
|
} |
|
454
|
|
|
|
|
455
|
|
|
if (isset($this->_[self::HDOM_INFO_SPACE][$key])) { |
|
456
|
|
|
$ret .= $this->_[self::HDOM_INFO_SPACE][$key][0]; |
|
457
|
|
|
} else { |
|
458
|
|
|
$ret .= ' '; |
|
459
|
|
|
} |
|
460
|
|
|
|
|
461
|
|
|
//no value attr: nowrap, checked selected... |
|
462
|
|
|
if (true === $val) { |
|
463
|
|
|
$ret .= $key; |
|
464
|
|
|
} else { |
|
465
|
|
|
if (isset($this->_[self::HDOM_INFO_QUOTE][$key])) { |
|
466
|
|
|
$quote_type = $this->_[self::HDOM_INFO_QUOTE][$key]; |
|
467
|
|
|
} else { |
|
468
|
|
|
$quote_type = self::HDOM_QUOTE_DOUBLE; |
|
469
|
|
|
} |
|
470
|
|
|
|
|
471
|
|
|
switch ($quote_type) { |
|
472
|
|
|
case self::HDOM_QUOTE_SINGLE: |
|
473
|
|
|
$quote = '\''; |
|
474
|
|
|
$val = htmlentities($val, ENT_QUOTES, $this->dom->target_charset); |
|
475
|
|
|
break; |
|
476
|
|
|
case self::HDOM_QUOTE_NO: |
|
477
|
|
|
$quote = ''; |
|
478
|
|
|
break; |
|
479
|
|
|
case self::HDOM_QUOTE_DOUBLE: |
|
480
|
|
|
default: |
|
481
|
|
|
$quote = '"'; |
|
482
|
|
|
$val = htmlentities($val, ENT_COMPAT, $this->dom->target_charset); |
|
483
|
|
|
} |
|
484
|
|
|
|
|
485
|
|
|
$ret .= $key |
|
486
|
|
|
. (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][1] : '') |
|
487
|
|
|
. '=' |
|
488
|
|
|
. (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][2] : '') |
|
489
|
|
|
. $quote |
|
490
|
|
|
. $val |
|
491
|
|
|
. $quote; |
|
492
|
|
|
} |
|
493
|
|
|
} |
|
494
|
|
|
|
|
495
|
|
|
if (isset($this->_[self::HDOM_INFO_ENDSPACE])) { |
|
496
|
|
|
$ret .= $this->_[self::HDOM_INFO_ENDSPACE]; |
|
497
|
|
|
} |
|
498
|
|
|
|
|
499
|
|
|
return $ret . '>'; |
|
500
|
|
|
} |
|
501
|
|
|
|
|
502
|
|
|
/** |
|
503
|
|
|
* Element selector |
|
504
|
|
|
* |
|
505
|
|
|
* @param string $selector |
|
506
|
|
|
* @param int $idx |
|
507
|
|
|
* @param boolean $lowercase |
|
508
|
|
|
* @return HtmlNode |
|
509
|
|
|
*/ |
|
510
|
|
|
public function find($selector, $idx = null, $lowercase = false) |
|
511
|
|
|
{ |
|
512
|
|
|
$selectors = $this->parse_selector($selector); |
|
513
|
|
|
if (0 === ($count = count($selectors))) { |
|
514
|
|
|
return []; |
|
|
|
|
|
|
515
|
|
|
} |
|
516
|
|
|
$found_keys = []; |
|
517
|
|
|
|
|
518
|
|
|
// find each selector |
|
519
|
|
|
for ($c = 0; $c < $count; ++$c) { |
|
520
|
|
|
// The change on the below line was documented on the sourceforge |
|
521
|
|
|
// code tracker id 2788009 |
|
522
|
|
|
// used to be: if (($levle=count($selectors[0]))===0) return array(); |
|
523
|
|
|
if (0 === ($levle = count($selectors[$c]))) { |
|
524
|
|
|
Debug::log_once('Empty selector (' . $selector . ') matches nothing.'); |
|
525
|
|
|
|
|
526
|
|
|
return []; |
|
|
|
|
|
|
527
|
|
|
} |
|
528
|
|
|
|
|
529
|
|
|
if (!isset($this->_[self::HDOM_INFO_BEGIN])) { |
|
530
|
|
|
Debug::log_once('Invalid operation. The current node has no start tag.'); |
|
531
|
|
|
|
|
532
|
|
|
return []; |
|
|
|
|
|
|
533
|
|
|
} |
|
534
|
|
|
|
|
535
|
|
|
$head = [$this->_[self::HDOM_INFO_BEGIN] => 1]; |
|
536
|
|
|
$cmd = ' '; // Combinator |
|
537
|
|
|
|
|
538
|
|
|
// handle descendant selectors, no recursive! |
|
539
|
|
|
for ($l = 0; $l < $levle; ++$l) { |
|
540
|
|
|
$ret = []; |
|
541
|
|
|
|
|
542
|
|
|
foreach ($head as $k => $v) { |
|
543
|
|
|
$n = (-1 === $k) ? $this->dom->root : $this->dom->nodes[$k]; |
|
544
|
|
|
//PaperG - Pass this optional parameter on to the seek function. |
|
545
|
|
|
$n->seek($selectors[$c][$l], $ret, $cmd, $lowercase); |
|
546
|
|
|
} |
|
547
|
|
|
|
|
548
|
|
|
$head = $ret; |
|
549
|
|
|
$cmd = $selectors[$c][$l][6]; // Next Combinator |
|
550
|
|
|
} |
|
551
|
|
|
|
|
552
|
|
|
foreach ($head as $k => $v) { |
|
553
|
|
|
if (!isset($found_keys[$k])) { |
|
554
|
|
|
$found_keys[$k] = 1; |
|
555
|
|
|
} |
|
556
|
|
|
} |
|
557
|
|
|
} |
|
558
|
|
|
|
|
559
|
|
|
// sort keys |
|
560
|
|
|
ksort($found_keys); |
|
561
|
|
|
|
|
562
|
|
|
$found = []; |
|
563
|
|
|
foreach ($found_keys as $k => $v) { |
|
564
|
|
|
$found[] = $this->dom->nodes[$k]; |
|
565
|
|
|
} |
|
566
|
|
|
|
|
567
|
|
|
// return nth-element or array |
|
568
|
|
|
if (is_null($idx)) { |
|
569
|
|
|
return $found; |
|
|
|
|
|
|
570
|
|
|
} elseif ($idx < 0) { |
|
571
|
|
|
$idx = count($found) + $idx; |
|
572
|
|
|
} |
|
573
|
|
|
|
|
574
|
|
|
return (isset($found[$idx])) ? $found[$idx] : null; |
|
575
|
|
|
} |
|
576
|
|
|
|
|
577
|
|
|
public function expect($selector, $idx = null, $lowercase = false) |
|
578
|
|
|
{ |
|
579
|
|
|
return $this->find($selector, $idx, $lowercase) ?: null; |
|
580
|
|
|
} |
|
581
|
|
|
|
|
582
|
|
|
protected function seek($selector, &$ret, $parent_cmd, $lowercase = false) |
|
583
|
|
|
{ |
|
584
|
|
|
list($ps_selector, $tag, $ps_element, $id, $class, $attributes, $cmb) = $selector; |
|
585
|
|
|
$nodes = []; |
|
586
|
|
|
|
|
587
|
|
|
if (' ' === $parent_cmd) { // Descendant Combinator |
|
588
|
|
|
// Find parent closing tag if the current element doesn't have a closing |
|
589
|
|
|
// tag (i.e. void element) |
|
590
|
|
|
$end = (!empty($this->_[self::HDOM_INFO_END])) ? $this->_[self::HDOM_INFO_END] : 0; |
|
591
|
|
|
if (0 == $end && $this->parent) { |
|
592
|
|
|
$parent = $this->parent; |
|
593
|
|
|
while (null !== $parent && !isset($parent->_[self::HDOM_INFO_END])) { |
|
594
|
|
|
--$end; |
|
595
|
|
|
$parent = $parent->parent; |
|
596
|
|
|
} |
|
597
|
|
|
$end += $parent->_[self::HDOM_INFO_END]; |
|
598
|
|
|
} |
|
599
|
|
|
|
|
600
|
|
|
if (0 === $end) { |
|
601
|
|
|
$end = count($this->dom->nodes); |
|
602
|
|
|
} |
|
603
|
|
|
|
|
604
|
|
|
// Get list of target nodes |
|
605
|
|
|
$nodes_start = $this->_[self::HDOM_INFO_BEGIN] + 1; |
|
606
|
|
|
|
|
607
|
|
|
// remove() makes $this->dom->nodes non-contiguous; use what is left. |
|
608
|
|
|
$nodes = array_intersect_key( |
|
609
|
|
|
$this->dom->nodes, |
|
610
|
|
|
array_flip(range($nodes_start, $end)) |
|
611
|
|
|
); |
|
612
|
|
|
} elseif ('>' === $parent_cmd) { // Child Combinator |
|
613
|
|
|
$nodes = $this->children; |
|
614
|
|
|
} elseif ( |
|
615
|
|
|
'+' === $parent_cmd |
|
616
|
|
|
&& $this->parent |
|
617
|
|
|
&& in_array($this, $this->parent->children) |
|
618
|
|
|
) { // Next-Sibling Combinator |
|
619
|
|
|
$index = array_search($this, $this->parent->children, true) + 1; |
|
620
|
|
|
if ($index < count($this->parent->children)) { |
|
621
|
|
|
$nodes[] = $this->parent->children[$index]; |
|
622
|
|
|
} |
|
623
|
|
|
} elseif ( |
|
624
|
|
|
'~' === $parent_cmd |
|
625
|
|
|
&& $this->parent |
|
626
|
|
|
&& in_array($this, $this->parent->children) |
|
627
|
|
|
) { // Subsequent Sibling Combinator |
|
628
|
|
|
$index = array_search($this, $this->parent->children, true); |
|
629
|
|
|
$nodes = array_slice($this->parent->children, $index); |
|
|
|
|
|
|
630
|
|
|
} |
|
631
|
|
|
|
|
632
|
|
|
// Go throgh each element starting at this element until the end tag |
|
633
|
|
|
// Note: If this element is a void tag, any previous void element is |
|
634
|
|
|
// skipped. |
|
635
|
|
|
foreach ($nodes as $node) { |
|
636
|
|
|
$pass = true; |
|
637
|
|
|
|
|
638
|
|
|
// Skip root nodes |
|
639
|
|
|
if (!$node->parent) { |
|
640
|
|
|
unset($node); |
|
641
|
|
|
continue; |
|
642
|
|
|
} |
|
643
|
|
|
|
|
644
|
|
|
// Handle 'text' selector |
|
645
|
|
|
if ($pass && 'text' === $tag) { |
|
646
|
|
|
if ('text' === $node->tag) { |
|
647
|
|
|
$ret[array_search($node, $this->dom->nodes, true)] = 1; |
|
648
|
|
|
} |
|
649
|
|
|
|
|
650
|
|
|
if (isset($node->_[self::HDOM_INFO_INNER])) { |
|
651
|
|
|
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1; |
|
652
|
|
|
} |
|
653
|
|
|
|
|
654
|
|
|
unset($node); |
|
655
|
|
|
continue; |
|
656
|
|
|
} |
|
657
|
|
|
|
|
658
|
|
|
// Handle 'cdata' selector |
|
659
|
|
|
if ($pass && 'cdata' === $tag) { |
|
660
|
|
|
if ('cdata' === $node->tag) { |
|
661
|
|
|
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1; |
|
662
|
|
|
} |
|
663
|
|
|
|
|
664
|
|
|
unset($node); |
|
665
|
|
|
continue; |
|
666
|
|
|
} |
|
667
|
|
|
|
|
668
|
|
|
// Handle 'comment' |
|
669
|
|
|
if ($pass && 'comment' === $tag && 'comment' === $node->tag) { |
|
670
|
|
|
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1; |
|
671
|
|
|
unset($node); |
|
672
|
|
|
continue; |
|
673
|
|
|
} |
|
674
|
|
|
|
|
675
|
|
|
// Skip if node isn't a child node (i.e. text nodes) |
|
676
|
|
|
if ($pass && !in_array($node, $node->parent->children, true)) { |
|
677
|
|
|
unset($node); |
|
678
|
|
|
continue; |
|
679
|
|
|
} |
|
680
|
|
|
|
|
681
|
|
|
// Skip if tag doesn't match |
|
682
|
|
|
if ($pass && '' !== $tag && $tag !== $node->tag && '*' !== $tag) { |
|
683
|
|
|
$pass = false; |
|
684
|
|
|
} |
|
685
|
|
|
|
|
686
|
|
|
// Skip if ID doesn't exist |
|
687
|
|
|
if ($pass && '' !== $id && !isset($node->attr['id'])) { |
|
688
|
|
|
$pass = false; |
|
689
|
|
|
} |
|
690
|
|
|
|
|
691
|
|
|
// Check if ID matches |
|
692
|
|
|
if ($pass && '' !== $id && isset($node->attr['id'])) { |
|
693
|
|
|
// Note: Only consider the first ID (as browsers do) |
|
694
|
|
|
$node_id = explode(' ', trim($node->attr['id']))[0]; |
|
695
|
|
|
|
|
696
|
|
|
if ($id !== $node_id) { |
|
697
|
|
|
$pass = false; |
|
698
|
|
|
} |
|
699
|
|
|
} |
|
700
|
|
|
|
|
701
|
|
|
// Check if all class(es) exist |
|
702
|
|
|
if ($pass && '' !== $class && is_array($class) && !empty($class)) { |
|
703
|
|
|
if (isset($node->attr['class'])) { |
|
704
|
|
|
// Apply the same rules for the pattern and attribute value |
|
705
|
|
|
// Attribute values must not contain control characters other than space |
|
706
|
|
|
// https://www.w3.org/TR/html/dom.html#text-content |
|
707
|
|
|
// https://www.w3.org/TR/html/syntax.html#attribute-values |
|
708
|
|
|
// https://www.w3.org/TR/xml/#AVNormalize |
|
709
|
|
|
$node_classes = preg_replace("/[\r\n\t\s]+/u", ' ', $node->attr['class']); |
|
710
|
|
|
$node_classes = trim($node_classes); |
|
711
|
|
|
$node_classes = explode(' ', $node_classes); |
|
712
|
|
|
|
|
713
|
|
|
if ($lowercase) { |
|
714
|
|
|
$node_classes = array_map('strtolower', $node_classes); |
|
715
|
|
|
} |
|
716
|
|
|
|
|
717
|
|
|
foreach ($class as $c) { |
|
718
|
|
|
if (!in_array($c, $node_classes)) { |
|
719
|
|
|
$pass = false; |
|
720
|
|
|
break; |
|
721
|
|
|
} |
|
722
|
|
|
} |
|
723
|
|
|
} else { |
|
724
|
|
|
$pass = false; |
|
725
|
|
|
} |
|
726
|
|
|
} |
|
727
|
|
|
|
|
728
|
|
|
// Check attributes |
|
729
|
|
|
if ( |
|
730
|
|
|
$pass |
|
731
|
|
|
&& '' !== $attributes |
|
732
|
|
|
&& is_array($attributes) |
|
733
|
|
|
&& !empty($attributes) |
|
734
|
|
|
) { |
|
735
|
|
|
foreach ($attributes as $a) { |
|
736
|
|
|
list( |
|
737
|
|
|
$att_name, |
|
738
|
|
|
$att_expr, |
|
739
|
|
|
$att_val, |
|
740
|
|
|
$att_inv, |
|
741
|
|
|
$att_case_sensitivity |
|
742
|
|
|
) = $a; |
|
743
|
|
|
|
|
744
|
|
|
// Handle indexing attributes (i.e. "[2]") |
|
745
|
|
|
/* |
|
746
|
|
|
* Note: This is not supported by the CSS Standard but adds |
|
747
|
|
|
* the ability to select items compatible to XPath (i.e. |
|
748
|
|
|
* the 3rd element within it's parent). |
|
749
|
|
|
* |
|
750
|
|
|
* Note: This doesn't conflict with the CSS Standard which |
|
751
|
|
|
* doesn't work on numeric attributes anyway. |
|
752
|
|
|
*/ |
|
753
|
|
|
if ( |
|
754
|
|
|
is_numeric($att_name) |
|
755
|
|
|
&& '' === $att_expr |
|
756
|
|
|
&& '' === $att_val |
|
757
|
|
|
) { |
|
758
|
|
|
$count = 0; |
|
759
|
|
|
|
|
760
|
|
|
// Find index of current element in parent |
|
761
|
|
|
foreach ($node->parent->children as $c) { |
|
762
|
|
|
if ($c->tag === $node->tag) { |
|
763
|
|
|
++$count; |
|
764
|
|
|
} |
|
765
|
|
|
if ($c === $node) { |
|
766
|
|
|
break; |
|
767
|
|
|
} |
|
768
|
|
|
} |
|
769
|
|
|
|
|
770
|
|
|
// If this is the correct node, continue with next |
|
771
|
|
|
// attribute |
|
772
|
|
|
if ($count === (int) $att_name) { |
|
773
|
|
|
continue; |
|
774
|
|
|
} |
|
775
|
|
|
} |
|
776
|
|
|
|
|
777
|
|
|
// Check attribute availability |
|
778
|
|
|
if ($att_inv) { // Attribute should NOT be set |
|
779
|
|
|
if (isset($node->attr[$att_name])) { |
|
780
|
|
|
$pass = false; |
|
781
|
|
|
break; |
|
782
|
|
|
} |
|
783
|
|
|
} else { // Attribute should be set |
|
784
|
|
|
// todo: "plaintext" is not a valid CSS selector! |
|
785
|
|
|
if ( |
|
786
|
|
|
'plaintext' !== $att_name |
|
787
|
|
|
&& !isset($node->attr[$att_name]) |
|
788
|
|
|
) { |
|
789
|
|
|
$pass = false; |
|
790
|
|
|
break; |
|
791
|
|
|
} |
|
792
|
|
|
} |
|
793
|
|
|
|
|
794
|
|
|
// Continue with next attribute if expression isn't defined |
|
795
|
|
|
if ('' === $att_expr) { |
|
796
|
|
|
continue; |
|
797
|
|
|
} |
|
798
|
|
|
|
|
799
|
|
|
// If they have told us that this is a "plaintext" |
|
800
|
|
|
// search then we want the plaintext of the node - right? |
|
801
|
|
|
// todo "plaintext" is not a valid CSS selector! |
|
802
|
|
|
if ('plaintext' === $att_name) { |
|
803
|
|
|
$nodeKeyValue = $node->text(); |
|
804
|
|
|
} else { |
|
805
|
|
|
$nodeKeyValue = $node->attr[$att_name]; |
|
806
|
|
|
} |
|
807
|
|
|
|
|
808
|
|
|
// If lowercase is set, do a case insensitive test of |
|
809
|
|
|
// the value of the selector. |
|
810
|
|
|
if ($lowercase) { |
|
811
|
|
|
$check = $this->match( |
|
812
|
|
|
$att_expr, |
|
813
|
|
|
strtolower($att_val), |
|
814
|
|
|
strtolower($nodeKeyValue), |
|
815
|
|
|
$att_case_sensitivity |
|
816
|
|
|
); |
|
817
|
|
|
} else { |
|
818
|
|
|
$check = $this->match( |
|
819
|
|
|
$att_expr, |
|
820
|
|
|
$att_val, |
|
821
|
|
|
$nodeKeyValue, |
|
822
|
|
|
$att_case_sensitivity |
|
823
|
|
|
); |
|
824
|
|
|
} |
|
825
|
|
|
|
|
826
|
|
|
$check = 'not' === $ps_element ? !$check : $check; |
|
827
|
|
|
|
|
828
|
|
|
if (!$check) { |
|
829
|
|
|
$pass = false; |
|
830
|
|
|
break; |
|
831
|
|
|
} |
|
832
|
|
|
} |
|
833
|
|
|
} |
|
834
|
|
|
|
|
835
|
|
|
// Found a match. Add to list and clear node |
|
836
|
|
|
$pass = 'not' === $ps_selector ? !$pass : $pass; |
|
837
|
|
|
if ($pass) { |
|
838
|
|
|
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1; |
|
839
|
|
|
} |
|
840
|
|
|
unset($node); |
|
841
|
|
|
} |
|
842
|
|
|
} |
|
843
|
|
|
|
|
844
|
|
|
protected function match($exp, $pattern, $value, $case_sensitivity) |
|
845
|
|
|
{ |
|
846
|
|
|
if ('i' === $case_sensitivity) { |
|
847
|
|
|
$pattern = strtolower($pattern); |
|
848
|
|
|
$value = strtolower($value); |
|
849
|
|
|
} |
|
850
|
|
|
|
|
851
|
|
|
// Apply the same rules for the pattern and attribute value |
|
852
|
|
|
// Attribute values must not contain control characters other than space |
|
853
|
|
|
// https://www.w3.org/TR/html/dom.html#text-content |
|
854
|
|
|
// https://www.w3.org/TR/html/syntax.html#attribute-values |
|
855
|
|
|
// https://www.w3.org/TR/xml/#AVNormalize |
|
856
|
|
|
$pattern = preg_replace("/[\r\n\t\s]+/u", ' ', $pattern); |
|
857
|
|
|
$pattern = trim($pattern); |
|
858
|
|
|
|
|
859
|
|
|
$value = preg_replace("/[\r\n\t\s]+/u", ' ', $value); |
|
860
|
|
|
$value = trim($value); |
|
861
|
|
|
|
|
862
|
|
|
switch ($exp) { |
|
863
|
|
|
case '=': |
|
864
|
|
|
return $value === $pattern; |
|
865
|
|
|
case '!=': |
|
866
|
|
|
return $value !== $pattern; |
|
867
|
|
|
case '^=': |
|
868
|
|
|
return preg_match('/^' . preg_quote($pattern, '/') . '/', $value); |
|
869
|
|
|
case '$=': |
|
870
|
|
|
return preg_match('/' . preg_quote($pattern, '/') . '$/', $value); |
|
871
|
|
|
case '*=': |
|
872
|
|
|
return preg_match('/' . preg_quote($pattern, '/') . '/', $value); |
|
873
|
|
|
case '|=': |
|
874
|
|
|
/* |
|
875
|
|
|
* [att|=val] |
|
876
|
|
|
* |
|
877
|
|
|
* Represents an element with the att attribute, its value |
|
878
|
|
|
* either being exactly "val" or beginning with "val" |
|
879
|
|
|
* immediately followed by "-" (U+002D). |
|
880
|
|
|
*/ |
|
881
|
|
|
return 0 === strpos($value, $pattern); |
|
882
|
|
|
case '~=': |
|
883
|
|
|
/* |
|
884
|
|
|
* [att~=val] |
|
885
|
|
|
* |
|
886
|
|
|
* Represents an element with the att attribute whose value is a |
|
887
|
|
|
* whitespace-separated list of words, one of which is exactly |
|
888
|
|
|
* "val". If "val" contains whitespace, it will never represent |
|
889
|
|
|
* anything (since the words are separated by spaces). Also if |
|
890
|
|
|
* "val" is the empty string, it will never represent anything. |
|
891
|
|
|
*/ |
|
892
|
|
|
return in_array($pattern, explode(' ', trim($value)), true); |
|
893
|
|
|
} |
|
894
|
|
|
|
|
895
|
|
|
Debug::log('Unhandled attribute selector: ' . $exp . '!'); |
|
896
|
|
|
|
|
897
|
|
|
return false; |
|
898
|
|
|
} |
|
899
|
|
|
|
|
900
|
|
|
protected function parse_selector($selector_string) |
|
901
|
|
|
{ |
|
902
|
|
|
/** |
|
903
|
|
|
* Pattern of CSS selectors, modified from mootools (https://mootools.net/). |
|
904
|
|
|
* |
|
905
|
|
|
* Paperg: Add the colon to the attribute, so that it properly finds |
|
906
|
|
|
* <tag attr:ibute="something" > like google does. |
|
907
|
|
|
* |
|
908
|
|
|
* Note: if you try to look at this attribute, you MUST use getAttribute |
|
909
|
|
|
* since $dom->x:y will fail the php syntax check. |
|
910
|
|
|
* |
|
911
|
|
|
* Notice the \[ starting the attribute? and the @? following? This |
|
912
|
|
|
* implies that an attribute can begin with an @ sign that is not |
|
913
|
|
|
* captured. This implies that an html attribute specifier may start |
|
914
|
|
|
* with an @ sign that is NOT captured by the expression. Farther study |
|
915
|
|
|
* is required to determine of this should be documented or removed. |
|
916
|
|
|
* |
|
917
|
|
|
* Matches selectors in this order: |
|
918
|
|
|
* |
|
919
|
|
|
* [0] - full match |
|
920
|
|
|
* |
|
921
|
|
|
* [1] - pseudo selector |
|
922
|
|
|
* (?:\:(\w+)\()? |
|
923
|
|
|
* Matches the pseudo selector (optional) |
|
924
|
|
|
* |
|
925
|
|
|
* [2] - tag name |
|
926
|
|
|
* ([\w:\*-]*) |
|
927
|
|
|
* Matches the tag name consisting of zero or more words, colons, |
|
928
|
|
|
* asterisks and hyphens. |
|
929
|
|
|
* |
|
930
|
|
|
* [3] - pseudo selector |
|
931
|
|
|
* (?:\:(\w+)\()? |
|
932
|
|
|
* Matches the pseudo selector (optional) |
|
933
|
|
|
* |
|
934
|
|
|
* [4] - id name |
|
935
|
|
|
* (?:\#([\w-]+)) |
|
936
|
|
|
* Optionally matches a id name, consisting of an "#" followed by |
|
937
|
|
|
* the id name (one or more words and hyphens). |
|
938
|
|
|
* |
|
939
|
|
|
* [5] - class names (including dots) |
|
940
|
|
|
* (?:\.([\w\.-]+))? |
|
941
|
|
|
* Optionally matches a list of classs, consisting of an "." |
|
942
|
|
|
* followed by the class name (one or more words and hyphens) |
|
943
|
|
|
* where multiple classes can be chained (i.e. ".foo.bar.baz") |
|
944
|
|
|
* |
|
945
|
|
|
* [6] - attributes |
|
946
|
|
|
* ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)? |
|
947
|
|
|
* Optionally matches the attributes list |
|
948
|
|
|
* |
|
949
|
|
|
* [7] - separator |
|
950
|
|
|
* ([\/, >+~]+) |
|
951
|
|
|
* Matches the selector list separator |
|
952
|
|
|
*/ |
|
953
|
|
|
// phpcs:ignore Generic.Files.LineLength |
|
954
|
|
|
$pattern = "/(?:\:(\w+)\()?([\w:\*-]*)(?:\:(\w+)\()?(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?(?:\))?(?:\))?([\/, >+~]+)/is"; |
|
955
|
|
|
|
|
956
|
|
|
preg_match_all( |
|
957
|
|
|
$pattern, |
|
958
|
|
|
trim($selector_string) . ' ', // Add final ' ' as pseudo separator |
|
959
|
|
|
$matches, |
|
960
|
|
|
PREG_SET_ORDER |
|
961
|
|
|
); |
|
962
|
|
|
|
|
963
|
|
|
$selectors = []; |
|
964
|
|
|
$result = []; |
|
965
|
|
|
|
|
966
|
|
|
foreach ($matches as $m) { |
|
967
|
|
|
$m[0] = trim($m[0]); |
|
968
|
|
|
|
|
969
|
|
|
// Skip NoOps |
|
970
|
|
|
if ('' === $m[0] || '/' === $m[0] || '//' === $m[0]) { |
|
971
|
|
|
continue; |
|
972
|
|
|
} |
|
973
|
|
|
|
|
974
|
|
|
array_shift($m); |
|
975
|
|
|
|
|
976
|
|
|
// Convert to lowercase |
|
977
|
|
|
if ($this->dom->lowercase) { |
|
978
|
|
|
$m[1] = strtolower($m[1]); |
|
979
|
|
|
} |
|
980
|
|
|
|
|
981
|
|
|
// Extract classes |
|
982
|
|
|
if ('' !== $m[4]) { |
|
983
|
|
|
$m[4] = explode('.', $m[4]); |
|
984
|
|
|
} |
|
985
|
|
|
|
|
986
|
|
|
/* Extract attributes (pattern based on the pattern above!) |
|
987
|
|
|
|
|
988
|
|
|
* [0] - full match |
|
989
|
|
|
* [1] - attribute name |
|
990
|
|
|
* [2] - attribute expression |
|
991
|
|
|
* [3] - attribute value |
|
992
|
|
|
* [4] - case sensitivity |
|
993
|
|
|
* |
|
994
|
|
|
* Note: Attributes can be negated with a "!" prefix to their name |
|
995
|
|
|
*/ |
|
996
|
|
|
if ('' !== $m[5]) { |
|
997
|
|
|
preg_match_all( |
|
998
|
|
|
"/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is", |
|
999
|
|
|
trim($m[5]), |
|
1000
|
|
|
$attributes, |
|
1001
|
|
|
PREG_SET_ORDER |
|
1002
|
|
|
); |
|
1003
|
|
|
|
|
1004
|
|
|
// Replace element by array |
|
1005
|
|
|
$m[5] = []; |
|
1006
|
|
|
|
|
1007
|
|
|
foreach ($attributes as $att) { |
|
1008
|
|
|
// Skip empty matches |
|
1009
|
|
|
if ('' === trim($att[0])) { |
|
1010
|
|
|
continue; |
|
1011
|
|
|
} |
|
1012
|
|
|
|
|
1013
|
|
|
$inverted = (isset($att[1][0]) && '!' === $att[1][0]); |
|
1014
|
|
|
$m[5][] = [ |
|
1015
|
|
|
$inverted ? substr($att[1], 1) : $att[1], // Name |
|
1016
|
|
|
(isset($att[2])) ? $att[2] : '', // Expression |
|
1017
|
|
|
(isset($att[3])) ? $att[3] : '', // Value |
|
1018
|
|
|
$inverted, // Inverted Flag |
|
1019
|
|
|
(isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity |
|
1020
|
|
|
]; |
|
1021
|
|
|
} |
|
1022
|
|
|
} |
|
1023
|
|
|
|
|
1024
|
|
|
// Sanitize Separator |
|
1025
|
|
|
if ('' !== $m[6] && '' === trim($m[6])) { // Descendant Separator |
|
1026
|
|
|
$m[6] = ' '; |
|
1027
|
|
|
} else { // Other Separator |
|
1028
|
|
|
$m[6] = trim($m[6]); |
|
1029
|
|
|
} |
|
1030
|
|
|
|
|
1031
|
|
|
// Clear Separator if it's a Selector List |
|
1032
|
|
|
if ($is_list = (',' === $m[6])) { |
|
1033
|
|
|
$m[6] = ''; |
|
1034
|
|
|
} |
|
1035
|
|
|
|
|
1036
|
|
|
$result[] = $m; |
|
1037
|
|
|
|
|
1038
|
|
|
if ($is_list) { // Selector List |
|
1039
|
|
|
$selectors[] = $result; |
|
1040
|
|
|
$result = []; |
|
1041
|
|
|
} |
|
1042
|
|
|
} |
|
1043
|
|
|
|
|
1044
|
|
|
if (count($result) > 0) { |
|
1045
|
|
|
$selectors[] = $result; |
|
1046
|
|
|
} |
|
1047
|
|
|
|
|
1048
|
|
|
return $selectors; |
|
1049
|
|
|
} |
|
1050
|
|
|
|
|
1051
|
|
|
public function __get($name) |
|
1052
|
|
|
{ |
|
1053
|
|
|
if (isset($this->attr[$name])) { |
|
1054
|
|
|
return $this->convert_text($this->attr[$name]); |
|
1055
|
|
|
} |
|
1056
|
|
|
|
|
1057
|
|
|
switch ($name) { |
|
1058
|
|
|
case 'outertext': |
|
1059
|
|
|
return $this->outertext(); |
|
1060
|
|
|
case 'innertext': |
|
1061
|
|
|
return $this->innertext(); |
|
1062
|
|
|
case 'plaintext': |
|
1063
|
|
|
return $this->text(); |
|
1064
|
|
|
case 'xmltext': |
|
1065
|
|
|
return $this->xmltext(); |
|
1066
|
|
|
} |
|
1067
|
|
|
|
|
1068
|
|
|
return false; |
|
1069
|
|
|
} |
|
1070
|
|
|
|
|
1071
|
|
|
public function __set($name, $value) |
|
1072
|
|
|
{ |
|
1073
|
|
|
switch ($name) { |
|
1074
|
|
|
case 'outertext': |
|
1075
|
|
|
$this->_[self::HDOM_INFO_OUTER] = $value; |
|
1076
|
|
|
break; |
|
1077
|
|
|
case 'innertext': |
|
1078
|
|
|
if (isset($this->_[self::HDOM_INFO_TEXT])) { |
|
1079
|
|
|
$this->_[self::HDOM_INFO_TEXT] = ''; |
|
1080
|
|
|
} |
|
1081
|
|
|
$this->_[self::HDOM_INFO_INNER] = $value; |
|
1082
|
|
|
break; |
|
1083
|
|
|
default: |
|
1084
|
|
|
$this->attr[$name] = $value; |
|
1085
|
|
|
} |
|
1086
|
|
|
} |
|
1087
|
|
|
|
|
1088
|
|
|
public function __isset($name) |
|
1089
|
|
|
{ |
|
1090
|
|
|
switch ($name) { |
|
1091
|
|
|
case 'outertext': |
|
1092
|
|
|
return true; |
|
1093
|
|
|
case 'innertext': |
|
1094
|
|
|
return true; |
|
1095
|
|
|
case 'plaintext': |
|
1096
|
|
|
return true; |
|
1097
|
|
|
} |
|
1098
|
|
|
|
|
1099
|
|
|
return isset($this->attr[$name]); |
|
1100
|
|
|
} |
|
1101
|
|
|
|
|
1102
|
|
|
public function __unset($name) |
|
1103
|
|
|
{ |
|
1104
|
|
|
if (isset($this->attr[$name])) { |
|
1105
|
|
|
unset($this->attr[$name]); |
|
1106
|
|
|
} |
|
1107
|
|
|
} |
|
1108
|
|
|
|
|
1109
|
|
|
public function convert_text($text) |
|
1110
|
|
|
{ |
|
1111
|
|
|
$converted_text = $text; |
|
1112
|
|
|
|
|
1113
|
|
|
$sourceCharset = ''; |
|
1114
|
|
|
$targetCharset = ''; |
|
1115
|
|
|
|
|
1116
|
|
|
if ($this->dom) { |
|
1117
|
|
|
$sourceCharset = strtoupper($this->dom->_charset); |
|
1118
|
|
|
$targetCharset = strtoupper($this->dom->_target_charset); |
|
1119
|
|
|
} |
|
1120
|
|
|
|
|
1121
|
|
|
if (!empty($sourceCharset) && !empty($targetCharset)) { |
|
1122
|
|
|
if (strtoupper($sourceCharset) === strtoupper($targetCharset)) { |
|
1123
|
|
|
$converted_text = $text; |
|
1124
|
|
|
} elseif (('UTF-8' === strtoupper($targetCharset)) && (self::is_utf8($text))) { |
|
1125
|
|
|
Debug::log_once('The source charset was incorrectly detected as ' . $sourceCharset . ' but should have been UTF-8'); |
|
1126
|
|
|
$converted_text = $text; |
|
1127
|
|
|
} else { |
|
1128
|
|
|
$converted_text = iconv($sourceCharset, $targetCharset, $text); |
|
1129
|
|
|
} |
|
1130
|
|
|
} |
|
1131
|
|
|
|
|
1132
|
|
|
// Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output. |
|
1133
|
|
|
if ('UTF-8' === $targetCharset) { |
|
1134
|
|
|
if ("\xef\xbb\xbf" === substr($converted_text, 0, 3)) { |
|
1135
|
|
|
$converted_text = substr($converted_text, 3); |
|
1136
|
|
|
} |
|
1137
|
|
|
|
|
1138
|
|
|
if ("\xef\xbb\xbf" === substr($converted_text, -3)) { |
|
1139
|
|
|
$converted_text = substr($converted_text, 0, -3); |
|
1140
|
|
|
} |
|
1141
|
|
|
} |
|
1142
|
|
|
|
|
1143
|
|
|
return $converted_text; |
|
1144
|
|
|
} |
|
1145
|
|
|
|
|
1146
|
|
|
public static function is_utf8($str) |
|
1147
|
|
|
{ |
|
1148
|
|
|
$c = 0; |
|
|
|
|
|
|
1149
|
|
|
$b = 0; |
|
|
|
|
|
|
1150
|
|
|
$bits = 0; |
|
1151
|
|
|
$len = strlen($str); |
|
1152
|
|
|
for ($i = 0; $i < $len; ++$i) { |
|
1153
|
|
|
$c = ord($str[$i]); |
|
1154
|
|
|
if ($c > 128) { |
|
1155
|
|
|
if (($c >= 254)) { |
|
1156
|
|
|
return false; |
|
1157
|
|
|
} elseif ($c >= 252) { |
|
1158
|
|
|
$bits = 6; |
|
1159
|
|
|
} elseif ($c >= 248) { |
|
1160
|
|
|
$bits = 5; |
|
1161
|
|
|
} elseif ($c >= 240) { |
|
1162
|
|
|
$bits = 4; |
|
1163
|
|
|
} elseif ($c >= 224) { |
|
1164
|
|
|
$bits = 3; |
|
1165
|
|
|
} elseif ($c >= 192) { |
|
1166
|
|
|
$bits = 2; |
|
1167
|
|
|
} else { |
|
1168
|
|
|
return false; |
|
1169
|
|
|
} |
|
1170
|
|
|
if (($i + $bits) > $len) { |
|
1171
|
|
|
return false; |
|
1172
|
|
|
} |
|
1173
|
|
|
while ($bits > 1) { |
|
1174
|
|
|
++$i; |
|
1175
|
|
|
$b = ord($str[$i]); |
|
1176
|
|
|
if ($b < 128 || $b > 191) { |
|
1177
|
|
|
return false; |
|
1178
|
|
|
} |
|
1179
|
|
|
--$bits; |
|
1180
|
|
|
} |
|
1181
|
|
|
} |
|
1182
|
|
|
} |
|
1183
|
|
|
|
|
1184
|
|
|
return true; |
|
1185
|
|
|
} |
|
1186
|
|
|
|
|
1187
|
|
|
public function get_display_size() |
|
1188
|
|
|
{ |
|
1189
|
|
|
$width = -1; |
|
1190
|
|
|
$height = -1; |
|
1191
|
|
|
|
|
1192
|
|
|
if ('img' !== $this->tag) { |
|
1193
|
|
|
return false; |
|
1194
|
|
|
} |
|
1195
|
|
|
|
|
1196
|
|
|
// See if there is aheight or width attribute in the tag itself. |
|
1197
|
|
|
if (isset($this->attr['width'])) { |
|
1198
|
|
|
$width = $this->attr['width']; |
|
1199
|
|
|
} |
|
1200
|
|
|
|
|
1201
|
|
|
if (isset($this->attr['height'])) { |
|
1202
|
|
|
$height = $this->attr['height']; |
|
1203
|
|
|
} |
|
1204
|
|
|
|
|
1205
|
|
|
// Now look for an inline style. |
|
1206
|
|
|
if (isset($this->attr['style'])) { |
|
1207
|
|
|
// Thanks to user gnarf from stackoverflow for this regular expression. |
|
1208
|
|
|
$attributes = []; |
|
1209
|
|
|
|
|
1210
|
|
|
preg_match_all( |
|
1211
|
|
|
'/([\w-]+)\s*:\s*([^;]+)\s*;?/', |
|
1212
|
|
|
$this->attr['style'], |
|
1213
|
|
|
$matches, |
|
1214
|
|
|
PREG_SET_ORDER |
|
1215
|
|
|
); |
|
1216
|
|
|
|
|
1217
|
|
|
foreach ($matches as $match) { |
|
1218
|
|
|
$attributes[$match[1]] = $match[2]; |
|
1219
|
|
|
} |
|
1220
|
|
|
|
|
1221
|
|
|
// If there is a width in the style attributes: |
|
1222
|
|
|
if (isset($attributes['width']) && -1 == $width) { |
|
1223
|
|
|
// check that the last two characters are px (pixels) |
|
1224
|
|
|
if ('px' === strtolower(substr($attributes['width'], -2))) { |
|
1225
|
|
|
$proposed_width = substr($attributes['width'], 0, -2); |
|
1226
|
|
|
// Now make sure that it's an integer and not something stupid. |
|
1227
|
|
|
if (filter_var($proposed_width, FILTER_VALIDATE_INT)) { |
|
1228
|
|
|
$width = $proposed_width; |
|
1229
|
|
|
} |
|
1230
|
|
|
} |
|
1231
|
|
|
} |
|
1232
|
|
|
|
|
1233
|
|
|
// If there is a width in the style attributes: |
|
1234
|
|
|
if (isset($attributes['height']) && -1 == $height) { |
|
1235
|
|
|
// check that the last two characters are px (pixels) |
|
1236
|
|
|
if ('px' == strtolower(substr($attributes['height'], -2))) { |
|
1237
|
|
|
$proposed_height = substr($attributes['height'], 0, -2); |
|
1238
|
|
|
// Now make sure that it's an integer and not something stupid. |
|
1239
|
|
|
if (filter_var($proposed_height, FILTER_VALIDATE_INT)) { |
|
1240
|
|
|
$height = $proposed_height; |
|
1241
|
|
|
} |
|
1242
|
|
|
} |
|
1243
|
|
|
} |
|
1244
|
|
|
} |
|
1245
|
|
|
|
|
1246
|
|
|
// Future enhancement: |
|
1247
|
|
|
// Look in the tag to see if there is a class or id specified that has |
|
1248
|
|
|
// a height or width attribute to it. |
|
1249
|
|
|
|
|
1250
|
|
|
// Far future enhancement |
|
1251
|
|
|
// Look at all the parent tags of this image to see if they specify a |
|
1252
|
|
|
// class or id that has an img selector that specifies a height or width |
|
1253
|
|
|
// Note that in this case, the class or id will have the img subselector |
|
1254
|
|
|
// for it to apply to the image. |
|
1255
|
|
|
|
|
1256
|
|
|
// ridiculously far future development |
|
1257
|
|
|
// If the class or id is specified in a SEPARATE css file thats not on |
|
1258
|
|
|
// the page, go get it and do what we were just doing for the ones on |
|
1259
|
|
|
// the page. |
|
1260
|
|
|
|
|
1261
|
|
|
$result = [ |
|
1262
|
|
|
'height' => $height, |
|
1263
|
|
|
'width' => $width, |
|
1264
|
|
|
]; |
|
1265
|
|
|
|
|
1266
|
|
|
return $result; |
|
1267
|
|
|
} |
|
1268
|
|
|
|
|
1269
|
|
|
public function save($filepath = '') |
|
1270
|
|
|
{ |
|
1271
|
|
|
$ret = $this->outertext(); |
|
1272
|
|
|
|
|
1273
|
|
|
if ('' !== $filepath) { |
|
1274
|
|
|
file_put_contents($filepath, $ret, LOCK_EX); |
|
1275
|
|
|
} |
|
1276
|
|
|
|
|
1277
|
|
|
return $ret; |
|
1278
|
|
|
} |
|
1279
|
|
|
|
|
1280
|
|
|
public function addClass($class) |
|
1281
|
|
|
{ |
|
1282
|
|
|
if (is_string($class)) { |
|
1283
|
|
|
$class = explode(' ', $class); |
|
1284
|
|
|
} |
|
1285
|
|
|
|
|
1286
|
|
|
if (is_array($class)) { |
|
1287
|
|
|
foreach ($class as $c) { |
|
1288
|
|
|
if (isset($this->class)) { |
|
1289
|
|
|
if ($this->hasClass($c)) { |
|
1290
|
|
|
continue; |
|
1291
|
|
|
} else { |
|
1292
|
|
|
$this->class .= ' ' . $c; |
|
1293
|
|
|
} |
|
1294
|
|
|
} else { |
|
1295
|
|
|
$this->class = $c; |
|
|
|
|
|
|
1296
|
|
|
} |
|
1297
|
|
|
} |
|
1298
|
|
|
} |
|
1299
|
|
|
} |
|
1300
|
|
|
|
|
1301
|
|
|
public function hasClass($class) |
|
1302
|
|
|
{ |
|
1303
|
|
|
if (is_string($class)) { |
|
1304
|
|
|
if (isset($this->class)) { |
|
1305
|
|
|
return in_array($class, explode(' ', $this->class), true); |
|
1306
|
|
|
} |
|
1307
|
|
|
} |
|
1308
|
|
|
|
|
1309
|
|
|
return false; |
|
1310
|
|
|
} |
|
1311
|
|
|
|
|
1312
|
|
|
public function removeClass($class = null) |
|
1313
|
|
|
{ |
|
1314
|
|
|
if (!isset($this->class)) { |
|
1315
|
|
|
return; |
|
1316
|
|
|
} |
|
1317
|
|
|
|
|
1318
|
|
|
if (is_null($class)) { |
|
1319
|
|
|
$this->removeAttribute('class'); |
|
1320
|
|
|
|
|
1321
|
|
|
return; |
|
1322
|
|
|
} |
|
1323
|
|
|
|
|
1324
|
|
|
if (is_string($class)) { |
|
1325
|
|
|
$class = explode(' ', $class); |
|
1326
|
|
|
} |
|
1327
|
|
|
|
|
1328
|
|
|
if (is_array($class)) { |
|
1329
|
|
|
$class = array_diff(explode(' ', $this->class), $class); |
|
1330
|
|
|
if (empty($class)) { |
|
1331
|
|
|
$this->removeAttribute('class'); |
|
1332
|
|
|
} else { |
|
1333
|
|
|
$this->class = implode(' ', $class); |
|
|
|
|
|
|
1334
|
|
|
} |
|
1335
|
|
|
} |
|
1336
|
|
|
} |
|
1337
|
|
|
|
|
1338
|
|
|
public function getAllAttributes() |
|
1339
|
|
|
{ |
|
1340
|
|
|
return $this->attr; |
|
1341
|
|
|
} |
|
1342
|
|
|
|
|
1343
|
|
|
public function getAttribute($name) |
|
1344
|
|
|
{ |
|
1345
|
|
|
return $this->$name; |
|
1346
|
|
|
} |
|
1347
|
|
|
|
|
1348
|
|
|
public function setAttribute($name, $value) |
|
1349
|
|
|
{ |
|
1350
|
|
|
$this->$name = $value; |
|
1351
|
|
|
} |
|
1352
|
|
|
|
|
1353
|
|
|
public function hasAttribute($name) |
|
1354
|
|
|
{ |
|
1355
|
|
|
return isset($this->$name); |
|
1356
|
|
|
} |
|
1357
|
|
|
|
|
1358
|
|
|
public function removeAttribute($name) |
|
1359
|
|
|
{ |
|
1360
|
|
|
unset($this->$name); |
|
1361
|
|
|
} |
|
1362
|
|
|
|
|
1363
|
|
|
public function remove() |
|
1364
|
|
|
{ |
|
1365
|
|
|
if ($this->parent) { |
|
1366
|
|
|
$this->parent->removeChild($this); |
|
1367
|
|
|
} |
|
1368
|
|
|
} |
|
1369
|
|
|
|
|
1370
|
|
|
public function removeChild($node) |
|
1371
|
|
|
{ |
|
1372
|
|
|
foreach ($node->children as $child) { |
|
1373
|
|
|
$node->removeChild($child); |
|
1374
|
|
|
} |
|
1375
|
|
|
|
|
1376
|
|
|
// No need to re-index node->children because it is about to be removed! |
|
1377
|
|
|
|
|
1378
|
|
|
foreach ($node->nodes as $entity) { |
|
1379
|
|
|
$enidx = array_search($entity, $node->nodes, true); |
|
1380
|
|
|
$edidx = array_search($entity, $node->dom->nodes, true); |
|
1381
|
|
|
|
|
1382
|
|
|
if (false !== $enidx) { |
|
1383
|
|
|
unset($node->nodes[$enidx]); |
|
1384
|
|
|
} |
|
1385
|
|
|
|
|
1386
|
|
|
if (false !== $edidx) { |
|
1387
|
|
|
unset($node->dom->nodes[$edidx]); |
|
1388
|
|
|
} |
|
1389
|
|
|
} |
|
1390
|
|
|
|
|
1391
|
|
|
// No need to re-index node->nodes because it is about to be removed! |
|
1392
|
|
|
|
|
1393
|
|
|
$nidx = array_search($node, $this->nodes, true); |
|
1394
|
|
|
$cidx = array_search($node, $this->children, true); |
|
1395
|
|
|
$didx = array_search($node, $this->dom->nodes, true); |
|
1396
|
|
|
|
|
1397
|
|
|
if (false !== $nidx) { |
|
1398
|
|
|
unset($this->nodes[$nidx]); |
|
1399
|
|
|
} |
|
1400
|
|
|
|
|
1401
|
|
|
$this->nodes = array_values($this->nodes); |
|
1402
|
|
|
|
|
1403
|
|
|
if (false !== $cidx) { |
|
1404
|
|
|
unset($this->children[$cidx]); |
|
1405
|
|
|
} |
|
1406
|
|
|
|
|
1407
|
|
|
$this->children = array_values($this->children); |
|
1408
|
|
|
|
|
1409
|
|
|
if (false !== $didx) { |
|
1410
|
|
|
unset($this->dom->nodes[$didx]); |
|
1411
|
|
|
} |
|
1412
|
|
|
|
|
1413
|
|
|
// Do not re-index dom->nodes because nodes point to other nodes in the |
|
1414
|
|
|
// array explicitly! |
|
1415
|
|
|
|
|
1416
|
|
|
$node->clear(); |
|
1417
|
|
|
} |
|
1418
|
|
|
|
|
1419
|
|
|
public function getElementById($id) |
|
1420
|
|
|
{ |
|
1421
|
|
|
return $this->find("#$id", 0); |
|
1422
|
|
|
} |
|
1423
|
|
|
|
|
1424
|
|
|
public function getElementsById($id, $idx = null) |
|
1425
|
|
|
{ |
|
1426
|
|
|
return $this->find("#$id", $idx); |
|
1427
|
|
|
} |
|
1428
|
|
|
|
|
1429
|
|
|
public function getElementByTagName($name) |
|
1430
|
|
|
{ |
|
1431
|
|
|
return $this->find($name, 0); |
|
1432
|
|
|
} |
|
1433
|
|
|
|
|
1434
|
|
|
public function getElementsByTagName($name, $idx = null) |
|
1435
|
|
|
{ |
|
1436
|
|
|
return $this->find($name, $idx); |
|
1437
|
|
|
} |
|
1438
|
|
|
|
|
1439
|
|
|
public function parentNode() |
|
1440
|
|
|
{ |
|
1441
|
|
|
return $this->parent(); |
|
1442
|
|
|
} |
|
1443
|
|
|
|
|
1444
|
|
|
public function childNodes($idx = -1) |
|
1445
|
|
|
{ |
|
1446
|
|
|
if (-1 === $idx) { |
|
1447
|
|
|
return $this->children; |
|
1448
|
|
|
} |
|
1449
|
|
|
|
|
1450
|
|
|
if (isset($this->children[$idx])) { |
|
1451
|
|
|
return $this->children[$idx]; |
|
1452
|
|
|
} |
|
1453
|
|
|
|
|
1454
|
|
|
return null; |
|
1455
|
|
|
} |
|
1456
|
|
|
|
|
1457
|
|
|
public function firstChild() |
|
1458
|
|
|
{ |
|
1459
|
|
|
if (count($this->children) > 0) { |
|
1460
|
|
|
return $this->children[0]; |
|
1461
|
|
|
} |
|
1462
|
|
|
|
|
1463
|
|
|
return null; |
|
1464
|
|
|
} |
|
1465
|
|
|
|
|
1466
|
|
|
public function lastChild() |
|
1467
|
|
|
{ |
|
1468
|
|
|
if (count($this->children) > 0) { |
|
1469
|
|
|
return end($this->children); |
|
1470
|
|
|
} |
|
1471
|
|
|
|
|
1472
|
|
|
return null; |
|
1473
|
|
|
} |
|
1474
|
|
|
|
|
1475
|
|
|
public function nextSibling() |
|
1476
|
|
|
{ |
|
1477
|
|
|
if (null === $this->parent) { |
|
1478
|
|
|
return null; |
|
1479
|
|
|
} |
|
1480
|
|
|
|
|
1481
|
|
|
$idx = array_search($this, $this->parent->children, true); |
|
1482
|
|
|
|
|
1483
|
|
|
if (false !== $idx && isset($this->parent->children[$idx + 1])) { |
|
1484
|
|
|
return $this->parent->children[$idx + 1]; |
|
1485
|
|
|
} |
|
1486
|
|
|
|
|
1487
|
|
|
return null; |
|
1488
|
|
|
} |
|
1489
|
|
|
|
|
1490
|
|
|
public function previousSibling() |
|
1491
|
|
|
{ |
|
1492
|
|
|
if (null === $this->parent) { |
|
1493
|
|
|
return null; |
|
1494
|
|
|
} |
|
1495
|
|
|
|
|
1496
|
|
|
$idx = array_search($this, $this->parent->children, true); |
|
1497
|
|
|
|
|
1498
|
|
|
if (false !== $idx && $idx > 0) { |
|
1499
|
|
|
return $this->parent->children[$idx - 1]; |
|
1500
|
|
|
} |
|
1501
|
|
|
|
|
1502
|
|
|
return null; |
|
1503
|
|
|
} |
|
1504
|
|
|
|
|
1505
|
|
|
public function hasChildNodes() |
|
1506
|
|
|
{ |
|
1507
|
|
|
return !empty($this->children); |
|
1508
|
|
|
} |
|
1509
|
|
|
|
|
1510
|
|
|
public function nodeName() |
|
1511
|
|
|
{ |
|
1512
|
|
|
return $this->tag; |
|
1513
|
|
|
} |
|
1514
|
|
|
|
|
1515
|
|
|
public function appendChild($node) |
|
1516
|
|
|
{ |
|
1517
|
|
|
$node->parent = $this; |
|
1518
|
|
|
$this->nodes[] = $node; |
|
1519
|
|
|
$this->children[] = $node; |
|
1520
|
|
|
|
|
1521
|
|
|
if ($this->dom) { // Attach current node to DOM (recursively) |
|
1522
|
|
|
$children = [$node]; |
|
1523
|
|
|
|
|
1524
|
|
|
while ($children) { |
|
1525
|
|
|
$child = array_pop($children); |
|
1526
|
|
|
$children = array_merge($children, $child->children); |
|
1527
|
|
|
|
|
1528
|
|
|
$this->dom->nodes[] = $child; |
|
1529
|
|
|
$child->dom = $this->dom; |
|
1530
|
|
|
$child->_[self::HDOM_INFO_BEGIN] = count($this->dom->nodes) - 1; |
|
1531
|
|
|
$child->_[self::HDOM_INFO_END] = $child->_[self::HDOM_INFO_BEGIN]; |
|
1532
|
|
|
} |
|
1533
|
|
|
|
|
1534
|
|
|
$this->dom->root->_[self::HDOM_INFO_END] = count($this->dom->nodes) - 1; |
|
1535
|
|
|
} |
|
1536
|
|
|
|
|
1537
|
|
|
return $this; |
|
1538
|
|
|
} |
|
1539
|
|
|
} |
|
1540
|
|
|
|