1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace voku\helper; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* simple html dom node |
7
|
|
|
* |
8
|
|
|
* PaperG - added $tag_start to track the start position of the tag in the total byte index |
9
|
|
|
* |
10
|
|
|
* @property string alt |
11
|
|
|
* @property string href |
12
|
|
|
* @property string class |
13
|
|
|
* @property string name |
14
|
|
|
* @property string src |
15
|
|
|
* @property string checked |
16
|
|
|
* @property string outertext |
17
|
|
|
* @property string innertext |
18
|
|
|
* |
19
|
|
|
* @package voku\helper |
20
|
|
|
*/ |
21
|
|
|
class SimpleHtmlDomNode |
22
|
|
|
{ |
23
|
|
|
public $nodetype = HDOM_TYPE_TEXT; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @var string |
27
|
|
|
*/ |
28
|
|
|
public $tag = 'text'; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @var array |
32
|
|
|
*/ |
33
|
|
|
public $attr = array(); |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @var SimpleHtmlDomNode[] |
37
|
|
|
*/ |
38
|
|
|
public $children = array(); |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* @var SimpleHtmlDomNode[] |
42
|
|
|
*/ |
43
|
|
|
public $nodes = array(); |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* @var SimpleHtmlDomNode |
47
|
|
|
*/ |
48
|
|
|
public $parent = null; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* The "info" array - see HDOM_INFO_... for what each element contains. |
52
|
|
|
* |
53
|
|
|
* @var array |
54
|
|
|
*/ |
55
|
|
|
public $_ = array(); |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @var int |
59
|
|
|
*/ |
60
|
|
|
public $tag_start = 0; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* @var SimpleHtmlDom|null |
64
|
|
|
*/ |
65
|
|
|
private $dom = null; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @param $dom |
69
|
|
|
*/ |
70
|
6 |
|
public function __construct($dom) |
71
|
|
|
{ |
72
|
6 |
|
$this->dom = $dom; |
73
|
6 |
|
$dom->nodes[] = $this; |
74
|
6 |
|
} |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* Returns true if $string is valid UTF-8 and false otherwise. |
78
|
|
|
* |
79
|
|
|
* @param mixed $str String to be tested |
80
|
|
|
* |
81
|
|
|
* @return boolean |
82
|
|
|
*/ |
83
|
|
|
public static function is_utf8($str) |
84
|
|
|
{ |
85
|
|
|
return UTF8::is_utf8($str); |
86
|
|
|
} |
87
|
|
|
|
88
|
1 |
|
public function __destruct() |
89
|
|
|
{ |
90
|
1 |
|
$this->clear(); |
91
|
1 |
|
} |
92
|
|
|
|
93
|
|
|
// clean up memory due to php5 circular references memory leak... |
94
|
|
|
|
95
|
1 |
|
public function clear() |
96
|
|
|
{ |
97
|
|
|
unset( |
98
|
1 |
|
$this->dom, |
99
|
1 |
|
$this->nodes, |
100
|
1 |
|
$this->parent, |
101
|
1 |
|
$this->children |
102
|
|
|
); |
103
|
1 |
|
} |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* magic - toString |
107
|
|
|
* |
108
|
|
|
* @return string |
109
|
|
|
*/ |
110
|
|
|
public function __toString() |
111
|
|
|
{ |
112
|
|
|
return $this->outertext(); |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
/** |
116
|
|
|
* get dom node's outer text (with tag) |
117
|
|
|
* |
118
|
|
|
* @return string |
119
|
|
|
*/ |
120
|
5 |
|
public function outertext() |
121
|
|
|
{ |
122
|
5 |
|
if ($this->tag === 'root') { |
123
|
|
|
return $this->innertext(); |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
// trigger callback |
127
|
5 |
|
if ($this->dom && $this->dom->callback !== null) { |
128
|
|
|
call_user_func($this->dom->callback, array($this)); |
129
|
|
|
} |
130
|
|
|
|
131
|
5 |
|
if (isset($this->_[HDOM_INFO_OUTER])) { |
132
|
1 |
|
return $this->_[HDOM_INFO_OUTER]; |
133
|
|
|
} |
134
|
|
|
|
135
|
5 |
View Code Duplication |
if (isset($this->_[HDOM_INFO_TEXT])) { |
|
|
|
|
136
|
5 |
|
return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
// render begin tag |
140
|
5 |
|
if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) { |
141
|
|
|
/** @noinspection PhpUndefinedMethodInspection */ |
142
|
5 |
|
$ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup(); |
|
|
|
|
143
|
5 |
|
} else { |
144
|
|
|
$ret = ''; |
145
|
|
|
} |
146
|
|
|
|
147
|
|
|
// render inner text |
148
|
5 |
|
if (isset($this->_[HDOM_INFO_INNER])) { |
149
|
|
|
// If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added. |
150
|
4 |
|
if ($this->tag != 'br') { |
151
|
2 |
|
$ret .= $this->_[HDOM_INFO_INNER]; |
152
|
2 |
|
} |
153
|
4 |
|
} else { |
154
|
5 |
|
if ($this->nodes) { |
|
|
|
|
155
|
5 |
|
foreach ($this->nodes as $n) { |
156
|
5 |
|
$ret .= $n->outertext(); |
157
|
5 |
|
} |
158
|
5 |
|
} |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
// render end tag |
162
|
5 |
|
if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) { |
163
|
5 |
|
$ret .= '</' . $this->tag . '>'; |
164
|
5 |
|
} |
165
|
|
|
|
166
|
5 |
|
return $ret; |
167
|
|
|
} |
168
|
|
|
|
169
|
|
|
/** |
170
|
|
|
* get dom node's inner html |
171
|
|
|
* |
172
|
|
|
* @return string |
173
|
|
|
*/ |
174
|
5 |
|
public function innertext() |
175
|
|
|
{ |
176
|
5 |
|
if (isset($this->_[HDOM_INFO_INNER])) { |
177
|
|
|
return $this->_[HDOM_INFO_INNER]; |
178
|
|
|
} |
179
|
|
|
|
180
|
5 |
View Code Duplication |
if (isset($this->_[HDOM_INFO_TEXT])) { |
|
|
|
|
181
|
|
|
return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); |
182
|
|
|
} |
183
|
|
|
|
184
|
5 |
|
$ret = ''; |
185
|
5 |
|
foreach ($this->nodes as $n) { |
186
|
5 |
|
$ret .= $n->outertext(); |
187
|
5 |
|
} |
188
|
|
|
|
189
|
5 |
|
return $ret; |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* dump node's tree |
194
|
|
|
* |
195
|
|
|
* @param bool $show_attr |
196
|
|
|
* @param int $deep |
197
|
|
|
*/ |
198
|
|
|
public function dump($show_attr = true, $deep = 0) |
199
|
|
|
{ |
200
|
|
|
$lead = str_repeat(' ', $deep); |
201
|
|
|
|
202
|
|
|
echo $lead . $this->tag; |
203
|
|
|
if ($show_attr && count($this->attr) > 0) { |
204
|
|
|
echo '('; |
205
|
|
|
foreach ($this->attr as $k => $v) { |
206
|
|
|
/** @noinspection PhpVariableVariableInspection */ |
207
|
|
|
echo "[$k]=>\"" . $this->$k . '", '; |
208
|
|
|
} |
209
|
|
|
echo ')'; |
210
|
|
|
} |
211
|
|
|
echo "\n"; |
212
|
|
|
|
213
|
|
|
if ($this->nodes) { |
|
|
|
|
214
|
|
|
foreach ($this->nodes as $c) { |
215
|
|
|
$c->dump($show_attr, $deep + 1); |
216
|
|
|
} |
217
|
|
|
} |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
/** |
221
|
|
|
* Debugging function to dump a single dom node with a bunch of information about it. |
222
|
|
|
* |
223
|
|
|
* @param bool $echo |
224
|
|
|
* @param $node |
225
|
|
|
* |
226
|
|
|
* @return string|void |
227
|
|
|
*/ |
228
|
|
|
public function dump_node($echo = true, $node) |
229
|
|
|
{ |
230
|
|
|
$string = $this->tag; |
231
|
|
|
|
232
|
|
|
if (count($this->attr) > 0) { |
233
|
|
|
$string .= '('; |
234
|
|
|
foreach ($this->attr as $k => $v) { |
235
|
|
|
/** @noinspection PhpVariableVariableInspection */ |
236
|
|
|
$string .= "[$k]=>\"" . $this->$k . '", '; |
237
|
|
|
} |
238
|
|
|
$string .= ')'; |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
if (count($this->_) > 0) { |
242
|
|
|
$string .= ' $_ ('; |
243
|
|
|
foreach ($this->_ as $k => $v) { |
244
|
|
|
if (is_array($v)) { |
245
|
|
|
$string .= "[$k]=>("; |
246
|
|
|
foreach ($v as $k2 => $v2) { |
247
|
|
|
$string .= "[$k2]=>\"" . $v2 . '", '; |
248
|
|
|
} |
249
|
|
|
$string .= ')'; |
250
|
|
|
} else { |
251
|
|
|
$string .= "[$k]=>\"" . $v . '", '; |
252
|
|
|
} |
253
|
|
|
} |
254
|
|
|
$string .= ')'; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
if (isset($this->text)) { |
258
|
|
|
$string .= ' text: (' . $this->text . ')'; |
|
|
|
|
259
|
|
|
} |
260
|
|
|
|
261
|
|
|
$string .= " HDOM_INNER_INFO: '"; |
262
|
|
|
if (isset($node->_[HDOM_INFO_INNER])) { |
263
|
|
|
$string .= $node->_[HDOM_INFO_INNER] . "'"; |
264
|
|
|
} else { |
265
|
|
|
$string .= ' NULL '; |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
$string .= ' children: ' . count($this->children); |
269
|
|
|
$string .= ' nodes: ' . count($this->nodes); |
270
|
|
|
$string .= ' tag_start: ' . $this->tag_start; |
271
|
|
|
$string .= "\n"; |
272
|
|
|
|
273
|
|
|
if ($echo) { |
274
|
|
|
echo $string; |
275
|
|
|
|
276
|
|
|
return ''; |
277
|
|
|
} else { |
278
|
|
|
return $string; |
279
|
|
|
} |
280
|
|
|
} |
281
|
|
|
|
282
|
|
|
/** |
283
|
|
|
* function to locate a specific ancestor tag in the path to the root. |
284
|
|
|
* |
285
|
|
|
* @param $tag |
286
|
|
|
* |
287
|
|
|
* @return \voku\helper\SimpleHtmlDomNode |
288
|
|
|
*/ |
289
|
|
|
public function find_ancestor_tag($tag) |
290
|
|
|
{ |
291
|
|
|
// Start by including ourselves in the comparison. |
292
|
|
|
$returnDom = $this; |
293
|
|
|
|
294
|
|
|
while (null !== $returnDom) { |
295
|
|
|
if ($returnDom->tag == $tag) { |
296
|
|
|
break; |
297
|
|
|
} |
298
|
|
|
|
299
|
|
|
$returnDom = $returnDom->parent; |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
return $returnDom; |
303
|
|
|
} |
304
|
|
|
|
305
|
|
|
/** |
306
|
|
|
* build node's text with tag |
307
|
|
|
* |
308
|
|
|
* @return string |
309
|
|
|
*/ |
310
|
5 |
|
public function makeup() |
311
|
|
|
{ |
312
|
|
|
// text, comment, unknown |
313
|
5 |
View Code Duplication |
if (isset($this->_[HDOM_INFO_TEXT])) { |
|
|
|
|
314
|
|
|
return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); |
315
|
|
|
} |
316
|
|
|
|
317
|
5 |
|
$ret = '<' . $this->tag; |
318
|
5 |
|
$i = -1; |
319
|
|
|
|
320
|
5 |
|
foreach ($this->attr as $key => $val) { |
321
|
5 |
|
++$i; |
322
|
|
|
|
323
|
|
|
// skip removed attribute |
324
|
5 |
|
if ($val === null || $val === false) { |
325
|
|
|
continue; |
326
|
|
|
} |
327
|
|
|
|
328
|
5 |
|
$ret .= $this->_[HDOM_INFO_SPACE][$i][0]; |
329
|
|
|
//no value attr: nowrap, checked selected... |
330
|
5 |
|
if ($val === true) { |
331
|
1 |
|
$ret .= $key; |
332
|
1 |
|
} else { |
333
|
5 |
|
switch ($this->_[HDOM_INFO_QUOTE][$i]) { |
334
|
5 |
|
case HDOM_QUOTE_DOUBLE: |
335
|
5 |
|
$quote = '"'; |
336
|
5 |
|
break; |
337
|
|
|
case HDOM_QUOTE_SINGLE: |
338
|
|
|
$quote = '\''; |
339
|
|
|
break; |
340
|
|
|
default: |
341
|
|
|
$quote = ''; |
342
|
5 |
|
} |
343
|
5 |
|
$ret .= $key . $this->_[HDOM_INFO_SPACE][$i][1] . '=' . $this->_[HDOM_INFO_SPACE][$i][2] . $quote . $val . $quote; |
344
|
|
|
} |
345
|
5 |
|
} |
346
|
5 |
|
$ret = $this->dom->restore_noise($ret); |
347
|
|
|
|
348
|
5 |
|
return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; |
349
|
|
|
} |
350
|
|
|
|
351
|
|
|
/** |
352
|
|
|
* magic unset |
353
|
|
|
* |
354
|
|
|
* @param $name |
355
|
|
|
*/ |
356
|
1 |
|
public function __unset($name) |
357
|
|
|
{ |
358
|
1 |
|
if (isset($this->attr[$name])) { |
359
|
|
|
unset($this->attr[$name]); |
360
|
|
|
} |
361
|
1 |
|
} |
362
|
|
|
|
363
|
|
|
/** |
364
|
|
|
* Function to try a few tricks to determine the displayed size of an img on the page. |
365
|
|
|
* NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types. |
366
|
|
|
* |
367
|
|
|
* @author John Schlick |
368
|
|
|
* @version April 19 2012 |
369
|
|
|
* @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it |
370
|
|
|
* out. |
371
|
|
|
*/ |
372
|
|
|
public function get_display_size() |
373
|
|
|
{ |
374
|
|
|
$width = -1; |
375
|
|
|
$height = -1; |
376
|
|
|
|
377
|
|
|
if ($this->tag !== 'img') { |
378
|
|
|
return false; |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
// See if there is aheight or width attribute in the tag itself. |
382
|
|
|
if (isset($this->attr['width'])) { |
383
|
|
|
$width = $this->attr['width']; |
384
|
|
|
} |
385
|
|
|
|
386
|
|
|
if (isset($this->attr['height'])) { |
387
|
|
|
$height = $this->attr['height']; |
388
|
|
|
} |
389
|
|
|
|
390
|
|
|
// Now look for an inline style. |
391
|
|
|
if (isset($this->attr['style'])) { |
392
|
|
|
// Thanks to user gnarf from stackoverflow for this regular expression. |
393
|
|
|
$attributes = array(); |
394
|
|
|
preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); |
395
|
|
|
foreach ($matches as $match) { |
396
|
|
|
$attributes[$match[1]] = $match[2]; |
397
|
|
|
} |
398
|
|
|
|
399
|
|
|
// If there is a width in the style attributes: |
400
|
|
View Code Duplication |
if ( |
|
|
|
|
401
|
|
|
($width == -1 && isset($attributes['width'])) |
402
|
|
|
&& |
403
|
|
|
// check that the last two characters are px (pixels) |
404
|
|
|
strtolower(substr($attributes['width'], -2)) == 'px' |
405
|
|
|
) { |
406
|
|
|
$proposed_width = substr($attributes['width'], 0, -2); |
407
|
|
|
// Now make sure that it's an integer and not something stupid. |
408
|
|
|
if (filter_var($proposed_width, FILTER_VALIDATE_INT)) { |
409
|
|
|
$width = $proposed_width; |
410
|
|
|
} |
411
|
|
|
} |
412
|
|
|
|
413
|
|
|
// If there is a width in the style attributes: |
414
|
|
View Code Duplication |
if ( |
|
|
|
|
415
|
|
|
($height == -1 && isset($attributes['height'])) |
416
|
|
|
&& |
417
|
|
|
// check that the last two characters are px (pixels) |
418
|
|
|
strtolower(substr($attributes['height'], -2)) == 'px' |
419
|
|
|
) { |
420
|
|
|
$proposed_height = substr($attributes['height'], 0, -2); |
421
|
|
|
// Now make sure that it's an integer and not something stupid. |
422
|
|
|
if (filter_var($proposed_height, FILTER_VALIDATE_INT)) { |
423
|
|
|
$height = $proposed_height; |
424
|
|
|
} |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
// Future enhancement: |
430
|
|
|
// Look in the tag to see if there is a class or id specified that has a height or width attribute to it. |
431
|
|
|
|
432
|
|
|
// Far future enhancement |
433
|
|
|
// Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width |
434
|
|
|
// Note that in this case, the class or id will have the img subselector for it to apply to the image. |
435
|
|
|
|
436
|
|
|
// ridiculously far future development |
437
|
|
|
// If the class or id is specified in a SEPARATE css file thats not on the page, go get it and do what we were just doing for the ones on the page. |
438
|
|
|
|
439
|
|
|
/** @noinspection OneTimeUseVariablesInspection */ |
440
|
|
|
$result = array( |
441
|
|
|
'height' => $height, |
442
|
|
|
'width' => $width, |
443
|
|
|
); |
444
|
|
|
|
445
|
|
|
return $result; |
446
|
|
|
} |
447
|
|
|
|
448
|
|
|
/** |
449
|
|
|
* get all attributes |
450
|
|
|
* |
451
|
|
|
* @return array |
452
|
|
|
*/ |
453
|
|
|
public function getAllAttributes() |
454
|
|
|
{ |
455
|
|
|
return $this->attr; |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
/** |
459
|
|
|
* get attribute |
460
|
|
|
* |
461
|
|
|
* @param $name |
462
|
|
|
* |
463
|
|
|
* @return bool|mixed|string |
464
|
|
|
*/ |
465
|
|
|
public function getAttribute($name) |
466
|
|
|
{ |
467
|
|
|
/** @noinspection PhpVariableVariableInspection */ |
468
|
|
|
return $this->$name; |
469
|
|
|
} |
470
|
|
|
|
471
|
|
|
/** |
472
|
|
|
* magic get |
473
|
|
|
* |
474
|
|
|
* @param $name |
475
|
|
|
* |
476
|
|
|
* @return bool|mixed|string |
477
|
|
|
*/ |
478
|
3 |
|
public function __get($name) |
479
|
|
|
{ |
480
|
3 |
|
if (isset($this->attr[$name])) { |
481
|
3 |
|
return $this->attr[$name]; |
482
|
|
|
} |
483
|
|
|
|
484
|
|
|
switch ($name) { |
485
|
2 |
|
case 'outertext': |
486
|
1 |
|
return $this->outertext(); |
487
|
2 |
|
case 'innertext': |
488
|
1 |
|
return $this->innertext(); |
489
|
2 |
|
case 'plaintext': |
490
|
|
|
return $this->text(); |
491
|
2 |
|
case 'xmltext': |
492
|
|
|
return $this->xmltext(); |
493
|
2 |
|
default: |
494
|
2 |
|
return array_key_exists($name, $this->attr); |
495
|
2 |
|
} |
496
|
|
|
} |
497
|
|
|
|
498
|
|
|
/** |
499
|
|
|
* magic set |
500
|
|
|
* |
501
|
|
|
* @param $name |
502
|
|
|
* @param $value |
503
|
|
|
* |
504
|
|
|
* @return mixed |
505
|
|
|
*/ |
506
|
4 |
|
public function __set($name, $value) |
507
|
|
|
{ |
508
|
|
|
switch ($name) { |
509
|
4 |
|
case 'outertext': |
510
|
1 |
|
return $this->_[HDOM_INFO_OUTER] = $value; |
511
|
3 |
|
case 'innertext': |
512
|
2 |
|
if (isset($this->_[HDOM_INFO_TEXT])) { |
513
|
|
|
return $this->_[HDOM_INFO_TEXT] = $value; |
514
|
|
|
} |
515
|
|
|
|
516
|
2 |
|
return $this->_[HDOM_INFO_INNER] = $value; |
517
|
|
|
} |
518
|
|
|
|
519
|
3 |
|
if (!isset($this->attr[$name])) { |
520
|
2 |
|
$this->_[HDOM_INFO_SPACE][] = array( |
521
|
2 |
|
' ', |
522
|
2 |
|
'', |
523
|
2 |
|
'', |
524
|
|
|
); |
525
|
2 |
|
$this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; |
526
|
2 |
|
} |
527
|
|
|
|
528
|
3 |
|
$this->attr[$name] = $value; |
529
|
|
|
|
530
|
3 |
|
return ''; |
531
|
|
|
} |
532
|
|
|
|
533
|
|
|
/** |
534
|
|
|
* get dom node's plain text |
535
|
|
|
* |
536
|
|
|
* @return string |
537
|
|
|
*/ |
538
|
|
|
public function text() |
539
|
|
|
{ |
540
|
|
|
if (isset($this->_[HDOM_INFO_INNER])) { |
541
|
|
|
return $this->_[HDOM_INFO_INNER]; |
542
|
|
|
} |
543
|
|
|
|
544
|
|
|
switch ($this->nodetype) { |
545
|
|
|
case HDOM_TYPE_TEXT: |
546
|
|
|
return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); |
547
|
|
|
case HDOM_TYPE_COMMENT: |
548
|
|
|
return ''; |
549
|
|
|
case HDOM_TYPE_UNKNOWN: |
550
|
|
|
return ''; |
551
|
|
|
} |
552
|
|
|
if (strcasecmp($this->tag, 'script') === 0) { |
553
|
|
|
return ''; |
554
|
|
|
} |
555
|
|
|
|
556
|
|
|
if (strcasecmp($this->tag, 'style') === 0) { |
557
|
|
|
return ''; |
558
|
|
|
} |
559
|
|
|
|
560
|
|
|
$ret = ''; |
561
|
|
|
// In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL. |
562
|
|
|
// NOTE: This indicates that there is a problem where it's set to NULL without a clear happening. |
563
|
|
|
// WHY is this happening? |
564
|
|
|
if (null !== $this->nodes) { |
565
|
|
|
foreach ($this->nodes as $n) { |
566
|
|
|
$ret .= $n->text(); |
567
|
|
|
} |
568
|
|
|
|
569
|
|
|
// If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all. |
570
|
|
|
if ($this->tag == 'span') { |
571
|
|
|
$ret .= $this->dom->default_span_text; |
572
|
|
|
} |
573
|
|
|
} |
574
|
|
|
|
575
|
|
|
return $ret; |
576
|
|
|
} |
577
|
|
|
|
578
|
|
|
/** |
579
|
|
|
* xmltext |
580
|
|
|
* |
581
|
|
|
* @return mixed|string |
582
|
|
|
*/ |
583
|
|
|
public function xmltext() |
584
|
|
|
{ |
585
|
|
|
$ret = $this->innertext(); |
586
|
|
|
$ret = str_ireplace('<![CDATA[', '', $ret); |
587
|
|
|
$ret = str_replace(']]>', '', $ret); |
588
|
|
|
|
589
|
|
|
return $ret; |
590
|
|
|
} |
591
|
|
|
|
592
|
|
|
/** |
593
|
|
|
* set attribute |
594
|
|
|
* |
595
|
|
|
* @param $name |
596
|
|
|
* @param $value |
597
|
|
|
*/ |
598
|
|
|
public function setAttribute($name, $value) |
599
|
|
|
{ |
600
|
|
|
/** @noinspection PhpVariableVariableInspection */ |
601
|
|
|
$this->$name = $value; |
602
|
|
|
} |
603
|
|
|
|
604
|
|
|
/** |
605
|
|
|
* has attribute |
606
|
|
|
* |
607
|
|
|
* @param $name |
608
|
|
|
* |
609
|
|
|
* @return bool |
610
|
|
|
*/ |
611
|
|
|
public function hasAttribute($name) |
612
|
|
|
{ |
613
|
|
|
/** @noinspection PhpVariableVariableInspection */ |
614
|
|
|
return isset($this->$name); |
615
|
|
|
} |
616
|
|
|
|
617
|
|
|
/** |
618
|
|
|
* magic isset |
619
|
|
|
* |
620
|
|
|
* @param $name |
621
|
|
|
* |
622
|
|
|
* @return bool |
623
|
|
|
*/ |
624
|
|
|
public function __isset($name) |
625
|
|
|
{ |
626
|
|
|
switch ($name) { |
627
|
|
|
case 'outertext': |
628
|
|
|
return true; |
629
|
|
|
case 'innertext': |
630
|
|
|
return true; |
631
|
|
|
case 'plaintext': |
632
|
|
|
return true; |
633
|
|
|
} |
634
|
|
|
|
635
|
|
|
//no value attr: nowrap, checked selected... |
636
|
|
|
return array_key_exists($name, $this->attr) ? true : isset($this->attr[$name]); |
637
|
|
|
} |
638
|
|
|
|
639
|
|
|
/** |
640
|
|
|
* remove attribute |
641
|
|
|
* |
642
|
|
|
* @param $name |
643
|
|
|
*/ |
644
|
|
|
public function removeAttribute($name) |
645
|
|
|
{ |
646
|
|
|
/** @noinspection PhpVariableVariableInspection */ |
647
|
|
|
$this->$name = null; |
648
|
|
|
} |
649
|
|
|
|
650
|
|
|
/** |
651
|
|
|
* get element by id |
652
|
|
|
* |
653
|
|
|
* @param $id |
654
|
|
|
* |
655
|
|
|
* @return array|null |
656
|
|
|
*/ |
657
|
|
|
public function getElementById($id) |
658
|
|
|
{ |
659
|
|
|
return $this->find("#$id", 0); |
660
|
|
|
} |
661
|
|
|
|
662
|
|
|
/** |
663
|
|
|
* find elements by css selector |
664
|
|
|
* |
665
|
|
|
* @param $selector |
666
|
|
|
* @param null|int $idx |
667
|
|
|
* |
668
|
|
|
* @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|array|null |
669
|
|
|
*/ |
670
|
6 |
|
public function find($selector, $idx = null) |
671
|
|
|
{ |
672
|
6 |
|
$selectors = $this->parse_selector($selector); |
673
|
6 |
|
$count = count($selectors); |
674
|
|
|
|
675
|
6 |
|
if ($count === 0) { |
676
|
|
|
return array(); |
677
|
|
|
} |
678
|
|
|
|
679
|
6 |
|
$found_keys = array(); |
680
|
|
|
|
681
|
|
|
// find each selector |
682
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
683
|
6 |
|
for ($c = 0; $c < $count; ++$c) { |
684
|
|
|
|
685
|
|
|
// The change on the below line was documented on the sourceforge code tracker id 2788009 |
686
|
|
|
// used to be: |
687
|
|
|
// $level = count($selectors[0]); |
|
|
|
|
688
|
|
|
// if ($level === 0) { return array(); } |
|
|
|
|
689
|
|
|
|
690
|
6 |
|
$level = count($selectors[$c]); |
691
|
6 |
|
if ($level === 0) { |
692
|
|
|
return array(); |
693
|
|
|
} |
694
|
|
|
|
695
|
6 |
|
if (!isset($this->_[HDOM_INFO_BEGIN])) { |
696
|
|
|
return array(); |
697
|
|
|
} |
698
|
|
|
|
699
|
6 |
|
$head = array($this->_[HDOM_INFO_BEGIN] => 1); |
700
|
|
|
|
701
|
|
|
// handle descendant selectors, no recursive! |
702
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
703
|
6 |
|
for ($l = 0; $l < $level; ++$l) { |
704
|
6 |
|
$ret = array(); |
705
|
6 |
|
foreach ($head as $k => $v) { |
706
|
6 |
|
$n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k]; |
707
|
|
|
//PaperG - Pass this optional parameter on to the seek function. |
708
|
6 |
|
$n->seek($selectors[$c][$l], $ret); |
709
|
6 |
|
} |
710
|
6 |
|
$head = $ret; |
711
|
6 |
|
} |
712
|
|
|
|
713
|
6 |
|
foreach ($head as $k => $v) { |
714
|
6 |
|
if (!isset($found_keys[$k])) { |
715
|
6 |
|
$found_keys[$k] = 1; |
716
|
6 |
|
} |
717
|
6 |
|
} |
718
|
6 |
|
} |
719
|
|
|
|
720
|
|
|
// sort keys |
721
|
6 |
|
ksort($found_keys); |
722
|
|
|
|
723
|
6 |
|
$found = array(); |
724
|
6 |
|
foreach ($found_keys as $k => $v) { |
725
|
6 |
|
$found[] = $this->dom->nodes[$k]; |
726
|
6 |
|
} |
727
|
|
|
|
728
|
|
|
// return nth-element or array |
729
|
6 |
|
if (null === $idx) { |
730
|
4 |
|
return $found; |
731
|
3 |
|
} elseif ($idx < 0) { |
732
|
1 |
|
$idx = count($found) + $idx; |
733
|
1 |
|
} |
734
|
|
|
|
735
|
3 |
|
return isset($found[$idx]) ? $found[$idx] : null; |
736
|
|
|
} |
737
|
|
|
|
738
|
|
|
/** |
739
|
|
|
* parse_selector |
740
|
|
|
* |
741
|
|
|
* @param $selector_string |
742
|
|
|
* |
743
|
|
|
* @return array |
744
|
|
|
*/ |
745
|
6 |
|
protected function parse_selector($selector_string) |
746
|
|
|
{ |
747
|
|
|
// pattern of CSS selectors, modified from mootools |
748
|
|
|
// Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does. |
749
|
|
|
// Note: if you try to look at this attribute, yo MUST use getAttribute since $dom->x:y will fail the php syntax check. |
750
|
|
|
// Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured. |
751
|
|
|
// This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression. |
752
|
|
|
// farther study is required to determine of this should be documented or removed. |
753
|
|
|
// $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; |
754
|
6 |
|
$pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; |
755
|
6 |
|
preg_match_all($pattern, trim($selector_string) . ' ', $matches, PREG_SET_ORDER); |
756
|
|
|
|
757
|
6 |
|
$selectors = array(); |
758
|
6 |
|
$result = array(); |
759
|
|
|
|
760
|
6 |
|
foreach ($matches as $m) { |
761
|
6 |
|
$m[0] = trim($m[0]); |
762
|
6 |
|
if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { |
763
|
|
|
continue; |
764
|
|
|
} |
765
|
|
|
// for browser generated xpath |
766
|
6 |
|
if ($m[1] === 'tbody') { |
767
|
|
|
continue; |
768
|
|
|
} |
769
|
|
|
|
770
|
|
|
list($tag, $key, $val, $exp, $no_key) = array( |
771
|
6 |
|
$m[1], |
772
|
6 |
|
null, |
773
|
6 |
|
null, |
774
|
6 |
|
'=', |
775
|
6 |
|
false, |
776
|
6 |
|
); |
777
|
6 |
|
if (!empty($m[2])) { |
778
|
1 |
|
$key = 'id'; |
779
|
1 |
|
$val = $m[2]; |
780
|
1 |
|
} |
781
|
6 |
|
if (!empty($m[3])) { |
782
|
2 |
|
$key = 'class'; |
783
|
2 |
|
$val = $m[3]; |
784
|
2 |
|
} |
785
|
6 |
|
if (!empty($m[4])) { |
786
|
3 |
|
$key = $m[4]; |
787
|
3 |
|
} |
788
|
6 |
|
if (!empty($m[5])) { |
789
|
3 |
|
$exp = $m[5]; |
790
|
3 |
|
} |
791
|
6 |
|
if (!empty($m[6])) { |
792
|
3 |
|
$val = $m[6]; |
793
|
3 |
|
} |
794
|
|
|
|
795
|
|
|
// convert to lowercase |
796
|
6 |
|
$tag = strtolower($tag); |
797
|
6 |
|
$key = strtolower($key); |
798
|
|
|
|
799
|
|
|
//elements that do NOT have the specified attribute |
800
|
6 |
|
if (isset($key[0]) && $key[0] === '!') { |
801
|
|
|
$key = substr($key, 1); |
802
|
|
|
$no_key = true; |
803
|
|
|
} |
804
|
|
|
|
805
|
6 |
|
$result[] = array( |
806
|
6 |
|
$tag, |
807
|
6 |
|
$key, |
808
|
6 |
|
$val, |
809
|
6 |
|
$exp, |
810
|
6 |
|
$no_key, |
811
|
|
|
); |
812
|
|
|
|
813
|
6 |
|
if (trim($m[7]) === ',') { |
814
|
|
|
$selectors[] = $result; |
815
|
|
|
$result = array(); |
816
|
|
|
} |
817
|
6 |
|
} |
818
|
|
|
|
819
|
6 |
|
if (count($result) > 0) { |
820
|
6 |
|
$selectors[] = $result; |
821
|
6 |
|
} |
822
|
|
|
|
823
|
6 |
|
return $selectors; |
824
|
|
|
} |
825
|
|
|
|
826
|
|
|
/** |
827
|
|
|
* seek for given conditions |
828
|
|
|
* |
829
|
|
|
* PaperG - added parameter to allow for case insensitive testing of the value of a selector. |
830
|
|
|
* |
831
|
|
|
* @param $selector |
832
|
|
|
* @param $ret |
833
|
|
|
*/ |
834
|
6 |
|
protected function seek($selector, &$ret) |
835
|
|
|
{ |
836
|
6 |
|
list($tag, $key, $val, $exp, $no_key) = $selector; |
837
|
|
|
|
838
|
|
|
// xpath index |
839
|
6 |
|
if ($tag && $key && is_numeric($key)) { |
840
|
|
|
$count = 0; |
841
|
|
|
foreach ($this->children as $c) { |
842
|
|
|
|
843
|
|
|
if ( |
844
|
|
|
($tag === '*' || $tag === $c->tag) |
845
|
|
|
&& |
846
|
|
|
++$count == $key |
847
|
|
|
) { |
848
|
|
|
|
849
|
|
|
$ret[$c->_[HDOM_INFO_BEGIN]] = 1; |
850
|
|
|
|
851
|
|
|
return; |
852
|
|
|
} |
853
|
|
|
} |
854
|
|
|
|
855
|
|
|
return; |
856
|
|
|
} |
857
|
|
|
|
858
|
6 |
|
$end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; |
859
|
6 |
|
if ($end == 0) { |
860
|
|
|
$parent = $this->parent; |
861
|
|
|
while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) { |
862
|
|
|
--$end; |
863
|
|
|
$parent = $parent->parent; |
864
|
|
|
} |
865
|
|
|
$end += $parent->_[HDOM_INFO_END]; |
866
|
|
|
} |
867
|
|
|
|
868
|
6 |
|
for ($i = $this->_[HDOM_INFO_BEGIN] + 1; $i < $end; ++$i) { |
869
|
|
|
/* @var SimpleHtmlDomNode $node */ |
870
|
6 |
|
$node = $this->dom->nodes[$i]; |
871
|
|
|
|
872
|
6 |
|
$pass = true; |
873
|
|
|
|
874
|
6 |
|
if ($tag === '*' && !$key) { |
875
|
|
|
if (in_array($node, $this->children, true)) { |
876
|
|
|
$ret[$i] = 1; |
877
|
|
|
} |
878
|
|
|
continue; |
879
|
|
|
} |
880
|
|
|
|
881
|
|
|
// compare tag |
882
|
6 |
|
if ($tag && $tag != $node->tag && $tag !== '*') { |
883
|
5 |
|
$pass = false; |
884
|
5 |
|
} |
885
|
|
|
// compare key |
886
|
6 |
|
if ($pass && $key) { |
887
|
5 |
|
if ($no_key) { |
888
|
|
|
if (isset($node->attr[$key])) { |
889
|
|
|
$pass = false; |
890
|
|
|
} |
891
|
|
|
} else { |
892
|
5 |
|
if (($key != 'plaintext') && !isset($node->attr[$key])) { |
893
|
2 |
|
$pass = false; |
894
|
2 |
|
} |
895
|
|
|
} |
896
|
5 |
|
} |
897
|
|
|
// compare value |
898
|
6 |
|
if ($pass && $key && $val && $val !== '*') { |
899
|
|
|
|
900
|
|
|
// If they have told us that this is a "plaintext" search then we want the plaintext of the node - right? |
901
|
5 |
|
if ($key == 'plaintext') { |
902
|
|
|
// $node->plaintext actually returns $node->text(); |
|
|
|
|
903
|
|
|
$nodeKeyValue = $node->text(); |
904
|
|
|
} else { |
905
|
|
|
// this is a normal search, we want the value of that attribute of the tag. |
906
|
5 |
|
$nodeKeyValue = $node->attr[$key]; |
907
|
|
|
} |
908
|
|
|
|
909
|
|
|
//PaperG - do a case insensitive test of the value of the selector. |
910
|
5 |
|
$check = $this->match($exp, strtolower($val), strtolower($nodeKeyValue)); |
911
|
|
|
|
912
|
|
|
// handle multiple class |
913
|
5 |
|
if (!$check && strcasecmp($key, 'class') === 0) { |
914
|
1 |
|
foreach (explode(' ', $node->attr[$key]) as $k) { |
915
|
|
|
// Without this, there were cases where leading, trailing, or double spaces lead to our comparing blanks - bad form. |
916
|
1 |
|
if (!empty($k)) { |
917
|
1 |
|
$check = $this->match($exp, strtolower($val), strtolower($k)); |
918
|
|
|
|
919
|
1 |
|
if ($check) { |
920
|
|
|
break; |
921
|
|
|
} |
922
|
1 |
|
} |
923
|
1 |
|
} |
924
|
1 |
|
} |
925
|
5 |
|
if (!$check) { |
926
|
3 |
|
$pass = false; |
927
|
3 |
|
} |
928
|
5 |
|
} |
929
|
6 |
|
if ($pass) { |
930
|
6 |
|
$ret[$i] = 1; |
931
|
6 |
|
} |
932
|
6 |
|
unset($node); |
933
|
6 |
|
} |
934
|
6 |
|
} |
935
|
|
|
|
936
|
|
|
/** |
937
|
|
|
* match |
938
|
|
|
* |
939
|
|
|
* @param $exp |
940
|
|
|
* @param $pattern |
941
|
|
|
* @param $value |
942
|
|
|
* |
943
|
|
|
* @return bool|int |
944
|
|
|
*/ |
945
|
5 |
|
protected function match($exp, $pattern, $value) |
946
|
|
|
{ |
947
|
|
|
switch ($exp) { |
948
|
5 |
|
case '=': |
949
|
5 |
|
return ($value === $pattern); |
950
|
|
|
case '!=': |
951
|
|
|
return ($value !== $pattern); |
952
|
|
|
case '^=': |
953
|
|
|
return preg_match('/^' . preg_quote($pattern, '/') . '/', $value); |
954
|
|
|
case '$=': |
955
|
|
|
return preg_match('/' . preg_quote($pattern, '/') . '$/', $value); |
956
|
|
|
case '*=': |
957
|
|
|
if ($pattern[0] == '/') { |
958
|
|
|
return preg_match($pattern, $value); |
959
|
|
|
} |
960
|
|
|
|
961
|
|
|
return preg_match('/' . $pattern . '/i', $value); |
962
|
|
|
} |
963
|
|
|
|
964
|
|
|
return false; |
965
|
|
|
} |
966
|
|
|
|
967
|
|
|
/** |
968
|
|
|
* get elements by id |
969
|
|
|
* |
970
|
|
|
* @param $id |
971
|
|
|
* @param null $idx |
972
|
|
|
* |
973
|
|
|
* @return array|null |
974
|
|
|
*/ |
975
|
|
|
public function getElementsById($id, $idx = null) |
976
|
|
|
{ |
977
|
|
|
return $this->find("#$id", $idx); |
978
|
|
|
} |
979
|
|
|
|
980
|
|
|
/** |
981
|
|
|
* get element by tag name |
982
|
|
|
* |
983
|
|
|
* @param $name |
984
|
|
|
* |
985
|
|
|
* @return array|null |
986
|
|
|
*/ |
987
|
|
|
public function getElementByTagName($name) |
988
|
|
|
{ |
989
|
|
|
return $this->find($name, 0); |
990
|
|
|
} |
991
|
|
|
|
992
|
|
|
/** |
993
|
|
|
* get elements by tag name |
994
|
|
|
* |
995
|
|
|
* @param $name |
996
|
|
|
* @param null $idx |
997
|
|
|
* |
998
|
|
|
* @return array|null |
999
|
|
|
*/ |
1000
|
|
|
public function getElementsByTagName($name, $idx = null) |
1001
|
|
|
{ |
1002
|
|
|
return $this->find($name, $idx); |
1003
|
|
|
} |
1004
|
|
|
|
1005
|
|
|
/** |
1006
|
|
|
* parent node |
1007
|
|
|
* |
1008
|
|
|
* @return null |
1009
|
|
|
*/ |
1010
|
|
|
public function parentNode() |
1011
|
|
|
{ |
1012
|
|
|
return $this->parent(); |
1013
|
|
|
} |
1014
|
|
|
|
1015
|
|
|
/** |
1016
|
|
|
* returns the parent of node |
1017
|
|
|
* |
1018
|
|
|
* If a node is passed in, it will reset the parent of the current node to that one. |
1019
|
|
|
* |
1020
|
|
|
* @param null $parent |
1021
|
|
|
* |
1022
|
|
|
* @return null |
1023
|
|
|
*/ |
1024
|
|
|
public function parent($parent = null) |
1025
|
|
|
{ |
1026
|
|
|
// I am SURE that this doesn't work properly. |
1027
|
|
|
// It fails to unset the current node from it's current parents nodes or children list first. |
1028
|
|
|
if ($parent !== null) { |
1029
|
|
|
$this->parent = $parent; |
1030
|
|
|
$this->parent->nodes[] = $this; |
1031
|
|
|
$this->parent->children[] = $this; |
1032
|
|
|
} |
1033
|
|
|
|
1034
|
|
|
return $this->parent; |
1035
|
|
|
} |
1036
|
|
|
|
1037
|
|
|
/** |
1038
|
|
|
* child nodes |
1039
|
|
|
* |
1040
|
|
|
* @param int $idx |
1041
|
|
|
* |
1042
|
|
|
* @return array|null |
1043
|
|
|
*/ |
1044
|
|
|
public function childNodes($idx = -1) |
1045
|
|
|
{ |
1046
|
|
|
return $this->children($idx); |
1047
|
|
|
} |
1048
|
|
|
|
1049
|
|
|
/** |
1050
|
|
|
* returns children of node |
1051
|
|
|
* |
1052
|
|
|
* @param int $idx |
1053
|
|
|
* |
1054
|
|
|
* @return array|null |
1055
|
|
|
*/ |
1056
|
|
|
public function children($idx = -1) |
1057
|
|
|
{ |
1058
|
|
|
if ($idx === -1) { |
1059
|
|
|
return $this->children; |
1060
|
|
|
} |
1061
|
|
|
|
1062
|
|
|
if (isset($this->children[$idx])) { |
1063
|
|
|
return $this->children[$idx]; |
1064
|
|
|
} |
1065
|
|
|
|
1066
|
|
|
return null; |
1067
|
|
|
} |
1068
|
|
|
|
1069
|
|
|
/** |
1070
|
|
|
* first child |
1071
|
|
|
* |
1072
|
|
|
* @return null |
1073
|
|
|
*/ |
1074
|
|
|
public function firstChild() |
1075
|
|
|
{ |
1076
|
|
|
return $this->first_child(); |
1077
|
|
|
} |
1078
|
|
|
|
1079
|
|
|
/** |
1080
|
|
|
* returns the first child of node |
1081
|
|
|
* |
1082
|
|
|
* @return null |
1083
|
|
|
*/ |
1084
|
|
|
public function first_child() |
1085
|
|
|
{ |
1086
|
|
|
if (count($this->children) > 0) { |
1087
|
|
|
return $this->children[0]; |
1088
|
|
|
} |
1089
|
|
|
|
1090
|
|
|
return null; |
1091
|
|
|
} |
1092
|
|
|
|
1093
|
|
|
/** |
1094
|
|
|
* last child |
1095
|
|
|
* |
1096
|
|
|
* @return null |
1097
|
|
|
*/ |
1098
|
|
|
public function lastChild() |
1099
|
|
|
{ |
1100
|
|
|
return $this->last_child(); |
1101
|
|
|
} |
1102
|
|
|
|
1103
|
|
|
/** |
1104
|
|
|
* returns the last child of node |
1105
|
|
|
* |
1106
|
|
|
* @return null |
1107
|
|
|
*/ |
1108
|
|
|
public function last_child() |
1109
|
|
|
{ |
1110
|
|
|
$count = count($this->children); |
1111
|
|
|
if ($count > 0) { |
1112
|
|
|
return $this->children[$count - 1]; |
1113
|
|
|
} |
1114
|
|
|
|
1115
|
|
|
return null; |
1116
|
|
|
} |
1117
|
|
|
|
1118
|
|
|
/** |
1119
|
|
|
* next sibling |
1120
|
|
|
* |
1121
|
|
|
* @return null |
1122
|
|
|
*/ |
1123
|
|
|
public function nextSibling() |
1124
|
|
|
{ |
1125
|
|
|
return $this->next_sibling(); |
1126
|
|
|
} |
1127
|
|
|
|
1128
|
|
|
/** |
1129
|
|
|
* returns the next sibling of node |
1130
|
|
|
* |
1131
|
|
|
* @return null |
1132
|
|
|
*/ |
1133
|
|
View Code Duplication |
public function next_sibling() |
|
|
|
|
1134
|
|
|
{ |
1135
|
|
|
if ($this->parent === null) { |
1136
|
|
|
return null; |
1137
|
|
|
} |
1138
|
|
|
|
1139
|
|
|
$idx = 0; |
1140
|
|
|
$count = count($this->parent->children); |
1141
|
|
|
while ($idx < $count && $this !== $this->parent->children[$idx]) { |
1142
|
|
|
++$idx; |
1143
|
|
|
} |
1144
|
|
|
|
1145
|
|
|
if (++$idx >= $count) { |
1146
|
|
|
return null; |
1147
|
|
|
} |
1148
|
|
|
|
1149
|
|
|
return $this->parent->children[$idx]; |
1150
|
|
|
} |
1151
|
|
|
|
1152
|
|
|
/** |
1153
|
|
|
* previous sibling |
1154
|
|
|
* |
1155
|
|
|
* @return null |
1156
|
|
|
*/ |
1157
|
|
|
public function previousSibling() |
1158
|
|
|
{ |
1159
|
|
|
return $this->prev_sibling(); |
1160
|
|
|
} |
1161
|
|
|
|
1162
|
|
|
/** |
1163
|
|
|
* returns the previous sibling of node |
1164
|
|
|
* |
1165
|
|
|
* @return null|\voku\helper\SimpleHtmlDomNode |
1166
|
|
|
*/ |
1167
|
|
View Code Duplication |
public function prev_sibling() |
|
|
|
|
1168
|
|
|
{ |
1169
|
|
|
if ($this->parent === null) { |
1170
|
|
|
return null; |
1171
|
|
|
} |
1172
|
|
|
|
1173
|
|
|
$idx = 0; |
1174
|
|
|
$count = count($this->parent->children); |
1175
|
|
|
while ($idx < $count && $this !== $this->parent->children[$idx]) { |
1176
|
|
|
++$idx; |
1177
|
|
|
} |
1178
|
|
|
|
1179
|
|
|
if (--$idx < 0) { |
1180
|
|
|
return null; |
1181
|
|
|
} |
1182
|
|
|
|
1183
|
|
|
return $this->parent->children[$idx]; |
1184
|
|
|
} |
1185
|
|
|
|
1186
|
|
|
/** |
1187
|
|
|
* has child nodes |
1188
|
|
|
* |
1189
|
|
|
* @return bool |
1190
|
|
|
*/ |
1191
|
|
|
public function hasChildNodes() |
1192
|
|
|
{ |
1193
|
|
|
return $this->has_child(); |
1194
|
|
|
} |
1195
|
|
|
|
1196
|
|
|
/** |
1197
|
|
|
* verify that node has children |
1198
|
|
|
* |
1199
|
|
|
* @return bool |
1200
|
|
|
*/ |
1201
|
|
|
public function has_child() |
1202
|
|
|
{ |
1203
|
|
|
return !empty($this->children); |
1204
|
|
|
} |
1205
|
|
|
|
1206
|
|
|
/** |
1207
|
|
|
* node name |
1208
|
|
|
* |
1209
|
|
|
* @return string |
1210
|
|
|
*/ |
1211
|
|
|
public function nodeName() |
1212
|
|
|
{ |
1213
|
|
|
return $this->tag; |
1214
|
|
|
} |
1215
|
|
|
|
1216
|
|
|
/** |
1217
|
|
|
* append child |
1218
|
|
|
* |
1219
|
|
|
* @param SimpleHtmlDomNode $node |
1220
|
|
|
* |
1221
|
|
|
* @return mixed |
1222
|
|
|
*/ |
1223
|
|
|
public function appendChild($node) |
1224
|
|
|
{ |
1225
|
|
|
$node->parent($this); |
1226
|
|
|
|
1227
|
|
|
return $node; |
1228
|
|
|
} |
1229
|
|
|
|
1230
|
|
|
} |
1231
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.