1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace PHPHtmlParser\Selector; |
6
|
|
|
|
7
|
|
|
use PHPHtmlParser\Dom\AbstractNode; |
8
|
|
|
use PHPHtmlParser\Dom\Collection; |
9
|
|
|
use PHPHtmlParser\Dom\InnerNode; |
10
|
|
|
use PHPHtmlParser\Dom\LeafNode; |
11
|
|
|
use PHPHtmlParser\Exceptions\ChildNotFoundException; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Class Selector |
15
|
|
|
* |
16
|
|
|
* @package PHPHtmlParser |
17
|
|
|
*/ |
18
|
|
|
class Selector |
19
|
|
|
{ |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* @var array |
23
|
|
|
*/ |
24
|
|
|
protected $selectors = []; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @var bool |
28
|
|
|
*/ |
29
|
|
|
private $depthFirst = false; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* Constructs with the selector string |
33
|
|
|
* @param string $selector |
34
|
|
|
* @param ParserInterface $parser |
35
|
|
|
*/ |
36
|
303 |
|
public function __construct(string $selector, ParserInterface $parser) |
37
|
|
|
{ |
38
|
303 |
|
$this->selectors = $parser->parseSelectorString($selector); |
39
|
303 |
|
} |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Returns the selectors that where found in __construct |
43
|
|
|
* @return array |
44
|
|
|
*/ |
45
|
12 |
|
public function getSelectors() |
46
|
|
|
{ |
47
|
12 |
|
return $this->selectors; |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* @param bool $status |
52
|
|
|
* @return void |
53
|
|
|
*/ |
54
|
240 |
|
public function setDepthFirstFind(bool $status): void |
55
|
|
|
{ |
56
|
240 |
|
$this->depthFirst = $status; |
57
|
240 |
|
} |
58
|
|
|
|
59
|
|
|
/** |
60
|
|
|
* Attempts to find the selectors starting from the given |
61
|
|
|
* node object. |
62
|
|
|
* @param AbstractNode $node |
63
|
|
|
* @return Collection |
64
|
|
|
* @throws ChildNotFoundException |
65
|
|
|
*/ |
66
|
291 |
|
public function find(AbstractNode $node): Collection |
67
|
|
|
{ |
68
|
291 |
|
$results = new Collection; |
69
|
291 |
|
foreach ($this->selectors as $selector) { |
70
|
291 |
|
$nodes = [$node]; |
71
|
291 |
|
if (count($selector) == 0) { |
72
|
|
|
continue; |
73
|
|
|
} |
74
|
|
|
|
75
|
291 |
|
$options = []; |
76
|
291 |
|
foreach ($selector as $rule) { |
77
|
291 |
|
if ($rule['alterNext']) { |
78
|
3 |
|
$options[] = $this->alterNext($rule); |
79
|
3 |
|
continue; |
80
|
|
|
} |
81
|
291 |
|
$nodes = $this->seek($nodes, $rule, $options); |
82
|
|
|
// clear the options |
83
|
291 |
|
$options = []; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
// this is the final set of nodes |
87
|
291 |
|
foreach ($nodes as $result) { |
88
|
253 |
|
$results[] = $result; |
89
|
|
|
} |
90
|
|
|
} |
91
|
|
|
|
92
|
291 |
|
return $results; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
|
96
|
|
|
/** |
97
|
|
|
* Attempts to find all children that match the rule |
98
|
|
|
* given. |
99
|
|
|
* @param array $nodes |
100
|
|
|
* @param array $rule |
101
|
|
|
* @param array $options |
102
|
|
|
* @return array |
103
|
|
|
* @throws ChildNotFoundException |
104
|
|
|
*/ |
105
|
291 |
|
protected function seek(array $nodes, array $rule, array $options): array |
106
|
|
|
{ |
107
|
|
|
// XPath index |
108
|
291 |
|
if (array_key_exists('tag', $rule) && array_key_exists('key', $rule) |
109
|
291 |
|
&& is_numeric($rule['key']) |
110
|
|
|
) { |
111
|
3 |
|
$count = 0; |
112
|
|
|
/** @var AbstractNode $node */ |
113
|
3 |
|
foreach ($nodes as $node) { |
114
|
3 |
|
if ($rule['tag'] == '*' |
115
|
3 |
|
|| $rule['tag'] == $node->getTag() |
116
|
3 |
|
->name() |
117
|
|
|
) { |
118
|
3 |
|
++$count; |
119
|
3 |
|
if ($count == $rule['key']) { |
120
|
|
|
// found the node we wanted |
121
|
3 |
|
return [$node]; |
122
|
|
|
} |
123
|
|
|
} |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
return []; |
127
|
|
|
} |
128
|
|
|
|
129
|
288 |
|
$options = $this->flattenOptions($options); |
130
|
|
|
|
131
|
288 |
|
$return = []; |
132
|
|
|
/** @var InnerNode $node */ |
133
|
288 |
|
foreach ($nodes as $node) { |
134
|
|
|
// check if we are a leaf |
135
|
288 |
|
if ($node instanceof LeafNode || !$node->hasChildren() |
136
|
|
|
) { |
137
|
12 |
|
continue; |
138
|
|
|
} |
139
|
|
|
|
140
|
288 |
|
$children = []; |
141
|
288 |
|
$child = $node->firstChild(); |
142
|
288 |
|
while (!is_null($child)) { |
143
|
|
|
// wild card, grab all |
144
|
288 |
|
if ($rule['tag'] == '*' && is_null($rule['key'])) { |
145
|
15 |
|
$return[] = $child; |
146
|
15 |
|
$child = $this->getNextChild($node, $child); |
147
|
15 |
|
continue; |
148
|
|
|
} |
149
|
|
|
|
150
|
288 |
|
$pass = $this->checkTag($rule, $child); |
151
|
288 |
|
if ($pass && !is_null($rule['key'])) { |
152
|
99 |
|
$pass = $this->checkKey($rule, $child); |
153
|
|
|
} |
154
|
288 |
|
if ($pass && !is_null($rule['key']) && !is_null($rule['value']) |
155
|
288 |
|
&& $rule['value'] != '*' |
156
|
|
|
) { |
157
|
96 |
|
$pass = $this->checkComparison($rule, $child); |
158
|
|
|
} |
159
|
|
|
|
160
|
288 |
|
if ($pass) { |
161
|
|
|
// it passed all checks |
162
|
231 |
|
$return[] = $child; |
163
|
|
|
} else { |
164
|
|
|
// this child failed to be matched |
165
|
273 |
|
if ($child instanceof InnerNode && $child->hasChildren() |
166
|
|
|
) { |
167
|
246 |
|
if ($this->depthFirst) { |
168
|
3 |
|
if (!isset($options['checkGrandChildren']) |
169
|
3 |
|
|| $options['checkGrandChildren'] |
170
|
|
|
) { |
171
|
|
|
// we have a child that failed but are not leaves. |
172
|
3 |
|
$matches = $this->seek([$child], $rule, |
173
|
2 |
|
$options); |
174
|
3 |
|
foreach ($matches as $match) { |
175
|
3 |
|
$return[] = $match; |
176
|
|
|
} |
177
|
|
|
} |
178
|
|
|
} else { |
179
|
|
|
// we still want to check its children |
180
|
246 |
|
$children[] = $child; |
181
|
|
|
} |
182
|
|
|
} |
183
|
|
|
} |
184
|
|
|
|
185
|
288 |
|
$child = $this->getNextChild($node, $child); |
186
|
|
|
} |
187
|
|
|
|
188
|
288 |
|
if ((!isset($options['checkGrandChildren']) |
189
|
288 |
|
|| $options['checkGrandChildren']) |
190
|
288 |
|
&& count($children) > 0 |
191
|
|
|
) { |
192
|
|
|
// we have children that failed but are not leaves. |
193
|
243 |
|
$matches = $this->seek($children, $rule, $options); |
194
|
243 |
|
foreach ($matches as $match) { |
195
|
176 |
|
$return[] = $match; |
196
|
|
|
} |
197
|
|
|
} |
198
|
|
|
} |
199
|
|
|
|
200
|
288 |
|
return $return; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* Attempts to match the given arguments with the given operator. |
205
|
|
|
* @param string $operator |
206
|
|
|
* @param string $pattern |
207
|
|
|
* @param string $value |
208
|
|
|
* @return bool |
209
|
|
|
*/ |
210
|
96 |
|
protected function match( |
211
|
|
|
string $operator, |
212
|
|
|
string $pattern, |
213
|
|
|
string $value |
214
|
|
|
): bool { |
215
|
96 |
|
$value = strtolower($value); |
216
|
96 |
|
$pattern = strtolower($pattern); |
217
|
64 |
|
switch ($operator) { |
218
|
96 |
|
case '=': |
219
|
96 |
|
return $value === $pattern; |
220
|
|
|
case '!=': |
221
|
|
|
return $value !== $pattern; |
222
|
|
|
case '^=': |
223
|
|
|
return preg_match('/^' . preg_quote($pattern, '/') . '/', |
224
|
|
|
$value) == 1; |
225
|
|
|
case '$=': |
226
|
|
|
return preg_match('/' . preg_quote($pattern, '/') . '$/', |
227
|
|
|
$value) == 1; |
228
|
|
|
case '*=': |
229
|
|
|
if ($pattern[0] == '/') { |
230
|
|
|
return preg_match($pattern, $value) == 1; |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
return preg_match("/" . $pattern . "/i", $value) == 1; |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
return false; |
237
|
|
|
} |
238
|
|
|
|
239
|
|
|
/** |
240
|
|
|
* Attempts to figure out what the alteration will be for |
241
|
|
|
* the next element. |
242
|
|
|
* @param array $rule |
243
|
|
|
* @return array |
244
|
|
|
*/ |
245
|
3 |
|
protected function alterNext(array $rule): array |
246
|
|
|
{ |
247
|
3 |
|
$options = []; |
248
|
3 |
|
if ($rule['tag'] == '>') { |
249
|
3 |
|
$options['checkGrandChildren'] = false; |
250
|
|
|
} |
251
|
|
|
|
252
|
3 |
|
return $options; |
253
|
|
|
} |
254
|
|
|
|
255
|
|
|
/** |
256
|
|
|
* Flattens the option array. |
257
|
|
|
* @param array $optionsArray |
258
|
|
|
* @return array |
259
|
|
|
*/ |
260
|
288 |
|
protected function flattenOptions(array $optionsArray) |
261
|
|
|
{ |
262
|
288 |
|
$options = []; |
263
|
288 |
|
foreach ($optionsArray as $optionArray) { |
264
|
3 |
|
foreach ($optionArray as $key => $option) { |
265
|
3 |
|
$options[$key] = $option; |
266
|
|
|
} |
267
|
|
|
} |
268
|
|
|
|
269
|
288 |
|
return $options; |
270
|
|
|
} |
271
|
|
|
|
272
|
|
|
/** |
273
|
|
|
* Returns the next child or null if no more children. |
274
|
|
|
* @param AbstractNode $node |
275
|
|
|
* @param AbstractNode $currentChild |
276
|
|
|
* @return AbstractNode|null |
277
|
|
|
*/ |
278
|
288 |
|
protected function getNextChild( |
279
|
|
|
AbstractNode $node, |
280
|
|
|
AbstractNode $currentChild |
281
|
|
|
) { |
282
|
|
|
try { |
283
|
288 |
|
$child = null; |
284
|
288 |
|
if ($node instanceof InnerNode) { |
285
|
|
|
// get next child |
286
|
288 |
|
$child = $node->nextChild($currentChild->id()); |
287
|
|
|
} |
288
|
288 |
|
} catch (ChildNotFoundException $e) { |
289
|
|
|
// no more children |
290
|
288 |
|
unset($e); |
291
|
288 |
|
$child = null; |
292
|
|
|
} |
293
|
|
|
|
294
|
288 |
|
return $child; |
295
|
|
|
} |
296
|
|
|
|
297
|
|
|
/** |
298
|
|
|
* Checks tag condition from rules against node. |
299
|
|
|
* @param array $rule |
300
|
|
|
* @param AbstractNode $node |
301
|
|
|
* @return bool |
302
|
|
|
*/ |
303
|
288 |
|
protected function checkTag(array $rule, AbstractNode $node): bool |
304
|
|
|
{ |
305
|
288 |
|
if (!empty($rule['tag']) && $rule['tag'] != $node->getTag()->name() |
306
|
288 |
|
&& $rule['tag'] != '*' |
307
|
|
|
) { |
308
|
261 |
|
return false; |
309
|
|
|
} |
310
|
|
|
|
311
|
231 |
|
return true; |
312
|
|
|
} |
313
|
|
|
|
314
|
|
|
/** |
315
|
|
|
* Checks key condition from rules against node. |
316
|
|
|
* @param array $rule |
317
|
|
|
* @param AbstractNode $node |
318
|
|
|
* @return bool |
319
|
|
|
*/ |
320
|
99 |
|
protected function checkKey(array $rule, AbstractNode $node): bool |
321
|
|
|
{ |
322
|
99 |
|
if (!is_array($rule['key'])) { |
323
|
96 |
|
if ($rule['noKey']) { |
324
|
|
|
if (!is_null($node->getAttribute($rule['key']))) { |
|
|
|
|
325
|
|
|
return false; |
326
|
|
|
} |
327
|
|
|
} else { |
328
|
96 |
|
if ($rule['key'] != 'plaintext' |
329
|
96 |
|
&& !$node->hasAttribute($rule['key']) |
330
|
|
|
) { |
331
|
96 |
|
return false; |
332
|
|
|
} |
333
|
|
|
} |
334
|
|
|
} else { |
335
|
3 |
|
if ($rule['noKey']) { |
336
|
|
|
foreach ($rule['key'] as $key) { |
337
|
|
|
if (!is_null($node->getAttribute($key))) { |
|
|
|
|
338
|
|
|
return false; |
339
|
|
|
} |
340
|
|
|
} |
341
|
|
|
} else { |
342
|
3 |
|
foreach ($rule['key'] as $key) { |
343
|
3 |
|
if ($key != 'plaintext' |
344
|
3 |
|
&& !$node->hasAttribute($key) |
345
|
|
|
) { |
346
|
1 |
|
return false; |
347
|
|
|
} |
348
|
|
|
} |
349
|
|
|
} |
350
|
|
|
} |
351
|
|
|
|
352
|
99 |
|
return true; |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
/** |
356
|
|
|
* Checks comparison condition from rules against node. |
357
|
|
|
* @param array $rule |
358
|
|
|
* @param AbstractNode $node |
359
|
|
|
* @return bool |
360
|
|
|
*/ |
361
|
96 |
|
public function checkComparison(array $rule, AbstractNode $node): bool |
362
|
|
|
{ |
363
|
96 |
|
if ($rule['key'] == 'plaintext') { |
364
|
|
|
// plaintext search |
365
|
|
|
$nodeValue = $node->text(); |
366
|
|
|
$result = $this->checkNodeValue($nodeValue, $rule, $node); |
367
|
|
|
} else { |
368
|
|
|
// normal search |
369
|
96 |
|
if (!is_array($rule['key'])) { |
370
|
93 |
|
$nodeValue = $node->getAttribute($rule['key']); |
|
|
|
|
371
|
93 |
|
$result = $this->checkNodeValue($nodeValue, $rule, $node); |
372
|
|
|
} else { |
373
|
3 |
|
$result = true; |
374
|
3 |
|
foreach ($rule['key'] as $index => $key) { |
375
|
3 |
|
$nodeValue = $node->getAttribute($key); |
|
|
|
|
376
|
3 |
|
$result = $result && |
377
|
3 |
|
$this->checkNodeValue($nodeValue, $rule, $node, $index); |
378
|
|
|
} |
379
|
|
|
} |
380
|
|
|
} |
381
|
|
|
|
382
|
96 |
|
return $result; |
383
|
|
|
} |
384
|
|
|
|
385
|
|
|
/** |
386
|
|
|
* @param string|null $nodeValue |
387
|
|
|
* @param array $rule |
388
|
|
|
* @param AbstractNode $node |
389
|
|
|
* @param int|null $index |
390
|
|
|
* @return bool |
391
|
|
|
*/ |
392
|
96 |
|
private function checkNodeValue( |
393
|
|
|
?string $nodeValue, |
394
|
|
|
array $rule, |
395
|
|
|
AbstractNode $node, |
396
|
|
|
?int $index = null |
397
|
|
|
) : bool { |
398
|
96 |
|
$check = false; |
399
|
|
|
if ( |
400
|
96 |
|
array_key_exists('value', $rule) && !is_array($rule['value']) && |
401
|
96 |
|
!is_null($nodeValue) && |
402
|
96 |
|
array_key_exists('operator', $rule) && is_string($rule['operator']) && |
403
|
96 |
|
array_key_exists('value', $rule) && is_string($rule['value']) |
404
|
|
|
) { |
405
|
51 |
|
$check = $this->match($rule['operator'], $rule['value'], $nodeValue); |
406
|
|
|
} |
407
|
|
|
|
408
|
|
|
// handle multiple classes |
409
|
96 |
|
$key = $rule['key']; |
410
|
|
|
if ( |
411
|
96 |
|
!$check && |
412
|
96 |
|
$key == 'class' && |
413
|
96 |
|
array_key_exists('value', $rule) && is_array($rule['value']) |
414
|
|
|
) { |
415
|
51 |
|
$nodeClasses = explode(' ', $node->getAttribute('class') ?? ''); |
|
|
|
|
416
|
51 |
|
foreach ($rule['value'] as $value) { |
417
|
51 |
|
foreach ($nodeClasses as $class) { |
418
|
|
|
if ( |
419
|
51 |
|
!empty($class) && |
420
|
51 |
|
array_key_exists('operator', $rule) && is_string($rule['operator']) |
421
|
|
|
) { |
422
|
51 |
|
$check = $this->match($rule['operator'], $value, $class); |
423
|
|
|
} |
424
|
51 |
|
if ($check) { |
425
|
51 |
|
break; |
426
|
|
|
} |
427
|
|
|
} |
428
|
51 |
|
if (!$check) { |
429
|
43 |
|
break; |
430
|
|
|
} |
431
|
|
|
} |
432
|
|
|
} elseif ( |
433
|
54 |
|
!$check && |
434
|
54 |
|
is_array($key) && |
435
|
54 |
|
!is_null($nodeValue) && |
436
|
54 |
|
array_key_exists('operator', $rule) && is_string($rule['operator']) && |
437
|
54 |
|
array_key_exists('value', $rule) && is_string($rule['value'][$index]) |
438
|
|
|
) { |
439
|
3 |
|
$check = $this->match($rule['operator'], $rule['value'][$index], $nodeValue); |
440
|
|
|
} |
441
|
|
|
|
442
|
96 |
|
return $check; |
443
|
|
|
} |
444
|
|
|
} |
445
|
|
|
|
This check looks for function or method calls that always return null and whose return value is used.
The method
getObject()
can return nothing but null, so it makes no sense to use the return value.The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.