Passed
Push — master ( 12b94f...668c77 )
by Gilles
03:31
created

src/PHPHtmlParser/Selector/Selector.php (1 issue)

Labels
Severity
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PHPHtmlParser\Selector;
6
7
use PHPHtmlParser\Dom\AbstractNode;
8
use PHPHtmlParser\Dom\Collection;
9
use PHPHtmlParser\Dom\InnerNode;
10
use PHPHtmlParser\Dom\LeafNode;
11
use PHPHtmlParser\Exceptions\ChildNotFoundException;
12
13
/**
14
 * Class Selector
15
 *
16
 * @package PHPHtmlParser
17
 */
18
class Selector
19
{
20
21
    /**
22
     * @var array
23
     */
24
    protected $selectors = [];
25
26
    /**
27
     * @var bool
28
     */
29
    private $depthFirst = false;
30
31
    /**
32
     * Constructs with the selector string
33
     * @param string          $selector
34
     * @param ParserInterface $parser
35
     */
36 294
    public function __construct(string $selector, ParserInterface $parser)
37
    {
38 294
        $this->selectors = $parser->parseSelectorString($selector);
39 294
    }
40
41
    /**
42
     * Returns the selectors that where found in __construct
43
     * @return array
44
     */
45 12
    public function getSelectors()
46
    {
47 12
        return $this->selectors;
48
    }
49
50
    /**
51
     * @param bool $status
52
     * @return void
53
     */
54 231
    public function setDepthFirstFind(bool $status): void
55
    {
56 231
        $this->depthFirst = $status;
57 231
    }
58
59
    /**
60
     * Attempts to find the selectors starting from the given
61
     * node object.
62
     * @param AbstractNode $node
63
     * @return Collection
64
     * @throws ChildNotFoundException
65
     */
66 282
    public function find(AbstractNode $node): Collection
67
    {
68 282
        $results = new Collection;
69 282
        foreach ($this->selectors as $selector) {
70 282
            $nodes = [$node];
71 282
            if (count($selector) == 0) {
72
                continue;
73
            }
74
75 282
            $options = [];
76 282
            foreach ($selector as $rule) {
77 282
                if ($rule['alterNext']) {
78 3
                    $options[] = $this->alterNext($rule);
79 3
                    continue;
80
                }
81 282
                $nodes = $this->seek($nodes, $rule, $options);
82
                // clear the options
83 282
                $options = [];
84
            }
85
86
            // this is the final set of nodes
87 282
            foreach ($nodes as $result) {
88 246
                $results[] = $result;
89
            }
90
        }
91
92 282
        return $results;
93
    }
94
95
96
    /**
97
     * Attempts to find all children that match the rule
98
     * given.
99
     * @param array $nodes
100
     * @param array $rule
101
     * @param array $options
102
     * @return array
103
     * @throws ChildNotFoundException
104
     */
105 282
    protected function seek(array $nodes, array $rule, array $options): array
106
    {
107
        // XPath index
108 282
        if (array_key_exists('tag', $rule) && array_key_exists('key', $rule)
109 282
          && is_numeric($rule['key'])
110
        ) {
111 3
            $count = 0;
112
            /** @var AbstractNode $node */
113 3
            foreach ($nodes as $node) {
114 3
                if ($rule['tag'] == '*'
115 3
                  || $rule['tag'] == $node->getTag()
116 3
                    ->name()
117
                ) {
118 3
                    ++$count;
119 3
                    if ($count == $rule['key']) {
120
                        // found the node we wanted
121 3
                        return [$node];
122
                    }
123
                }
124
            }
125
126
            return [];
127
        }
128
129 279
        $options = $this->flattenOptions($options);
130
131 279
        $return = [];
132
        /** @var InnerNode $node */
133 279
        foreach ($nodes as $node) {
134
            // check if we are a leaf
135 279
            if ($node instanceof LeafNode || !$node->hasChildren()
136
            ) {
137 12
                continue;
138
            }
139
140 279
            $children = [];
141 279
            $child = $node->firstChild();
142 279
            while (!is_null($child)) {
143
                // wild card, grab all
144 279
                if ($rule['tag'] == '*' && is_null($rule['key'])) {
145 12
                    $return[] = $child;
146 12
                    $child = $this->getNextChild($node, $child);
147 12
                    continue;
148
                }
149
150 279
                $pass = $this->checkTag($rule, $child);
151 279
                if ($pass && !is_null($rule['key'])) {
152 96
                    $pass = $this->checkKey($rule, $child);
153
                }
154 279
                if ($pass && !is_null($rule['key']) && !is_null($rule['value'])
155 279
                  && $rule['value'] != '*'
156
                ) {
157 93
                    $pass = $this->checkComparison($rule, $child);
158
                }
159
160 279
                if ($pass) {
161
                    // it passed all checks
162 225
                    $return[] = $child;
163
                } else {
164
                    // this child failed to be matched
165 264
                    if ($child instanceof InnerNode && $child->hasChildren()
166
                    ) {
167 237
                        if ($this->depthFirst) {
168 3
                            if (!isset($options['checkGrandChildren'])
169 3
                              || $options['checkGrandChildren']
170
                            ) {
171
                                // we have a child that failed but are not leaves.
172 3
                                $matches = $this->seek([$child], $rule,
173 2
                                  $options);
174 3
                                foreach ($matches as $match) {
175 3
                                    $return[] = $match;
176
                                }
177
                            }
178
                        } else {
179
                            // we still want to check its children
180 237
                            $children[] = $child;
181
                        }
182
                    }
183
                }
184
185 279
                $child = $this->getNextChild($node, $child);
186
            }
187
188 279
            if ((!isset($options['checkGrandChildren'])
189 279
                || $options['checkGrandChildren'])
190 279
              && count($children) > 0
191
            ) {
192
                // we have children that failed but are not leaves.
193 234
                $matches = $this->seek($children, $rule, $options);
194 234
                foreach ($matches as $match) {
195 171
                    $return[] = $match;
196
                }
197
            }
198
        }
199
200 279
        return $return;
201
    }
202
203
    /**
204
     * Attempts to match the given arguments with the given operator.
205
     * @param string $operator
206
     * @param string $pattern
207
     * @param string $value
208
     * @return bool
209
     */
210 93
    protected function match(
211
      string $operator,
212
      string $pattern,
213
      string $value
214
    ): bool {
215 93
        $value = strtolower($value);
216 93
        $pattern = strtolower($pattern);
217 62
        switch ($operator) {
218 93
            case '=':
219 93
                return $value === $pattern;
220
            case '!=':
221
                return $value !== $pattern;
222
            case '^=':
223
                return preg_match('/^' . preg_quote($pattern, '/') . '/',
224
                    $value) == 1;
225
            case '$=':
226
                return preg_match('/' . preg_quote($pattern, '/') . '$/',
227
                    $value) == 1;
228
            case '*=':
229
                if ($pattern[0] == '/') {
230
                    return preg_match($pattern, $value) == 1;
231
                }
232
233
                return preg_match("/" . $pattern . "/i", $value) == 1;
234
        }
235
236
        return false;
237
    }
238
239
    /**
240
     * Attempts to figure out what the alteration will be for
241
     * the next element.
242
     * @param array $rule
243
     * @return array
244
     */
245 3
    protected function alterNext(array $rule): array
246
    {
247 3
        $options = [];
248 3
        if ($rule['tag'] == '>') {
249 3
            $options['checkGrandChildren'] = false;
250
        }
251
252 3
        return $options;
253
    }
254
255
    /**
256
     * Flattens the option array.
257
     * @param array $optionsArray
258
     * @return array
259
     */
260 279
    protected function flattenOptions(array $optionsArray)
261
    {
262 279
        $options = [];
263 279
        foreach ($optionsArray as $optionArray) {
264 3
            foreach ($optionArray as $key => $option) {
265 3
                $options[$key] = $option;
266
            }
267
        }
268
269 279
        return $options;
270
    }
271
272
    /**
273
     * Returns the next child or null if no more children.
274
     * @param AbstractNode $node
275
     * @param AbstractNode $currentChild
276
     * @return AbstractNode|null
277
     */
278 279
    protected function getNextChild(
279
      AbstractNode $node,
280
      AbstractNode $currentChild
281
    ) {
282
        try {
283 279
            $child = null;
284 279
            if ($node instanceof InnerNode) {
285
                // get next child
286 279
                $child = $node->nextChild($currentChild->id());
287
            }
288 279
        } catch (ChildNotFoundException $e) {
289
            // no more children
290 279
            $child = null;
291
        }
292
293 279
        return $child;
294
    }
295
296
    /**
297
     * Checks tag condition from rules against node.
298
     * @param array        $rule
299
     * @param AbstractNode $node
300
     * @return bool
301
     */
302 279
    protected function checkTag(array $rule, AbstractNode $node): bool
303
    {
304 279
        if (!empty($rule['tag']) && $rule['tag'] != $node->getTag()->name()
305 279
          && $rule['tag'] != '*'
306
        ) {
307 252
            return false;
308
        }
309
310 225
        return true;
311
    }
312
313
    /**
314
     * Checks key condition from rules against node.
315
     * @param array        $rule
316
     * @param AbstractNode $node
317
     * @return bool
318
     */
319 96
    protected function checkKey(array $rule, AbstractNode $node): bool
320
    {
321 96
        if (!is_array($rule['key'])) {
322 93
            if ($rule['noKey']) {
323
                if (!is_null($node->getAttribute($rule['key']))) {
324
                    return false;
325
                }
326
            } else {
327 93
                if ($rule['key'] != 'plaintext'
328 93
                  && !$node->hasAttribute($rule['key'])
329
                ) {
330 93
                    return false;
331
                }
332
            }
333
        } else {
334 3
            if ($rule['noKey']) {
335
                foreach ($rule['key'] as $key) {
336
                    if (!is_null($node->getAttribute($key))) {
337
                        return false;
338
                    }
339
                }
340
            } else {
341 3
                foreach ($rule['key'] as $key) {
342 3
                    if ($key != 'plaintext'
343 3
                      && !$node->hasAttribute($key)
344
                    ) {
345 1
                        return false;
346
                    }
347
                }
348
            }
349
        }
350
351 96
        return true;
352
    }
353
354
    /**
355
     * Checks comparison condition from rules against node.
356
     * @param array        $rule
357
     * @param AbstractNode $node
358
     * @return bool
359
     */
360 93
    public function checkComparison(array $rule, AbstractNode $node): bool
361
    {
362 93
        if ($rule['key'] == 'plaintext') {
363
            // plaintext search
364
            $nodeValue = $node->text();
365
            $result = $this->checkNodeValue($nodeValue, $rule, $node);
366
        } else {
367
            // normal search
368 93
            if (!is_array($rule['key'])) {
369 90
                $nodeValue = $node->getAttribute($rule['key']);
370 90
                $result = $this->checkNodeValue($nodeValue, $rule, $node);
371
            } else {
372 3
                $result = true;
373 3
                foreach ($rule['key'] as $index => $key) {
374 3
                    $nodeValue = $node->getAttribute($key);
375 3
                    $result = $result &&
376 3
                        $this->checkNodeValue($nodeValue, $rule, $node, $index);
377
                }
378
            }
379
        }
380
381 93
        return $result;
382
    }
383
384
    /**
385
     * @param string|null  $nodeValue
386
     * @param array        $rule
387
     * @param AbstractNode $node
388
     * @param int|null     $index
389
     * @return bool
390
     */
391 93
    private function checkNodeValue(
392
        ?string $nodeValue,
393
        array $rule,
394
        AbstractNode $node,
395
        ?int $index = null
396
    ) : bool {
397 93
        $check = false;
398 93
        if (!is_array($rule['value'])) {
399 48
            $check = $this->match($rule['operator'], $rule['value'], $nodeValue);
0 ignored issues
show
It seems like $nodeValue can also be of type null; however, parameter $value of PHPHtmlParser\Selector\Selector::match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

399
            $check = $this->match($rule['operator'], $rule['value'], /** @scrutinizer ignore-type */ $nodeValue);
Loading history...
400
        }
401
402
        // handle multiple classes
403 93
        $key = $rule['key'];
404 93
        if (!$check && $key == 'class') {
405 51
            $nodeClasses = explode(' ', $node->getAttribute('class') ?? '');
406 51
            foreach ($rule['value'] as $value) {
407 51
                foreach ($nodeClasses as $class) {
408 51
                    if ( ! empty($class)) {
409 51
                        $check = $this->match($rule['operator'], $value, $class);
410
                    }
411 51
                    if ($check) {
412 51
                        break;
413
                    }
414
                }
415 51
                if (!$check) {
416 43
                    break;
417
                }
418
            }
419 51
        } elseif (!$check && is_array($key)) {
420 3
            $check = $this->match($rule['operator'], $rule['value'][$index], $nodeValue);
421
        }
422
423 93
        return $check;
424
    }
425
}
426