Passed
Pull Request — master (#197)
by
unknown
12:39
created

Selector::match()   B

Complexity

Conditions 7
Paths 7

Size

Total Lines 27
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 20.2773

Importance

Changes 0
Metric Value
cc 7
eloc 18
nc 7
nop 3
dl 0
loc 27
ccs 6
cts 17
cp 0.3529
crap 20.2773
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PHPHtmlParser\Selector;
6
7
use PHPHtmlParser\Dom\AbstractNode;
8
use PHPHtmlParser\Dom\Collection;
9
use PHPHtmlParser\Dom\InnerNode;
10
use PHPHtmlParser\Dom\LeafNode;
11
use PHPHtmlParser\Exceptions\ChildNotFoundException;
12
13
/**
14
 * Class Selector
15
 *
16
 * @package PHPHtmlParser
17
 */
18
class Selector
19
{
20
21
    /**
22
     * @var array
23
     */
24
    protected $selectors = [];
25
26
    /**
27
     * @var bool
28
     */
29
    private $depthFirst = false;
30
31
    /**
32
     * Constructs with the selector string
33 285
     * @param string          $selector
34
     * @param ParserInterface $parser
35 285
     */
36 285
    public function __construct(string $selector, ParserInterface $parser)
37
    {
38
        $this->selectors = $parser->parseSelectorString($selector);
39
    }
40
41
    /**
42
     * Returns the selectors that where found in __construct
43 12
     * @return array
44
     */
45 12
    public function getSelectors()
46
    {
47
        return $this->selectors;
48
    }
49
50
    /**
51
     * @param bool $status
52 222
     * @return void
53
     */
54 222
    public function setDepthFirstFind(bool $status): void
55 222
    {
56
        $this->depthFirst = $status;
57
    }
58
59
    /**
60
     * Attempts to find the selectors starting from the given
61
     * node object.
62
     * @param AbstractNode $node
63
     * @return Collection
64 273
     * @throws ChildNotFoundException
65
     */
66 273
    public function find(AbstractNode $node): Collection
67 273
    {
68 273
        $results = new Collection;
69 273
        foreach ($this->selectors as $selector) {
70
            $nodes = [$node];
71
            if (count($selector) == 0) {
72
                continue;
73 273
            }
74 273
75 273
            $options = [];
76 3
            foreach ($selector as $rule) {
77 3
                if ($rule['alterNext']) {
78
                    $options[] = $this->alterNext($rule);
79 273
                    continue;
80
                }
81 273
                $nodes = $this->seek($nodes, $rule, $options);
82
                // clear the options
83
                $options = [];
84
            }
85 273
86 256
            // this is the final set of nodes
87
            foreach ($nodes as $result) {
88
                $results[] = $result;
89
            }
90 273
        }
91
92
        return $results;
93
    }
94
95
96
    /**
97
     * Attempts to find all children that match the rule
98
     * given.
99
     * @param array $nodes
100
     * @param array $rule
101
     * @param array $options
102
     * @return array
103
     * @throws ChildNotFoundException
104
     */
105 273
    protected function seek(array $nodes, array $rule, array $options): array
106
    {
107
        // XPath index
108 273
        if (array_key_exists('tag', $rule) && array_key_exists('key', $rule)
109 273
          && is_numeric($rule['key'])
110 273
        ) {
111
            $count = 0;
112 3
            /** @var AbstractNode $node */
113
            foreach ($nodes as $node) {
114 3
                if ($rule['tag'] == '*'
115 3
                  || $rule['tag'] == $node->getTag()
116 3
                    ->name()
117
                ) {
118 3
                    ++$count;
119 3
                    if ($count == $rule['key']) {
120
                        // found the node we wanted
121 3
                        return [$node];
122
                    }
123
                }
124
            }
125
126
            return [];
127
        }
128
129 270
        $options = $this->flattenOptions($options);
130
131 270
        $return = [];
132
        /** @var InnerNode $node */
133 270
        foreach ($nodes as $node) {
134
            // check if we are a leaf
135 270
            if ($node instanceof LeafNode || !$node->hasChildren()
136 270
            ) {
137
                continue;
138 12
            }
139
140
            $children = [];
141 270
            $child = $node->firstChild();
142 270
            while (!is_null($child)) {
143 270
                // wild card, grab all
144
                if ($rule['tag'] == '*' && is_null($rule['key'])) {
145 270
                    $return[] = $child;
146 12
                    $child = $this->getNextChild($node, $child);
147 12
                    continue;
148 12
                }
149
150
                $pass = $this->checkTag($rule, $child);
151 270
                if ($pass && !is_null($rule['key'])) {
152 270
                    $pass = $this->checkKey($rule, $child);
153 90
                }
154
                if ($pass && !is_null($rule['key']) && !is_null($rule['value'])
155 270
                  && $rule['value'] != '*'
156 270
                ) {
157
                    $pass = $this->checkComparison($rule, $child);
158 87
                }
159
160
                if ($pass) {
161 270
                    // it passed all checks
162
                    $return[] = $child;
163 219
                } else {
164
                    // this child failed to be matched
165
                    if ($child instanceof InnerNode && $child->hasChildren()
166 255
                    ) {
167 255
                        if ($this->depthFirst) {
168
                            if (!isset($options['checkGrandChildren'])
169 234
                              || $options['checkGrandChildren']
170 3
                            ) {
171 3
                                // we have a child that failed but are not leaves.
172
                                $matches = $this->seek([$child], $rule,
173 3
                                  $options);
174 3
                                foreach ($matches as $match) {
175 3
                                    $return[] = $match;
176
                                }
177
                            }
178
                        } else {
179
                            // we still want to check its children
180 234
                            $children[] = $child;
181
                        }
182
                    }
183
                }
184
185 270
                $child = $this->getNextChild($node, $child);
186
            }
187
188 270
            if ((!isset($options['checkGrandChildren'])
189 270
                || $options['checkGrandChildren'])
190 270
              && count($children) > 0
191
            ) {
192
                // we have children that failed but are not leaves.
193 231
                $matches = $this->seek($children, $rule, $options);
194 231
                foreach ($matches as $match) {
195 219
                    $return[] = $match;
196
                }
197
            }
198
        }
199
200 270
        return $return;
201
    }
202
203
    /**
204
     * Attempts to match the given arguments with the given operator.
205
     * @param string $operator
206
     * @param string $pattern
207
     * @param string $value
208
     * @return bool
209
     */
210
    protected function match(
211 87
      string $operator,
212
      string $pattern,
213 87
      string $value
214 87
    ): bool {
215 29
        $value = strtolower($value);
216 87
        $pattern = strtolower($pattern);
217 87
        switch ($operator) {
218
            case '=':
219
                return $value === $pattern;
220
            case '!=':
221
                return $value !== $pattern;
222
            case '^=':
223
                return preg_match('/^' . preg_quote($pattern, '/') . '/',
224
                    $value) == 1;
225
            case '$=':
226
                return preg_match('/' . preg_quote($pattern, '/') . '$/',
227
                    $value) == 1;
228
            case '*=':
229
                if ($pattern[0] == '/') {
230
                    return preg_match($pattern, $value) == 1;
231
                }
232
233
                return preg_match("/" . $pattern . "/i", $value) == 1;
234
        }
235
236
        return false;
237
    }
238
239
    /**
240
     * Attempts to figure out what the alteration will be for
241
     * the next element.
242 3
     * @param array $rule
243
     * @return array
244 3
     */
245 3
    protected function alterNext(array $rule): array
246 3
    {
247
        $options = [];
248
        if ($rule['tag'] == '>') {
249 3
            $options['checkGrandChildren'] = false;
250
        }
251
252
        return $options;
253
    }
254
255
    /**
256
     * Flattens the option array.
257
     * @param array $optionsArray
258 270
     * @return array
259
     */
260 270
    protected function flattenOptions(array $optionsArray)
261 270
    {
262 3
        $options = [];
263 3
        foreach ($optionsArray as $optionArray) {
264
            foreach ($optionArray as $key => $option) {
265
                $options[$key] = $option;
266
            }
267 270
        }
268
269
        return $options;
270
    }
271
272
    /**
273
     * Returns the next child or null if no more children.
274
     * @param AbstractNode $node
275
     * @param AbstractNode $currentChild
276
     * @return AbstractNode|null
277 270
     */
278
    protected function getNextChild(
279
      AbstractNode $node,
280 270
      AbstractNode $currentChild
281 270
    ) {
282
        try {
283 270
            $child = null;
284
            if ($node instanceof InnerNode) {
285 270
                // get next child
286
                $child = $node->nextChild($currentChild->id());
287 270
            }
288
        } catch (ChildNotFoundException $e) {
289
            // no more children
290 270
            $child = null;
291
        }
292
293
        return $child;
294
    }
295
296
    /**
297
     * Checks tag condition from rules against node.
298
     * @param array        $rule
299
     * @param AbstractNode $node
300 270
     * @return bool
301
     */
302 270
    protected function checkTag(array $rule, AbstractNode $node): bool
303 270
    {
304
        if (!empty($rule['tag']) && $rule['tag'] != $node->getTag()->name()
305 243
          && $rule['tag'] != '*'
306
        ) {
307
            return false;
308 219
        }
309
310
        return true;
311
    }
312
313
    /**
314
     * Checks key condition from rules against node.
315
     * @param array        $rule
316
     * @param AbstractNode $node
317
     * @return bool
318 90
     */
319
    protected function checkKey(array $rule, AbstractNode $node): bool
320 90
    {
321
        if (!is_array($rule['key'])) {
322
            if ($rule['noKey']) {
323
                if (!is_null($node->getAttribute($rule['key']))) {
324
                    return false;
325 90
                }
326 81
            } else {
327
                if ($rule['key'] != 'plaintext'
328
                  && !$node->hasAttribute($rule['key'])
329
                ) {
330 90
                    return false;
331
                }
332
            }
333
        } else {
334
            if ($rule['noKey']) {
335
                foreach ($rule['key'] as $key) {
336
                    if (!is_null($node->getAttribute($key))) {
337
                        return false;
338
                    }
339
                }
340 87
            } else {
341
                foreach ($rule['key'] as $key) {
342 87
                    if ($key != 'plaintext'
343
                      && !$node->hasAttribute($key)
344
                    ) {
345
                        return false;
346
                    }
347 87
                }
348
            }
349
        }
350 87
351 87
        return true;
352 48
    }
353
354
    /**
355
     * Checks comparison condition from rules against node.
356 87
     * @param array        $rule
357 48
     * @param AbstractNode $node
358 48
     * @return bool
359 48
     */
360 48
    public function checkComparison(array $rule, AbstractNode $node): bool
361 48
    {
362
        if ($rule['key'] == 'plaintext') {
363 48
            // plaintext search
364 48
            $nodeValue = $node->text();
365
            $result = $this->checkNodeValue($nodeValue, $rule, $node);
366
        } else {
367 48
            // normal search
368 44
            if (!is_array($rule['key'])) {
369
                $nodeValue = $node->getAttribute($rule['key']);
370
                $result = $this->checkNodeValue($nodeValue, $rule, $node);
371
            } else {
372
                $result = true;
373 87
                foreach ($rule['key'] as $index => $key) {
374
                    $nodeValue = $node->getAttribute($key);
375
                    $result = $result &&
376
                        $this->checkNodeValue($nodeValue, $rule, $node, $index);
377
                }
378
            }
379
        }
380
381
        return $result;
382
    }
383
384
    /**
385
     * @param string|null  $nodeValue
386
     * @param array        $rule
387
     * @param AbstractNode $node
388
     * @param int|null     $index
389
     * @return bool
390
     */
391
    private function checkNodeValue(
392
        ?string $nodeValue,
393
        array $rule,
394
        AbstractNode $node,
395
        ?int $index = null
396
    ) : bool {
397
        $check = false;
398
        if (!is_array($rule['value'])) {
399
            $check = $this->match($rule['operator'], $rule['value'], $nodeValue);
0 ignored issues
show
Bug introduced by
It seems like $nodeValue can also be of type null; however, parameter $value of PHPHtmlParser\Selector\Selector::match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

399
            $check = $this->match($rule['operator'], $rule['value'], /** @scrutinizer ignore-type */ $nodeValue);
Loading history...
400
        }
401
402
        // handle multiple classes
403
        $key = $rule['key'];
404
        if (!$check && $key == 'class') {
405
            $nodeClasses = explode(' ', $node->getAttribute('class') ?? '');
406
            foreach ($rule['value'] as $value) {
407
                foreach ($nodeClasses as $class) {
408
                    if ( ! empty($class)) {
409
                        $check = $this->match($rule['operator'], $value, $class);
410
                    }
411
                    if ($check) {
412
                        break;
413
                    }
414
                }
415
                if (!$check) {
416
                    break;
417
                }
418
            }
419
        } elseif (!$check && is_array($key)) {
420
            $check = $this->match($rule['operator'], $rule['value'][$index], $nodeValue);
421
        }
422
423
        return $check;
424
    }
425
}
426