Completed
Push — master ( 77e4a4...d10009 )
by Gilles
03:09
created

Selector::seek()   D

Complexity

Conditions 31
Paths 72

Size

Total Lines 96
Code Lines 51

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 49
CRAP Score 31.0076

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 31
eloc 51
c 1
b 0
f 0
nc 72
nop 3
dl 0
loc 96
ccs 49
cts 50
cp 0.98
crap 31.0076
rs 4.1666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser\Selector;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\InnerNode;
7
use PHPHtmlParser\Dom\LeafNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
10
/**
11
 * Class Selector
12
 *
13
 * @package PHPHtmlParser
14
 */
15
class Selector
16
{
17
18
    /**
19
     * @var array
20
     */
21
    protected $selectors = [];
22
23
    /**
24
     * @var bool
25
     */
26
    private $depthFirst = false;
27
28
    /**
29
     * Constructs with the selector string
30
     * @param string          $selector
31
     * @param ParserInterface $parser
32
     */
33 285
    public function __construct(string $selector, ParserInterface $parser)
34
    {
35 285
        $this->selectors = $parser->parseSelectorString($selector);
36 285
    }
37
38
    /**
39
     * Returns the selectors that where found in __construct
40
     *
41
     * @return array
42
     */
43 12
    public function getSelectors()
44
    {
45 12
        return $this->selectors;
46
    }
47
48
    /**
49
     * @param bool $status
50
     * @return void
51
     */
52 222
    public function setDepthFirstFind(bool $status): void
53
    {
54 222
        $this->depthFirst = $status;
55 222
    }
56
57
    /**
58
     * Attempts to find the selectors starting from the given
59
     * node object.
60
     * @param AbstractNode $node
61
     * @return Collection
62
     * @throws ChildNotFoundException
63
     */
64 273
    public function find(AbstractNode $node): Collection
65
    {
66 273
        $results = new Collection;
67 273
        foreach ($this->selectors as $selector) {
68 273
            $nodes = [$node];
69 273
            if (count($selector) == 0) {
70
                continue;
71
            }
72
73 273
            $options = [];
74 273
            foreach ($selector as $rule) {
75 273
                if ($rule['alterNext']) {
76 3
                    $options[] = $this->alterNext($rule);
77 3
                    continue;
78
                }
79 273
                $nodes = $this->seek($nodes, $rule, $options);
80
                // clear the options
81 273
                $options = [];
82
            }
83
84
            // this is the final set of nodes
85 273
            foreach ($nodes as $result) {
86 256
                $results[] = $result;
87
            }
88
        }
89
90 273
        return $results;
91
    }
92
93
94
    /**
95
     * Attempts to find all children that match the rule
96
     * given.
97
     *
98
     * @param array $nodes
99
     * @param array $rule
100
     * @param array $options
101
     *
102
     * @return array
103
     * @throws ChildNotFoundException
104
     */
105 273
    protected function seek(array $nodes, array $rule, array $options): array
106
    {
107
        // XPath index
108 273
        if (array_key_exists('tag', $rule) &&
109 273
            array_key_exists('key', $rule) &&
110 273
            is_numeric($rule['key'])
111
        ) {
112 3
            $count = 0;
113
            /** @var AbstractNode $node */
114 3
            foreach ($nodes as $node) {
115 3
                if ($rule['tag'] == '*' ||
116 3
                    $rule['tag'] == $node->getTag()->name()
117
                ) {
118 3
                    ++$count;
119 3
                    if ($count == $rule['key']) {
120
                        // found the node we wanted
121 3
                        return [$node];
122
                    }
123
                }
124
            }
125
126
            return [];
127
        }
128
129 270
        $options = $this->flattenOptions($options);
130
131 270
        $return = [];
132
        /** @var InnerNode $node */
133 270
        foreach ($nodes as $node) {
134
            // check if we are a leaf
135 270
            if ($node instanceof LeafNode ||
136 270
                ! $node->hasChildren()
137
            ) {
138 12
                continue;
139
            }
140
141 270
            $children = [];
142 270
            $child    = $node->firstChild();
143 270
            while ( ! is_null($child)) {
144
                // wild card, grab all
145 270
                if ($rule['tag'] == '*' && is_null($rule['key'])) {
146 12
                    $return[] = $child;
147 12
                    $child = $this->getNextChild($node, $child);
148 12
                    continue;
149
                }
150
151 270
                $pass = $this->checkTag($rule, $child);
152 270
                if ($pass && ! is_null($rule['key'])) {
153 90
                    $pass = $this->checkKey($rule, $child);
154
                }
155 270
                if ($pass && ! is_null($rule['key']) &&
156 270
                    ! is_null($rule['value']) && $rule['value'] != '*'
157
                ) {
158 87
                    $pass = $this->checkComparison($rule, $child);
159
                }
160
161 270
                if ($pass) {
162
                    // it passed all checks
163 219
                    $return[] = $child;
164
                } else {
165
                    // this child failed to be matched
166 255
                    if ($child instanceof InnerNode &&
167 255
                        $child->hasChildren()
168
                    ) {
169 234
                        if ($this->depthFirst) {
170 3
                            if ( ! isset($options['checkGrandChildren']) ||
171 3
                                $options['checkGrandChildren']) {
172
                                // we have a child that failed but are not leaves.
173 3
                                $matches = $this->seek([$child], $rule, $options);
174 3
                                foreach ($matches as $match) {
175 3
                                    $return[] = $match;
176
                                }
177
                            }
178
                        } else {
179
                            // we still want to check its children
180 234
                            $children[] = $child;
181
                        }
182
                    }
183
                }
184
185 270
                $child = $this->getNextChild($node, $child);
186
            }
187
188 270
            if (( ! isset($options['checkGrandChildren']) ||
189 270
                    $options['checkGrandChildren'])
190 270
                && count($children) > 0
191
            ) {
192
                // we have children that failed but are not leaves.
193 231
                $matches = $this->seek($children, $rule, $options);
194 231
                foreach ($matches as $match) {
195 219
                    $return[] = $match;
196
                }
197
            }
198
        }
199
200 270
        return $return;
201
    }
202
203
    /**
204
     * Attempts to match the given arguments with the given operator.
205
     *
206
     * @param string $operator
207
     * @param string $pattern
208
     * @param string $value
209
     * @return bool
210
     */
211 87
    protected function match(string $operator, string $pattern, string $value): bool
212
    {
213 87
        $value   = strtolower($value);
214 87
        $pattern = strtolower($pattern);
215 29
        switch ($operator) {
216 87
            case '=':
217 87
                return $value === $pattern;
218
            case '!=':
219
                return $value !== $pattern;
220
            case '^=':
221
                return preg_match('/^'.preg_quote($pattern, '/').'/', $value) == 1;
222
            case '$=':
223
                return preg_match('/'.preg_quote($pattern, '/').'$/', $value) == 1;
224
            case '*=':
225
                if ($pattern[0] == '/') {
226
                    return preg_match($pattern, $value) == 1;
227
                }
228
229
                return preg_match("/".$pattern."/i", $value) == 1;
230
        }
231
232
        return false;
233
    }
234
235
    /**
236
     * Attempts to figure out what the alteration will be for
237
     * the next element.
238
     *
239
     * @param array $rule
240
     * @return array
241
     */
242 3
    protected function alterNext(array $rule): array
243
    {
244 3
        $options = [];
245 3
        if ($rule['tag'] == '>') {
246 3
            $options['checkGrandChildren'] = false;
247
        }
248
249 3
        return $options;
250
    }
251
252
    /**
253
     * Flattens the option array.
254
     *
255
     * @param array $optionsArray
256
     * @return array
257
     */
258 270
    protected function flattenOptions(array $optionsArray)
259
    {
260 270
        $options = [];
261 270
        foreach ($optionsArray as $optionArray) {
262 3
            foreach ($optionArray as $key => $option) {
263 3
                $options[$key] = $option;
264
            }
265
        }
266
267 270
        return $options;
268
    }
269
270
    /**
271
     * Returns the next child or null if no more children.
272
     *
273
     * @param AbstractNode $node
274
     * @param AbstractNode $currentChild
275
     * @return AbstractNode|null
276
     */
277 270
    protected function getNextChild(AbstractNode $node, AbstractNode $currentChild)
278
    {
279
        try {
280 270
            $child = null;
281 270
            if ($node instanceof InnerNode) {
282
                // get next child
283 270
                $child = $node->nextChild($currentChild->id());
284
            }
285 270
        } catch (ChildNotFoundException $e) {
286
            // no more children
287 270
            $child = null;
288
        }
289
290 270
        return $child;
291
    }
292
293
    /**
294
     * Checks tag condition from rules against node.
295
     *
296
     * @param array $rule
297
     * @param AbstractNode $node
298
     * @return bool
299
     */
300 270
    protected function checkTag(array $rule, AbstractNode $node): bool
301
    {
302 270
        if ( ! empty($rule['tag']) && $rule['tag'] != $node->getTag()->name() &&
303 270
            $rule['tag'] != '*'
304
        ) {
305 243
            return false;
306
        }
307
308 219
        return true;
309
    }
310
311
    /**
312
     * Checks key condition from rules against node.
313
     *
314
     * @param array $rule
315
     * @param AbstractNode $node
316
     * @return bool
317
     */
318 90
    protected function checkKey(array $rule, AbstractNode $node): bool
319
    {
320 90
        if ($rule['noKey']) {
321
            if ( ! is_null($node->getAttribute($rule['key']))) {
322
                return false;
323
            }
324
        } else {
325 90
            if ($rule['key'] != 'plaintext' && !$node->hasAttribute($rule['key'])) {
326 81
                return false;
327
            }
328
        }
329
330 90
        return true;
331
    }
332
333
    /**
334
     * Checks comparison condition from rules against node.
335
     *
336
     * @param array $rule
337
     * @param AbstractNode $node
338
     * @return bool
339
     */
340 87
    public function checkComparison(array $rule, AbstractNode $node): bool
341
    {
342 87
        if ($rule['key'] == 'plaintext') {
343
            // plaintext search
344
            $nodeValue = $node->text();
345
        } else {
346
            // normal search
347 87
            $nodeValue = $node->getAttribute($rule['key']);
348
        }
349
350 87
        $check = false;
351 87
        if (!is_array($rule['value'])) {
352 48
            $check = $this->match($rule['operator'], $rule['value'], $nodeValue);
353
        }
354
355
        // handle multiple classes
356 87
        if ( ! $check && $rule['key'] == 'class') {
357 48
            $nodeClasses = explode(' ', $node->getAttribute('class'));
358 48
            foreach ($rule['value'] as $value) {
359 48
                foreach ($nodeClasses as $class) {
360 48
                    if ( ! empty($class)) {
361 48
                        $check = $this->match($rule['operator'], $value, $class);
362
                    }
363 48
                    if ($check) {
364 48
                        break;
365
                    }
366
                }
367 48
                if (!$check) {
368 44
                    break;
369
                }
370
            }
371
        }
372
373 87
        return $check;
374
    }
375
}
376