Completed
Push — master ( 77e4a4...d10009 )
by Gilles
03:09
created

Selector::alterNext()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 2
nop 1
dl 0
loc 8
ccs 5
cts 5
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser\Selector;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\InnerNode;
7
use PHPHtmlParser\Dom\LeafNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
10
/**
11
 * Class Selector
12
 *
13
 * @package PHPHtmlParser
14
 */
15
class Selector
16
{
17
18
    /**
19
     * @var array
20
     */
21
    protected $selectors = [];
22
23
    /**
24
     * @var bool
25
     */
26
    private $depthFirst = false;
27
28
    /**
29
     * Constructs with the selector string
30
     * @param string          $selector
31
     * @param ParserInterface $parser
32
     */
33 285
    public function __construct(string $selector, ParserInterface $parser)
34
    {
35 285
        $this->selectors = $parser->parseSelectorString($selector);
36 285
    }
37
38
    /**
39
     * Returns the selectors that where found in __construct
40
     *
41
     * @return array
42
     */
43 12
    public function getSelectors()
44
    {
45 12
        return $this->selectors;
46
    }
47
48
    /**
49
     * @param bool $status
50
     * @return void
51
     */
52 222
    public function setDepthFirstFind(bool $status): void
53
    {
54 222
        $this->depthFirst = $status;
55 222
    }
56
57
    /**
58
     * Attempts to find the selectors starting from the given
59
     * node object.
60
     * @param AbstractNode $node
61
     * @return Collection
62
     * @throws ChildNotFoundException
63
     */
64 273
    public function find(AbstractNode $node): Collection
65
    {
66 273
        $results = new Collection;
67 273
        foreach ($this->selectors as $selector) {
68 273
            $nodes = [$node];
69 273
            if (count($selector) == 0) {
70
                continue;
71
            }
72
73 273
            $options = [];
74 273
            foreach ($selector as $rule) {
75 273
                if ($rule['alterNext']) {
76 3
                    $options[] = $this->alterNext($rule);
77 3
                    continue;
78
                }
79 273
                $nodes = $this->seek($nodes, $rule, $options);
80
                // clear the options
81 273
                $options = [];
82
            }
83
84
            // this is the final set of nodes
85 273
            foreach ($nodes as $result) {
86 256
                $results[] = $result;
87
            }
88
        }
89
90 273
        return $results;
91
    }
92
93
94
    /**
95
     * Attempts to find all children that match the rule
96
     * given.
97
     *
98
     * @param array $nodes
99
     * @param array $rule
100
     * @param array $options
101
     *
102
     * @return array
103
     * @throws ChildNotFoundException
104
     */
105 273
    protected function seek(array $nodes, array $rule, array $options): array
106
    {
107
        // XPath index
108 273
        if (array_key_exists('tag', $rule) &&
109 273
            array_key_exists('key', $rule) &&
110 273
            is_numeric($rule['key'])
111
        ) {
112 3
            $count = 0;
113
            /** @var AbstractNode $node */
114 3
            foreach ($nodes as $node) {
115 3
                if ($rule['tag'] == '*' ||
116 3
                    $rule['tag'] == $node->getTag()->name()
117
                ) {
118 3
                    ++$count;
119 3
                    if ($count == $rule['key']) {
120
                        // found the node we wanted
121 3
                        return [$node];
122
                    }
123
                }
124
            }
125
126
            return [];
127
        }
128
129 270
        $options = $this->flattenOptions($options);
130
131 270
        $return = [];
132
        /** @var InnerNode $node */
133 270
        foreach ($nodes as $node) {
134
            // check if we are a leaf
135 270
            if ($node instanceof LeafNode ||
136 270
                ! $node->hasChildren()
137
            ) {
138 12
                continue;
139
            }
140
141 270
            $children = [];
142 270
            $child    = $node->firstChild();
143 270
            while ( ! is_null($child)) {
144
                // wild card, grab all
145 270
                if ($rule['tag'] == '*' && is_null($rule['key'])) {
146 12
                    $return[] = $child;
147 12
                    $child = $this->getNextChild($node, $child);
148 12
                    continue;
149
                }
150
151 270
                $pass = $this->checkTag($rule, $child);
152 270
                if ($pass && ! is_null($rule['key'])) {
153 90
                    $pass = $this->checkKey($rule, $child);
154
                }
155 270
                if ($pass && ! is_null($rule['key']) &&
156 270
                    ! is_null($rule['value']) && $rule['value'] != '*'
157
                ) {
158 87
                    $pass = $this->checkComparison($rule, $child);
159
                }
160
161 270
                if ($pass) {
162
                    // it passed all checks
163 219
                    $return[] = $child;
164
                } else {
165
                    // this child failed to be matched
166 255
                    if ($child instanceof InnerNode &&
167 255
                        $child->hasChildren()
168
                    ) {
169 234
                        if ($this->depthFirst) {
170 3
                            if ( ! isset($options['checkGrandChildren']) ||
171 3
                                $options['checkGrandChildren']) {
172
                                // we have a child that failed but are not leaves.
173 3
                                $matches = $this->seek([$child], $rule, $options);
174 3
                                foreach ($matches as $match) {
175 3
                                    $return[] = $match;
176
                                }
177
                            }
178
                        } else {
179
                            // we still want to check its children
180 234
                            $children[] = $child;
181
                        }
182
                    }
183
                }
184
185 270
                $child = $this->getNextChild($node, $child);
186
            }
187
188 270
            if (( ! isset($options['checkGrandChildren']) ||
189 270
                    $options['checkGrandChildren'])
190 270
                && count($children) > 0
191
            ) {
192
                // we have children that failed but are not leaves.
193 231
                $matches = $this->seek($children, $rule, $options);
194 231
                foreach ($matches as $match) {
195 219
                    $return[] = $match;
196
                }
197
            }
198
        }
199
200 270
        return $return;
201
    }
202
203
    /**
204
     * Attempts to match the given arguments with the given operator.
205
     *
206
     * @param string $operator
207
     * @param string $pattern
208
     * @param string $value
209
     * @return bool
210
     */
211 87
    protected function match(string $operator, string $pattern, string $value): bool
212
    {
213 87
        $value   = strtolower($value);
214 87
        $pattern = strtolower($pattern);
215 29
        switch ($operator) {
216 87
            case '=':
217 87
                return $value === $pattern;
218
            case '!=':
219
                return $value !== $pattern;
220
            case '^=':
221
                return preg_match('/^'.preg_quote($pattern, '/').'/', $value) == 1;
222
            case '$=':
223
                return preg_match('/'.preg_quote($pattern, '/').'$/', $value) == 1;
224
            case '*=':
225
                if ($pattern[0] == '/') {
226
                    return preg_match($pattern, $value) == 1;
227
                }
228
229
                return preg_match("/".$pattern."/i", $value) == 1;
230
        }
231
232
        return false;
233
    }
234
235
    /**
236
     * Attempts to figure out what the alteration will be for
237
     * the next element.
238
     *
239
     * @param array $rule
240
     * @return array
241
     */
242 3
    protected function alterNext(array $rule): array
243
    {
244 3
        $options = [];
245 3
        if ($rule['tag'] == '>') {
246 3
            $options['checkGrandChildren'] = false;
247
        }
248
249 3
        return $options;
250
    }
251
252
    /**
253
     * Flattens the option array.
254
     *
255
     * @param array $optionsArray
256
     * @return array
257
     */
258 270
    protected function flattenOptions(array $optionsArray)
259
    {
260 270
        $options = [];
261 270
        foreach ($optionsArray as $optionArray) {
262 3
            foreach ($optionArray as $key => $option) {
263 3
                $options[$key] = $option;
264
            }
265
        }
266
267 270
        return $options;
268
    }
269
270
    /**
271
     * Returns the next child or null if no more children.
272
     *
273
     * @param AbstractNode $node
274
     * @param AbstractNode $currentChild
275
     * @return AbstractNode|null
276
     */
277 270
    protected function getNextChild(AbstractNode $node, AbstractNode $currentChild)
278
    {
279
        try {
280 270
            $child = null;
281 270
            if ($node instanceof InnerNode) {
282
                // get next child
283 270
                $child = $node->nextChild($currentChild->id());
284
            }
285 270
        } catch (ChildNotFoundException $e) {
286
            // no more children
287 270
            $child = null;
288
        }
289
290 270
        return $child;
291
    }
292
293
    /**
294
     * Checks tag condition from rules against node.
295
     *
296
     * @param array $rule
297
     * @param AbstractNode $node
298
     * @return bool
299
     */
300 270
    protected function checkTag(array $rule, AbstractNode $node): bool
301
    {
302 270
        if ( ! empty($rule['tag']) && $rule['tag'] != $node->getTag()->name() &&
303 270
            $rule['tag'] != '*'
304
        ) {
305 243
            return false;
306
        }
307
308 219
        return true;
309
    }
310
311
    /**
312
     * Checks key condition from rules against node.
313
     *
314
     * @param array $rule
315
     * @param AbstractNode $node
316
     * @return bool
317
     */
318 90
    protected function checkKey(array $rule, AbstractNode $node): bool
319
    {
320 90
        if ($rule['noKey']) {
321
            if ( ! is_null($node->getAttribute($rule['key']))) {
322
                return false;
323
            }
324
        } else {
325 90
            if ($rule['key'] != 'plaintext' && !$node->hasAttribute($rule['key'])) {
326 81
                return false;
327
            }
328
        }
329
330 90
        return true;
331
    }
332
333
    /**
334
     * Checks comparison condition from rules against node.
335
     *
336
     * @param array $rule
337
     * @param AbstractNode $node
338
     * @return bool
339
     */
340 87
    public function checkComparison(array $rule, AbstractNode $node): bool
341
    {
342 87
        if ($rule['key'] == 'plaintext') {
343
            // plaintext search
344
            $nodeValue = $node->text();
345
        } else {
346
            // normal search
347 87
            $nodeValue = $node->getAttribute($rule['key']);
348
        }
349
350 87
        $check = false;
351 87
        if (!is_array($rule['value'])) {
352 48
            $check = $this->match($rule['operator'], $rule['value'], $nodeValue);
353
        }
354
355
        // handle multiple classes
356 87
        if ( ! $check && $rule['key'] == 'class') {
357 48
            $nodeClasses = explode(' ', $node->getAttribute('class'));
358 48
            foreach ($rule['value'] as $value) {
359 48
                foreach ($nodeClasses as $class) {
360 48
                    if ( ! empty($class)) {
361 48
                        $check = $this->match($rule['operator'], $value, $class);
362
                    }
363 48
                    if ($check) {
364 48
                        break;
365
                    }
366
                }
367 48
                if (!$check) {
368 44
                    break;
369
                }
370
            }
371
        }
372
373 87
        return $check;
374
    }
375
}
376