Passed
Push — master ( 268bdc...77e4a4 )
by Gilles
02:56
created

Selector::match()   B

Complexity

Conditions 7
Paths 7

Size

Total Lines 22
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 20.2773

Importance

Changes 0
Metric Value
cc 7
eloc 16
nc 7
nop 3
dl 0
loc 22
ccs 6
cts 17
cp 0.3529
crap 20.2773
rs 8.8333
c 0
b 0
f 0
1
<?php
2
namespace PHPHtmlParser\Selector;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\InnerNode;
7
use PHPHtmlParser\Dom\LeafNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
10
/**
11
 * Class Selector
12
 *
13
 * @package PHPHtmlParser
14
 */
15
class Selector
16
{
17
18
    /**
19
     * @var array
20
     */
21
    protected $selectors = [];
22
23
    /**
24
     * Constructs with the selector string
25
     *
26
     * @param string $selector
27
     */
28 255
    public function __construct(string $selector, ParserInterface $parser)
29
    {
30 255
        $this->selectors = $parser->parseSelectorString($selector);
31 255
    }
32
33
    /**
34
     * Returns the selectors that where found in __construct
35
     *
36
     * @return array
37
     */
38 12
    public function getSelectors()
39
    {
40 12
        return $this->selectors;
41
    }
42
43
    /**
44
     * Attempts to find the selectors starting from the given
45
     * node object.
46
     *
47
     * @param AbstractNode $node
48
     * @return Collection
49
     */
50 243
    public function find(AbstractNode $node): Collection
51
    {
52 243
        $results = new Collection;
53 243
        foreach ($this->selectors as $selector) {
54 243
            $nodes = [$node];
55 243
            if (count($selector) == 0) {
56
                continue;
57
            }
58
59 243
            $options = [];
60 243
            foreach ($selector as $rule) {
61 243
                if ($rule['alterNext']) {
62 3
                    $options[] = $this->alterNext($rule);
63 3
                    continue;
64
                }
65 243
                $nodes = $this->seek($nodes, $rule, $options);
66
                // clear the options
67 243
                $options = [];
68
            }
69
70
            // this is the final set of nodes
71 243
            foreach ($nodes as $result) {
72 228
                $results[] = $result;
73
            }
74
        }
75
76 243
        return $results;
77
    }
78
79
80
    /**
81
     * Attempts to find all children that match the rule
82
     * given.
83
     *
84
     * @param array $nodes
85
     * @param array $rule
86
     * @param array $options
87
     * @return array
88
     * @recursive
89
     */
90 243
    protected function seek(array $nodes, array $rule, array $options): array
91
    {
92
        // XPath index
93 243
        if (array_key_exists('tag', $rule) &&
94 243
            array_key_exists('key', $rule) &&
95 243
            is_numeric($rule['key'])
96
        ) {
97 3
            $count = 0;
98
            /** @var AbstractNode $node */
99 3
            foreach ($nodes as $node) {
100 3
                if ($rule['tag'] == '*' ||
101 3
                    $rule['tag'] == $node->getTag()->name()
102
                ) {
103 3
                    ++$count;
104 3
                    if ($count == $rule['key']) {
105
                        // found the node we wanted
106 3
                        return [$node];
107
                    }
108
                }
109
            }
110
111
            return [];
112
        }
113
114 240
        $options = $this->flattenOptions($options);
115
116 240
        $return = [];
117
        /** @var InnerNode $node */
118 240
        foreach ($nodes as $node) {
119
            // check if we are a leaf
120 240
            if ($node instanceof LeafNode ||
121 240
                ! $node->hasChildren()
122
            ) {
123 12
                continue;
124
            }
125
126 240
            $children = [];
127 240
            $child    = $node->firstChild();
128 240
            while ( ! is_null($child)) {
129
                // wild card, grab all
130 240
                if ($rule['tag'] == '*' && is_null($rule['key'])) {
131 12
                    $return[] = $child;
132 12
                    $child = $this->getNextChild($node, $child);
133 12
                    continue;
134
                }
135
136 240
                $pass = $this->checkTag($rule, $child);
137 240
                if ($pass && ! is_null($rule['key'])) {
138 84
                    $pass = $this->checkKey($rule, $child);
139
                }
140 240
                if ($pass && ! is_null($rule['key']) &&
141 240
                    ! is_null($rule['value']) && $rule['value'] != '*'
142
                ) {
143 81
                    $pass = $this->checkComparison($rule, $child);
144
                }
145
146 240
                if ($pass) {
147
                    // it passed all checks
148 195
                    $return[] = $child;
149
                } else {
150
                    // this child failed to be matched
151 225
                    if ($child instanceof InnerNode &&
152 225
                        $child->hasChildren()
153
                    ) {
154
                        // we still want to check its children
155 213
                        $children[] = $child;
156
                    }
157
                }
158
159 240
                $child = $this->getNextChild($node, $child);
160
            }
161
162 240
            if (( ! isset($options['checkGrandChildren']) ||
163 240
                    $options['checkGrandChildren'])
164 240
                && count($children) > 0
165
            ) {
166
                // we have children that failed but are not leaves.
167 210
                $matches = $this->seek($children, $rule, $options);
168 210
                foreach ($matches as $match) {
169 196
                    $return[] = $match;
170
                }
171
            }
172
        }
173
174 240
        return $return;
175
    }
176
177
    /**
178
     * Attempts to match the given arguments with the given operator.
179
     *
180
     * @param string $operator
181
     * @param string $pattern
182
     * @param string $value
183
     * @return bool
184
     */
185 81
    protected function match(string $operator, string $pattern, string $value): bool
186
    {
187 81
        $value   = strtolower($value);
188 81
        $pattern = strtolower($pattern);
189 27
        switch ($operator) {
190 81
            case '=':
191 81
                return $value === $pattern;
192
            case '!=':
193
                return $value !== $pattern;
194
            case '^=':
195
                return preg_match('/^'.preg_quote($pattern, '/').'/', $value) == 1;
196
            case '$=':
197
                return preg_match('/'.preg_quote($pattern, '/').'$/', $value) == 1;
198
            case '*=':
199
                if ($pattern[0] == '/') {
200
                    return preg_match($pattern, $value) == 1;
201
                }
202
203
                return preg_match("/".$pattern."/i", $value) == 1;
204
        }
205
206
        return false;
207
    }
208
209
    /**
210
     * Attempts to figure out what the alteration will be for
211
     * the next element.
212
     *
213
     * @param array $rule
214
     * @return array
215
     */
216 3
    protected function alterNext(array $rule): array
217
    {
218 3
        $options = [];
219 3
        if ($rule['tag'] == '>') {
220 3
            $options['checkGrandChildren'] = false;
221
        }
222
223 3
        return $options;
224
    }
225
226
    /**
227
     * Flattens the option array.
228
     *
229
     * @param array $optionsArray
230
     * @return array
231
     */
232 240
    protected function flattenOptions(array $optionsArray)
233
    {
234 240
        $options = [];
235 240
        foreach ($optionsArray as $optionArray) {
236 3
            foreach ($optionArray as $key => $option) {
237 3
                $options[$key] = $option;
238
            }
239
        }
240
241 240
        return $options;
242
    }
243
244
    /**
245
     * Returns the next child or null if no more children.
246
     *
247
     * @param AbstractNode $node
248
     * @param AbstractNode $currentChild
249
     * @return AbstractNode|null
250
     */
251 240
    protected function getNextChild(AbstractNode $node, AbstractNode $currentChild)
252
    {
253
        try {
254
            // get next child
255 240
            $child = $node->nextChild($currentChild->id());
0 ignored issues
show
Bug introduced by
The method nextChild() does not exist on PHPHtmlParser\Dom\AbstractNode. It seems like you code against a sub-type of PHPHtmlParser\Dom\AbstractNode such as PHPHtmlParser\Dom\InnerNode. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

255
            /** @scrutinizer ignore-call */ 
256
            $child = $node->nextChild($currentChild->id());
Loading history...
256 240
        } catch (ChildNotFoundException $e) {
257
            // no more children
258 240
            $child = null;
259
        }
260
261 240
        return $child;
262
    }
263
264
    /**
265
     * Checks tag condition from rules against node.
266
     *
267
     * @param array $rule
268
     * @param AbstractNode $node
269
     * @return bool
270
     */
271 240
    protected function checkTag(array $rule, AbstractNode $node): bool
272
    {
273 240
        if ( ! empty($rule['tag']) && $rule['tag'] != $node->getTag()->name() &&
274 240
            $rule['tag'] != '*'
275
        ) {
276 216
            return false;
277
        }
278
279 195
        return true;
280
    }
281
282
    /**
283
     * Checks key condition from rules against node.
284
     *
285
     * @param array $rule
286
     * @param AbstractNode $node
287
     * @return bool
288
     */
289 84
    protected function checkKey(array $rule, AbstractNode $node): bool
290
    {
291 84
        if ($rule['noKey']) {
292
            if ( ! is_null($node->getAttribute($rule['key']))) {
293
                return false;
294
            }
295
        } else {
296 84
            if ($rule['key'] != 'plaintext' && !$node->hasAttribute($rule['key'])) {
297 81
                return false;
298
            }
299
        }
300
301 84
        return true;
302
    }
303
304
    /**
305
     * Checks comparison condition from rules against node.
306
     *
307
     * @param array $rule
308
     * @param AbstractNode $node
309
     * @return bool
310
     */
311 81
    public function checkComparison(array $rule, AbstractNode $node): bool
312
    {
313 81
        if ($rule['key'] == 'plaintext') {
314
            // plaintext search
315
            $nodeValue = $node->text();
316
        } else {
317
            // normal search
318 81
            $nodeValue = $node->getAttribute($rule['key']);
319
        }
320
321 81
        $check = $this->match($rule['operator'], $rule['value'], $nodeValue);
322
323
        // handle multiple classes
324 81
        if ( ! $check && $rule['key'] == 'class') {
325 36
            $nodeClasses = explode(' ', $node->getAttribute('class'));
326 36
            foreach ($nodeClasses as $class) {
327 36
                if ( ! empty($class)) {
328 36
                    $check = $this->match($rule['operator'], $rule['value'], $class);
329
                }
330 36
                if ($check) {
331 31
                    break;
332
                }
333
            }
334
        }
335
336 81
        return $check;
337
    }
338
}
339