Issues (146)

src/CSS/DOMTraverser/PseudoClass.php (2 issues)

Severity
1
<?php
2
/**
3
 * @file
4
 *
5
 * PseudoClass class.
6
 *
7
 * This is the first pass in an experiment to break PseudoClass handling
8
 * out of the normal traversal. Eventually, this should become a
9
 * top-level pluggable registry that will allow custom pseudoclasses.
10
 * For now, though, we just handle the core pseudoclasses.
11
 */
12
13
namespace QueryPath\CSS\DOMTraverser;
14
15
use \QueryPath\CSS\NotImplementedException;
16
use \QueryPath\CSS\EventHandler;
17
use QueryPath\CSS\ParseException;
18
19
/**
20
 *  The PseudoClass handler.
21
 *
22
 */
23
class PseudoClass
24
{
25
26
    /**
27
     * Tests whether the given element matches the given pseudoclass.
28
     *
29
     * @param string $pseudoclass
30
     *   The string name of the pseudoclass
31
     * @param resource $node
32
     *   The DOMNode to be tested.
33
     * @param resource $scope
34
     *   The DOMElement that is the active root for this node.
35
     * @param mixed $value
36
     *   The optional value string provided with this class. This is
37
     *   used, for example, in an+b psuedoclasses.
38
     * @return bool
39
     * @throws NotImplementedException
40
     * @throws \QueryPath\CSS\ParseException
41
     * @retval boolean
42
     *   TRUE if the node matches, FALSE otherwise.
43
     */
44
    public function elementMatches($pseudoclass, $node, $scope, $value = NULL)
45
    {
46
        $name = strtolower($pseudoclass);
47
        // Need to handle known pseudoclasses.
48
        switch ($name) {
49
            case 'current':
50
            case 'past':
51
            case 'future':
52
            case 'visited':
53
            case 'hover':
54
            case 'active':
55
            case 'focus':
56
            case 'animated': //  Last 3 are from jQuery
57
            case 'visible':
58
            case 'hidden':
59
                // These require a UA, which we don't have.
60
            case 'valid':
61
            case 'invalid':
62
            case 'required':
63
            case 'optional':
64
            case 'read-only':
65
            case 'read-write':
66
                // Since we don't know how to validate elements,
67
                // we can't supply these.
68
            case 'dir':
69
                // FIXME: I don't know how to get directionality info.
70
            case 'nth-column':
71
            case 'nth-last-column':
72
                // We don't know what a column is in most documents.
73
                // FIXME: Can we do this for HTML?
74
            case 'target':
75
                // This requires a location URL, which we don't have.
76
                return false;
77
            case 'indeterminate':
78
                // Because sometimes screwing with people is fun.
79
                return (boolean)mt_rand(0, 1);
80
            case 'lang':
81
                // No value = exception.
82
                if (!isset($value)) {
83
                    throw new NotImplementedException(':lang() requires a value.');
84
                }
85
86
                return $this->lang($node, $value);
87
            case 'any-link':
88
                return Util::matchesAttribute($node, 'href')
89
                    || Util::matchesAttribute($node, 'src')
90
                    || Util::matchesAttribute($node, 'link');
91
            case 'link':
92
                return Util::matchesAttribute($node, 'href');
93
            case 'local-link':
94
                return $this->isLocalLink($node);
95
            case 'root':
96
                return $node->isSameNode($node->ownerDocument->documentElement);
97
98
            // CSS 4 declares the :scope pseudo-class, which describes what was
99
            // the :x-root QueryPath extension.
100
            case 'x-root':
101
            case 'x-reset':
102
            case 'scope':
103
                return $node->isSameNode($scope);
104
            // NON-STANDARD extensions for simple support of even and odd. These
105
            // are supported by jQuery, FF, and other user agents.
106
            case 'even':
107
                return $this->isNthChild($node, 'even');
108
            case 'odd':
109
                return $this->isNthChild($node, 'odd');
110
            case 'nth-child':
111
                return $this->isNthChild($node, $value);
112
            case 'nth-last-child':
113
                return $this->isNthChild($node, $value, true);
114
            case 'nth-of-type':
115
                return $this->isNthChild($node, $value, false, true);
116
            case 'nth-last-of-type':
117
                return $this->isNthChild($node, $value, true, true);
118
            case 'first-of-type':
119
                return $this->isFirstOfType($node);
120
            case 'last-of-type':
121
                return $this->isLastOfType($node);
122
            case 'only-of-type':
123
                return $this->isFirstOfType($node) && $this->isLastOfType($node);
124
125
            // Additional pseudo-classes defined in jQuery:
126
            case 'lt':
127
                // I'm treating this as "less than or equal to".
128
                $rule = sprintf('-n + %d', (int)$value);
129
130
                // $rule = '-n+15';
131
                return $this->isNthChild($node, $rule);
132
            case 'gt':
133
                // I'm treating this as "greater than"
134
                // return $this->nodePositionFromEnd($node) > (int) $value;
135
                return $this->nodePositionFromStart($node) > (int)$value;
136
            case 'nth':
137
            case 'eq':
138
                $rule = (int)$value;
139
140
                return $this->isNthChild($node, $rule);
141
            case 'first':
142
                return $this->isNthChild($node, 1);
143
            case 'first-child':
144
                return $this->isFirst($node);
145
            case 'last':
146
            case 'last-child':
147
                return $this->isLast($node);
148
            case 'only-child':
149
                return $this->isFirst($node) && $this->isLast($node);
150
            case 'empty':
151
                return $this->isEmpty($node);
152
            case 'parent':
153
                return !$this->isEmpty($node);
154
155
            case 'enabled':
156
            case 'disabled':
157
            case 'checked':
158
                return Util::matchesAttribute($node, $name);
159
            case 'text':
160
            case 'radio':
161
            case 'checkbox':
162
            case 'file':
163
            case 'password':
164
            case 'submit':
165
            case 'image':
166
            case 'reset':
167
            case 'button':
168
                return Util::matchesAttribute($node, 'type', $name);
169
170
            case 'header':
171
                return $this->header($node);
172
            case 'has':
173
            case 'matches':
174
                return $this->has($node, $value);
175
                break;
0 ignored issues
show
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
176
            case 'not':
177
                if (empty($value)) {
178
                    throw new ParseException(':not() requires a value.');
179
                }
180
181
                return $this->isNot($node, $value);
182
            // Contains == text matches.
183
            // In QP 2.1, this was changed.
184
            case 'contains':
185
                return $this->contains($node, $value);
186
            // Since QP 2.1
187
            case 'contains-exactly':
188
                return $this->containsExactly($node, $value);
189
            default:
190
                throw new ParseException('Unknown Pseudo-Class: ' . $name);
191
        }
192
    }
193
194
    /**
195
     * Pseudo-class handler for :lang
196
     *
197
     * Note that this does not implement the spec in its entirety because we do
198
     * not presume to "know the language" of the document. If anyone is interested
199
     * in making this more intelligent, please do so.
200
     */
201
    protected function lang($node, $value)
202
    {
203
        // TODO: This checks for cases where an explicit language is
204
        // set. The spec seems to indicate that an element should inherit
205
        // language from the parent... but this is unclear.
206
        $operator = (strpos($value, '-') !== false) ? EventHandler::IS_EXACTLY : EventHandler::CONTAINS_WITH_HYPHEN;
207
208
        $match = true;
0 ignored issues
show
The assignment to $match is dead and can be removed.
Loading history...
209
        foreach ($node->attributes as $attrNode) {
210
            if ($attrNode->localName === 'lang') {
211
212
                if ($attrNode->nodeName === $attrNode->localName) {
213
                    // fprintf(STDOUT, "%s in NS %s\n", $attrNode->name, $attrNode->nodeName);
214
                    return Util::matchesAttribute($node, 'lang', $value, $operator);
215
                }
216
217
                $nsuri = $attrNode->namespaceURI;
218
                // fprintf(STDOUT, "%s in NS %s\n", $attrNode->name, $nsuri);
219
                return Util::matchesAttributeNS($node, 'lang', $nsuri, $value, $operator);
220
            }
221
        }
222
223
        return false;
224
    }
225
226
    /**
227
     * Provides jQuery pseudoclass ':header'.
228
     *
229
     * @param $node
230
     * @return bool
231
     */
232
    protected function header($node): bool
233
    {
234
        return preg_match('/^h[1-9]$/i', $node->tagName) === 1;
235
    }
236
237
    /**
238
     * Provides pseudoclass :empty.
239
     */
240
    protected function isEmpty($node): bool
241
    {
242
        foreach ($node->childNodes as $kid) {
243
            // We don't want to count PIs and comments. From the spec, it
244
            // appears that CDATA is also not counted.
245
            if ($kid->nodeType === XML_ELEMENT_NODE || $kid->nodeType === XML_TEXT_NODE) {
246
                // As soon as we hit a FALSE, return.
247
                return false;
248
            }
249
        }
250
251
        return true;
252
    }
253
254
    /**
255
     * Provides jQuery pseudoclass :first.
256
     *
257
     * @todo
258
     *   This can be replaced by isNthChild().
259
     */
260
    protected function isFirst($node): bool
261
    {
262
        while (isset($node->previousSibling)) {
263
            $node = $node->previousSibling;
264
            if ($node->nodeType === XML_ELEMENT_NODE) {
265
                return false;
266
            }
267
        }
268
269
        return true;
270
    }
271
272
    /**
273
     * Fast version of first-of-type.
274
     */
275
    protected function isFirstOfType($node)
276
    {
277
        $type = $node->tagName;
278
        while (isset($node->previousSibling)) {
279
            $node = $node->previousSibling;
280
            if ($node->nodeType === XML_ELEMENT_NODE && $node->tagName === $type) {
281
                return false;
282
            }
283
        }
284
285
        return true;
286
    }
287
288
    /**
289
     * Fast version of jQuery :last.
290
     */
291
    protected function isLast($node)
292
    {
293
        while (isset($node->nextSibling)) {
294
            $node = $node->nextSibling;
295
            if ($node->nodeType === XML_ELEMENT_NODE) {
296
                return false;
297
            }
298
        }
299
300
        return true;
301
    }
302
303
    /**
304
     * Provides last-of-type.
305
     */
306
    protected function isLastOfType($node)
307
    {
308
        $type = $node->tagName;
309
        while (isset($node->nextSibling)) {
310
            $node = $node->nextSibling;
311
            if ($node->nodeType === XML_ELEMENT_NODE && $node->tagName === $type) {
312
                return false;
313
            }
314
        }
315
316
        return true;
317
    }
318
319
    /**
320
     * Provides :contains() as the original spec called for.
321
     *
322
     * This is an INEXACT match.
323
     */
324
    protected function contains($node, $value): bool
325
    {
326
        $text = $node->textContent;
327
        $value = Util::removeQuotes($value);
328
329
        return isset($text) && (stripos($text, $value) !== false);
330
    }
331
332
    /**
333
     * Provides :contains-exactly QueryPath pseudoclass.
334
     *
335
     * This is an EXACT match.
336
     */
337
    protected function containsExactly($node, $value): bool
338
    {
339
        $text = $node->textContent;
340
        $value = Util::removeQuotes($value);
341
342
        return isset($text) && $text == $value;
343
    }
344
345
    /**
346
     * Provides :has pseudoclass.
347
     *
348
     * @throws ParseException
349
     */
350
    protected function has($node, $selector): bool
351
    {
352
        $splos = new \SPLObjectStorage();
353
        $splos->attach($node);
354
        $traverser = new \QueryPath\CSS\DOMTraverser($splos, true);
355
        $results = $traverser->find($selector)->matches();
356
357
        return count($results) > 0;
358
    }
359
360
    /**
361
     * Provides :not pseudoclass.
362
     *
363
     * @throws ParseException
364
     */
365
    protected function isNot($node, $selector): bool
366
    {
367
        return !$this->has($node, $selector);
368
    }
369
370
    /**
371
     * Get the relative position of a node in its sibling set.
372
     */
373
    protected function nodePositionFromStart($node, $byType = false): int
374
    {
375
        $i = 1;
376
        $tag = $node->tagName;
377
        while (isset($node->previousSibling)) {
378
            $node = $node->previousSibling;
379
            if ($node->nodeType === XML_ELEMENT_NODE && (!$byType || $node->tagName === $tag)) {
380
                ++$i;
381
            }
382
        }
383
384
        return $i;
385
    }
386
387
    /**
388
     * Get the relative position of a node in its sibling set.
389
     *
390
     * @param $node
391
     * @param bool $byType
392
     * @return int
393
     */
394
    protected function nodePositionFromEnd($node, $byType = false): int
395
    {
396
        $i = 1;
397
        $tag = $node->tagName;
398
        while (isset($node->nextSibling)) {
399
            $node = $node->nextSibling;
400
            if ($node->nodeType === XML_ELEMENT_NODE && (!$byType || $node->tagName === $tag)) {
401
                ++$i;
402
            }
403
        }
404
405
        return $i;
406
    }
407
408
    /**
409
     * Provides functionality for all "An+B" rules.
410
     * Provides nth-child and also the functionality required for:
411
     *
412
     *- nth-last-child
413
     *- even
414
     *- odd
415
     *- first
416
     *- last
417
     *- eq
418
     *- nth
419
     *- nth-of-type
420
     *- first-of-type
421
     *- last-of-type
422
     *- nth-last-of-type
423
     *
424
     * See also QueryPath::CSS::DOMTraverser::Util::parseAnB().
425
     *
426
     * @param $node
427
     * @param $value
428
     * @param bool $reverse
429
     * @param bool $byType
430
     * @return bool
431
     */
432
    protected function isNthChild($node, $value, $reverse = false, $byType = false): bool
433
    {
434
        list($groupSize, $elementInGroup) = Util::parseAnB($value);
435
        $parent = $node->parentNode;
436
        if (empty($parent)
437
            || ($groupSize === 0 && $elementInGroup === 0)
438
            || ($groupSize > 0 && $elementInGroup > $groupSize)
439
        ) {
440
            return false;
441
        }
442
443
        // First we need to find the position of $node in other elements.
444
        if ($reverse) {
445
            $pos = $this->nodePositionFromEnd($node, $byType);
446
        } else {
447
            $pos = $this->nodePositionFromStart($node, $byType);
448
        }
449
450
        // If group size is 0, we just check to see if this
451
        // is the nth element:
452
        if ($groupSize === 0) {
453
            return $pos === $elementInGroup;
454
        }
455
456
        // Next, we normalize $elementInGroup
457
        if ($elementInGroup < 0) {
458
            $elementInGroup = $groupSize + $elementInGroup;
459
        }
460
        $prod = ($pos - $elementInGroup) / $groupSize;
461
462
        return is_int($prod) && $prod >= 0;
463
    }
464
465
    protected function isLocalLink($node): bool
466
    {
467
        if (!$node->hasAttribute('href')) {
468
            return false;
469
        }
470
        $url = $node->getAttribute('href');
471
        $scheme = parse_url($url, PHP_URL_SCHEME);
472
473
        return empty($scheme) || $scheme === 'file';
474
    }
475
476
}
477