Scanner::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
/** @file
3
 * The scanner.
4
 */
5
6
namespace QueryPath\CSS;
7
8
/**
9
 * Scanner for CSS selector parsing.
10
 *
11
 * This provides a simple scanner for traversing an input stream.
12
 *
13
 * @ingroup querypath_css
14
 */
15
final class Scanner
16
{
17
18
    public $is;
19
    public $value;
20
    public $token;
21
22
    public $recurse = false;
23
    public $it = 0;
24
25
    /**
26
     * Given a new input stream, tokenize the CSS selector string.
27
     *
28
     * @see InputStream
29
     * @param InputStream $in
30
     *  An input stream to be scanned.
31
     */
32
    public function __construct(InputStream $in)
33
    {
34
        $this->is = $in;
35
    }
36
37
    /**
38
     * Return the position of the reader in the string.
39
     */
40
    public function position(): int
41
    {
42
        return $this->is->position;
43
    }
44
45
    /**
46
     * See the next char without removing it from the stack.
47
     *
48
     * @return string
49
     * Returns the next character on the stack.
50
     */
51
    public function peek(): string
52
    {
53
        return $this->is->peek();
54
    }
55
56
    /**
57
     * Get the next token in the input stream.
58
     *
59
     * This sets the current token to the value of the next token in
60
     * the stream.
61
     *
62
     * @return int
63
     *  Returns an int value corresponding to one of the Token constants,
64
     *  or FALSE if the end of the string is reached. (Remember to use
65
     *  strong equality checking on FALSE, since 0 is a valid token id.)
66
     * @throws ParseException
67
     * @throws \QueryPath\Exception
68
     */
69
    public function nextToken(): int
70
    {
71
        $tok = -1;
72
        ++$this->it;
73
        if ($this->is->isEmpty()) {
74
            if ($this->recurse) {
75
                throw new \QueryPath\Exception('Recursion error detected at iteration ' . $this->it . '.');
76
            }
77
            //print "{$this->it}: All done\n";
78
            $this->recurse = true;
79
            $this->token = false;
80
81
            return false;
0 ignored issues
show
Bug Best Practice introduced by
The expression return false returns the type false which is incompatible with the type-hinted return integer.
Loading history...
82
        }
83
        $ch = $this->is->consume();
84
        //print __FUNCTION__ . " Testing $ch.\n";
85
        if (ctype_space($ch)) {
86
            $this->value = ' '; // Collapse all WS to a space.
87
            $this->token = $tok = Token::WHITE;
88
89
            //$ch = $this->is->consume();
90
            return $tok;
91
        }
92
93
        if ($ch === '-' || $ch === '_' || ctype_alnum($ch)) {
94
            // It's a character
95
            $this->value = $ch; //strtolower($ch);
96
            $this->token = $tok = Token::CHAR;
97
98
            return $tok;
99
        }
100
101
        $this->value = $ch;
102
103
        switch ($ch) {
104
            case '*':
105
                $tok = Token::STAR;
106
                break;
107
            case chr(ord('>')):
108
                $tok = Token::RANGLE;
109
                break;
110
            case '.':
111
                $tok = Token::DOT;
112
                break;
113
            case '#':
114
                $tok = Token::OCTO;
115
                break;
116
            case '[':
117
                $tok = Token::LSQUARE;
118
                break;
119
            case ']':
120
                $tok = Token::RSQUARE;
121
                break;
122
            case ':':
123
                $tok = Token::COLON;
124
                break;
125
            case '(':
126
                $tok = Token::LPAREN;
127
                break;
128
            case ')':
129
                $tok = Token::RPAREN;
130
                break;
131
            case '+':
132
                $tok = Token::PLUS;
133
                break;
134
            case '~':
135
                $tok = Token::TILDE;
136
                break;
137
            case '=':
138
                $tok = Token::EQ;
139
                break;
140
            case '|':
141
                $tok = Token::PIPE;
142
                break;
143
            case ',':
144
                $tok = Token::COMMA;
145
                break;
146
            case chr(34):
147
                $tok = Token::QUOTE;
148
                break;
149
            case "'":
150
                $tok = Token::SQUOTE;
151
                break;
152
            case '\\':
153
                $tok = Token::BSLASH;
154
                break;
155
            case '^':
156
                $tok = Token::CARAT;
157
                break;
158
            case '$':
159
                $tok = Token::DOLLAR;
160
                break;
161
            case '@':
162
                $tok = Token::AT;
163
                break;
164
        }
165
166
167
        // Catch all characters that are legal within strings.
168
        if ($tok === -1) {
169
            // TODO: This should be UTF-8 compatible, but PHP doesn't
170
            // have a native UTF-8 string. Should we use external
171
            // mbstring library?
172
173
            $ord = ord($ch);
174
            // Characters in this pool are legal for use inside of
175
            // certain strings. Extended ASCII is used here, though I
176
            // Don't know if these are really legal.
177
            if (($ord >= 32 && $ord <= 126) || ($ord >= 128 && $ord <= 255)) {
178
                $tok = Token::STRING_LEGAL;
179
            } else {
180
                throw new ParseException('Illegal character found in stream: ' . $ord);
181
            }
182
        }
183
184
        $this->token = $tok;
185
186
        return $tok;
187
    }
188
189
    /**
190
     * Get a name string from the input stream.
191
     * A name string must be composed of
192
     * only characters defined in Token:char: -_a-zA-Z0-9
193
     */
194
    public function getNameString()
195
    {
196
        $buf = '';
197
        while ($this->token === Token::CHAR) {
198
            $buf .= $this->value;
199
            $this->nextToken();
200
        }
201
202
        return $buf;
203
    }
204
205
    /**
206
     * This gets a string with any legal 'string' characters.
207
     * See CSS Selectors specification, section 11, for the
208
     * definition of string.
209
     *
210
     * This will check for string1, string2, and the case where a
211
     * string is unquoted (Oddly absent from the "official" grammar,
212
     * though such strings are present as examples in the spec.)
213
     *
214
     * Note:
215
     * Though the grammar supplied by CSS 3 Selectors section 11 does not
216
     * address the contents of a pseudo-class value, the spec itself indicates
217
     * that a pseudo-class value is a "value between parenthesis" [6.6]. The
218
     * examples given use URLs among other things, making them closer to the
219
     * definition of 'string' than to 'name'. So we handle them here as strings.
220
     */
221
    public function getQuotedString()
222
    {
223
        if ($this->token === Token::QUOTE || $this->token === Token::SQUOTE || $this->token === Token::LPAREN) {
224
            $end = ($this->token === Token::LPAREN) ? Token::RPAREN : $this->token;
225
            $buf = '';
226
            $escape = false;
227
228
            $this->nextToken(); // Skip the opening quote/paren
229
230
            // The second conjunct is probably not necessary.
231
            while ($this->token !== false && $this->token > -1) {
232
                //print "Char: $this->value \n";
233
                if ($this->token == Token::BSLASH && !$escape) {
234
                    // XXX: The backslash (\) is removed here.
235
                    // Turn on escaping.
236
                    //$buf .= $this->value;
237
                    $escape = true;
238
                } elseif ($escape) {
239
                    // Turn off escaping
240
                    $buf .= $this->value;
241
                    $escape = false;
242
                } elseif ($this->token === $end) {
243
                    // At end of string; skip token and break.
244
                    $this->nextToken();
245
                    break;
246
                } else {
247
                    // Append char.
248
                    $buf .= $this->value;
249
                }
250
                $this->nextToken();
251
            }
252
253
            return $buf;
254
        }
255
    }
256
257
    // Get the contents inside of a pseudoClass().
258
    public function getPseudoClassString()
259
    {
260
        if ($this->token === Token::QUOTE || $this->token === Token::SQUOTE || $this->token === Token::LPAREN) {
261
            $end = ($this->token === Token::LPAREN) ? Token::RPAREN : $this->token;
262
            $buf = '';
263
            $escape = false;
264
265
            $this->nextToken(); // Skip the opening quote/paren
266
267
            // The second conjunct is probably not necessary.
268
            while ($this->token !== false && $this->token > -1) {
269
                //print "Char: $this->value \n";
270
                if ($this->token === Token::BSLASH && !$escape) {
271
                    // XXX: The backslash (\) is removed here.
272
                    // Turn on escaping.
273
                    //$buf .= $this->value;
274
                    $escape = true;
275
                } elseif ($escape) {
276
                    // Turn off escaping
277
                    $buf .= $this->value;
278
                    $escape = false;
279
                } // Allow nested pseudoclasses.
280
                elseif ($this->token === Token::LPAREN) {
281
                    $buf .= '(';
282
                    $buf .= $this->getPseudoClassString();
283
                    $buf .= ')';
284
                } elseif ($this->token === $end) {
285
                    // At end of string; skip token and break.
286
                    $this->nextToken();
287
                    break;
288
                } else {
289
                    // Append char.
290
                    $buf .= $this->value;
291
                }
292
                $this->nextToken();
293
            }
294
295
            return $buf;
296
        }
297
    }
298
299
    /**
300
     * Get a string from the input stream.
301
     * This is a convenience function for getting a string of
302
     * characters that are either alphanumber or whitespace. See
303
     * the Token::white and Token::char definitions.
304
     *
305
     * @deprecated This is not used anywhere in QueryPath.
306
     *//*
307
  public function getStringPlusWhitespace() {
308
    $buf = '';
309
    if($this->token === FALSE) {return '';}
310
    while ($this->token === Token::char || $this->token == Token::white) {
311
      $buf .= $this->value;
312
      $this->nextToken();
313
    }
314
    return $buf;
315
  }*/
316
317
}
318