Twig_Lexer::__construct()   B
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 25
Code Lines 20

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 25
rs 8.8571
cc 1
eloc 20
nc 1
nop 2
1
<?php
2
3
/*
4
 * This file is part of Twig.
5
 *
6
 * (c) 2009 Fabien Potencier
7
 * (c) 2009 Armin Ronacher
8
 *
9
 * For the full copyright and license information, please view the LICENSE
10
 * file that was distributed with this source code.
11
 */
12
13
/**
14
 * Lexes a template string.
15
 *
16
 * @author Fabien Potencier <[email protected]>
17
 */
18
class Twig_Lexer implements Twig_LexerInterface
0 ignored issues
show
Deprecated Code introduced by
The interface Twig_LexerInterface has been deprecated with message: since 1.12 (to be removed in 3.0)

This class, trait or interface has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the type will be removed from the class and what other constant to use instead.

Loading history...
19
{
20
    protected $tokens;
21
    protected $code;
22
    protected $cursor;
23
    protected $lineno;
24
    protected $end;
25
    protected $state;
26
    protected $states;
27
    protected $brackets;
28
    protected $env;
29
    protected $filename;
30
    protected $options;
31
    protected $regexes;
32
    protected $position;
33
    protected $positions;
34
    protected $currentVarBlockLine;
35
36
    const STATE_DATA            = 0;
37
    const STATE_BLOCK           = 1;
38
    const STATE_VAR             = 2;
39
    const STATE_STRING          = 3;
40
    const STATE_INTERPOLATION   = 4;
41
42
    const REGEX_NAME            = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
43
    const REGEX_NUMBER          = '/[0-9]+(?:\.[0-9]+)?/A';
44
    const REGEX_STRING          = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
45
    const REGEX_DQ_STRING_DELIM = '/"/A';
46
    const REGEX_DQ_STRING_PART  = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
47
    const PUNCTUATION           = '()[]{}?:.,|';
48
49
    public function __construct(Twig_Environment $env, array $options = array())
50
    {
51
        $this->env = $env;
52
53
        $this->options = array_merge(array(
54
            'tag_comment'     => array('{#', '#}'),
55
            'tag_block'       => array('{%', '%}'),
56
            'tag_variable'    => array('{{', '}}'),
57
            'whitespace_trim' => '-',
58
            'interpolation'   => array('#{', '}'),
59
        ), $options);
60
61
        $this->regexes = array(
62
            'lex_var'             => '/\s*'.preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_variable'][1], '/').'/A',
63
            'lex_block'           => '/\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')\n?/A',
64
            'lex_raw_data'        => '/('.preg_quote($this->options['tag_block'][0].$this->options['whitespace_trim'], '/').'|'.preg_quote($this->options['tag_block'][0], '/').')\s*(?:end%s)\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')/s',
65
            'operator'            => $this->getOperatorRegex(),
66
            'lex_comment'         => '/(?:'.preg_quote($this->options['whitespace_trim'], '/').preg_quote($this->options['tag_comment'][1], '/').'\s*|'.preg_quote($this->options['tag_comment'][1], '/').')\n?/s',
67
            'lex_block_raw'       => '/\s*(raw|verbatim)\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')/As',
68
            'lex_block_line'      => '/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As',
69
            'lex_tokens_start'    => '/('.preg_quote($this->options['tag_variable'][0], '/').'|'.preg_quote($this->options['tag_block'][0], '/').'|'.preg_quote($this->options['tag_comment'][0], '/').')('.preg_quote($this->options['whitespace_trim'], '/').')?/s',
70
            'interpolation_start' => '/'.preg_quote($this->options['interpolation'][0], '/').'\s*/A',
71
            'interpolation_end'   => '/\s*'.preg_quote($this->options['interpolation'][1], '/').'/A',
72
        );
73
    }
74
75
    /**
76
     * {@inheritdoc}
77
     */
78
    public function tokenize($code, $filename = null)
79
    {
80
        if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
81
            $mbEncoding = mb_internal_encoding();
82
            mb_internal_encoding('ASCII');
83
        } else {
84
            $mbEncoding = null;
85
        }
86
87
        $this->code = str_replace(array("\r\n", "\r"), "\n", $code);
88
        $this->filename = $filename;
89
        $this->cursor = 0;
90
        $this->lineno = 1;
91
        $this->end = strlen($this->code);
92
        $this->tokens = array();
93
        $this->state = self::STATE_DATA;
94
        $this->states = array();
95
        $this->brackets = array();
96
        $this->position = -1;
97
98
        // find all token starts in one go
99
        preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE);
100
        $this->positions = $matches;
101
102
        while ($this->cursor < $this->end) {
103
            // dispatch to the lexing functions depending
104
            // on the current state
105
            switch ($this->state) {
106
                case self::STATE_DATA:
107
                    $this->lexData();
108
                    break;
109
110
                case self::STATE_BLOCK:
111
                    $this->lexBlock();
112
                    break;
113
114
                case self::STATE_VAR:
115
                    $this->lexVar();
116
                    break;
117
118
                case self::STATE_STRING:
119
                    $this->lexString();
120
                    break;
121
122
                case self::STATE_INTERPOLATION:
123
                    $this->lexInterpolation();
124
                    break;
125
            }
126
        }
127
128
        $this->pushToken(Twig_Token::EOF_TYPE);
129
130
        if (!empty($this->brackets)) {
131
            list($expect, $lineno) = array_pop($this->brackets);
132
            throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
133
        }
134
135
        if ($mbEncoding) {
136
            mb_internal_encoding($mbEncoding);
137
        }
138
139
        return new Twig_TokenStream($this->tokens, $this->filename);
140
    }
141
142
    protected function lexData()
143
    {
144
        // if no matches are left we return the rest of the template as simple text token
145
        if ($this->position == count($this->positions[0]) - 1) {
146
            $this->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor));
147
            $this->cursor = $this->end;
148
149
            return;
150
        }
151
152
        // Find the first token after the current cursor
153
        $position = $this->positions[0][++$this->position];
154
        while ($position[1] < $this->cursor) {
155
            if ($this->position == count($this->positions[0]) - 1) {
156
                return;
157
            }
158
            $position = $this->positions[0][++$this->position];
159
        }
160
161
        // push the template text first
162
        $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
163
        if (isset($this->positions[2][$this->position][0])) {
164
            $text = rtrim($text);
165
        }
166
        $this->pushToken(Twig_Token::TEXT_TYPE, $text);
167
        $this->moveCursor($textContent.$position[0]);
168
169
        switch ($this->positions[1][$this->position][0]) {
170
            case $this->options['tag_comment'][0]:
171
                $this->lexComment();
172
                break;
173
174
            case $this->options['tag_block'][0]:
175
                // raw data?
176
                if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, null, $this->cursor)) {
177
                    $this->moveCursor($match[0]);
178
                    $this->lexRawData($match[1]);
179
                // {% line \d+ %}
180
                } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, null, $this->cursor)) {
181
                    $this->moveCursor($match[0]);
182
                    $this->lineno = (int) $match[1];
183
                } else {
184
                    $this->pushToken(Twig_Token::BLOCK_START_TYPE);
185
                    $this->pushState(self::STATE_BLOCK);
186
                    $this->currentVarBlockLine = $this->lineno;
187
                }
188
                break;
189
190
            case $this->options['tag_variable'][0]:
191
                $this->pushToken(Twig_Token::VAR_START_TYPE);
192
                $this->pushState(self::STATE_VAR);
193
                $this->currentVarBlockLine = $this->lineno;
194
                break;
195
        }
196
    }
197
198
    protected function lexBlock()
199
    {
200
        if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, null, $this->cursor)) {
201
            $this->pushToken(Twig_Token::BLOCK_END_TYPE);
202
            $this->moveCursor($match[0]);
203
            $this->popState();
204
        } else {
205
            $this->lexExpression();
206
        }
207
    }
208
209
    protected function lexVar()
210
    {
211
        if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, null, $this->cursor)) {
212
            $this->pushToken(Twig_Token::VAR_END_TYPE);
213
            $this->moveCursor($match[0]);
214
            $this->popState();
215
        } else {
216
            $this->lexExpression();
217
        }
218
    }
219
220
    protected function lexExpression()
221
    {
222
        // whitespace
223
        if (preg_match('/\s+/A', $this->code, $match, null, $this->cursor)) {
224
            $this->moveCursor($match[0]);
225
226
            if ($this->cursor >= $this->end) {
227
                throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $this->state === self::STATE_BLOCK ? 'block' : 'variable'), $this->currentVarBlockLine, $this->filename);
228
            }
229
        }
230
231
        // operators
232
        if (preg_match($this->regexes['operator'], $this->code, $match, null, $this->cursor)) {
233
            $this->pushToken(Twig_Token::OPERATOR_TYPE, preg_replace('/\s+/', ' ', $match[0]));
234
            $this->moveCursor($match[0]);
235
        }
236
        // names
237
        elseif (preg_match(self::REGEX_NAME, $this->code, $match, null, $this->cursor)) {
238
            $this->pushToken(Twig_Token::NAME_TYPE, $match[0]);
239
            $this->moveCursor($match[0]);
240
        }
241
        // numbers
242
        elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, null, $this->cursor)) {
243
            $number = (float) $match[0];  // floats
244
            if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) {
245
                $number = (int) $match[0]; // integers lower than the maximum
246
            }
247
            $this->pushToken(Twig_Token::NUMBER_TYPE, $number);
248
            $this->moveCursor($match[0]);
249
        }
250
        // punctuation
251
        elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
252
            // opening bracket
253
            if (false !== strpos('([{', $this->code[$this->cursor])) {
254
                $this->brackets[] = array($this->code[$this->cursor], $this->lineno);
255
            }
256
            // closing bracket
257
            elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
258
                if (empty($this->brackets)) {
259
                    throw new Twig_Error_Syntax(sprintf('Unexpected "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename);
260
                }
261
262
                list($expect, $lineno) = array_pop($this->brackets);
263
                if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
264
                    throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
265
                }
266
            }
267
268
            $this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
269
            ++$this->cursor;
270
        }
271
        // strings
272
        elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) {
273
            $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)));
274
            $this->moveCursor($match[0]);
275
        }
276
        // opening double quoted string
277
        elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) {
278
            $this->brackets[] = array('"', $this->lineno);
279
            $this->pushState(self::STATE_STRING);
280
            $this->moveCursor($match[0]);
281
        }
282
        // unlexable
283
        else {
284
            throw new Twig_Error_Syntax(sprintf('Unexpected character "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename);
285
        }
286
    }
287
288
    protected function lexRawData($tag)
289
    {
290
        if (!preg_match(str_replace('%s', $tag, $this->regexes['lex_raw_data']), $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
291
            throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "%s" block', $tag), $this->lineno, $this->filename);
292
        }
293
294
        $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
295
        $this->moveCursor($text.$match[0][0]);
296
297
        if (false !== strpos($match[1][0], $this->options['whitespace_trim'])) {
298
            $text = rtrim($text);
299
        }
300
301
        $this->pushToken(Twig_Token::TEXT_TYPE, $text);
302
    }
303
304
    protected function lexComment()
305
    {
306
        if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
307
            throw new Twig_Error_Syntax('Unclosed comment', $this->lineno, $this->filename);
308
        }
309
310
        $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
311
    }
312
313
    protected function lexString()
314
    {
315
        if (preg_match($this->regexes['interpolation_start'], $this->code, $match, null, $this->cursor)) {
316
            $this->brackets[] = array($this->options['interpolation'][0], $this->lineno);
317
            $this->pushToken(Twig_Token::INTERPOLATION_START_TYPE);
318
            $this->moveCursor($match[0]);
319
            $this->pushState(self::STATE_INTERPOLATION);
320
        } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, null, $this->cursor) && strlen($match[0]) > 0) {
321
            $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes($match[0]));
322
            $this->moveCursor($match[0]);
323
        } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) {
324
            list($expect, $lineno) = array_pop($this->brackets);
325
            if ($this->code[$this->cursor] != '"') {
326
                throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
327
            }
328
329
            $this->popState();
330
            ++$this->cursor;
331
        }
332
    }
333
334
    protected function lexInterpolation()
335
    {
336
        $bracket = end($this->brackets);
337
        if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, null, $this->cursor)) {
338
            array_pop($this->brackets);
339
            $this->pushToken(Twig_Token::INTERPOLATION_END_TYPE);
340
            $this->moveCursor($match[0]);
341
            $this->popState();
342
        } else {
343
            $this->lexExpression();
344
        }
345
    }
346
347
    protected function pushToken($type, $value = '')
348
    {
349
        // do not push empty text tokens
350
        if (Twig_Token::TEXT_TYPE === $type && '' === $value) {
351
            return;
352
        }
353
354
        $this->tokens[] = new Twig_Token($type, $value, $this->lineno);
355
    }
356
357
    protected function moveCursor($text)
358
    {
359
        $this->cursor += strlen($text);
360
        $this->lineno += substr_count($text, "\n");
361
    }
362
363
    protected function getOperatorRegex()
364
    {
365
        $operators = array_merge(
366
            array('='),
367
            array_keys($this->env->getUnaryOperators()),
368
            array_keys($this->env->getBinaryOperators())
369
        );
370
371
        $operators = array_combine($operators, array_map('strlen', $operators));
372
        arsort($operators);
373
374
        $regex = array();
375
        foreach ($operators as $operator => $length) {
376
            // an operator that ends with a character must be followed by
377
            // a whitespace or a parenthesis
378
            if (ctype_alpha($operator[$length - 1])) {
379
                $r = preg_quote($operator, '/').'(?=[\s()])';
380
            } else {
381
                $r = preg_quote($operator, '/');
382
            }
383
384
            // an operator with a space can be any amount of whitespaces
385
            $r = preg_replace('/\s+/', '\s+', $r);
386
387
            $regex[] = $r;
388
        }
389
390
        return '/'.implode('|', $regex).'/A';
391
    }
392
393
    protected function pushState($state)
394
    {
395
        $this->states[] = $this->state;
396
        $this->state = $state;
397
    }
398
399
    protected function popState()
400
    {
401
        if (0 === count($this->states)) {
402
            throw new Exception('Cannot pop state without a previous state');
403
        }
404
405
        $this->state = array_pop($this->states);
406
    }
407
}
408