Tokenizer::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 1
nc 1
nop 0
crap 1
1
<?php
2
/**
3
 * For licensing information, please see the LICENSE file accompanied with this file.
4
 *
5
 * @author Gerard van Helden <[email protected]>
6
 * @copyright 2012 Gerard van Helden <http://melp.nl>
7
 */
8
9
namespace Zicht\Tool\Script;
10
11
use Zicht\Tool\Script\Tokenizer\Expression as ExpressionTokenizer;
12
13
/**
14
 * Tokenizer for the script language
15
 */
16
class Tokenizer implements TokenizerInterface
17
{
18
    /**
19
     * Constructor.
20
     */
21 17
    public function __construct()
22
    {
23 17
    }
24
25
26
    /**
27
     * Returns an array of tokens
28
     *
29
     * @param string $string
30
     * @param int &$needle
31
     * @throws \UnexpectedValueException
32
     * @return array
33
     */
34 17
    public function getTokens($string, &$needle = 0)
35
    {
36 17
        $exprTokenizer = new ExpressionTokenizer();
37 17
        $ret = array();
38 17
        $depth = 0;
39 17
        $needle = 0;
40 17
        $len = strlen($string);
41 17
        while ($needle < $len) {
42 16
            $before = $needle;
43 16
            $substr = substr($string, $needle);
44 16
            if ($depth === 0) {
45
                // match either '$(' or '@(' and mark that as an EXPR_START token.
46 16
                if (preg_match('/^([$@])\(/', $substr, $m)) {
47 15
                    $needle += strlen($m[0]);
48 15
                    $ret[] = new Token(Token::EXPR_START, $m[0]);
49
50
                    // record expression depth, to make sure the usage of parentheses inside the expression doesn't
51
                    // break tokenization (e.g. '$( ("foo") )'
52 15
                    $depth++;
53 15
                } else {
54
                    // store the current token in a temp var for appending, in case it's a DATA token
55 8
                    $token = end($ret);
56
57
                    // handle escaping of the $( syntax, '$$(' becomes '$('
58 8
                    if (preg_match('/^\$\$\(/', $substr, $m)) {
59
                        $value = substr($m[0], 1);
60
                        $needle += strlen($m[0]);
61
                    } else {
62 8
                        $value = $string{$needle};
63 8
                        $needle += strlen($value);
64
                    }
65
66
                    // if the current token is DATA, and the previous token is DATA, append the value to the previous
67
                    // and ignore the current.
68 8
                    if ($token && $token->match(Token::DATA)) {
69 8
                        $token->value .= $value;
70 8
                        unset($token);
71 8
                    } else {
72 8
                        $ret[] = new Token(Token::DATA, $value);
73
                    }
74
                }
75 16
            } else {
76 15
                $ret = array_merge($ret, $exprTokenizer->getTokens($string, $needle));
77 15
                $depth = 0;
78
            }
79 16
            if ($before === $needle) {
80
                // safety net.
81
                throw new \UnexpectedValueException(
82
                    "Unexpected input near token {$string{$needle}}, unsupported character"
83
                );
84
            }
85 16
        }
86 17
        return $ret;
87
    }
88
}
89