1 | <?php |
||
8 | class Tokenizer { |
||
9 | private $str; |
||
10 | const NAME = 'LITERAL'; |
||
11 | const STRING = 'STRING'; |
||
12 | const OPEN_BRACKET = 'OPEN_BRACKET'; |
||
13 | const CLOSE_BRACKET = 'CLOSE_BRACKET'; |
||
14 | const OPEN_SQUARE_BRACKET = 'SQUARE_BRACKET'; |
||
15 | const CLOSE_SQUARE_BRACKET = 'CLOSE_SQUARE_BRACKET'; |
||
16 | const CONCAT = 'CONCAT'; |
||
17 | const ARG = 'ARG'; |
||
18 | const WHITESPACE = 'WHITESPACE'; |
||
19 | const DOT = 'DOT'; |
||
20 | const NUMERIC = 'NUMERIC'; |
||
21 | const EQUALS = 'EQUALS'; |
||
22 | const NOT = 'NOT'; |
||
23 | const OPEN_BRACE = 'OPEN_BRACE'; |
||
24 | const CLOSE_BRACE = 'CLOSE_BRACE'; |
||
25 | const BOOL = 'BOOL'; |
||
26 | const COLON = 'COLON'; |
||
27 | const SEMI_COLON = 'SEMI_COLON'; |
||
28 | const NUM_SIGN = 'NUM_SIGN'; |
||
29 | const GREATER_THAN = 'GREATER_THAN'; |
||
30 | const AT_SIGN = 'AT_SIGN'; |
||
31 | |||
32 | private $chars = [ |
||
33 | '"' => self::STRING, |
||
34 | '\'' => self::STRING, |
||
35 | '(' => self::OPEN_BRACKET, |
||
36 | ')' => self::CLOSE_BRACKET, |
||
37 | '[' => self::OPEN_SQUARE_BRACKET, |
||
38 | ']' => self::CLOSE_SQUARE_BRACKET, |
||
39 | '+' => self::CONCAT, |
||
40 | ',' => self::ARG, |
||
41 | '.' => self::DOT, |
||
42 | '!' => self::NOT, |
||
43 | '=' => self::EQUALS, |
||
44 | '{' => self::OPEN_BRACE, |
||
45 | '}' => self::CLOSE_BRACE, |
||
46 | ':' => self::COLON, |
||
47 | ';' => self::SEMI_COLON, |
||
48 | '#' => self::NUM_SIGN, |
||
49 | '>' => self::GREATER_THAN, |
||
50 | '@' => self::AT_SIGN, |
||
51 | ' ' => self::WHITESPACE, |
||
52 | "\n" => self::WHITESPACE, |
||
53 | "\r" => self::WHITESPACE, |
||
54 | "\t" => self::WHITESPACE |
||
55 | ]; |
||
56 | |||
57 | public function __construct($str) { |
||
58 | $this->str = $str; |
||
59 | } |
||
60 | |||
61 | public function getTokens($returnObj = true) { |
||
62 | $tokens = []; |
||
63 | |||
64 | for ($i = 0; $i < strlen($this->str); $i++) { |
||
65 | $char = $this->identifyChar($this->str[$i]); |
||
66 | |||
67 | $this->doSimpleTokens($tokens, $char); |
||
68 | $this->doLiterals($tokens, $char, $i); |
||
69 | $i += $this->doStrings($tokens, $char, $i); |
||
70 | $i += $this->doBrackets($tokens, $char, $i); |
||
71 | } |
||
72 | if ($returnObj) return new Tokens($tokens); |
||
73 | else return $tokens; |
||
74 | } |
||
75 | |||
76 | private function doSimpleTokens(&$tokens, $char) { |
||
77 | if (in_array($char, [Tokenizer::ARG, Tokenizer::CONCAT, Tokenizer::DOT, Tokenizer::NOT, |
||
78 | Tokenizer::EQUALS, Tokenizer::COLON, Tokenizer::SEMI_COLON, Tokenizer::WHITESPACE, |
||
79 | Tokenizer::NUM_SIGN, Tokenizer::GREATER_THAN, Tokenizer::AT_SIGN])) { |
||
80 | $tokens[] = ['type' => $char]; |
||
81 | } |
||
82 | } |
||
83 | |||
84 | private function doLiterals(&$tokens, $char, &$i) { |
||
85 | if ($char === self::NAME) { |
||
86 | $name = $this->str[$i]; |
||
87 | while (isset($this->str[$i+1]) && $this->identifyChar($this->str[$i+1]) == self::NAME) { |
||
88 | $name .= $this->str[$i+1]; |
||
89 | $i++; |
||
90 | } |
||
91 | $this->processLiterals($tokens, $name); |
||
92 | } |
||
93 | } |
||
94 | |||
95 | private function processLiterals(&$tokens, $name) { |
||
101 | |||
102 | private function doBrackets(&$tokens, $char, $i) { |
||
118 | |||
119 | private function doStrings(&$tokens, $char, $i) { |
||
129 | |||
130 | private function extractString($pos) { |
||
137 | |||
138 | private function extractBrackets($open, $startBracket = '(', $closeBracket = ')') { |
||
145 | |||
146 | private function identifyChar($chr) { |
||
150 | |||
151 | private function getChar($num) { |
||
152 | $chars = array_reverse($this->chars); |
||
156 | |||
157 | public function serialize($tokens) { |
||
158 | $str = ''; |
||
169 | |||
170 | private function serializeValue($token) { |
||
176 | } |
||
177 |