|
1
|
|
|
<?php |
|
2
|
|
|
namespace Hal\Component\Token; |
|
3
|
|
|
|
|
4
|
|
|
class Tokenizer |
|
5
|
|
|
{ |
|
6
|
|
|
public function tokenize($code) |
|
7
|
|
|
{ |
|
8
|
|
|
// espace chars |
|
9
|
|
|
$code = preg_replace('!(\}|\)|;)\?!', '$1 ?', $code); |
|
10
|
|
|
|
|
11
|
|
|
// remove one line comments |
|
12
|
|
|
$code = preg_replace('!((\/\/|#).*\n)!', Token::T_COMMENT . ' ', $code); |
|
13
|
|
|
|
|
14
|
|
|
// remove EOL |
|
15
|
|
|
$code = preg_replace('!(\s+)!', ' ', $code); |
|
16
|
|
|
|
|
17
|
|
|
// replace strings |
|
18
|
|
|
$code = preg_replace('/"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"/s', Token::T_VALUE_STRING, $code); |
|
19
|
|
|
$code = preg_replace("/'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'/s", Token::T_VALUE_STRING, $code); |
|
20
|
|
|
|
|
21
|
|
|
// replace booleans |
|
22
|
|
|
$code = preg_replace('/(true|false)/i', Token::T_VALUE_BOOLEAN, $code); |
|
23
|
|
|
|
|
24
|
|
|
// remove code which is not between <?php ? > tags |
|
25
|
|
|
$code = preg_replace('!^(.*?)<\?!', '<?', $code); |
|
26
|
|
|
$code = preg_replace('!\?>(.*?)<\?!', '<?', $code); |
|
27
|
|
|
$code = preg_replace('!(<\?php|<\?)!', '', $code); |
|
28
|
|
|
|
|
29
|
|
|
// remove multiline comments |
|
30
|
|
|
$code = preg_replace('!/\*+!', Token::T_COMMENT_OPEN, $code); |
|
31
|
|
|
$code = preg_replace('!\*/!', Token::T_COMMENT_CLOSE, $code); |
|
32
|
|
|
|
|
33
|
|
|
// type cast |
|
34
|
|
|
$code = preg_replace('!(\((string|bool|float|object|array)\))!', Token::T_CAST, $code); |
|
35
|
|
|
|
|
36
|
|
|
// return void |
|
37
|
|
|
$code = preg_replace('!return;!i', Token::T_RETURN_VOID, $code); |
|
38
|
|
|
|
|
39
|
|
|
// split tokens |
|
40
|
|
|
return preg_split('!\s|;|,|(\{|\}|\(|\))!', $code, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
|
41
|
|
|
} |
|
42
|
|
|
} |