1 | <?php |
||
17 | class Lexer extends NativeStateless |
||
18 | { |
||
19 | /**@#+ |
||
20 | * List of tokens used inside grammar files. |
||
21 | */ |
||
22 | public const T_WHITESPACE = 'T_WHITESPACE'; |
||
23 | public const T_COMMENT = 'T_COMMENT'; |
||
24 | public const T_BLOCK_COMMENT = 'T_BLOCK_COMMENT'; |
||
25 | public const T_PRAGMA = 'T_PRAGMA'; |
||
26 | public const T_TOKEN = 'T_TOKEN'; |
||
27 | public const T_SKIP = 'T_SKIP'; |
||
28 | public const T_INCLUDE = 'T_INCLUDE'; |
||
29 | public const T_NODE_DEFINITION = 'T_NODE_DEFINITION'; |
||
30 | public const T_OR = 'T_OR'; |
||
31 | public const T_ZERO_OR_ONE = 'T_ZERO_OR_ONE'; |
||
32 | public const T_ONE_OR_MORE = 'T_ONE_OR_MORE'; |
||
33 | public const T_ZERO_OR_MORE = 'T_ZERO_OR_MORE'; |
||
34 | public const T_N_TO_M = 'T_N_TO_M'; |
||
35 | public const T_ZERO_TO_M = 'T_ZERO_TO_M'; |
||
36 | public const T_N_OR_MORE = 'T_N_OR_MORE'; |
||
37 | public const T_EXACTLY_N = 'T_EXACTLY_N'; |
||
38 | public const T_SKIPPED = 'T_SKIPPED'; |
||
39 | public const T_KEPT = 'T_KEPT'; |
||
40 | public const T_NAMED = 'T_NAMED'; |
||
41 | public const T_NODE = 'T_NODE'; |
||
42 | public const T_GROUP_OPEN = 'T_GROUP_OPEN'; |
||
43 | public const T_GROUP_CLOSE = 'T_GROUP_CLOSE'; |
||
44 | /**#@-*/ |
||
45 | |||
46 | /** |
||
47 | * @var array|string[] Tokens list |
||
48 | */ |
||
49 | private const TOKENS_LIST = [ |
||
50 | self::T_WHITESPACE => '\s+', |
||
51 | self::T_COMMENT => '//[^\\n]*', |
||
52 | self::T_BLOCK_COMMENT => '/\\*.*?\\*/', |
||
53 | self::T_PRAGMA => '%pragma\h+([\w\.]+)\h+(.+?)\s+', |
||
54 | self::T_TOKEN => '%token\h+(\w+)\h+(.+?)(?:\s*->\h*\\$(\d+)\h*)?\n', |
||
55 | self::T_SKIP => '%skip\h+(\w+)\h+(.+?)\s+', |
||
56 | self::T_INCLUDE => '%include\h+(.+?)\s+', |
||
57 | self::T_NODE_DEFINITION => '(#?\w+)(?:\s*->\h*(.+?)\h*)?\s*:', |
||
58 | self::T_OR => '\\|', |
||
59 | self::T_ZERO_OR_ONE => '\\?', |
||
60 | self::T_ONE_OR_MORE => '\\+', |
||
61 | self::T_ZERO_OR_MORE => '\\*', |
||
62 | self::T_N_TO_M => '{\h*(\d+),\h*(\d+)\h*}', |
||
63 | self::T_ZERO_TO_M => '{\h*,\h*(\d+)\h*}', |
||
64 | self::T_N_OR_MORE => '{\h*(\d+)\h*,\h*}', |
||
65 | self::T_EXACTLY_N => '{(\d+)}', |
||
66 | self::T_SKIPPED => '::(\w+)::', |
||
67 | self::T_KEPT => '<(\w+)>', |
||
68 | self::T_NAMED => '(\w+)\\(\\)', |
||
69 | self::T_NODE => '#(\w+)', |
||
70 | self::T_GROUP_OPEN => '\\(', |
||
71 | self::T_GROUP_CLOSE => '\\)', |
||
72 | ]; |
||
73 | |||
74 | /** |
||
75 | * A list of skipped tokens |
||
76 | */ |
||
77 | private const TOKENS_SKIP = [ |
||
78 | self::T_WHITESPACE, |
||
79 | self::T_COMMENT, |
||
80 | self::T_BLOCK_COMMENT, |
||
81 | ]; |
||
82 | |||
83 | /** |
||
84 | * Lexer constructor. |
||
85 | */ |
||
86 | public function __construct() |
||
94 | } |
||
95 |