Completed
Push — master ( cc2765...340ff8 )
by Richard
08:19
created

Parser::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
dl 0
loc 3
ccs 3
cts 3
cp 1
rs 10
c 1
b 0
f 0
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
/******************************************************************************
3
 * An implementation of dicto (scg.unibe.ch/dicto) in and for PHP.
4
 *
5
 * Copyright (c) 2016 Richard Klees <[email protected]>
6
 * 
7
 * This software is licensed under The MIT License. You should have received
8
 * a copy of the license along with the code.
9
 */
10
11
namespace Lechimp\Dicto\Definition;
12
13
/**
14
 * Baseclass for Parsers.
15
 */
16
abstract class Parser {
17
    /**
18
     * @var SymbolTable
19
     */
20
    protected $symbol_table;
21
22
    /**
23
     * @var Tokenizer|null
24
     */
25
    protected $tokenizer = null;
26
27
    /**
28
     * @var array   (Symbol, array $matches)
29
     */
30
    protected $token;
31
32 37
    public function __construct() {
33 37
        $this->symbol_table = $this->create_symbol_table();
34 37
    }
35
36
    /**
37
     * Parse the string according to this parser.
38
     *
39
     * @return mixed
40
     */
41 37
    public function parse($source) {
42
        try {
43 37
            $this->tokenizer = $this->create_tokenizer($source);
44 37
            $this->token = $this->tokenizer->current();
45 37
            return $this->root();
46 1
        }
47
        finally {
48 37
            $this->tokenizer = null;
49 37
            $this->token = null;
0 ignored issues
show
Documentation Bug introduced by
It seems like null of type null is incompatible with the declared type array of property $token.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
50
        }
51
    }
52
53
    /**
54
     * The root for the parse tree.
55
     *
56
     * @return  mixed
57
     */
58
    abstract protected function root();
59
60
    // Factory Methods
61
62
    /**
63
     * Build the Tokenizer.
64
     *
65
     * @return  Tokenizer
66
     */
67 37
    public function create_tokenizer($source) {
68 37
        assert('is_string($source)');
69 37
        return new Tokenizer($this->symbol_table, $source);
70
    }
71
72
    /**
73
     * Build the SymbolTable
74
     *
75
     * @return SymbolTable
76
     */
77 37
    public function create_symbol_table() {
78
        // TODO: When symbol, operator and stuff were moved to
79
        //       Symbol table, there could be an add_symbols method
80
        //       postprocessing the table instead of using this->symbol etc.
81 37
        return new SymbolTable();
82
    }
83
84
    // Helpers for defining the grammar.
85
86
    /**
87
     * Add a symbol to the symbol table.
88
     *
89
     * TODO: This most probably should go to symbol table.
90
     *
91
     * @param   string  $regexp
92
     * @param   int     $binding_power
93
     * @throws  \InvalidArgumentException if %$regexp% is not a regexp
94
     * @throws  \LogicException if there already is a symbol with that $regexp.
95
     * @return  Symbol
96
     */
97 37
    protected function symbol($regexp, $binding_power = 0) {
98 37
        return $this->symbol_table->add_symbol($regexp, $binding_power);
99
    }
100
101
    /**
102
     * Add an operator to the symbol table.
103
     *
104
     * TODO: This most probably should go to symbol table.
105
     *
106
     * Convenience, will split the given string and wrap each char in []
107
     * before passing it to symbol.
108
     *
109
     * @param   string  $op
110
     * @param   int     $binding_power
111
     * @throws  \InvalidArgumentException if %$regexp% is not a regexp
112
     * @throws  \LogicException if there already is a symbol with that $regexp.
113
     * @return  Symbol
114
     */
115 37
    protected function operator($op, $binding_power = 0) {
116 37
        $regexp = $this->operator_regexp($op);
117 37
        return $this->symbol($regexp, $binding_power);
118
    }
119
120
    /**
121
     * Add a literal to the symbol table, where the matches are
122
     * transformed using the $converter.
123
     *
124
     * TODO: This most probably should go to symbol table.
125
     *
126
     * @param   string      $regexp
127
     * @param   \Closure    $converter
128
     * @throws  \InvalidArgumentException if %$regexp% is not a regexp
129
     * @throws  \LogicException if there already is a symbol with that $regexp.
130
     * @return  Symbol
131
     */
132 37
    protected function literal($regexp, $converter) {
133 37
        return $this->symbol($regexp)
134 37
            ->null_denotation_is($converter);
135
    }
136
137
    // Helpers for actual parsing.
138
139
    /**
140
     * Set the current token to the next token from the tokenizer.
141
     *
142
     * @return  null
143
     */
144 35
    protected function fetch_next_token() {
145 35
        assert('is_array($this->token)');
146 35
        assert('$this->tokenizer !== null');
147 35
        $this->tokenizer->next();
148 35
        $this->token = $this->tokenizer->current();
149 35
    }
150
151
    /**
152
     * Get the current symbol.
153
     *
154
     * @return  Symbol
155
     */
156 31
    protected function current_symbol() {
157 31
        return $this->token[0];
158
    }
159
160
    /**
161
     * Get the current match.
162
     *
163
     * @return  string[] 
164
     */
165 35
    protected function current_match() {
166 35
        return $this->token[1];
167
    }
168
169
    /**
170
     * Advance the tokenizer to the next token if current token
171
     * was matched by the given regexp.
172
     *
173
     * @param   string  $regexp
174
     * @return  null
175
     */
176 14
    protected function advance($regexp) {
177 14
        assert('is_string($regexp)');
178 14
        assert('is_array($this->token)');
179 14
        assert('$this->tokenizer !== null');
180 14
        if (!$this->is_current_token_matched_by($regexp)) {
181
            throw new ParserException("Syntax Error: Expected '$regexp'");
182
        }
183 14
        $this->tokenizer->next();
184 14
        $this->token = $this->tokenizer->current();
185 14
    }
186
187
    /**
188
     * Advance the tokenizer to the next token if current token
189
     * was matched by the given operator.
190
     *
191
     * @param   string  $op
192
     * @return  null
193
     */
194 10
    protected function advance_operator($op) {
195 10
        $this->advance($this->operator_regexp($op));
196 10
    }
197
198
    /**
199
     * Is the end of the file reached?
200
     *
201
     * @return  bool
202
     */
203 28
    public function is_end_of_file_reached() {
204 28
        return $this->is_current_token_matched_by("");
205
    }
206
207
    /**
208
     * Check if the current token was matched by the given regexp.
209
     *
210
     * @param   string  $regexp
211
     * @return  bool
212
     */
213 35
    protected function is_current_token_matched_by($regexp) {
214 35
        assert('is_string($regexp)');
215 35
        return $this->token[0]->regexp() == $regexp;
216
    }
217
218
    /**
219
     * Check if the current token is the given operator.
220
     *
221
     * @param   string  $operator
222
     * @return  bool
223
     */
224 7
    protected function is_current_token_operator($operator) {
225 7
        return $this->is_current_token_matched_by($this->operator_regexp($operator));
226
    }
227
228
    // Internal Helpers
229
    /**
230
     * "abc" -> "[a][b][c]"
231
     *
232
     * @param   string  $op
233
     * @return  string
234
     */
235 37
    protected function operator_regexp($op) {
236 37
        assert('is_string($op)');
237 37
        $regexp = array();
238 37
        foreach (str_split($op, 1) as $c) {
239 37
            $regexp[] = "[$c]";
240 37
        }
241 37
        return implode("", $regexp);
242
    }
243
}
244