1
|
|
|
<?php
|
2
|
|
|
/**
|
3
|
|
|
* @author Niels A.D.
|
4
|
|
|
* @author Todd Burry <[email protected]>
|
5
|
|
|
* @copyright 2010 Niels A.D., 2014 Todd Burry
|
6
|
|
|
* @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1
|
7
|
|
|
* @package pQuery
|
8
|
|
|
*/
|
9
|
|
|
|
10
|
|
|
namespace pQuery;
|
11
|
|
|
|
12
|
|
|
/**
|
13
|
|
|
* Converts a document into tokens
|
14
|
|
|
*
|
15
|
|
|
* Can convert any string into tokens. The base class only supports
|
16
|
|
|
* identifier/whitespace tokens. For more tokens, the class can be
|
17
|
|
|
* easily extended.
|
18
|
|
|
*
|
19
|
|
|
* Use like:
|
20
|
|
|
* <code>
|
21
|
|
|
* <?php
|
22
|
|
|
* $a = new TokenizerBase('hello word');
|
23
|
|
|
* while ($a->next() !== $a::TOK_NULL) {
|
24
|
|
|
* echo $a->token, ': ',$a->getTokenString(), "<br>\n";
|
25
|
|
|
* }
|
26
|
|
|
* ?>
|
27
|
|
|
* </code>
|
28
|
|
|
*
|
29
|
|
|
* @internal The tokenizer works with a character map that connects a certain
|
30
|
|
|
* character to a certain function/token. This class is build with speed in mind.
|
31
|
|
|
*/
|
32
|
|
|
class TokenizerBase {
|
33
|
|
|
|
34
|
|
|
/**
|
35
|
|
|
* NULL Token, used at end of document (parsing should stop after this token)
|
36
|
|
|
*/
|
37
|
|
|
const TOK_NULL = 0;
|
38
|
|
|
/**
|
39
|
|
|
* Unknown token, used at unidentified character
|
40
|
|
|
*/
|
41
|
|
|
const TOK_UNKNOWN = 1;
|
42
|
|
|
/**
|
43
|
|
|
* Whitespace token, used with whitespace
|
44
|
|
|
*/
|
45
|
|
|
const TOK_WHITESPACE = 2;
|
46
|
|
|
/**
|
47
|
|
|
* Identifier token, used with identifiers
|
48
|
|
|
*/
|
49
|
|
|
const TOK_IDENTIFIER = 3;
|
50
|
|
|
|
51
|
|
|
/**
|
52
|
|
|
* The document that is being tokenized
|
53
|
|
|
* @var string
|
54
|
|
|
* @internal Public for faster access!
|
55
|
|
|
* @see setDoc()
|
56
|
|
|
* @see getDoc()
|
57
|
|
|
* @access private
|
58
|
|
|
*/
|
59
|
|
|
var $doc = '';
|
|
|
|
|
60
|
|
|
|
61
|
|
|
/**
|
62
|
|
|
* The size of the document (length of string)
|
63
|
|
|
* @var int
|
64
|
|
|
* @internal Public for faster access!
|
65
|
|
|
* @see $doc
|
66
|
|
|
* @access private
|
67
|
|
|
*/
|
68
|
|
|
var $size = 0;
|
|
|
|
|
69
|
|
|
|
70
|
|
|
/**
|
71
|
|
|
* Current (character) position in the document
|
72
|
|
|
* @var int
|
73
|
|
|
* @internal Public for faster access!
|
74
|
|
|
* @see setPos()
|
75
|
|
|
* @see getPos()
|
76
|
|
|
* @access private
|
77
|
|
|
*/
|
78
|
|
|
var $pos = 0;
|
|
|
|
|
79
|
|
|
|
80
|
|
|
/**
|
81
|
|
|
* Current (Line/Column) position in document
|
82
|
|
|
* @var array (Current_Line, Line_Starting_Pos)
|
83
|
|
|
* @internal Public for faster access!
|
84
|
|
|
* @see getLinePos()
|
85
|
|
|
* @access private
|
86
|
|
|
*/
|
87
|
|
|
var $line_pos = array(0, 0);
|
|
|
|
|
88
|
|
|
|
89
|
|
|
/**
|
90
|
|
|
* Current token
|
91
|
|
|
* @var int
|
92
|
|
|
* @internal Public for faster access!
|
93
|
|
|
* @see getToken()
|
94
|
|
|
* @access private
|
95
|
|
|
*/
|
96
|
|
|
var $token = self::TOK_NULL;
|
|
|
|
|
97
|
|
|
|
98
|
|
|
/**
|
99
|
|
|
* Start position of token. If NULL, then current position is used.
|
100
|
|
|
* @var int
|
101
|
|
|
* @internal Public for faster access!
|
102
|
|
|
* @see getTokenString()
|
103
|
|
|
* @access private
|
104
|
|
|
*/
|
105
|
|
|
var $token_start = null;
|
|
|
|
|
106
|
|
|
|
107
|
|
|
/**
|
108
|
|
|
* List with all the character that can be considered as whitespace
|
109
|
|
|
* @var array|string
|
110
|
|
|
* @internal Variable is public + associated array for faster access!
|
111
|
|
|
* @internal array(' ' => true) will recognize space (' ') as whitespace
|
112
|
|
|
* @internal String will be converted to array in constructor
|
113
|
|
|
* @internal Result token will be {@link self::TOK_WHITESPACE};
|
114
|
|
|
* @see setWhitespace()
|
115
|
|
|
* @see getWhitespace()
|
116
|
|
|
* @access private
|
117
|
|
|
*/
|
118
|
|
|
var $whitespace = " \t\n\r\0\x0B";
|
|
|
|
|
119
|
|
|
|
120
|
|
|
/**
|
121
|
|
|
* List with all the character that can be considered as identifier
|
122
|
|
|
* @var array|string
|
123
|
|
|
* @internal Variable is public + associated array for faster access!
|
124
|
|
|
* @internal array('a' => true) will recognize 'a' as identifier
|
125
|
|
|
* @internal String will be converted to array in constructor
|
126
|
|
|
* @internal Result token will be {@link self::TOK_IDENTIFIER};
|
127
|
|
|
* @see setIdentifiers()
|
128
|
|
|
* @see getIdentifiers()
|
129
|
|
|
* @access private
|
130
|
|
|
*/
|
131
|
|
|
var $identifiers = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_';
|
|
|
|
|
132
|
|
|
|
133
|
|
|
/**
|
134
|
|
|
* All characters that should be mapped to a token/function that cannot be considered as whitespace or identifier
|
135
|
|
|
* @var array
|
136
|
|
|
* @internal Variable is public + associated array for faster access!
|
137
|
|
|
* @internal array('a' => 'parse_a') will call $this->parse_a() if it matches the character 'a'
|
138
|
|
|
* @internal array('a' => self::TOK_A) will set token to TOK_A if it matches the character 'a'
|
139
|
|
|
* @see mapChar()
|
140
|
|
|
* @see unmapChar()
|
141
|
|
|
* @access private
|
142
|
|
|
*/
|
143
|
|
|
var $custom_char_map = array();
|
|
|
|
|
144
|
|
|
|
145
|
|
|
/**
|
146
|
|
|
* Automatically built character map. Built using {@link $identifiers}, {@link $whitespace} and {@link $custom_char_map}
|
147
|
|
|
* @var array
|
148
|
|
|
* @internal Public for faster access!
|
149
|
|
|
* @access private
|
150
|
|
|
*/
|
151
|
|
|
var $char_map = array();
|
|
|
|
|
152
|
|
|
|
153
|
|
|
/**
|
154
|
|
|
* All errors found while parsing the document
|
155
|
|
|
* @var array
|
156
|
|
|
* @see addError()
|
157
|
|
|
*/
|
158
|
|
|
var $errors = array();
|
|
|
|
|
159
|
|
|
|
160
|
|
|
/**
|
161
|
|
|
* Class constructor
|
162
|
|
|
* @param string $doc Document to be tokenized
|
163
|
|
|
* @param int $pos Position to start parsing
|
164
|
|
|
* @see setDoc()
|
165
|
|
|
* @see setPos()
|
166
|
|
|
*/
|
167
|
37 |
|
function __construct($doc = '', $pos = 0) {
|
|
|
|
|
168
|
37 |
|
$this->setWhitespace($this->whitespace);
|
169
|
37 |
|
$this->setIdentifiers($this->identifiers);
|
170
|
|
|
|
171
|
37 |
|
$this->setDoc($doc, $pos);
|
172
|
37 |
|
}
|
173
|
|
|
|
174
|
|
|
#php4 PHP4 class constructor compatibility
|
175
|
|
|
#function TokenizerBase($doc = '', $pos = 0) {return $this->__construct($doc, $pos);}
|
|
|
|
|
176
|
|
|
#php4e
|
177
|
|
|
|
178
|
|
|
/**
|
179
|
|
|
* Sets target document
|
180
|
|
|
* @param string $doc Document to be tokenized
|
181
|
|
|
* @param int $pos Position to start parsing
|
182
|
|
|
* @see getDoc()
|
183
|
|
|
* @see setPos()
|
184
|
|
|
*/
|
185
|
37 |
|
function setDoc($doc, $pos = 0) {
|
|
|
|
|
186
|
37 |
|
$this->doc = $doc;
|
187
|
37 |
|
$this->size = strlen($doc);
|
188
|
37 |
|
$this->setPos($pos);
|
189
|
37 |
|
}
|
190
|
|
|
|
191
|
|
|
/**
|
192
|
|
|
* Returns target document
|
193
|
|
|
* @return string
|
194
|
|
|
* @see setDoc()
|
195
|
|
|
*/
|
196
|
|
|
function getDoc() {
|
|
|
|
|
197
|
|
|
return $this->doc;
|
198
|
|
|
}
|
199
|
|
|
|
200
|
|
|
/**
|
201
|
|
|
* Sets position in document
|
202
|
|
|
* @param int $pos
|
203
|
|
|
* @see getPos()
|
204
|
|
|
*/
|
205
|
37 |
|
function setPos($pos = 0) {
|
|
|
|
|
206
|
37 |
|
$this->pos = $pos - 1;
|
207
|
37 |
|
$this->line_pos = array(0, 0);
|
208
|
37 |
|
$this->next();
|
209
|
37 |
|
}
|
210
|
|
|
|
211
|
|
|
/**
|
212
|
|
|
* Returns current position in document (Index)
|
213
|
|
|
* @return int
|
214
|
|
|
* @see setPos()
|
215
|
|
|
*/
|
216
|
1 |
|
function getPos() {
|
|
|
|
|
217
|
1 |
|
return $this->pos;
|
218
|
|
|
}
|
219
|
|
|
|
220
|
|
|
/**
|
221
|
|
|
* Returns current position in document (Line/Char)
|
222
|
|
|
* @return array array(Line, Column)
|
223
|
|
|
*/
|
224
|
|
|
function getLinePos() {
|
|
|
|
|
225
|
|
|
return array($this->line_pos[0], $this->pos - $this->line_pos[1]);
|
226
|
|
|
}
|
227
|
|
|
|
228
|
|
|
/**
|
229
|
|
|
* Returns current token
|
230
|
|
|
* @return int
|
231
|
|
|
* @see $token
|
232
|
|
|
*/
|
233
|
|
|
function getToken() {
|
|
|
|
|
234
|
|
|
return $this->token;
|
235
|
|
|
}
|
236
|
|
|
|
237
|
|
|
/**
|
238
|
|
|
* Returns current token as string
|
239
|
|
|
* @param int $start_offset Offset from token start
|
240
|
|
|
* @param int $end_offset Offset from token end
|
241
|
|
|
* @return string
|
242
|
|
|
*/
|
243
|
37 |
|
function getTokenString($start_offset = 0, $end_offset = 0) {
|
|
|
|
|
244
|
37 |
|
$token_start = ((is_int($this->token_start)) ? $this->token_start : $this->pos) + $start_offset;
|
245
|
37 |
|
$len = $this->pos - $token_start + 1 + $end_offset;
|
246
|
37 |
|
return (($len > 0) ? substr($this->doc, $token_start, $len) : '');
|
247
|
|
|
}
|
248
|
|
|
|
249
|
|
|
/**
|
250
|
|
|
* Sets characters to be recognized as whitespace
|
251
|
|
|
*
|
252
|
|
|
* Used like: setWhitespace('ab') or setWhitespace(array('a' => true, 'b', 'c'));
|
253
|
|
|
* @param string|array $ws
|
254
|
|
|
* @see getWhitespace();
|
255
|
|
|
*/
|
256
|
37 |
|
function setWhitespace($ws) {
|
|
|
|
|
257
|
37 |
|
if (is_array($ws)) {
|
258
|
37 |
|
$this->whitespace = array_fill_keys(array_values($ws), true);
|
259
|
37 |
|
$this->buildCharMap();
|
260
|
37 |
|
} else {
|
261
|
37 |
|
$this->setWhiteSpace(str_split($ws));
|
262
|
|
|
}
|
263
|
37 |
|
}
|
264
|
|
|
|
265
|
|
|
/**
|
266
|
|
|
* Returns whitespace characters as string/array
|
267
|
|
|
* @param bool $as_string Should the result be a string or an array?
|
268
|
|
|
* @return string|array
|
269
|
|
|
* @see setWhitespace()
|
270
|
|
|
*/
|
271
|
|
|
function getWhitespace($as_string = true) {
|
|
|
|
|
272
|
|
|
$ws = array_keys($this->whitespace);
|
273
|
|
|
return (($as_string) ? implode('', $ws) : $ws);
|
274
|
|
|
}
|
275
|
|
|
|
276
|
|
|
/**
|
277
|
|
|
* Sets characters to be recognized as identifier
|
278
|
|
|
*
|
279
|
|
|
* Used like: setIdentifiers('ab') or setIdentifiers(array('a' => true, 'b', 'c'));
|
280
|
|
|
* @param string|array $ident
|
281
|
|
|
* @see getIdentifiers();
|
282
|
|
|
*/
|
283
|
37 |
|
function setIdentifiers($ident) {
|
|
|
|
|
284
|
37 |
|
if (is_array($ident)) {
|
285
|
37 |
|
$this->identifiers = array_fill_keys(array_values($ident), true);
|
286
|
37 |
|
$this->buildCharMap();
|
287
|
37 |
|
} else {
|
288
|
37 |
|
$this->setIdentifiers(str_split($ident));
|
289
|
|
|
}
|
290
|
37 |
|
}
|
291
|
|
|
|
292
|
|
|
/**
|
293
|
|
|
* Returns identifier characters as string/array
|
294
|
|
|
* @param bool $as_string Should the result be a string or an array?
|
295
|
|
|
* @return string|array
|
296
|
|
|
* @see setIdentifiers()
|
297
|
|
|
*/
|
298
|
|
|
function getIdentifiers($as_string = true) {
|
|
|
|
|
299
|
|
|
$ident = array_keys($this->identifiers);
|
300
|
|
|
return (($as_string) ? implode('', $ident) : $ident);
|
301
|
|
|
}
|
302
|
|
|
|
303
|
|
|
/**
|
304
|
|
|
* Maps a custom character to a token/function
|
305
|
|
|
*
|
306
|
|
|
* Used like: mapChar('a', self::{@link TOK_IDENTIFIER}) or mapChar('a', 'parse_identifier');
|
307
|
|
|
* @param string $char Character that should be mapped. If set, it will be overridden
|
308
|
|
|
* @param int|string $map If function name, then $this->function will be called, otherwise token is set to $map
|
309
|
|
|
* @see unmapChar()
|
310
|
|
|
*/
|
311
|
|
|
function mapChar($char, $map) {
|
|
|
|
|
312
|
|
|
$this->custom_char_map[$char] = $map;
|
313
|
|
|
$this->buildCharMap();
|
314
|
|
|
}
|
315
|
|
|
|
316
|
|
|
/**
|
317
|
|
|
* Removes a char mapped with {@link mapChar()}
|
318
|
|
|
* @param string $char Character that should be unmapped
|
319
|
|
|
* @see mapChar()
|
320
|
|
|
*/
|
321
|
|
|
function unmapChar($char) {
|
|
|
|
|
322
|
|
|
unset($this->custom_char_map[$char]);
|
323
|
|
|
$this->buildCharMap();
|
324
|
|
|
}
|
325
|
|
|
|
326
|
|
|
/**
|
327
|
|
|
* Builds the {@link $map_char} array
|
328
|
|
|
* @internal Builds single array that maps all characters. Gets called if {@link $whitespace}, {@link $identifiers} or {@link $custom_char_map} get modified
|
329
|
|
|
*/
|
330
|
37 |
|
protected function buildCharMap() {
|
331
|
37 |
|
$this->char_map = $this->custom_char_map;
|
332
|
37 |
|
if (is_array($this->whitespace)) {
|
333
|
37 |
|
foreach($this->whitespace as $w => $v) {
|
334
|
37 |
|
$this->char_map[$w] = 'parse_whitespace';
|
335
|
37 |
|
}
|
336
|
37 |
|
}
|
337
|
37 |
|
if (is_array($this->identifiers)) {
|
338
|
37 |
|
foreach($this->identifiers as $i => $v) {
|
339
|
37 |
|
$this->char_map[$i] = 'parse_identifier';
|
340
|
37 |
|
}
|
341
|
37 |
|
}
|
342
|
37 |
|
}
|
343
|
|
|
|
344
|
|
|
/**
|
345
|
|
|
* Add error to the array and appends current position
|
346
|
|
|
* @param string $error
|
347
|
|
|
*/
|
348
|
|
|
function addError($error) {
|
|
|
|
|
349
|
|
|
$this->errors[] = htmlentities($error.' at '.($this->line_pos[0] + 1).', '.($this->pos - $this->line_pos[1] + 1).'!');
|
350
|
|
|
}
|
351
|
|
|
|
352
|
|
|
/**
|
353
|
|
|
* Parse line breaks and increase line number
|
354
|
|
|
* @internal Gets called to process line breaks
|
355
|
|
|
*/
|
356
|
34 |
|
protected function parse_linebreak() {
|
357
|
34 |
|
if($this->doc[$this->pos] === "\r") {
|
358
|
|
|
++$this->line_pos[0];
|
359
|
|
|
if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === "\n")) {
|
360
|
|
|
++$this->pos;
|
361
|
|
|
}
|
362
|
|
|
$this->line_pos[1] = $this->pos;
|
363
|
34 |
|
} elseif($this->doc[$this->pos] === "\n") {
|
364
|
|
|
++$this->line_pos[0];
|
365
|
|
|
$this->line_pos[1] = $this->pos;
|
366
|
|
|
}
|
367
|
34 |
|
}
|
368
|
|
|
|
369
|
|
|
/**
|
370
|
|
|
* Parse whitespace
|
371
|
|
|
* @return int Token
|
372
|
|
|
* @internal Gets called with {@link $whitespace} characters
|
373
|
|
|
*/
|
374
|
4 |
|
protected function parse_whitespace() {
|
375
|
4 |
|
$this->token_start = $this->pos;
|
376
|
|
|
|
377
|
4 |
|
while(++$this->pos < $this->size) {
|
378
|
4 |
|
if (!isset($this->whitespace[$this->doc[$this->pos]])) {
|
379
|
4 |
|
break;
|
380
|
|
|
} else {
|
381
|
|
|
$this->parse_linebreak();
|
382
|
|
|
}
|
383
|
|
|
}
|
384
|
|
|
|
385
|
4 |
|
--$this->pos;
|
386
|
4 |
|
return self::TOK_WHITESPACE;
|
387
|
|
|
}
|
388
|
|
|
|
389
|
|
|
/**
|
390
|
|
|
* Parse identifiers
|
391
|
|
|
* @return int Token
|
392
|
|
|
* @internal Gets called with {@link $identifiers} characters
|
393
|
|
|
*/
|
394
|
37 |
|
protected function parse_identifier() {
|
395
|
37 |
|
$this->token_start = $this->pos;
|
396
|
|
|
|
397
|
37 |
|
while((++$this->pos < $this->size) && isset($this->identifiers[$this->doc[$this->pos]])) {}
|
|
|
|
|
398
|
|
|
|
399
|
37 |
|
--$this->pos;
|
400
|
37 |
|
return self::TOK_IDENTIFIER;
|
401
|
|
|
}
|
402
|
|
|
|
403
|
|
|
/**
|
404
|
|
|
* Continues to the next token
|
405
|
|
|
* @return int Next token ({@link TOK_NULL} if none)
|
406
|
|
|
*/
|
407
|
37 |
|
function next() {
|
|
|
|
|
408
|
37 |
|
$this->token_start = null;
|
409
|
|
|
|
410
|
37 |
|
if (++$this->pos < $this->size) {
|
411
|
37 |
|
if (isset($this->char_map[$this->doc[$this->pos]])) {
|
412
|
37 |
|
if (is_string($this->char_map[$this->doc[$this->pos]])) {
|
413
|
37 |
|
return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}());
|
414
|
|
|
} else {
|
415
|
35 |
|
return ($this->token = $this->char_map[$this->doc[$this->pos]]);
|
416
|
|
|
}
|
417
|
|
|
} else {
|
418
|
|
|
return ($this->token = self::TOK_UNKNOWN);
|
419
|
|
|
}
|
420
|
|
|
} else {
|
421
|
37 |
|
return ($this->token = self::TOK_NULL);
|
422
|
|
|
}
|
423
|
|
|
}
|
424
|
|
|
|
425
|
|
|
/**
|
426
|
|
|
* Finds the next token, but skips whitespace
|
427
|
|
|
* @return int Next token ({@link TOK_NULL} if none)
|
428
|
|
|
*/
|
429
|
37 |
|
function next_no_whitespace() {
|
|
|
|
|
430
|
37 |
|
$this->token_start = null;
|
431
|
|
|
|
432
|
37 |
|
while (++$this->pos < $this->size) {
|
433
|
37 |
|
if (!isset($this->whitespace[$this->doc[$this->pos]])) {
|
434
|
37 |
|
if (isset($this->char_map[$this->doc[$this->pos]])) {
|
435
|
37 |
|
if (is_string($this->char_map[$this->doc[$this->pos]])) {
|
436
|
34 |
|
return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}());
|
437
|
|
|
} else {
|
438
|
37 |
|
return ($this->token = $this->char_map[$this->doc[$this->pos]]);
|
439
|
|
|
}
|
440
|
|
|
} else {
|
441
|
|
|
return ($this->token = self::TOK_UNKNOWN);
|
442
|
|
|
}
|
443
|
|
|
} else {
|
444
|
34 |
|
$this->parse_linebreak();
|
445
|
|
|
}
|
446
|
34 |
|
}
|
447
|
|
|
|
448
|
|
|
return ($this->token = self::TOK_NULL);
|
449
|
|
|
}
|
450
|
|
|
|
451
|
|
|
/**
|
452
|
|
|
* Finds the next token using stop characters.
|
453
|
|
|
*
|
454
|
|
|
* Used like: next_search('abc') or next_search(array('a' => true, 'b' => true, 'c' => true));
|
455
|
|
|
* @param string|array $characters Characters to search for
|
456
|
|
|
* @param bool $callback Should the function check the charmap after finding a character?
|
457
|
|
|
* @return int Next token ({@link TOK_NULL} if none)
|
458
|
|
|
*/
|
459
|
9 |
|
function next_search($characters, $callback = true) {
|
|
|
|
|
460
|
9 |
|
$this->token_start = $this->pos;
|
461
|
9 |
|
if (!is_array($characters)) {
|
462
|
9 |
|
$characters = array_fill_keys(str_split($characters), true);
|
463
|
9 |
|
}
|
464
|
|
|
|
465
|
9 |
|
while(++$this->pos < $this->size) {
|
466
|
9 |
|
if (isset($characters[$this->doc[$this->pos]])) {
|
467
|
9 |
|
if ($callback && isset($this->char_map[$this->doc[$this->pos]])) {
|
468
|
|
|
if (is_string($this->char_map[$this->doc[$this->pos]])) {
|
469
|
|
|
return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}());
|
470
|
|
|
} else {
|
471
|
|
|
return ($this->token = $this->char_map[$this->doc[$this->pos]]);
|
472
|
|
|
}
|
473
|
|
|
} else {
|
474
|
9 |
|
return ($this->token = self::TOK_UNKNOWN);
|
475
|
|
|
}
|
476
|
|
|
} else {
|
477
|
9 |
|
$this->parse_linebreak();
|
478
|
|
|
}
|
479
|
9 |
|
}
|
480
|
|
|
|
481
|
|
|
return ($this->token = self::TOK_NULL);
|
482
|
|
|
}
|
483
|
|
|
|
484
|
|
|
/**
|
485
|
|
|
* Finds the next token by searching for a string
|
486
|
|
|
* @param string $needle The needle that's being searched for
|
487
|
|
|
* @param bool $callback Should the function check the charmap after finding the needle?
|
488
|
|
|
* @return int Next token ({@link TOK_NULL} if none)
|
489
|
|
|
*/
|
490
|
37 |
|
function next_pos($needle, $callback = true) {
|
|
|
|
|
491
|
37 |
|
$this->token_start = $this->pos;
|
492
|
37 |
|
if (($this->pos < $this->size) && (($p = stripos($this->doc, $needle, $this->pos + 1)) !== false)) {
|
493
|
|
|
|
494
|
37 |
|
$len = $p - $this->pos - 1;
|
495
|
37 |
|
if ($len > 0) {
|
496
|
37 |
|
$str = substr($this->doc, $this->pos + 1, $len);
|
497
|
|
|
|
498
|
37 |
|
if (($l = strrpos($str, "\n")) !== false) {
|
499
|
33 |
|
++$this->line_pos[0];
|
500
|
33 |
|
$this->line_pos[1] = $l + $this->pos + 1;
|
501
|
|
|
|
502
|
33 |
|
$len -= $l;
|
503
|
33 |
|
if ($len > 0) {
|
504
|
33 |
|
$str = substr($str, 0, -$len);
|
505
|
33 |
|
$this->line_pos[0] += substr_count($str, "\n");
|
506
|
33 |
|
}
|
507
|
33 |
|
}
|
508
|
37 |
|
}
|
509
|
|
|
|
510
|
37 |
|
$this->pos = $p;
|
511
|
37 |
|
if ($callback && isset($this->char_map[$this->doc[$this->pos]])) {
|
512
|
37 |
|
if (is_string($this->char_map[$this->doc[$this->pos]])) {
|
513
|
|
|
return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}());
|
514
|
|
|
} else {
|
515
|
37 |
|
return ($this->token = $this->char_map[$this->doc[$this->pos]]);
|
516
|
|
|
}
|
517
|
|
|
} else {
|
518
|
34 |
|
return ($this->token = self::TOK_UNKNOWN);
|
519
|
|
|
}
|
520
|
|
|
} else {
|
521
|
37 |
|
$this->pos = $this->size;
|
522
|
37 |
|
return ($this->token = self::TOK_NULL);
|
523
|
|
|
}
|
524
|
|
|
}
|
525
|
|
|
|
526
|
|
|
/**
|
527
|
|
|
* Expect a specific token or character. Adds error if token doesn't match.
|
528
|
|
|
* @param string|int $token Character or token to expect
|
529
|
|
|
* @param bool|int $do_next Go to next character before evaluating. 1 for next char, true to ignore whitespace
|
530
|
|
|
* @param bool|int $try_next Try next character if current doesn't match. 1 for next char, true to ignore whitespace
|
531
|
|
|
* @param bool|int $next_on_match Go to next character after evaluating. 1 for next char, true to ignore whitespace
|
532
|
|
|
* @return bool
|
533
|
|
|
*/
|
534
|
|
|
protected function expect($token, $do_next = true, $try_next = false, $next_on_match = 1) {
|
535
|
|
|
if ($do_next) {
|
536
|
|
|
if ($do_next === 1) {
|
537
|
|
|
$this->next();
|
538
|
|
|
} else {
|
539
|
|
|
$this->next_no_whitespace();
|
540
|
|
|
}
|
541
|
|
|
}
|
542
|
|
|
|
543
|
|
|
if (is_int($token)) {
|
544
|
|
|
if (($this->token !== $token) && ((!$try_next) || ((($try_next === 1) && ($this->next() !== $token)) || (($try_next === true) && ($this->next_no_whitespace() !== $token))))) {
|
545
|
|
|
$this->addError('Unexpected "'.$this->getTokenString().'"');
|
546
|
|
|
return false;
|
547
|
|
|
}
|
548
|
|
|
} else {
|
549
|
|
|
if (($this->doc[$this->pos] !== $token) && ((!$try_next) || (((($try_next === 1) && ($this->next() !== self::TOK_NULL)) || (($try_next === true) && ($this->next_no_whitespace() !== self::TOK_NULL))) && ($this->doc[$this->pos] !== $token)))) {
|
550
|
|
|
$this->addError('Expected "'.$token.'", but found "'.$this->getTokenString().'"');
|
551
|
|
|
return false;
|
552
|
|
|
}
|
553
|
|
|
}
|
554
|
|
|
|
555
|
|
|
if ($next_on_match) {
|
556
|
|
|
if ($next_on_match === 1) {
|
557
|
|
|
$this->next();
|
558
|
|
|
} else {
|
559
|
|
|
$this->next_no_whitespace();
|
560
|
|
|
}
|
561
|
|
|
}
|
562
|
|
|
return true;
|
563
|
|
|
}
|
564
|
|
|
}
|
565
|
|
|
|
566
|
|
|
?> |
|
|
|
|
The PSR-2 coding standard requires that all properties in a class have their visibility explicitly declared. If you declare a property using
the property is implicitly global.
To learn more about the PSR-2, please see the PHP-FIG site on the PSR-2.