Failed Conditions
Push — psr2-config ( c6639e )
by Andreas
06:39 queued 03:33
created

Lexer   B

Complexity

Total Complexity 36

Size/Duplication

Total Lines 322
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Importance

Changes 0
Metric Value
dl 0
loc 322
rs 8.8
c 0
b 0
f 0
wmc 36
lcom 1
cbo 2

14 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 8 1
A addPattern() 0 7 2
A addEntryPattern() 0 7 2
A addExitPattern() 0 7 2
A addSpecialPattern() 0 7 2
A mapHandler() 0 4 1
B parse() 0 26 6
C dispatchTokens() 0 24 7
A isModeEnd() 0 4 1
A isSpecialMode() 0 4 1
A decodeSpecial() 0 4 1
B invokeHandler() 0 19 5
A reduce() 0 14 4
B escape() 0 43 1
1
<?php
2
/**
3
 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
4
 * For an intro to the Lexer see:
5
 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
6
 *
7
 * @author Marcus Baker http://www.lastcraft.com
8
 */
9
10
namespace dokuwiki\Parsing\Lexer;
11
12
// FIXME move elsewhere
13
14
define("DOKU_LEXER_ENTER", 1);
15
define("DOKU_LEXER_MATCHED", 2);
16
define("DOKU_LEXER_UNMATCHED", 3);
17
define("DOKU_LEXER_EXIT", 4);
18
define("DOKU_LEXER_SPECIAL", 5);
19
20
/**
21
 * Accepts text and breaks it into tokens.
22
 *
23
 * Some optimisation to make the sure the content is only scanned by the PHP regex
24
 * parser once. Lexer modes must not start with leading underscores.
25
 */
26
class Lexer
27
{
28
    /** @var ParallelRegex[] */
29
    protected $regexes;
30
    /** @var \Doku_Handler */
31
    protected $handler;
32
    /** @var StateStack */
33
    protected $mode;
34
    /** @var array mode "rewrites" */
35
    protected $mode_handlers;
36
    /** @var bool case sensitive? */
37
    protected $case;
38
39
    /**
40
     * Sets up the lexer in case insensitive matching by default.
41
     *
42
     * @param \Doku_Handler $handler  Handling strategy by reference.
43
     * @param string $start            Starting handler.
44
     * @param boolean $case            True for case sensitive.
45
     */
46
    public function __construct($handler, $start = "accept", $case = false)
47
    {
48
        $this->case = $case;
49
        $this->regexes = array();
50
        $this->handler = $handler;
51
        $this->mode = new StateStack($start);
52
        $this->mode_handlers = array();
53
    }
54
55
    /**
56
     * Adds a token search pattern for a particular parsing mode.
57
     *
58
     * The pattern does not change the current mode.
59
     *
60
     * @param string $pattern      Perl style regex, but ( and )
61
     *                             lose the usual meaning.
62
     * @param string $mode         Should only apply this
63
     *                             pattern when dealing with
64
     *                             this type of input.
65
     */
66
    public function addPattern($pattern, $mode = "accept")
67
    {
68
        if (! isset($this->regexes[$mode])) {
69
            $this->regexes[$mode] = new ParallelRegex($this->case);
70
        }
71
        $this->regexes[$mode]->addPattern($pattern);
72
    }
73
74
    /**
75
     * Adds a pattern that will enter a new parsing mode.
76
     *
77
     * Useful for entering parenthesis, strings, tags, etc.
78
     *
79
     * @param string $pattern      Perl style regex, but ( and ) lose the usual meaning.
80
     * @param string $mode         Should only apply this pattern when dealing with this type of input.
81
     * @param string $new_mode     Change parsing to this new nested mode.
82
     */
83
    public function addEntryPattern($pattern, $mode, $new_mode)
84
    {
85
        if (! isset($this->regexes[$mode])) {
86
            $this->regexes[$mode] = new ParallelRegex($this->case);
87
        }
88
        $this->regexes[$mode]->addPattern($pattern, $new_mode);
89
    }
90
91
    /**
92
     * Adds a pattern that will exit the current mode and re-enter the previous one.
93
     *
94
     * @param string $pattern      Perl style regex, but ( and ) lose the usual meaning.
95
     * @param string $mode         Mode to leave.
96
     */
97
    public function addExitPattern($pattern, $mode)
98
    {
99
        if (! isset($this->regexes[$mode])) {
100
            $this->regexes[$mode] = new ParallelRegex($this->case);
101
        }
102
        $this->regexes[$mode]->addPattern($pattern, "__exit");
103
    }
104
105
    /**
106
     * Adds a pattern that has a special mode.
107
     *
108
     * Acts as an entry and exit pattern in one go, effectively calling a special
109
     * parser handler for this token only.
110
     *
111
     * @param string $pattern      Perl style regex, but ( and ) lose the usual meaning.
112
     * @param string $mode         Should only apply this pattern when dealing with this type of input.
113
     * @param string $special      Use this mode for this one token.
114
     */
115
    public function addSpecialPattern($pattern, $mode, $special)
116
    {
117
        if (! isset($this->regexes[$mode])) {
118
            $this->regexes[$mode] = new ParallelRegex($this->case);
119
        }
120
        $this->regexes[$mode]->addPattern($pattern, "_$special");
121
    }
122
123
    /**
124
     * Adds a mapping from a mode to another handler.
125
     *
126
     * @param string $mode        Mode to be remapped.
127
     * @param string $handler     New target handler.
128
     */
129
    public function mapHandler($mode, $handler)
130
    {
131
        $this->mode_handlers[$mode] = $handler;
132
    }
133
134
    /**
135
     * Splits the page text into tokens.
136
     *
137
     * Will fail if the handlers report an error or if no content is consumed. If successful then each
138
     * unparsed and parsed token invokes a call to the held listener.
139
     *
140
     * @param string $raw        Raw HTML text.
141
     * @return boolean           True on success, else false.
142
     */
143
    public function parse($raw)
144
    {
145
        if (! isset($this->handler)) {
146
            return false;
147
        }
148
        $initialLength = strlen($raw);
149
        $length = $initialLength;
150
        $pos = 0;
151
        while (is_array($parsed = $this->reduce($raw))) {
152
            list($unmatched, $matched, $mode) = $parsed;
153
            $currentLength = strlen($raw);
154
            $matchPos = $initialLength - $currentLength - strlen($matched);
155
            if (! $this->dispatchTokens($unmatched, $matched, $mode, $pos, $matchPos)) {
156
                return false;
157
            }
158
            if ($currentLength == $length) {
159
                return false;
160
            }
161
            $length = $currentLength;
162
            $pos = $initialLength - $currentLength;
163
        }
164
        if (!$parsed) {
165
            return false;
166
        }
167
        return $this->invokeHandler($raw, DOKU_LEXER_UNMATCHED, $pos);
0 ignored issues
show
Documentation introduced by
DOKU_LEXER_UNMATCHED is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
168
    }
169
170
    /**
171
     * Sends the matched token and any leading unmatched
172
     * text to the parser changing the lexer to a new
173
     * mode if one is listed.
174
     *
175
     * @param string $unmatched Unmatched leading portion.
176
     * @param string $matched Actual token match.
177
     * @param bool|string $mode Mode after match. A boolean false mode causes no change.
178
     * @param int $initialPos
179
     * @param int $matchPos Current byte index location in raw doc thats being parsed
180
     * @return boolean             False if there was any error from the parser.
181
     */
182
    protected function dispatchTokens($unmatched, $matched, $mode = false, $initialPos, $matchPos)
183
    {
184
        if (! $this->invokeHandler($unmatched, DOKU_LEXER_UNMATCHED, $initialPos)) {
0 ignored issues
show
Documentation introduced by
DOKU_LEXER_UNMATCHED is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
185
            return false;
186
        }
187
        if ($this->isModeEnd($mode)) {
0 ignored issues
show
Bug introduced by
It seems like $mode defined by parameter $mode on line 182 can also be of type boolean; however, dokuwiki\Parsing\Lexer\Lexer::isModeEnd() does only seem to accept string, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
188
            if (! $this->invokeHandler($matched, DOKU_LEXER_EXIT, $matchPos)) {
0 ignored issues
show
Documentation introduced by
DOKU_LEXER_EXIT is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
189
                return false;
190
            }
191
            return $this->mode->leave();
192
        }
193
        if ($this->isSpecialMode($mode)) {
0 ignored issues
show
Bug introduced by
It seems like $mode defined by parameter $mode on line 182 can also be of type boolean; however, dokuwiki\Parsing\Lexer\Lexer::isSpecialMode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
194
            $this->mode->enter($this->decodeSpecial($mode));
0 ignored issues
show
Bug introduced by
It seems like $mode defined by parameter $mode on line 182 can also be of type boolean; however, dokuwiki\Parsing\Lexer\Lexer::decodeSpecial() does only seem to accept string, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
195
            if (! $this->invokeHandler($matched, DOKU_LEXER_SPECIAL, $matchPos)) {
0 ignored issues
show
Documentation introduced by
DOKU_LEXER_SPECIAL is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
196
                return false;
197
            }
198
            return $this->mode->leave();
199
        }
200
        if (is_string($mode)) {
201
            $this->mode->enter($mode);
202
            return $this->invokeHandler($matched, DOKU_LEXER_ENTER, $matchPos);
0 ignored issues
show
Documentation introduced by
DOKU_LEXER_ENTER is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
203
        }
204
        return $this->invokeHandler($matched, DOKU_LEXER_MATCHED, $matchPos);
0 ignored issues
show
Documentation introduced by
DOKU_LEXER_MATCHED is of type integer, but the function expects a boolean.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
205
    }
206
207
    /**
208
     * Tests to see if the new mode is actually to leave the current mode and pop an item from the matching
209
     * mode stack.
210
     *
211
     * @param string $mode    Mode to test.
212
     * @return boolean        True if this is the exit mode.
213
     */
214
    protected function isModeEnd($mode)
215
    {
216
        return ($mode === "__exit");
217
    }
218
219
    /**
220
     * Test to see if the mode is one where this mode is entered for this token only and automatically
221
     * leaves immediately afterwoods.
222
     *
223
     * @param string $mode    Mode to test.
224
     * @return boolean        True if this is the exit mode.
225
     */
226
    protected function isSpecialMode($mode)
227
    {
228
        return (strncmp($mode, "_", 1) == 0);
229
    }
230
231
    /**
232
     * Strips the magic underscore marking single token modes.
233
     *
234
     * @param string $mode    Mode to decode.
235
     * @return string         Underlying mode name.
236
     */
237
    protected function decodeSpecial($mode)
238
    {
239
        return substr($mode, 1);
240
    }
241
242
    /**
243
     * Calls the parser method named after the current mode.
244
     *
245
     * Empty content will be ignored. The lexer has a parser handler for each mode in the lexer.
246
     *
247
     * @param string $content Text parsed.
248
     * @param boolean $is_match Token is recognised rather
249
     *                               than unparsed data.
250
     * @param int $pos Current byte index location in raw doc
251
     *                             thats being parsed
252
     * @return bool
253
     */
254
    protected function invokeHandler($content, $is_match, $pos)
255
    {
256
        if (($content === "") || ($content === false)) {
257
            return true;
258
        }
259
        $handler = $this->mode->getCurrent();
260
        if (isset($this->mode_handlers[$handler])) {
261
            $handler = $this->mode_handlers[$handler];
262
        }
263
264
        // modes starting with plugin_ are all handled by the same
265
        // handler but with an additional parameter
266
        if (substr($handler, 0, 7)=='plugin_') {
267
            list($handler,$plugin) = explode('_', $handler, 2);
268
            return $this->handler->$handler($content, $is_match, $pos, $plugin);
269
        }
270
271
        return $this->handler->$handler($content, $is_match, $pos);
272
    }
273
274
    /**
275
     * Tries to match a chunk of text and if successful removes the recognised chunk and any leading
276
     * unparsed data. Empty strings will not be matched.
277
     *
278
     * @param string $raw         The subject to parse. This is the content that will be eaten.
279
     * @return array|bool         Three item list of unparsed content followed by the
280
     *                            recognised token and finally the action the parser is to take.
281
     *                            True if no match, false if there is a parsing error.
282
     */
283
    protected function reduce(&$raw)
284
    {
285
        if (! isset($this->regexes[$this->mode->getCurrent()])) {
286
            return false;
287
        }
288
        if ($raw === "") {
289
            return true;
290
        }
291
        if ($action = $this->regexes[$this->mode->getCurrent()]->split($raw, $split)) {
292
            list($unparsed, $match, $raw) = $split;
293
            return array($unparsed, $match, $action);
294
        }
295
        return true;
296
    }
297
298
    /**
299
     * Escapes regex characters other than (, ) and /
300
     *
301
     * @param string $str
302
     * @return string
303
     */
304
    public static function escape($str)
305
    {
306
        $chars = array(
307
            '/\\\\/',
308
            '/\./',
309
            '/\+/',
310
            '/\*/',
311
            '/\?/',
312
            '/\[/',
313
            '/\^/',
314
            '/\]/',
315
            '/\$/',
316
            '/\{/',
317
            '/\}/',
318
            '/\=/',
319
            '/\!/',
320
            '/\</',
321
            '/\>/',
322
            '/\|/',
323
            '/\:/'
324
        );
325
326
        $escaped = array(
327
            '\\\\\\\\',
328
            '\.',
329
            '\+',
330
            '\*',
331
            '\?',
332
            '\[',
333
            '\^',
334
            '\]',
335
            '\$',
336
            '\{',
337
            '\}',
338
            '\=',
339
            '\!',
340
            '\<',
341
            '\>',
342
            '\|',
343
            '\:'
344
        );
345
        return preg_replace($chars, $escaped, $str);
346
    }
347
}
348