Completed
Push — master ( bc45a2...fe2e97 )
by Andreas
02:56
created

ParallelRegex::apply()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
nc 5
nop 2
dl 0
loc 20
rs 8.9777
c 0
b 0
f 0
1
<?php
2
/**
3
 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
4
 * For an intro to the Lexer see:
5
 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
6
 *
7
 * @author Marcus Baker http://www.lastcraft.com
8
 */
9
10
namespace dokuwiki\Parsing\Lexer;
11
12
/**
13
 * Compounded regular expression.
14
 *
15
 * Any of the contained patterns could match and when one does it's label is returned.
16
 */
17
class ParallelRegex
18
{
19
    /** @var string[] patterns to match */
20
    protected $patterns;
21
    /** @var string[] labels for above patterns */
22
    protected $labels;
23
    /** @var string the compound regex matching all patterns */
24
    protected $regex;
25
    /** @var bool case sensitive matching? */
26
    protected $case;
27
28
    /**
29
     * Constructor. Starts with no patterns.
30
     *
31
     * @param boolean $case    True for case sensitive, false
32
     *                         for insensitive.
33
     */
34
    public function __construct($case)
35
    {
36
        $this->case = $case;
37
        $this->patterns = array();
38
        $this->labels = array();
39
        $this->regex = null;
40
    }
41
42
    /**
43
     * Adds a pattern with an optional label.
44
     *
45
     * @param mixed       $pattern Perl style regex. Must be UTF-8
46
     *                             encoded. If its a string, the (, )
47
     *                             lose their meaning unless they
48
     *                             form part of a lookahead or
49
     *                             lookbehind assertation.
50
     * @param bool|string $label   Label of regex to be returned
51
     *                             on a match. Label must be ASCII
52
     */
53
    public function addPattern($pattern, $label = true)
54
    {
55
        $count = count($this->patterns);
56
        $this->patterns[$count] = $pattern;
57
        $this->labels[$count] = $label;
58
        $this->regex = null;
59
    }
60
61
    /**
62
     * Attempts to match all patterns at once against a string.
63
     *
64
     * @param string $subject      String to match against.
65
     * @param string $match        First matched portion of
66
     *                             subject.
67
     * @return bool|string         False if no match found, label if label exists, true if not
68
     */
69
    public function apply($subject, &$match)
70
    {
71
        if (count($this->patterns) == 0) {
72
            return false;
73
        }
74
        if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
75
            $match = "";
76
            return false;
77
        }
78
79
        $match = $matches[0];
80
        $size = count($matches);
81
        // FIXME this could be made faster by storing the labels as keys in a hashmap
82
        for ($i = 1; $i < $size; $i++) {
83
            if ($matches[$i] && isset($this->labels[$i - 1])) {
84
                return $this->labels[$i - 1];
85
            }
86
        }
87
        return true;
88
    }
89
90
    /**
91
     * Attempts to split the string against all patterns at once
92
     *
93
     * @param string $subject      String to match against.
94
     * @param array $split         The split result: array containing, pre-match, match & post-match strings
95
     * @return boolean             True on success.
96
     *
97
     * @author Christopher Smith <[email protected]>
98
     */
99
    public function split($subject, &$split)
100
    {
101
        if (count($this->patterns) == 0) {
102
            return false;
103
        }
104
105
        if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
106
            if (function_exists('preg_last_error')) {
107
                $err = preg_last_error();
108
                switch ($err) {
109
                    case PREG_BACKTRACK_LIMIT_ERROR:
110
                        msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
111
                        break;
112
                    case PREG_RECURSION_LIMIT_ERROR:
113
                        msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
114
                        break;
115
                    case PREG_BAD_UTF8_ERROR:
116
                        msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
117
                        break;
118
                    case PREG_INTERNAL_ERROR:
119
                        msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
120
                        break;
121
                }
122
            }
123
124
            $split = array($subject, "", "");
125
            return false;
126
        }
127
128
        $idx = count($matches)-2;
129
        list($pre, $post) = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2);
130
        $split = array($pre, $matches[0], $post);
131
132
        return isset($this->labels[$idx]) ? $this->labels[$idx] : true;
133
    }
134
135
    /**
136
     * Compounds the patterns into a single
137
     * regular expression separated with the
138
     * "or" operator. Caches the regex.
139
     * Will automatically escape (, ) and / tokens.
140
     *
141
     * @return null|string
142
     */
143
    protected function getCompoundedRegex()
144
    {
145
        if ($this->regex == null) {
146
            $cnt = count($this->patterns);
147
            for ($i = 0; $i < $cnt; $i++) {
148
                /*
149
                 * decompose the input pattern into "(", "(?", ")",
150
                 * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
151
                 * elements.
152
                 */
153
                preg_match_all('/\\\\.|' .
154
                               '\(\?|' .
155
                               '[()]|' .
156
                               '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
157
                               '[^[()\\\\]+/', $this->patterns[$i], $elts);
158
159
                $pattern = "";
160
                $level = 0;
161
162
                foreach ($elts[0] as $elt) {
163
                    /*
164
                     * for "(", ")" remember the nesting level, add "\"
165
                     * only to the non-"(?" ones.
166
                     */
167
168
                    switch ($elt) {
169
                        case '(':
170
                            $pattern .= '\(';
171
                            break;
172
                        case ')':
173
                            if ($level > 0)
174
                                $level--; /* closing (? */
175
                            else $pattern .= '\\';
176
                            $pattern .= ')';
177
                            break;
178
                        case '(?':
179
                            $level++;
180
                            $pattern .= '(?';
181
                            break;
182
                        default:
183
                            if (substr($elt, 0, 1) == '\\')
184
                                $pattern .= $elt;
185
                            else $pattern .= str_replace('/', '\/', $elt);
186
                    }
187
                }
188
                $this->patterns[$i] = "($pattern)";
189
            }
190
            $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
191
        }
192
        return $this->regex;
193
    }
194
195
    /**
196
     * Accessor for perl regex mode flags to use.
197
     * @return string       Perl regex flags.
198
     */
199
    protected function getPerlMatchingFlags()
200
    {
201
        return ($this->case ? "msS" : "msSi");
202
    }
203
}
204