Completed
Push — php74_support_25 ( d359d4 )
by
unknown
18:45
created

PcreCompiler   A

Complexity

Total Complexity 26

Size/Duplication

Total Lines 272
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 0
Metric Value
dl 0
loc 272
rs 10
c 0
b 0
f 0
wmc 26
lcom 1
cbo 1

12 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A compile() 0 12 3
A compileRule() 0 19 5
A compileMap() 0 7 1
A compileTransposeModulo() 0 15 1
A getModuloCharRange() 0 13 2
A getTransposeClosure() 0 11 2
A compileTargetCharacter() 0 32 4
A compileCharacter() 0 13 3
A compileReplace() 0 10 1
A compileTranspose() 0 10 1
A hexdec() 0 8 2
1
<?php
2
3
/**
4
 * File containing the PcreCompiler class.
5
 *
6
 * @copyright Copyright (C) eZ Systems AS. All rights reserved.
7
 * @license For full copyright and license information view LICENSE file distributed with this source code.
8
 */
9
namespace eZ\Publish\Core\Persistence\TransformationProcessor;
10
11
use eZ\Publish\Core\Persistence\Utf8Converter;
12
use eZ\Publish\Core\Persistence\TransformationProcessor;
13
use RuntimeException;
14
15
/**
16
 * Compiles the AST of parsed transformation rules into a set of PCRE replace
17
 * regular expressions.
18
 */
19
class PcreCompiler
20
{
21
    /**
22
     * Class for converting UTF-8 characters.
23
     *
24
     * @var \eZ\Publish\Core\Persistence\Utf8Converter
25
     */
26
    protected $converter;
27
28
    /**
29
     * Construct from UTF8Converter.
30
     *
31
     * @param \eZ\Publish\Core\Persistence\Utf8Converter $converter
32
     */
33
    public function __construct(Utf8Converter $converter)
34
    {
35
        $this->converter = $converter;
36
    }
37
38
    /**
39
     * Compile AST into a set of regular expressions.
40
     *
41
     * The returned array contains a set of regular expressions and their
42
     * replacement callbacks. The regular expressions can then be applied to
43
     * strings to executed the transformations.
44
     *
45
     * @param array $ast
46
     *
47
     * @return array
48
     */
49
    public function compile(array $ast)
50
    {
51
        $transformations = [];
52
53
        foreach ($ast as $section => $rules) {
54
            foreach ($rules as $rule) {
55
                $transformations[$section][] = $this->compileRule($rule);
56
            }
57
        }
58
59
        return $transformations;
60
    }
61
62
    /**
63
     * Compiles a single rule.
64
     *
65
     * @param array $rule
66
     *
67
     * @return array
68
     */
69
    protected function compileRule(array $rule)
70
    {
71
        switch ($rule['type']) {
72
            case TransformationProcessor::T_MAP:
73
                return $this->compileMap($rule);
74
75
            case TransformationProcessor::T_REPLACE:
76
                return $this->compileReplace($rule);
77
78
            case TransformationProcessor::T_TRANSPOSE:
79
                return $this->compileTranspose($rule);
80
81
            case TransformationProcessor::T_TRANSPOSE_MODULO:
82
                return $this->compileTransposeModulo($rule);
83
84
            default:
85
                throw new RuntimeException('Unknown rule type: ' . $rule['type']);
86
        }
87
    }
88
89
    /**
90
     * Compile map rule.
91
     *
92
     * @param array $rule
93
     *
94
     * @return array
95
     */
96
    protected function compileMap(array $rule)
97
    {
98
        return [
99
            'regexp' => '(' . preg_quote($this->compileCharacter($rule['data']['src'])) . ')us',
100
            'callback' => $this->compileTargetCharacter($rule['data']['dest']),
101
        ];
102
    }
103
104
    /**
105
     * Compile replace rule.
106
     *
107
     * @param array $rule
108
     *
109
     * @return array
110
     */
111
    protected function compileReplace(array $rule)
112
    {
113
        return [
114
            'regexp' => '([' .
115
                preg_quote($this->compileCharacter($rule['data']['srcStart'])) . '-' .
116
                preg_quote($this->compileCharacter($rule['data']['srcEnd'])) .
117
                '])us',
118
            'callback' => $this->compileTargetCharacter($rule['data']['dest']),
119
        ];
120
    }
121
122
    /**
123
     * Compile transpose rule.
124
     *
125
     * @param array $rule
126
     *
127
     * @return array
128
     */
129
    protected function compileTranspose(array $rule)
130
    {
131
        return [
132
            'regexp' => '([' .
133
                preg_quote($this->compileCharacter($rule['data']['srcStart'])) . '-' .
134
                preg_quote($this->compileCharacter($rule['data']['srcEnd'])) .
135
                '])us',
136
            'callback' => $this->getTransposeClosure($rule['data']['op'], $rule['data']['dest']),
137
        ];
138
    }
139
140
    /**
141
     * Compile transpose modulo rule.
142
     *
143
     * @param array $rule
144
     *
145
     * @return array
146
     */
147
    protected function compileTransposeModulo(array $rule)
148
    {
149
        return [
150
            'regexp' => '([' .
151
                preg_quote(
152
                    $this->getModuloCharRange(
153
                        $this->compileCharacter($rule['data']['srcStart']),
154
                        $this->compileCharacter($rule['data']['srcEnd']),
155
                        $rule['data']['modulo']
156
                    )
157
                ) .
158
                '])us',
159
            'callback' => $this->getTransposeClosure($rule['data']['op'], $rule['data']['dest']),
160
        ];
161
    }
162
163
    /**
164
     * Get string with all characters defined by parameters.
165
     *
166
     * Returns a string containing all UTF-8 characters starting with the
167
     * specified $start character up to the $end character with the step size
168
     * defined in $modulo.
169
     *
170
     * @param string $start
171
     * @param string $end
172
     * @param string $modulo
173
     *
174
     * @return string
175
     */
176
    protected function getModuloCharRange($start, $end, $modulo)
177
    {
178
        $start = $this->converter->toUnicodeCodepoint($start);
179
        $end = $this->converter->toUnicodeCodepoint($end);
180
        $modulo = hexdec($modulo);
181
182
        $chars = '';
183
        for ($start; $start <= $end; $start += $modulo) {
184
            $chars .= $this->converter->toUTF8Character($start);
0 ignored issues
show
Bug introduced by
It seems like $start can also be of type double or false; however, eZ\Publish\Core\Persiste...rter::toUTF8Character() does only seem to accept integer, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
185
        }
186
187
        return $chars;
188
    }
189
190
    /**
191
     * Returns a closure which modifies the provided character by the given
192
     * value.
193
     *
194
     * @param string $operator
195
     * @param string $value
196
     *
197
     * @return callback
198
     */
199
    protected function getTransposeClosure($operator, $value)
200
    {
201
        $value = $this->hexdec($value) * ($operator === '-' ? -1 : 1);
202
        $converter = $this->converter;
203
204
        return function ($matches) use ($value, $converter) {
205
            return $converter->toUTF8Character(
206
                $converter->toUnicodeCodepoint($matches[0]) + $value
207
            );
208
        };
209
    }
210
211
    /**
212
     * Compile target into a closure, which can be used by
213
     * preg_replace_callback.
214
     *
215
     * @param string $char
216
     *
217
     * @return callback
218
     */
219
    protected function compileTargetCharacter($char)
220
    {
221
        switch (true) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing preg_match('("(?:[^\\\\"...\\'|\\\\")*?")', $char) of type integer to the boolean true. If you are specifically checking for non-zero, consider using something more explicit like > 0 or !== 0 instead.
Loading history...
222
            case $char === 'remove':
223
                return function ($matches) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
224
                    return '';
225
                };
226
227
            case $char === 'keep':
228
                return function ($matches) {
229
                    return $matches[0];
230
                };
231
232
            case preg_match('("(?:[^\\\\"]+|\\\\\\\\|\\\\\'|\\\\")*?")', $char):
233
                $string = str_replace(
234
                    ['\\\\', '\\"', "\\'"],
235
                    ['\\', '"', "'"],
236
                    substr($char, 1, -1)
237
                );
238
239
                return function ($matches) use ($string) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
240
                    return $string;
241
                };
242
243
            default:
244
                $char = $this->compileCharacter($char);
245
246
                return function ($matches) use ($char) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
247
                    return $char;
248
                };
249
        }
250
    }
251
252
    /**
253
     * Compile a single source character definition into a plain UTF-8 character.
254
     *
255
     * Handles the two formats from the possible character definitions:
256
     *  - U+xxxx : Unicode value in hexadecimal
257
     *  - xx: Ascii value in hexadecimal
258
     *
259
     * @param string $char
260
     *
261
     * @return string
262
     */
263
    protected function compileCharacter($char)
264
    {
265
        switch (true) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing preg_match('(^U\\+[0-9a-fA-F]{4}$)', $char) of type integer to the boolean true. If you are specifically checking for non-zero, consider using something more explicit like > 0 or !== 0 instead.
Loading history...
Bug Best Practice introduced by
It seems like you are loosely comparing preg_match('(^[0-9a-fA-F]{2}$)', $char) of type integer to the boolean true. If you are specifically checking for non-zero, consider using something more explicit like > 0 or !== 0 instead.
Loading history...
266
            case preg_match('(^U\\+[0-9a-fA-F]{4}$)', $char):
267
                return $this->converter->toUTF8Character(hexdec(substr($char, 2)));
268
269
            case preg_match('(^[0-9a-fA-F]{2}$)', $char):
270
                return chr(hexdec($char));
271
272
            default:
273
                throw new RuntimeException("Invalid character definition: $char");
274
        }
275
    }
276
277
    /**
278
     * Converts a hexadecimal string to a decimal number.
279
     *
280
     * In compare to standard hexdec function it will ignore any non-hexadecimal characters
281
     */
282
    private function hexdec(?string $value): int
283
    {
284
        if ($value === null) {
285
            return 0;
286
        }
287
288
        return hexdec(preg_replace('/[^[:xdigit:]]/', '', (string)$value));
289
    }
290
}
291