Completed
Push — master ( c38b7a...6f5e4f )
by
unknown
47:17 queued 24:08
created

PcreCompiler::hexdec()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
nc 2
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * @copyright Copyright (C) eZ Systems AS. All rights reserved.
5
 * @license For full copyright and license information view LICENSE file distributed with this source code.
6
 */
7
namespace eZ\Publish\Core\Persistence\TransformationProcessor;
8
9
use eZ\Publish\Core\Persistence\Utf8Converter;
10
use eZ\Publish\Core\Persistence\TransformationProcessor;
11
use RuntimeException;
12
13
/**
14
 * Compiles the AST of parsed transformation rules into a set of PCRE replace
15
 * regular expressions.
16
 */
17
class PcreCompiler
18
{
19
    /**
20
     * Class for converting UTF-8 characters.
21
     *
22
     * @var \eZ\Publish\Core\Persistence\Utf8Converter
23
     */
24
    protected $converter;
25
26
    /**
27
     * Construct from UTF8Converter.
28
     *
29
     * @param \eZ\Publish\Core\Persistence\Utf8Converter $converter
30
     */
31
    public function __construct(Utf8Converter $converter)
32
    {
33
        $this->converter = $converter;
34
    }
35
36
    /**
37
     * Compile AST into a set of regular expressions.
38
     *
39
     * The returned array contains a set of regular expressions and their
40
     * replacement callbacks. The regular expressions can then be applied to
41
     * strings to executed the transformations.
42
     *
43
     * @param array $ast
44
     *
45
     * @return array
46
     */
47
    public function compile(array $ast)
48
    {
49
        $transformations = [];
50
51
        foreach ($ast as $section => $rules) {
52
            foreach ($rules as $rule) {
53
                $transformations[$section][] = $this->compileRule($rule);
54
            }
55
        }
56
57
        return $transformations;
58
    }
59
60
    /**
61
     * Compiles a single rule.
62
     *
63
     * @param array $rule
64
     *
65
     * @return array
66
     */
67
    protected function compileRule(array $rule)
68
    {
69
        switch ($rule['type']) {
70
            case TransformationProcessor::T_MAP:
71
                return $this->compileMap($rule);
72
73
            case TransformationProcessor::T_REPLACE:
74
                return $this->compileReplace($rule);
75
76
            case TransformationProcessor::T_TRANSPOSE:
77
                return $this->compileTranspose($rule);
78
79
            case TransformationProcessor::T_TRANSPOSE_MODULO:
80
                return $this->compileTransposeModulo($rule);
81
82
            default:
83
                throw new RuntimeException('Unknown rule type: ' . $rule['type']);
84
        }
85
    }
86
87
    /**
88
     * Compile map rule.
89
     *
90
     * @param array $rule
91
     *
92
     * @return array
93
     */
94
    protected function compileMap(array $rule)
95
    {
96
        return [
97
            'regexp' => '(' . preg_quote($this->compileCharacter($rule['data']['src'])) . ')us',
98
            'callback' => $this->compileTargetCharacter($rule['data']['dest']),
99
        ];
100
    }
101
102
    /**
103
     * Compile replace rule.
104
     *
105
     * @param array $rule
106
     *
107
     * @return array
108
     */
109
    protected function compileReplace(array $rule)
110
    {
111
        return [
112
            'regexp' => '([' .
113
                preg_quote($this->compileCharacter($rule['data']['srcStart'])) . '-' .
114
                preg_quote($this->compileCharacter($rule['data']['srcEnd'])) .
115
                '])us',
116
            'callback' => $this->compileTargetCharacter($rule['data']['dest']),
117
        ];
118
    }
119
120
    /**
121
     * Compile transpose rule.
122
     *
123
     * @param array $rule
124
     *
125
     * @return array
126
     */
127
    protected function compileTranspose(array $rule)
128
    {
129
        return [
130
            'regexp' => '([' .
131
                preg_quote($this->compileCharacter($rule['data']['srcStart'])) . '-' .
132
                preg_quote($this->compileCharacter($rule['data']['srcEnd'])) .
133
                '])us',
134
            'callback' => $this->getTransposeClosure($rule['data']['op'], $rule['data']['dest']),
135
        ];
136
    }
137
138
    /**
139
     * Compile transpose modulo rule.
140
     *
141
     * @param array $rule
142
     *
143
     * @return array
144
     */
145
    protected function compileTransposeModulo(array $rule)
146
    {
147
        return [
148
            'regexp' => '([' .
149
                preg_quote(
150
                    $this->getModuloCharRange(
151
                        $this->compileCharacter($rule['data']['srcStart']),
152
                        $this->compileCharacter($rule['data']['srcEnd']),
153
                        $rule['data']['modulo']
154
                    )
155
                ) .
156
                '])us',
157
            'callback' => $this->getTransposeClosure($rule['data']['op'], $rule['data']['dest']),
158
        ];
159
    }
160
161
    /**
162
     * Get string with all characters defined by parameters.
163
     *
164
     * Returns a string containing all UTF-8 characters starting with the
165
     * specified $start character up to the $end character with the step size
166
     * defined in $modulo.
167
     *
168
     * @param string $start
169
     * @param string $end
170
     * @param string $modulo
171
     *
172
     * @return string
173
     */
174
    protected function getModuloCharRange($start, $end, $modulo)
175
    {
176
        $start = $this->converter->toUnicodeCodepoint($start);
177
        $end = $this->converter->toUnicodeCodepoint($end);
178
        $modulo = hexdec($modulo);
179
180
        $chars = '';
181
        for ($start; $start <= $end; $start += $modulo) {
182
            $chars .= $this->converter->toUTF8Character($start);
0 ignored issues
show
Bug introduced by
It seems like $start can also be of type double or false; however, eZ\Publish\Core\Persiste...rter::toUTF8Character() does only seem to accept integer, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
183
        }
184
185
        return $chars;
186
    }
187
188
    /**
189
     * Returns a closure which modifies the provided character by the given
190
     * value.
191
     *
192
     * @param string $operator
193
     * @param string $value
194
     *
195
     * @return callback
196
     */
197
    protected function getTransposeClosure($operator, $value)
198
    {
199
        $value = $this->hexdec($value) * ($operator === '-' ? -1 : 1);
200
        $converter = $this->converter;
201
202
        return function ($matches) use ($value, $converter) {
203
            return $converter->toUTF8Character(
204
                $converter->toUnicodeCodepoint($matches[0]) + $value
205
            );
206
        };
207
    }
208
209
    /**
210
     * Compile target into a closure, which can be used by
211
     * preg_replace_callback.
212
     *
213
     * @param string $char
214
     *
215
     * @return callback
216
     */
217
    protected function compileTargetCharacter($char)
218
    {
219
        switch (true) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing preg_match('("(?:[^\\\\"...\\'|\\\\")*?")', $char) of type integer to the boolean true. If you are specifically checking for non-zero, consider using something more explicit like > 0 or !== 0 instead.
Loading history...
220
            case $char === 'remove':
221
                return function ($matches) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
222
                    return '';
223
                };
224
225
            case $char === 'keep':
226
                return function ($matches) {
227
                    return $matches[0];
228
                };
229
230
            case preg_match('("(?:[^\\\\"]+|\\\\\\\\|\\\\\'|\\\\")*?")', $char):
231
                $string = str_replace(
232
                    ['\\\\', '\\"', "\\'"],
233
                    ['\\', '"', "'"],
234
                    substr($char, 1, -1)
235
                );
236
237
                return function ($matches) use ($string) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
238
                    return $string;
239
                };
240
241
            default:
242
                $char = $this->compileCharacter($char);
243
244
                return function ($matches) use ($char) {
0 ignored issues
show
Unused Code introduced by
The parameter $matches is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
245
                    return $char;
246
                };
247
        }
248
    }
249
250
    /**
251
     * Compile a single source character definition into a plain UTF-8 character.
252
     *
253
     * Handles the two formats from the possible character definitions:
254
     *  - U+xxxx : Unicode value in hexadecimal
255
     *  - xx: Ascii value in hexadecimal
256
     *
257
     * @param string $char
258
     *
259
     * @return string
260
     */
261
    protected function compileCharacter($char)
262
    {
263
        switch (true) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing preg_match('(^U\\+[0-9a-fA-F]{4}$)', $char) of type integer to the boolean true. If you are specifically checking for non-zero, consider using something more explicit like > 0 or !== 0 instead.
Loading history...
Bug Best Practice introduced by
It seems like you are loosely comparing preg_match('(^[0-9a-fA-F]{2}$)', $char) of type integer to the boolean true. If you are specifically checking for non-zero, consider using something more explicit like > 0 or !== 0 instead.
Loading history...
264
            case preg_match('(^U\\+[0-9a-fA-F]{4}$)', $char):
265
                return $this->converter->toUTF8Character(hexdec(substr($char, 2)));
266
267
            case preg_match('(^[0-9a-fA-F]{2}$)', $char):
268
                return chr(hexdec($char));
269
270
            default:
271
                throw new RuntimeException("Invalid character definition: $char");
272
        }
273
    }
274
275
    /**
276
     * Converts a hexadecimal string to a decimal number.
277
     *
278
     * In comparison to standard hexdec function it will ignore any non-hexadecimal characters
279
     */
280
    private function hexdec(?string $value): int
281
    {
282
        if ($value === null) {
283
            return 0;
284
        }
285
286
        return hexdec(preg_replace('/[^[:xdigit:]]/', '', (string)$value));
287
    }
288
}
289