StringEncoder::supports()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 3
rs 10
1
<?php
2
3
namespace Riimu\Kit\PHPEncoder\Encoder;
4
5
/**
6
 * Encoder for string values.
7
 * @author Riikka Kalliomäki <[email protected]>
8
 * @copyright Copyright (c) 2014-2020 Riikka Kalliomäki
9
 * @license http://opensource.org/licenses/mit-license.php MIT License
10
 */
11
class StringEncoder implements Encoder
12
{
13
    /** @var array Default values for options in the encoder */
14
    private static $defaultOptions = [
15
        'string.escape' => true,
16
        'string.binary' => false,
17
        'string.utf8' => false,
18
        'string.classes' => [],
19
        'string.imports' => [],
20
    ];
21
22
    public function getDefaultOptions()
23
    {
24
        return self::$defaultOptions;
25
    }
26
27
    public function supports($value)
28
    {
29
        return \is_string($value);
30
    }
31
32
    public function encode($value, $depth, array $options, callable $encode)
33
    {
34
        $value = (string) $value;
35
36
        if ($this->isClassName($value, $options)) {
37
            return $this->getClassName($value, $options);
38
        }
39
40
        if (preg_match('/[^\x20-\x7E]/', $value)) {
41
            return $this->getComplexString($value, $options);
42
        }
43
44
        return $this->getSingleQuotedString($value);
45
    }
46
47
    /**
48
     * Tests if the given value is a string that could be encoded as a class name constant.
49
     * @param string $value The string to test
50
     * @param array $options The string encoding options
51
     * @return bool True if string can be encoded as class constant, false if not
52
     */
53
    private function isClassName($value, array $options)
54
    {
55
        if (preg_match('/^([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)(\\\\(?1))*$/', $value) !== 1) {
56
            return false;
57
        }
58
59
        return array_intersect(iterator_to_array($this->iterateNamespaces($value)), $options['string.classes']) !== [];
60
    }
61
62
    /**
63
     * Encodes the given string as a class name constant based on used imports.
64
     * @param string $value The string to encode
65
     * @param array $options The string encoding options
66
     * @return string The class constant PHP code representation
67
     */
68
    private function getClassName($value, array $options)
69
    {
70
        foreach ($this->iterateNamespaces($value) as $partial) {
71
            if (isset($options['string.imports'][$partial])) {
72
                $trimmed = substr($value, \strlen(rtrim($partial, '\\')));
73
                return ltrim(sprintf('%s%s::class', rtrim($options['string.imports'][$partial], '\\'), $trimmed), '\\');
74
            }
75
        }
76
77
        return sprintf('\\%s::class', $value);
78
    }
79
80
    /**
81
     * Iterates over the variations of the namespace for the given class name.
82
     * @param string $value The class name to iterate over
83
     * @return \Generator|string[] The namespace parts of the string
84
     */
85
    private function iterateNamespaces($value)
86
    {
87
        yield $value;
88
89
        $parts = explode('\\', '\\' . $value);
90
        $count = \count($parts);
91
92
        for ($i = 1; $i < $count; $i++) {
93
            yield ltrim(implode('\\', \array_slice($parts, 0, -$i)), '\\') . '\\';
94
        }
95
    }
96
97
    /**
98
     * Returns the PHP code representation for the string that is not just simple ascii characters.
99
     * @param string $value The string to encode
100
     * @param array $options The string encoding options
101
     * @return string The PHP code representation for the complex string
102
     */
103
    private function getComplexString($value, array $options)
104
    {
105
        if ($this->isBinaryString($value, $options)) {
106
            return $this->encodeBinaryString($value);
107
        }
108
109
        if ($options['string.escape']) {
110
            return $this->getDoubleQuotedString($value, $options);
111
        }
112
113
        return $this->getSingleQuotedString($value);
114
    }
115
116
    /**
117
     * Tells if the string is not a valid UTF-8 string.
118
     * @param string $string The string to test
119
     * @param array $options The string encoding options
120
     * @return bool True if the string is not valid UTF-8 and false if it is
121
     */
122
    private function isBinaryString($string, $options)
123
    {
124
        if (!$options['string.binary']) {
125
            return false;
126
        }
127
128
        // UTF-8 validity test without mbstring extension
129
        $pattern =
130
            '/^(?>
131
                [\x00-\x7F]+                       # ASCII
132
              | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
133
              |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding over longs
134
              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
135
              |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
136
              |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
137
              | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
138
              |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
139
            )*$/x';
140
141
        return !preg_match($pattern, $string);
142
    }
143
144
    /**
145
     * Encodes the given string into base 64 encoded format.
146
     * @param string $string The string to encode
147
     * @return string A base 64 PHP code representation for the string
148
     */
149
    private function encodeBinaryString($string)
150
    {
151
        return sprintf("base64_decode('%s')", base64_encode($string));
152
    }
153
154
    /**
155
     * Returns the string wrapped in single quotes and escape appropriately.
156
     * @param string $string String to wrap
157
     * @return string The string wrapped in single quotes
158
     */
159
    private function getSingleQuotedString($string)
160
    {
161
        return sprintf("'%s'", strtr($string, ["'" => "\\'", '\\' => '\\\\']));
162
    }
163
164
    /**
165
     * Returns the string wrapped in double quotes and all but print characters escaped.
166
     * @param string $string String to wrap and escape
167
     * @param array $options The string encoding options
168
     * @return string The string wrapped in double quotes and escape correctly
169
     */
170
    private function getDoubleQuotedString($string, $options)
171
    {
172
        $string = strtr($string, [
173
            "\n" => '\n',
174
            "\r" => '\r',
175
            "\t" => '\t',
176
            '$' => '\$',
177
            '"' => '\"',
178
            '\\' => '\\\\',
179
        ]);
180
181
        if ($options['string.utf8']) {
182
            $string = $this->encodeUtf8($string, $options);
183
        }
184
185
        $hexFormat = function ($matches) use ($options) {
186
            return sprintf($options['hex.capitalize'] ? '\x%02X' : '\x%02x', \ord($matches[0]));
187
        };
188
189
        return sprintf('"%s"', preg_replace_callback('/[^\x20-\x7E]/', $hexFormat, $string));
190
    }
191
192
    /**
193
     * Encodes all multibyte UTF-8 characters into PHP7 string encoding.
194
     * @param string $string The string to encoder
195
     * @param array $options The string encoding options
196
     * @return string The string with all the multibyte characters encoded
197
     */
198
    private function encodeUtf8($string, $options)
199
    {
200
        $pattern =
201
            '/  [\xC2-\xDF][\x80-\xBF]
202
              |  \xE0[\xA0-\xBF][\x80-\xBF]
203
              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
204
              |  \xED[\x80-\x9F][\x80-\xBF]
205
              |  \xF0[\x90-\xBF][\x80-\xBF]{2}
206
              | [\xF1-\xF3][\x80-\xBF]{3}
207
              |  \xF4[\x80-\x8F][\x80-\xBF]{2}/x';
208
209
        return preg_replace_callback($pattern, function ($match) use ($options) {
210
            return sprintf($options['hex.capitalize'] ? '\u{%X}' : '\u{%x}', $this->getCodePoint($match[0]));
211
        }, $string);
212
    }
213
214
    /**
215
     * Returns the unicode code point for the given multibyte UTF-8 character.
216
     * @param string $bytes The multibyte character
217
     * @return int The code point for the multibyte character
218
     */
219
    private function getCodePoint($bytes)
220
    {
221
        if (\strlen($bytes) === 2) {
222
            return ((\ord($bytes[0]) & 0b11111) << 6)
223
                | (\ord($bytes[1]) & 0b111111);
224
        }
225
226
        if (\strlen($bytes) === 3) {
227
            return ((\ord($bytes[0]) & 0b1111) << 12)
228
                | ((\ord($bytes[1]) & 0b111111) << 6)
229
                | (\ord($bytes[2]) & 0b111111);
230
        }
231
232
        return ((\ord($bytes[0]) & 0b111) << 18)
233
            | ((\ord($bytes[1]) & 0b111111) << 12)
234
            | ((\ord($bytes[2]) & 0b111111) << 6)
235
            | (\ord($bytes[3]) & 0b111111);
236
    }
237
}
238