Completed
Branch next (a68875)
by Riikka
02:28
created

StringEncoder::getClassName()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
nc 3
nop 2
dl 0
loc 10
ccs 7
cts 7
cp 1
crap 3
rs 9.9332
c 0
b 0
f 0
1
<?php
2
3
namespace Riimu\Kit\PHPEncoder\Encoder;
4
5
/**
6
 * Encoder for string values.
7
 * @author Riikka Kalliomäki <[email protected]>
8
 * @copyright Copyright (c) 2014-2017 Riikka Kalliomäki
9
 * @license http://opensource.org/licenses/mit-license.php MIT License
10
 */
11
class StringEncoder implements Encoder
12
{
13
    /** @var array Default values for options in the encoder */
14
    private static $defaultOptions = [
15
        'string.escape' => true,
16
        'string.binary' => false,
17
        'string.utf8' => false,
18
        'string.classes' => [],
19
        'string.imports' => [],
20
    ];
21
22 228
    public function getDefaultOptions()
23
    {
24 228
        return self::$defaultOptions;
25
    }
26
27 140
    public function supports($value)
28
    {
29 140
        return \is_string($value);
30
    }
31
32 88
    public function encode($value, $depth, array $options, callable $encode)
33
    {
34 88
        $value = (string) $value;
35
36 88
        if ($this->isClassName($value, $options)) {
37 8
            return $this->getClassName($value, $options);
38
        }
39
40 84
        if (preg_match('/[^\x20-\x7E]/', $value)) {
41 20
            return $this->getComplexString($value, $options);
42
        }
43
44 76
        return $this->getSingleQuotedString($value);
45
    }
46
47 88
    private function isClassName($value, array $options)
48
    {
49 88
        if (preg_match('/^([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)(\\\\(?1))*$/', $value) !== 1) {
50 40
            return false;
51
        }
52
53 72
        return array_intersect(iterator_to_array($this->iterateNamespaces($value)), $options['string.classes']) !== [];
54
    }
55
56 8
    private function getClassName($value, array $options)
57
    {
58 8
        foreach ($this->iterateNamespaces($value) as $partial) {
59 8
            if (isset($options['string.imports'][$partial])) {
60 4
                $trimmed = substr($value, \strlen(rtrim($partial, '\\')));
61 7
                return ltrim(sprintf('%s%s::class', rtrim($options['string.imports'][$partial], '\\'), $trimmed), '\\');
62
            }
63 2
        }
64
65 4
        return sprintf('\\%s::class', $value);
66
    }
67
68 72
    private function iterateNamespaces($value)
69
    {
70 72
        yield $value;
71
72 72
        $parts = explode('\\', '\\' . $value);
73 72
        $count = \count($parts);
74
75 72
        for ($i = 1; $i < $count; $i++) {
76 72
            yield ltrim(implode('\\', \array_slice($parts, 0, -$i)), '\\') . '\\';
77 18
        }
78 72
    }
79
80 20
    private function getComplexString($value, array $options)
81
    {
82 20
        if ($this->isBinaryString($value, $options)) {
83 4
            return $this->encodeBinaryString($value);
84
        }
85
86 20
        if ($options['string.escape']) {
87 12
            return $this->getDoubleQuotedString($value, $options);
88
        }
89
90 12
        return $this->getSingleQuotedString($value);
91
    }
92
93
    /**
94
     * Tells if the string is not a valid UTF-8 string.
95
     * @param string $string The string to test
96
     * @param array $options The string encoding options
97
     * @return bool True if the string is not valid UTF-8 and false if it is
98
     */
99 20
    private function isBinaryString($string, $options)
100
    {
101 20
        if (!$options['string.binary']) {
102 16
            return false;
103
        }
104
105
        // UTF-8 validity test without mbstring extension
106
        $pattern =
107 3
            '/^(?>
108
                [\x00-\x7F]+                       # ASCII
109
              | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
110
              |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding over longs
111
              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
112
              |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
113
              |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
114
              | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
115
              |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
116 1
            )*$/x';
117
118 4
        return !preg_match($pattern, $string);
119
    }
120
121
    /**
122
     * Encodes the given string into base 64 encoded format.
123
     * @param string $string The string to encode
124
     * @return string A base 64 PHP code representation for the string
125
     */
126 4
    private function encodeBinaryString($string)
127
    {
128 4
        return sprintf("base64_decode('%s')", base64_encode($string));
129
    }
130
131
    /**
132
     * Returns the string wrapped in single quotes and escape appropriately.
133
     * @param string $string String to wrap
134
     * @return string The string wrapped in single quotes
135
     */
136 80
    private function getSingleQuotedString($string)
137
    {
138 80
        return sprintf("'%s'", strtr($string, ["'" => "\\'", '\\' => '\\\\']));
139
    }
140
141
    /**
142
     * Returns the string wrapped in double quotes and all but print characters escaped.
143
     * @param string $string String to wrap and escape
144
     * @param array $options The string encoding options
145
     * @return string The string wrapped in double quotes and escape correctly
146
     */
147 12
    private function getDoubleQuotedString($string, $options)
148
    {
149 12
        $string = strtr($string, [
150 12
            "\n" => '\n',
151 3
            "\r" => '\r',
152 3
            "\t" => '\t',
153 3
            '$' => '\$',
154 3
            '"' => '\"',
155 3
            '\\' => '\\\\',
156 3
        ]);
157
158 12
        if ($options['string.utf8']) {
159 4
            $string = $this->encodeUtf8($string, $options);
160 1
        }
161
162 3
        $hexFormat = function ($matches) use ($options) {
163 12
            return sprintf($options['hex.capitalize'] ? '\x%02X' : '\x%02x', \ord($matches[0]));
164 12
        };
165
166 12
        return sprintf('"%s"', preg_replace_callback('/[^\x20-\x7E]/', $hexFormat, $string));
167
    }
168
169
    /**
170
     * Encodes all multibyte UTF-8 characters into PHP7 string encoding.
171
     * @param string $string The string to encoder
172
     * @param array $options The string encoding options
173
     * @return string The string with all the multibyte characters encoded
174
     */
175 4
    private function encodeUtf8($string, $options)
176
    {
177
        $pattern =
178 3
            '/  [\xC2-\xDF][\x80-\xBF]
179
              |  \xE0[\xA0-\xBF][\x80-\xBF]
180
              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
181
              |  \xED[\x80-\x9F][\x80-\xBF]
182
              |  \xF0[\x90-\xBF][\x80-\xBF]{2}
183
              | [\xF1-\xF3][\x80-\xBF]{3}
184 1
              |  \xF4[\x80-\x8F][\x80-\xBF]{2}/x';
185
186 2
        return preg_replace_callback($pattern, function ($match) use ($options) {
187 4
            return sprintf($options['hex.capitalize'] ? '\u{%X}' : '\u{%x}', $this->getCodePoint($match[0]));
188 4
        }, $string);
189
    }
190
191
    /**
192
     * Returns the unicode code point for the given multibyte UTF-8 character.
193
     * @param string $bytes The multibyte character
194
     * @return int The code point for the multibyte character
195
     */
196 4
    private function getCodePoint($bytes)
197
    {
198 4
        if (\strlen($bytes) === 2) {
199 4
            return ((\ord($bytes[0]) & 0b11111) << 6)
200 4
                | (\ord($bytes[1]) & 0b111111);
201
        }
202
203 4
        if (\strlen($bytes) === 3) {
204 4
            return ((\ord($bytes[0]) & 0b1111) << 12)
205 4
                | ((\ord($bytes[1]) & 0b111111) << 6)
206 4
                | (\ord($bytes[2]) & 0b111111);
207
        }
208
209 4
        return ((\ord($bytes[0]) & 0b111) << 18)
210 4
            | ((\ord($bytes[1]) & 0b111111) << 12)
211 4
            | ((\ord($bytes[2]) & 0b111111) << 6)
212 4
            | (\ord($bytes[3]) & 0b111111);
213
    }
214
}
215