Completed
Push — master ( 9920b5...aab855 )
by Riikka
11s
created

StringEncoder::isBinaryString()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 21
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 21
ccs 6
cts 6
cp 1
rs 9.3142
c 0
b 0
f 0
cc 2
eloc 6
nc 2
nop 2
crap 2
1
<?php
2
3
namespace Riimu\Kit\PHPEncoder\Encoder;
4
5
/**
6
 * Encoder for string values.
7
 * @author Riikka Kalliomäki <[email protected]>
8
 * @copyright Copyright (c) 2014-2017 Riikka Kalliomäki
9
 * @license http://opensource.org/licenses/mit-license.php MIT License
10
 */
11
class StringEncoder implements Encoder
12
{
13
    /** @var array Default values for options in the encoder */
14
    private static $defaultOptions = [
15
        'string.escape' => true,
16
        'string.binary' => false,
17
        'string.utf8' => false,
18
    ];
19
20 165
    public function getDefaultOptions()
21
    {
22 165
        return self::$defaultOptions;
23
    }
24
25 99
    public function supports($value)
26
    {
27 99
        return is_string($value);
28
    }
29
30 60
    public function encode($value, $depth, array $options, callable $encode)
31
    {
32 60
        $value = (string) $value;
33
34 60
        if (preg_match('/[^\x20-\x7E]/', $value)) {
35 15
            if ($this->isBinaryString($value, $options)) {
36 3
                return $this->encodeBinaryString($value);
37 15
            } elseif ($options['string.escape']) {
38 9
                return $this->getDoubleQuotedString($value, $options);
39
            }
40 3
        }
41
42 57
        return $this->getSingleQuotedString($value);
43
    }
44
45
    /**
46
     * Tells if the string is not a valid UTF-8 string.
47
     * @param string $string The string to test
48
     * @param array $options The string encoding options
49
     * @return bool True if the string is not valid UTF-8 and false if it is
50
     */
51 15
    private function isBinaryString($string, $options)
52
    {
53 15
        if (!$options['string.binary']) {
54 12
            return false;
55
        }
56
57
        // UTF-8 validity test without mbstring extension
58
        $pattern =
59 1
            '/^(?>
60
                [\x00-\x7F]+                       # ASCII
61
              | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
62
              |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding over longs
63
              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
64
              |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
65
              |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
66
              | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
67
              |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
68 2
            )*$/x';
69
70 3
        return !preg_match($pattern, $string);
71
    }
72
73
    /**
74
     * Encodes the given string into base 64 encoded format.
75
     * @param string $string The string to encode
76
     * @return string A base 64 PHP code representation for the string
77
     */
78 3
    private function encodeBinaryString($string)
79
    {
80 3
        return sprintf("base64_decode('%s')", base64_encode($string));
81
    }
82
83
    /**
84
     * Returns the string wrapped in single quotes and escape appropriately.
85
     * @param string $string String to wrap
86
     * @return string The string wrapped in single quotes
87
     */
88 57
    private function getSingleQuotedString($string)
89
    {
90 57
        return sprintf("'%s'", strtr($string, ["'" => "\\'", '\\' => '\\\\']));
91
    }
92
93
    /**
94
     * Returns the string wrapped in double quotes and all but print characters escaped.
95
     * @param string $string String to wrap and escape
96
     * @param array $options The string encoding options
97
     * @return string The string wrapped in double quotes and escape correctly
98
     */
99 9
    private function getDoubleQuotedString($string, $options)
100
    {
101 9
        $string = strtr($string, [
102 9
            "\n" => '\n',
103 3
            "\r" => '\r',
104 3
            "\t" => '\t',
105 3
            '$'  => '\$',
106 3
            '"'  => '\"',
107 3
            '\\' => '\\\\',
108 3
        ]);
109
110 9
        if ($options['string.utf8']) {
111 3
            $string = $this->encodeUtf8($string, $options);
112 1
        }
113
114
        $hexFormat = function ($matches) use ($options) {
115 9
            return sprintf($options['hex.capitalize'] ? '\x%02X' : '\x%02x', ord($matches[0]));
116 9
        };
117
118 9
        return sprintf('"%s"', preg_replace_callback('/[^\x20-\x7E]/', $hexFormat, $string));
119
    }
120
121
    /**
122
     * Encodes all multibyte UTF-8 characters into PHP7 string encoding.
123
     * @param string $string The string to encoder
124
     * @param array $options The string encoding options
125
     * @return string The string with all the multibyte characters encoded
126
     */
127 3
    private function encodeUtf8($string, $options)
128
    {
129
        $pattern =
130 1
            '/  [\xC2-\xDF][\x80-\xBF]
131
              |  \xE0[\xA0-\xBF][\x80-\xBF]
132
              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
133
              |  \xED[\x80-\x9F][\x80-\xBF]
134
              |  \xF0[\x90-\xBF][\x80-\xBF]{2}
135
              | [\xF1-\xF3][\x80-\xBF]{3}
136 2
              |  \xF4[\x80-\x8F][\x80-\xBF]{2}/x';
137
138 3
        return preg_replace_callback($pattern, function ($match) use ($options) {
139 3
            return sprintf($options['hex.capitalize'] ? '\u{%X}' : '\u{%x}', $this->getCodePoint($match[0]));
140 3
        }, $string);
141
    }
142
143
    /**
144
     * Returns the unicode code point for the given multibyte UTF-8 character.
145
     * @param string $bytes The multibyte character
146
     * @return int The code point for the multibyte character
147
     */
148 3
    private function getCodePoint($bytes)
149
    {
150 3
        if (strlen($bytes) === 2) {
151 3
            return ((ord($bytes[0]) & 0b11111) << 6)
152 3
                | (ord($bytes[1]) & 0b111111);
153
        }
154
155 3
        if (strlen($bytes) === 3) {
156 3
            return ((ord($bytes[0]) & 0b1111) << 12)
157 3
                | ((ord($bytes[1]) & 0b111111) << 6)
158 3
                | (ord($bytes[2]) & 0b111111);
159
        }
160
161 3
        return ((ord($bytes[0]) & 0b111) << 18)
162 3
            | ((ord($bytes[1]) & 0b111111) << 12)
163 3
            | ((ord($bytes[2]) & 0b111111) << 6)
164 3
            | (ord($bytes[3]) & 0b111111);
165
    }
166
}
167