Completed
Push — next ( 35d12b )
by Riikka
01:14
created

StringEncoder::getCodePoint()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 18
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 18
ccs 12
cts 12
cp 1
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 12
nc 3
nop 1
crap 3
1
<?php
2
3
namespace Riimu\Kit\PHPEncoder\Encoder;
4
5
/**
6
 * Encoder for string values.
7
 * @author Riikka Kalliomäki <[email protected]>
8
 * @copyright Copyright (c) 2014-2017 Riikka Kalliomäki
9
 * @license http://opensource.org/licenses/mit-license.php MIT License
10
 */
11
class StringEncoder implements Encoder
12
{
13
    /** @var array Default values for options in the encoder */
14
    private static $defaultOptions = [
15
        'string.escape' => true,
16
        'string.binary' => false,
17
        'string.utf8' => false,
18
    ];
19
20 165
    public function getDefaultOptions()
21
    {
22 165
        return self::$defaultOptions;
23
    }
24
25 99
    public function supports($value)
26
    {
27 99
        return is_string($value);
28
    }
29
30 60
    public function encode($value, $depth, array $options, callable $encode)
31
    {
32 60
        $value = (string) $value;
33
34 60
        if (preg_match('/[^\x20-\x7E]/', $value)) {
35 15
            if ($this->isBinaryString($value, $options)) {
36 3
                return $this->encodeBinaryString($value);
37 15
            } elseif ($options['string.escape']) {
38 9
                return $this->getDoubleQuotedString($value, $options);
39
            }
40 3
        }
41
42 57
        return $this->getSingleQuotedString($value);
43
    }
44
45 15
    private function isBinaryString($string, $options)
46
    {
47 15
        if (!$options['string.binary']) {
48 12
            return false;
49
        }
50
51
        // UTF-8 validity test without mbstring extension
52
        $pattern =
53 1
            '/^(?>
54
                [\x00-\x7F]+                       # ASCII
55
              | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
56
              |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding over longs
57
              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
58
              |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
59
              |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
60
              | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
61
              |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
62 2
            )*$/x';
63
64 3
        return !preg_match($pattern, $string);
65
    }
66
67 3
    private function encodeBinaryString($string)
68
    {
69 3
        return sprintf("base64_decode('%s')", base64_encode($string));
70
    }
71
72
    /**
73
     * Returns the string wrapped in single quotes and escape appropriately.
74
     * @param string $string String to wrap
75
     * @return string The string wrapped in single quotes
76
     */
77 57
    private function getSingleQuotedString($string)
78
    {
79 57
        return sprintf("'%s'", strtr($string, ["'" => "\\'", '\\' => '\\\\']));
80
    }
81
82
    /**
83
     * Returns the string wrapped in double quotes and all but print characters escaped.
84
     * @param string $string String to wrap and escape
85
     * @return string The string wrapped in double quotes and escape correctly
86
     */
87 9
    private function getDoubleQuotedString($string, $options)
88
    {
89 9
        $string = strtr($string, [
90 9
            "\n" => '\n',
91 3
            "\r" => '\r',
92 3
            "\t" => '\t',
93 3
            '$'  => '\$',
94 3
            '"'  => '\"',
95 3
            '\\' => '\\\\',
96 3
        ]);
97
98 9
        if ($options['string.utf8']) {
99 3
            $string = $this->encodeUtf8($string);
100 1
        }
101
102 9
        return sprintf('"%s"', preg_replace_callback(
103 9
            '/[^\x20-\x7E]/',
104
            function ($matches) {
105 6
                return sprintf('\x%02x', ord($matches[0]));
106 9
            },
107 3
            $string
108 3
        ));
109
    }
110
111 3
    private function encodeUtf8($string)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
112
    {
113
        $pattern =
114 1
            '/  [\xC2-\xDF][\x80-\xBF]
115
              |  \xE0[\xA0-\xBF][\x80-\xBF]
116
              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
117
              |  \xED[\x80-\x9F][\x80-\xBF]
118
              |  \xF0[\x90-\xBF][\x80-\xBF]{2}
119
              | [\xF1-\xF3][\x80-\xBF]{3}
120 2
              |  \xF4[\x80-\x8F][\x80-\xBF]{2}/x';
121
122 3
        return preg_replace_callback($pattern, function ($match) {
123 3
            return sprintf('\u{%s}', dechex($this->getCodePoint($match[0])));
124 3
        }, $string);
125
    }
126
127 3
    private function getCodePoint($bytes)
128
    {
129 3
        if (strlen($bytes) === 2) {
130 3
            return ((ord($bytes[0]) & 0b11111) << 6)
131 3
                | (ord($bytes[1]) & 0b111111);
132
        }
133
134 3
        if (strlen($bytes) === 3) {
135 3
            return ((ord($bytes[0]) & 0b1111) << 12)
136 3
                | ((ord($bytes[1]) & 0b111111) << 6)
137 3
                | (ord($bytes[2]) & 0b111111);
138
        }
139
140 3
        return ((ord($bytes[0]) & 0b111) << 18)
141 3
            | ((ord($bytes[1]) & 0b111111) << 12)
142 3
            | ((ord($bytes[2]) & 0b111111) << 6)
143 3
            | (ord($bytes[3]) & 0b111111);
144
    }
145
}
146