Passed
Push — master ( e8656d...3aca45 )
by Delete
02:10
created

Decoder::getEncoding()   B

Complexity

Conditions 6
Paths 4

Size

Total Lines 26
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 6
eloc 11
c 3
b 0
f 0
nc 4
nop 1
dl 0
loc 26
rs 8.439
1
<?php
2
namespace Crossjoin\Json;
3
4
use Crossjoin\Json\Exception\EncodingNotSupportedException;
5
use Crossjoin\Json\Exception\InvalidArgumentException;
6
use Crossjoin\Json\Exception\JsonException;
7
8
/**
9
 * Class Decoder
10
 *
11
 * @package Crossjoin\Json
12
 * @author Christoph Ziegenberg <[email protected]>
13
 */
14
class Decoder extends Converter
15
{
16
    /**
17
     * @var bool
18
     */
19
    private $ignoreByteOrderMark = true;
20
21
    /**
22
     * Decoder constructor.
23
     *
24
     * @param bool $ignoreByteOrderMark
25
     *
26
     * @throws \Crossjoin\Json\Exception\InvalidArgumentException
27
     */
28
    public function __construct($ignoreByteOrderMark = true)
29
    {
30
        $this->setIgnoreByteOrderMark($ignoreByteOrderMark);
31
    }
32
33
    /**
34
     * @return boolean
35
     */
36
    public function getIgnoreByteOrderMark()
37
    {
38
        return $this->ignoreByteOrderMark;
39
    }
40
41
    /**
42
     * @param boolean $ignoreByteOrderMark
43
     *
44
     * @throws \Crossjoin\Json\Exception\InvalidArgumentException
45
     */
46
    public function setIgnoreByteOrderMark($ignoreByteOrderMark)
47
    {
48
        // Check arguments
49
        InvalidArgumentException::validateArgument(InvalidArgumentException::TYPE_BOOLEAN, 'ignoreByteOrderMark', $ignoreByteOrderMark, 1478195542);
50
51
        $this->ignoreByteOrderMark = $ignoreByteOrderMark;
52
    }
53
54
    /**
55
     * Gets the encoding of the JSON text.
56
     *
57
     * @param string $json
58
     *
59
     * @return string
60
     * @throws \Crossjoin\Json\Exception\InvalidArgumentException
61
     * @throws \Crossjoin\Json\Exception\EncodingNotSupportedException
62
     */
63
    public function getEncoding($json)
64
    {
65
        // Check arguments
66
        InvalidArgumentException::validateArgument(InvalidArgumentException::TYPE_STRING, 'json', $json, 1478195652);
67
68
        // Get the first bytes
69
        $bytes = $this->getEncodingBytes($json);
70
71
        // Check encoding
72
        foreach ($this->getEncodingPatterns() as $encoding => $regExp) {
73
            if (preg_match($regExp, $bytes)) {
74
                // Additional check, because the UTF-8 pattern also matches UTF-8 ByteOrderMarks (if ByteOrderMarks
75
                //have not already been removed before).
76
                if ($encoding !== self::UTF8 || $this->ignoreByteOrderMark || !preg_match('/^\xEF\xBB\xBF/', $bytes)) {
77
                    return $encoding;
78
                }
79
                break;
80
            }
81
        }
82
83
        // No encoding found
84
        throw new EncodingNotSupportedException(
85
            'The JSON text is encoded with an unsupported encoding.',
86
            1478092834
87
        );
88
    }
89
90
    /**
91
     * @return array
92
     */
93
    private function getEncodingPatterns()
94
    {
95
        return array(
96
            // It's UTF-8 encoded JSON if you have...
97
            // - 1 byte and it's not NUL ("xx")
98
            // - 2 bytes and none of them are NUL ("xx xx")
99
            // - 3 bytes and they are not NUL ("xx xx xx")
100
            // - 4 or more bytes and the first 4 bytes are not NUL ("xx xx xx xx")
101
            self::UTF8    => '/^(?:[^\x00]{1,3}$|[^\x00]{4})/',
102
            // It's UTF-16BE encoded JSON if you have...
103
            // - 2 bytes and only the first is NUL ("00 xx")
104
            // - 4 or more bytes and only the first byte of the first 2 bytes is NUL ("00 xx")
105
            self::UTF16BE => '/^(?:\x00[^\x00]{1}$|\x00[^\x00]{1}.{2})/s',
106
            // It's UTF-16LE encoded JSON if you have...
107
            // - 2 bytes and only the second is NUL ("xx 00")
108
            // - 4 or more bytes and only the second of the first 3 bytes is NUL ("xx 00 xx")
109
            self::UTF16LE => '/^(?:[^\x00]{1}\x00$|[^\x00]{1}\x00[^\x00]{1}.{1})/s',
110
            // It's UTF-32BE encoded JSON if you have...
111
            // - 4 or more bytes and only the first to third byte of the first 4 bytes are NUL ("00 00 00 xx")
112
            self::UTF32BE => '/^[\x00]{3}[^\x00]{1}/',
113
            // It's UTF-32LE encoded JSON if you have...
114
            // - 4 or more bytes and only the second to fourth byte of the first 4 bytes are NUL ("xx 00 00 00")
115
            self::UTF32LE => '/^[^\x00]{1}[\x00]{3}/',
116
        );
117
    }
118
119
    /** @noinspection MoreThanThreeArgumentsInspection */
120
    /**
121
     * Parses a valid JSON text that is encoded as UTF-8, UTF-16BE, UTF-16LE, UTF-32BE or UTF-32LE
122
     * and returns the data as UTF-8.
123
     *
124
     * @param string $json
125
     * @param bool $assoc
126
     * @param int $depth
127
     * @param int $options
128
     *
129
     * @return mixed
130
     * @throws \Crossjoin\Json\Exception\NativeJsonErrorException
131
     * @throws \Crossjoin\Json\Exception\ConversionFailedException
132
     * @throws \Crossjoin\Json\Exception\InvalidArgumentException
133
     * @throws \Crossjoin\Json\Exception\EncodingNotSupportedException
134
     * @throws \Crossjoin\Json\Exception\ExtensionRequiredException
135
     */
136
    public function decode($json, $assoc = false, $depth = 512, $options = 0)
137
    {
138
        // Check arguments
139
        InvalidArgumentException::validateArgument(InvalidArgumentException::TYPE_STRING, 'json', $json, 1478418105);
140
        InvalidArgumentException::validateArgument(InvalidArgumentException::TYPE_BOOLEAN, 'assoc', $assoc, 1478418106);
141
        InvalidArgumentException::validateArgument(InvalidArgumentException::TYPE_INTEGER, 'depth', $depth, 1478418107);
142
        InvalidArgumentException::validateArgument(InvalidArgumentException::TYPE_INTEGER, 'options', $options, 1478418108);
143
144
        // Prepare JSON data (remove BOMs and convert encoding)
145
        $json = $this->prepareJson($json);
146
147
        // Try to decode the json text
148
        // @codeCoverageIgnoreStart
149
        if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
150
            $data = \json_decode($json, $assoc, $depth, $options);
151
        } else {
152
            $data = \json_decode($json, $assoc, $depth);
153
        }
154
        // @codeCoverageIgnoreEnd
155
156
        // Check if the native JSON decoder created an error
157
        $this->checkForNativeJsonError();
158
159
        return $data;
160
    }
161
162
    /**
163
     * @param string $json
164
     *
165
     * @return string
166
     * @throws \Crossjoin\Json\Exception\InvalidArgumentException
167
     */
168
    private function getEncodingBytes($json)
169
    {
170
        // Do not use str_* function here because of possible mb_str_* overloading
171
        preg_match('/^(.{0,8})/s', $json, $matches);
172
        $bytes = array_key_exists(1, $matches) ? $matches[1] : '';
173
174
        // Remove byte order marks
175
        if ($this->ignoreByteOrderMark && $bytes !== '') {
176
            $bytes = $this->removeByteOrderMark($bytes);
177
        }
178
179
        return $bytes;
180
    }
181
182
    /**
183
     * @param string $json
184
     *
185
     * @return string
186
     */
187
    private function prepareJson($json)
188
    {
189
        try {
190
            // Ignore empty string
191
            // (will cause a parsing error in the native json_decode function)
192
            if ($json !== '') {
193
                // Remove byte order marks
194
                if ($this->ignoreByteOrderMark) {
195
                    $json = $this->removeByteOrderMark($json);
196
                }
197
198
                // Convert encoding to UTF-8
199
                $json = $this->convertEncoding($json, $this->getEncoding($json), self::UTF8);
200
            }
201
        } catch (JsonException $e) {
202
            // Ignore exception here, so that the native json_decode function
203
            // is called by the decode() method and we get the native error message.
204
        }
205
206
        return $json;
207
    }
208
209
    /**
210
     * @throws \Crossjoin\Json\Exception\NativeJsonErrorException
211
     */
212
    private function checkForNativeJsonError()
213
    {
214
        if (\json_last_error() !== \JSON_ERROR_NONE) {
215
            throw $this->getNativeJsonErrorException();
216
        }
217
    }
218
}
219