|
1
|
|
|
<?php |
|
2
|
|
|
declare(strict_types=1); |
|
3
|
|
|
|
|
4
|
|
|
namespace JsonStreamParser; |
|
5
|
|
|
|
|
6
|
|
|
use JsonStreamParser\Exception\ParseException; |
|
7
|
|
|
|
|
8
|
|
|
/** |
|
9
|
|
|
* @author Stefan Pöhner <[email protected]> |
|
10
|
|
|
* @license MIT |
|
11
|
|
|
* |
|
12
|
|
|
* @package JsonStreamParser |
|
13
|
|
|
*/ |
|
14
|
|
|
class JsonStreamParser |
|
15
|
|
|
{ |
|
16
|
|
|
const STATE_NOTHING = 1; |
|
17
|
|
|
const STATE_IN_ARRAY = 2; |
|
18
|
|
|
const STATE_IN_OBJECT = 3; |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* @var Configuration |
|
22
|
|
|
*/ |
|
23
|
|
|
private $config; |
|
24
|
|
|
|
|
25
|
|
|
/** |
|
26
|
|
|
* @var Buffer |
|
27
|
|
|
*/ |
|
28
|
|
|
private $buffer; |
|
29
|
|
|
|
|
30
|
|
|
/** |
|
31
|
|
|
* @var Decoder |
|
32
|
|
|
*/ |
|
33
|
|
|
private $decoder; |
|
34
|
|
|
|
|
35
|
|
|
/** |
|
36
|
|
|
* @var \Generator |
|
37
|
|
|
*/ |
|
38
|
|
|
private $generator; |
|
39
|
|
|
|
|
40
|
|
|
/** |
|
41
|
|
|
* @var string |
|
42
|
|
|
*/ |
|
43
|
|
|
private $currentChar; |
|
44
|
|
|
|
|
45
|
|
|
/** |
|
46
|
|
|
* @var int |
|
47
|
|
|
*/ |
|
48
|
|
|
private $currentNestingLevel = 0; |
|
49
|
|
|
|
|
50
|
|
|
/** |
|
51
|
|
|
* JsonStreamParser constructor. |
|
52
|
|
|
* |
|
53
|
|
|
* @param Configuration $config |
|
54
|
|
|
* @param Decoder $decoder |
|
55
|
|
|
*/ |
|
56
|
|
|
public function __construct(Configuration $config, Decoder $decoder) |
|
57
|
|
|
{ |
|
58
|
|
|
$this->config = $config; |
|
59
|
|
|
$this->decoder = $decoder; |
|
60
|
|
|
} |
|
61
|
|
|
|
|
62
|
|
|
/** |
|
63
|
|
|
* Parse a JSON stream. |
|
64
|
|
|
* This is the entry point to the whole system. |
|
65
|
|
|
* |
|
66
|
|
|
* @param resource $stream |
|
67
|
|
|
* |
|
68
|
|
|
* @return void |
|
69
|
|
|
*/ |
|
70
|
|
|
public function parse($stream) |
|
71
|
|
|
{ |
|
72
|
|
|
if (!is_resource($stream)) { |
|
73
|
|
|
throw new \InvalidArgumentException('The stream provided is no resource.'); |
|
74
|
|
|
} |
|
75
|
|
|
|
|
76
|
|
|
if (!class_exists($this->config->bufferClass)) { |
|
77
|
|
|
throw new \InvalidArgumentException('Missing buffer class.'); |
|
78
|
|
|
} |
|
79
|
|
|
|
|
80
|
|
|
$cn = $this->config->bufferClass; |
|
81
|
|
|
$this->buffer = new $cn(); |
|
82
|
|
|
if (!$this->buffer instanceof Buffer) { |
|
83
|
|
|
throw new \InvalidArgumentException('Incompatible buffer class.'); |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
|
|
$this->buffer->setStream($stream); |
|
87
|
|
|
$this->buffer->setSize($this->config->bufferSize); |
|
88
|
|
|
|
|
89
|
|
|
$this->doParse(); |
|
90
|
|
|
} |
|
91
|
|
|
|
|
92
|
|
|
private function doParse() |
|
93
|
|
|
{ |
|
94
|
|
|
$this->currentNestingLevel = 0; |
|
95
|
|
|
|
|
96
|
|
|
$this->generator = $this->buffer->get(); |
|
97
|
|
|
foreach ($this->generator as $char) { |
|
98
|
|
|
$this->currentChar = $char; |
|
99
|
|
|
$this->processChar(); |
|
100
|
|
|
} |
|
101
|
|
|
|
|
102
|
|
|
if ($this->currentNestingLevel != 0) { |
|
103
|
|
|
throw new ParseException('Unexpected end of stream'); |
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
|
|
$this->decoder->endOfStream(); |
|
107
|
|
|
} |
|
108
|
|
|
|
|
109
|
|
|
private function processChar() |
|
110
|
|
|
{ |
|
111
|
|
|
switch ($this->currentChar) { |
|
112
|
|
|
case JsonDefinition::BEGIN_OBJECT: |
|
113
|
|
|
$this->currentNestingLevel++; |
|
114
|
|
|
$this->decoder->beginObject(); |
|
115
|
|
|
break; |
|
116
|
|
|
|
|
117
|
|
|
case JsonDefinition::END_OBJECT: |
|
118
|
|
|
$this->currentNestingLevel--; |
|
119
|
|
|
$this->decoder->endObject(); |
|
120
|
|
|
break; |
|
121
|
|
|
|
|
122
|
|
|
case JsonDefinition::BEGIN_ARRAY: |
|
123
|
|
|
$this->currentNestingLevel++; |
|
124
|
|
|
$this->decoder->beginArray(); |
|
125
|
|
|
break; |
|
126
|
|
|
|
|
127
|
|
|
case JsonDefinition::END_ARRAY: |
|
128
|
|
|
$this->currentNestingLevel--; |
|
129
|
|
|
$this->decoder->endArray(); |
|
130
|
|
|
break; |
|
131
|
|
|
|
|
132
|
|
|
case JsonDefinition::ARRAY_SEPARATOR: |
|
133
|
|
|
$this->decoder->arraySeparator(); |
|
134
|
|
|
break; |
|
135
|
|
|
|
|
136
|
|
|
case JsonDefinition::KEY_VALUE_SEPARATOR: |
|
137
|
|
|
$this->decoder->keyValueSeparator(); |
|
138
|
|
|
break; |
|
139
|
|
|
|
|
140
|
|
|
case JsonDefinition::STRING_ENCLOSURE: |
|
141
|
|
|
$string = $this->consumeString(); |
|
142
|
|
|
$this->decoder->appendValue($string); |
|
143
|
|
|
break; |
|
144
|
|
|
|
|
145
|
|
|
default: |
|
146
|
|
|
if ($this->isWhitespace()) { |
|
147
|
|
|
$this->decoder->whitespace($this->currentChar); |
|
148
|
|
|
} elseif ($this->isStartOfKeyword($this->currentChar)) { |
|
149
|
|
|
$value = $this->consumeKeyword(); |
|
150
|
|
|
$this->decoder->appendValue($value); |
|
151
|
|
|
} elseif (is_numeric($this->currentChar) || $this->currentChar == '-') { |
|
152
|
|
|
$value = $this->consumeNumber(); |
|
153
|
|
|
$this->decoder->appendValue($value); |
|
154
|
|
|
|
|
155
|
|
|
// if the generator has not closed behind the number, |
|
156
|
|
|
// consumeNumber walks one character too far. |
|
157
|
|
|
// this character needs to be processed |
|
158
|
|
|
if ($this->generator->valid()) { |
|
159
|
|
|
$this->processChar(); |
|
160
|
|
|
} |
|
161
|
|
|
} else { |
|
162
|
|
|
throw new ParseException("Unknown character: {$this->currentChar}"); |
|
163
|
|
|
} |
|
164
|
|
|
break; |
|
165
|
|
|
} |
|
166
|
|
|
} |
|
167
|
|
|
|
|
168
|
|
|
/** |
|
169
|
|
|
* @return string |
|
170
|
|
|
* @throws ParseException |
|
171
|
|
|
*/ |
|
172
|
|
|
private function consumeString(): string |
|
173
|
|
|
{ |
|
174
|
|
|
$string = ''; |
|
175
|
|
|
|
|
176
|
|
|
// the cursor is at the opening enclosure, so advance |
|
177
|
|
|
$this->generator->next(); |
|
178
|
|
|
while ($this->generator->valid()) { |
|
179
|
|
|
$char = $this->generator->current(); |
|
180
|
|
|
// read until we reach another enclosure |
|
181
|
|
|
if ($char === JsonDefinition::STRING_ENCLOSURE) { |
|
182
|
|
|
$decodedString = json_decode('"'.$string.'"'); |
|
183
|
|
|
|
|
184
|
|
|
return $decodedString; |
|
185
|
|
|
} |
|
186
|
|
|
|
|
187
|
|
|
$string .= $char; |
|
188
|
|
|
|
|
189
|
|
|
// keep this after the return; otherwise the foreach of doParse will skip one char |
|
190
|
|
|
$this->generator->next(); |
|
191
|
|
|
} |
|
192
|
|
|
|
|
193
|
|
|
// if we end up here, we never got an enclosure |
|
194
|
|
|
throw new ParseException('Encountered end of stream while inside a string.'); |
|
195
|
|
|
} |
|
196
|
|
|
|
|
197
|
|
|
/** |
|
198
|
|
|
* @param string $char |
|
199
|
|
|
* |
|
200
|
|
|
* @return bool |
|
201
|
|
|
*/ |
|
202
|
|
|
private function isStartOfKeyword(string $char): bool |
|
203
|
|
|
{ |
|
204
|
|
|
// true, false, null |
|
|
|
|
|
|
205
|
|
|
return in_array(mb_strtolower($char), ['t', 'f', 'n']); |
|
206
|
|
|
} |
|
207
|
|
|
|
|
208
|
|
|
/** |
|
209
|
|
|
* @return bool|null |
|
210
|
|
|
* @throws ParseException |
|
211
|
|
|
*/ |
|
212
|
|
|
private function consumeKeyword() |
|
213
|
|
|
{ |
|
214
|
|
|
$keyword = ''; |
|
215
|
|
|
|
|
216
|
|
|
// cursor is already on the first character |
|
217
|
|
|
do { |
|
218
|
|
|
$keyword .= mb_strtolower($this->generator->current()); |
|
219
|
|
|
|
|
220
|
|
|
if (array_key_exists($keyword, JsonDefinition::KEYWORDS)) { |
|
221
|
|
|
return JsonDefinition::KEYWORDS[$keyword]; |
|
222
|
|
|
} |
|
223
|
|
|
|
|
224
|
|
|
$this->generator->next(); |
|
225
|
|
|
} while ($this->generator->valid()); |
|
226
|
|
|
|
|
227
|
|
|
// there was a typo |
|
228
|
|
|
throw new ParseException('Encountered end of stream while inside a keyword.'); |
|
229
|
|
|
} |
|
230
|
|
|
|
|
231
|
|
|
/** |
|
232
|
|
|
* @return float|int |
|
233
|
|
|
* @throws ParseException |
|
234
|
|
|
*/ |
|
235
|
|
|
private function consumeNumber() |
|
236
|
|
|
{ |
|
237
|
|
|
$number = ''; |
|
238
|
|
|
$isInt = true; |
|
239
|
|
|
$numberCharacters = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '-', 'e']; |
|
240
|
|
|
|
|
241
|
|
|
// cursor is already on the first character |
|
242
|
|
|
do { |
|
243
|
|
|
$this->currentChar = mb_strtolower($this->generator->current()); |
|
244
|
|
|
if (!in_array($this->currentChar, $numberCharacters)) { |
|
245
|
|
|
// number has ended, see if it really was a number |
|
246
|
|
|
if (!is_numeric($number)) { |
|
247
|
|
|
throw new ParseException("Unknown number format: $number"); |
|
248
|
|
|
} |
|
249
|
|
|
break; |
|
250
|
|
|
} elseif (in_array($this->currentChar, ['.', 'e'])) { |
|
251
|
|
|
$isInt = false; |
|
252
|
|
|
} |
|
253
|
|
|
|
|
254
|
|
|
$number .= $this->currentChar; |
|
255
|
|
|
$this->generator->next(); |
|
256
|
|
|
} while ($this->generator->valid()); |
|
257
|
|
|
|
|
258
|
|
|
if ($isInt) { |
|
259
|
|
|
return (int)$number; |
|
260
|
|
|
} |
|
261
|
|
|
|
|
262
|
|
|
return (float)$number; |
|
263
|
|
|
} |
|
264
|
|
|
|
|
265
|
|
|
/** |
|
266
|
|
|
* @return bool |
|
267
|
|
|
*/ |
|
268
|
|
|
private function isWhitespace(): bool |
|
269
|
|
|
{ |
|
270
|
|
|
return in_array($this->currentChar, JsonDefinition::WHITESPACE); |
|
271
|
|
|
} |
|
272
|
|
|
} |
|
273
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.