1 | <?php |
||
2 | declare(strict_types=1); |
||
3 | |||
4 | namespace JsonDecodeStream; |
||
5 | |||
6 | use Generator; |
||
7 | use JsonDecodeStream\Collector\Collector; |
||
8 | use JsonDecodeStream\Collector\CollectorInterface; |
||
9 | use JsonDecodeStream\Exception\ParserException; |
||
10 | use JsonDecodeStream\Internal\SourceBuffer; |
||
11 | use JsonDecodeStream\Internal\Stack; |
||
12 | use JsonDecodeStream\Internal\StackFrame; |
||
13 | use JsonDecodeStream\Source\FileSource; |
||
14 | use JsonDecodeStream\Source\Psr7Source; |
||
15 | use JsonDecodeStream\Source\SourceInterface; |
||
16 | use JsonDecodeStream\Source\StreamSource; |
||
17 | use JsonDecodeStream\Source\StringSource; |
||
18 | use Psr\Http\Message\StreamInterface; |
||
19 | |||
20 | class Parser |
||
21 | { |
||
22 | /** @var SourceBuffer */ |
||
23 | protected $buffer; |
||
24 | |||
25 | /** @var Stack */ |
||
26 | protected $stack; |
||
27 | |||
28 | 52 | public function __construct(SourceInterface $source) |
|
29 | { |
||
30 | 52 | $this->buffer = new SourceBuffer($source); |
|
31 | 52 | } |
|
32 | |||
33 | 25 | public static function fromString(string $string) |
|
34 | { |
||
35 | 25 | return new static(new StringSource($string)); |
|
36 | } |
||
37 | |||
38 | 35 | public static function fromFile(string $path) |
|
39 | { |
||
40 | 35 | return new static(new FileSource($path)); |
|
41 | } |
||
42 | |||
43 | public static function fromStream($stream) |
||
44 | { |
||
45 | return new static(new StreamSource($stream)); |
||
46 | } |
||
47 | |||
48 | public static function fromPsr7(StreamInterface $stream) |
||
49 | { |
||
50 | return new static(new Psr7Source($stream)); |
||
51 | } |
||
52 | |||
53 | /** |
||
54 | * @param null|string|string[]|CollectorInterface|CollectorInterface[] $selectors |
||
55 | * single or coma-separated selector string |
||
56 | * or custom CollectorInterface implementation |
||
57 | * or array of any of both |
||
58 | * or null to collect whole documents |
||
59 | * @param bool $objectsAsAssoc |
||
60 | * @return iterable|Generator |
||
61 | * @throws Exception\CollectorException |
||
62 | * @throws Exception\SelectorException |
||
63 | * @throws Exception\TokenizerException |
||
64 | * @throws ParserException |
||
65 | */ |
||
66 | 15 | public function items($selectors = null, bool $objectsAsAssoc = false) |
|
67 | { |
||
68 | 15 | if (is_string($selectors)) { |
|
69 | 9 | $selectorsArray = explode(',', $selectors); |
|
70 | 6 | } else if (is_array($selectors)) { |
|
71 | $selectorsArray = $selectors; |
||
72 | 6 | } else if ($selectors instanceof CollectorInterface) { |
|
73 | 2 | $selectorsArray = [ $selectors ]; |
|
74 | 4 | } else if ($selectors === null) { |
|
0 ignored issues
–
show
introduced
by
![]() |
|||
75 | 4 | $selectorsArray = [ null ]; |
|
76 | } else { |
||
77 | throw new ParserException('Unexpected selectors are provided', ParserException::CODE_INVALID_ARGUMENT); |
||
78 | } |
||
79 | 15 | $collectors = []; |
|
80 | 15 | foreach ($selectorsArray as $selector) { |
|
81 | 15 | if (is_string($selector) || is_null($selector)) { |
|
82 | 13 | $collectors[] = new Collector($selector, $objectsAsAssoc); |
|
83 | 2 | } elseif ($selector instanceof CollectorInterface) { |
|
84 | 2 | $collectors[] = $selector; |
|
85 | } else { |
||
86 | throw new ParserException( |
||
87 | 'Invalid collector: ' |
||
88 | . is_object($selector) ? get_class($selector) : gettype($selector), |
||
89 | ParserException::CODE_INVALID_ARGUMENT |
||
90 | ); |
||
91 | } |
||
92 | } |
||
93 | |||
94 | 15 | foreach ($this->events() as $event) { |
|
95 | 15 | foreach ($collectors as $collector) { |
|
96 | 15 | $yielded = $collector->processEvent($event); |
|
97 | 15 | if (is_array($yielded)) { |
|
98 | 14 | if (count($yielded) != 2) { |
|
99 | throw ParserException::unexpectedCollectorReturn($yielded, $event); |
||
100 | } |
||
101 | 14 | [ $key, $value ] = $yielded; |
|
102 | 14 | yield $key => $value; |
|
103 | 15 | } else if ($yielded instanceof Generator) { |
|
104 | 1 | foreach ($yielded as $yieldedSingle) { |
|
105 | 1 | if (!is_array($yieldedSingle) || count($yieldedSingle) != 2) { |
|
106 | throw ParserException::unexpectedCollectorReturn($yielded, $event); |
||
107 | } |
||
108 | 1 | [ $key, $value ] = $yieldedSingle; |
|
109 | 1 | yield $key => $value; |
|
110 | } |
||
111 | 14 | } else if ($yielded === null) { |
|
112 | 14 | continue; |
|
113 | } else { |
||
114 | throw ParserException::unexpectedCollectorReturn($yielded, $event); |
||
115 | } |
||
116 | } |
||
117 | } |
||
118 | 15 | } |
|
119 | |||
120 | /** |
||
121 | * @return Generator|Event[] |
||
122 | * @psalm-return \Generator<Event> |
||
123 | * @throws ParserException |
||
124 | * @noinspection PhpStatementHasEmptyBodyInspection |
||
125 | */ |
||
126 | 52 | public function events(): Generator |
|
127 | { |
||
128 | 52 | $stack = new Stack(); |
|
129 | 52 | $tokens = $this->tokens(); |
|
130 | |||
131 | // shortcut to event factory |
||
132 | 52 | $createEvent = function (string $eventId, $value = null) use ($stack, &$token): Event { |
|
133 | 52 | return $this->createEvent($eventId, $value, $stack, $token->getLineNumber(), $token->getCharNumber()); |
|
134 | 52 | }; |
|
135 | |||
136 | 52 | foreach ($tokens as $token) { |
|
137 | 52 | if ($token->getId() == Token::WHITESPACE) { |
|
138 | // ignore whitespaces |
||
139 | 50 | continue; |
|
140 | } |
||
141 | |||
142 | 52 | if ($stack->isEmpty()) { |
|
143 | 52 | switch ($token->getId()) { |
|
144 | case Token::OBJECT_START: |
||
145 | 37 | yield $createEvent(Event::DOCUMENT_START); |
|
146 | 37 | yield $createEvent(Event::OBJECT_START); |
|
147 | |||
148 | 37 | $stack->push(StackFrame::object()); |
|
149 | 37 | break; |
|
150 | |||
151 | case Token::ARRAY_START: |
||
152 | 15 | yield $createEvent(Event::DOCUMENT_START); |
|
153 | 15 | yield $createEvent(Event::ARRAY_START); |
|
154 | |||
155 | 15 | $stack->push(StackFrame::array()); |
|
156 | 15 | break; |
|
157 | |||
158 | case Token::WHITESPACE: |
||
159 | case Token::COMA: |
||
160 | // this is ignored at top-level to parse json sequences |
||
161 | 2 | break; |
|
162 | |||
163 | default: |
||
164 | throw ParserException::unexpectedToken($token); |
||
165 | } |
||
166 | 52 | continue; |
|
167 | } |
||
168 | 52 | if ($stack->current()->isAwaitsComa()) { |
|
169 | 50 | if ($token->getId() == Token::COMA) { |
|
170 | 42 | $stack->current()->setAwaitsComa(false); |
|
171 | 42 | if ($stack->current()->isObject()) { |
|
172 | 29 | $stack->current()->setLastKey(null); |
|
173 | 29 | $stack->current()->setAwaitsKey(true); |
|
174 | } |
||
175 | 42 | continue; |
|
176 | 50 | } elseif ($stack->current()->isObject() && $token->getId() == Token::OBJECT_END) { |
|
177 | // pass |
||
178 | 36 | } elseif ($stack->current()->isArray() && $token->getId() == Token::ARRAY_END) { |
|
179 | // pass |
||
180 | } else { |
||
181 | throw ParserException::expectedButGot('","', $token); |
||
182 | } |
||
183 | } |
||
184 | 52 | if ($stack->current()->isAwaitsKeyDelimiter()) { |
|
185 | 42 | if ($token->getId() == Token::KEY_DELIMITER) { |
|
186 | 42 | $stack->current()->setAwaitsKeyDelimiter(false); |
|
187 | 42 | continue; |
|
188 | } else { |
||
189 | throw ParserException::expectedButGot('":"', $token); |
||
190 | } |
||
191 | } |
||
192 | 52 | if ($stack->current()->isAwaitsKey()) { |
|
193 | 43 | if ($token->getId() != Token::STRING && $token->getId() != Token::OBJECT_END) { |
|
194 | throw ParserException::expectedButGot('object key', $token); |
||
195 | } |
||
196 | } |
||
197 | |||
198 | 52 | if ($stack->current()->isArray()) { |
|
199 | 37 | switch ($token->getId()) { |
|
200 | case Token::STRING: |
||
201 | case Token::NUMBER: |
||
202 | case Token::NULL: |
||
203 | case Token::TRUE: |
||
204 | case Token::FALSE: |
||
205 | 35 | $stack->current()->setAwaitsComa(true); |
|
206 | 35 | $stack->current()->setLastKey( |
|
207 | 35 | $stack->current()->getElementCount() |
|
208 | ); |
||
209 | 35 | $stack->current()->incrementElementCount(); |
|
210 | 35 | yield $createEvent(Event::VALUE, $token->getValue()); |
|
211 | 35 | break; |
|
212 | |||
213 | case Token::ARRAY_START: |
||
214 | 7 | yield $createEvent(Event::ARRAY_START); |
|
215 | 7 | $stack->current()->setAwaitsComa(true); |
|
216 | 7 | $stack->current()->setLastKey( |
|
217 | 7 | $stack->current()->getElementCount() |
|
218 | ); |
||
219 | 7 | $stack->current()->incrementElementCount(); |
|
220 | 7 | $stack->push(StackFrame::array()); |
|
221 | 7 | break; |
|
222 | |||
223 | case Token::ARRAY_END: |
||
224 | 37 | $stack->pop(); |
|
225 | 37 | yield $createEvent(Event::ARRAY_END); |
|
226 | 37 | if ($stack->isEmpty()) { |
|
227 | 15 | yield $createEvent(Event::DOCUMENT_END); |
|
228 | } |
||
229 | 37 | break; |
|
230 | |||
231 | case Token::OBJECT_START: |
||
232 | 27 | $stack->current()->setLastKey( |
|
233 | 27 | $stack->current()->getElementCount() |
|
234 | ); |
||
235 | 27 | yield $createEvent(Event::OBJECT_START); |
|
236 | 27 | $stack->current()->setAwaitsComa(true); |
|
237 | 27 | $stack->current()->incrementElementCount(); |
|
238 | 27 | $stack->push(StackFrame::object()); |
|
239 | 27 | break; |
|
240 | |||
241 | case Token::OBJECT_END: |
||
242 | $stack->pop(); |
||
243 | yield $createEvent(Event::OBJECT_END); |
||
244 | break; |
||
245 | |||
246 | default: |
||
247 | throw ParserException::unexpectedToken($token); |
||
248 | } |
||
249 | |||
250 | 37 | continue; |
|
251 | } |
||
252 | |||
253 | 43 | if ($stack->current()->isObject()) { |
|
254 | 43 | switch ($token->getId()) { |
|
255 | case Token::STRING: |
||
256 | 42 | if ($stack->current()->isAwaitsKey()) { |
|
257 | 42 | yield $createEvent(Event::KEY, $token->getValue()); |
|
258 | 42 | $stack->current()->setLastKey($token->getValue()); |
|
259 | 42 | $stack->current()->setAwaitsKeyDelimiter(true); |
|
260 | 42 | $stack->current()->setAwaitsKey(false); |
|
261 | } else { |
||
262 | 25 | yield $createEvent(Event::VALUE, $token->getValue()); |
|
263 | 25 | $stack->current()->setAwaitsComa(true); |
|
264 | 25 | $stack->current()->incrementElementCount(); |
|
265 | } |
||
266 | 42 | break; |
|
267 | |||
268 | case Token::NUMBER: |
||
269 | case Token::NULL: |
||
270 | case Token::TRUE: |
||
271 | case Token::FALSE: |
||
272 | 36 | yield $createEvent(Event::VALUE, $token->getValue()); |
|
273 | 36 | $stack->current()->setAwaitsComa(true); |
|
274 | 36 | $stack->current()->incrementElementCount(); |
|
275 | 36 | break; |
|
276 | |||
277 | case Token::ARRAY_START: |
||
278 | 28 | yield $createEvent(Event::ARRAY_START); |
|
279 | 28 | $stack->current()->setAwaitsComa(true); |
|
280 | 28 | $stack->current()->incrementElementCount(); |
|
281 | 28 | $stack->push(StackFrame::array()); |
|
282 | 28 | break; |
|
283 | |||
284 | case Token::ARRAY_END: |
||
285 | yield $createEvent(Event::ARRAY_END); |
||
286 | $stack->pop(); |
||
287 | break; |
||
288 | |||
289 | case Token::OBJECT_START: |
||
290 | 27 | yield $createEvent(Event::OBJECT_START); |
|
291 | 27 | $stack->current()->setAwaitsComa(true); |
|
292 | 27 | $stack->current()->incrementElementCount(); |
|
293 | 27 | $stack->push(StackFrame::object()); |
|
294 | 27 | break; |
|
295 | |||
296 | case Token::OBJECT_END: |
||
297 | 43 | $stack->pop(); |
|
298 | 43 | yield $createEvent(Event::OBJECT_END); |
|
299 | 43 | if ($stack->isEmpty()) { |
|
300 | 37 | yield $createEvent(Event::DOCUMENT_END); |
|
301 | } |
||
302 | 43 | break; |
|
303 | |||
304 | default: |
||
305 | throw ParserException::unexpectedToken($token); |
||
306 | } |
||
307 | |||
308 | 43 | continue; |
|
309 | } |
||
310 | } |
||
311 | 52 | } |
|
312 | |||
313 | /** |
||
314 | * @return iterable|Tokenizer|Token[] |
||
315 | */ |
||
316 | 52 | public function tokens(): iterable |
|
317 | { |
||
318 | 52 | return new Tokenizer($this->buffer); |
|
319 | } |
||
320 | |||
321 | /** |
||
322 | * @param string $eventId |
||
323 | * @param $value |
||
324 | * @param Stack $stack |
||
325 | * @param int $lineNumber |
||
326 | * @param int $charNumber |
||
327 | * @return Event |
||
328 | */ |
||
329 | 52 | protected function createEvent(string $eventId, $value, Stack $stack, int $lineNumber, int $charNumber): Event |
|
330 | { |
||
331 | 52 | return new Event($eventId, $value, $stack, $lineNumber, $charNumber); |
|
332 | } |
||
333 | } |
||
334 |