| 1 | <?php |
||
| 2 | declare(strict_types=1); |
||
| 3 | |||
| 4 | namespace JsonDecodeStream; |
||
| 5 | |||
| 6 | use Generator; |
||
| 7 | use JsonDecodeStream\Collector\Collector; |
||
| 8 | use JsonDecodeStream\Collector\CollectorInterface; |
||
| 9 | use JsonDecodeStream\Exception\ParserException; |
||
| 10 | use JsonDecodeStream\Internal\SourceBuffer; |
||
| 11 | use JsonDecodeStream\Internal\Stack; |
||
| 12 | use JsonDecodeStream\Internal\StackFrame; |
||
| 13 | use JsonDecodeStream\Source\FileSource; |
||
| 14 | use JsonDecodeStream\Source\Psr7Source; |
||
| 15 | use JsonDecodeStream\Source\SourceInterface; |
||
| 16 | use JsonDecodeStream\Source\StreamSource; |
||
| 17 | use JsonDecodeStream\Source\StringSource; |
||
| 18 | use Psr\Http\Message\StreamInterface; |
||
| 19 | |||
| 20 | class Parser |
||
| 21 | { |
||
| 22 | /** @var SourceBuffer */ |
||
| 23 | protected $buffer; |
||
| 24 | |||
| 25 | /** @var Stack */ |
||
| 26 | protected $stack; |
||
| 27 | |||
| 28 | 52 | public function __construct(SourceInterface $source) |
|
| 29 | { |
||
| 30 | 52 | $this->buffer = new SourceBuffer($source); |
|
| 31 | 52 | } |
|
| 32 | |||
| 33 | 25 | public static function fromString(string $string) |
|
| 34 | { |
||
| 35 | 25 | return new static(new StringSource($string)); |
|
| 36 | } |
||
| 37 | |||
| 38 | 35 | public static function fromFile(string $path) |
|
| 39 | { |
||
| 40 | 35 | return new static(new FileSource($path)); |
|
| 41 | } |
||
| 42 | |||
| 43 | public static function fromStream($stream) |
||
| 44 | { |
||
| 45 | return new static(new StreamSource($stream)); |
||
| 46 | } |
||
| 47 | |||
| 48 | public static function fromPsr7(StreamInterface $stream) |
||
| 49 | { |
||
| 50 | return new static(new Psr7Source($stream)); |
||
| 51 | } |
||
| 52 | |||
| 53 | /** |
||
| 54 | * @param null|string|string[]|CollectorInterface|CollectorInterface[] $selectors |
||
| 55 | * single or coma-separated selector string |
||
| 56 | * or custom CollectorInterface implementation |
||
| 57 | * or array of any of both |
||
| 58 | * or null to collect whole documents |
||
| 59 | * @param bool $objectsAsAssoc |
||
| 60 | * @return iterable|Generator |
||
| 61 | * @throws Exception\CollectorException |
||
| 62 | * @throws Exception\SelectorException |
||
| 63 | * @throws Exception\TokenizerException |
||
| 64 | * @throws ParserException |
||
| 65 | */ |
||
| 66 | 15 | public function items($selectors = null, bool $objectsAsAssoc = false) |
|
| 67 | { |
||
| 68 | 15 | if (is_string($selectors)) { |
|
| 69 | 9 | $selectorsArray = explode(',', $selectors); |
|
| 70 | 6 | } else if (is_array($selectors)) { |
|
| 71 | $selectorsArray = $selectors; |
||
| 72 | 6 | } else if ($selectors instanceof CollectorInterface) { |
|
| 73 | 2 | $selectorsArray = [ $selectors ]; |
|
| 74 | 4 | } else if ($selectors === null) { |
|
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 75 | 4 | $selectorsArray = [ null ]; |
|
| 76 | } else { |
||
| 77 | throw new ParserException('Unexpected selectors are provided', ParserException::CODE_INVALID_ARGUMENT); |
||
| 78 | } |
||
| 79 | 15 | $collectors = []; |
|
| 80 | 15 | foreach ($selectorsArray as $selector) { |
|
| 81 | 15 | if (is_string($selector) || is_null($selector)) { |
|
| 82 | 13 | $collectors[] = new Collector($selector, $objectsAsAssoc); |
|
| 83 | 2 | } elseif ($selector instanceof CollectorInterface) { |
|
| 84 | 2 | $collectors[] = $selector; |
|
| 85 | } else { |
||
| 86 | throw new ParserException( |
||
| 87 | 'Invalid collector: ' |
||
| 88 | . is_object($selector) ? get_class($selector) : gettype($selector), |
||
| 89 | ParserException::CODE_INVALID_ARGUMENT |
||
| 90 | ); |
||
| 91 | } |
||
| 92 | } |
||
| 93 | |||
| 94 | 15 | foreach ($this->events() as $event) { |
|
| 95 | 15 | foreach ($collectors as $collector) { |
|
| 96 | 15 | $yielded = $collector->processEvent($event); |
|
| 97 | 15 | if (is_array($yielded)) { |
|
| 98 | 14 | if (count($yielded) != 2) { |
|
| 99 | throw ParserException::unexpectedCollectorReturn($yielded, $event); |
||
| 100 | } |
||
| 101 | 14 | [ $key, $value ] = $yielded; |
|
| 102 | 14 | yield $key => $value; |
|
| 103 | 15 | } else if ($yielded instanceof Generator) { |
|
| 104 | 1 | foreach ($yielded as $yieldedSingle) { |
|
| 105 | 1 | if (!is_array($yieldedSingle) || count($yieldedSingle) != 2) { |
|
| 106 | throw ParserException::unexpectedCollectorReturn($yielded, $event); |
||
| 107 | } |
||
| 108 | 1 | [ $key, $value ] = $yieldedSingle; |
|
| 109 | 1 | yield $key => $value; |
|
| 110 | } |
||
| 111 | 14 | } else if ($yielded === null) { |
|
| 112 | 14 | continue; |
|
| 113 | } else { |
||
| 114 | throw ParserException::unexpectedCollectorReturn($yielded, $event); |
||
| 115 | } |
||
| 116 | } |
||
| 117 | } |
||
| 118 | 15 | } |
|
| 119 | |||
| 120 | /** |
||
| 121 | * @return Generator|Event[] |
||
| 122 | * @psalm-return \Generator<Event> |
||
| 123 | * @throws ParserException |
||
| 124 | * @noinspection PhpStatementHasEmptyBodyInspection |
||
| 125 | */ |
||
| 126 | 52 | public function events(): Generator |
|
| 127 | { |
||
| 128 | 52 | $stack = new Stack(); |
|
| 129 | 52 | $tokens = $this->tokens(); |
|
| 130 | |||
| 131 | // shortcut to event factory |
||
| 132 | 52 | $createEvent = function (string $eventId, $value = null) use ($stack, &$token): Event { |
|
| 133 | 52 | return $this->createEvent($eventId, $value, $stack, $token->getLineNumber(), $token->getCharNumber()); |
|
| 134 | 52 | }; |
|
| 135 | |||
| 136 | 52 | foreach ($tokens as $token) { |
|
| 137 | 52 | if ($token->getId() == Token::WHITESPACE) { |
|
| 138 | // ignore whitespaces |
||
| 139 | 50 | continue; |
|
| 140 | } |
||
| 141 | |||
| 142 | 52 | if ($stack->isEmpty()) { |
|
| 143 | 52 | switch ($token->getId()) { |
|
| 144 | case Token::OBJECT_START: |
||
| 145 | 37 | yield $createEvent(Event::DOCUMENT_START); |
|
| 146 | 37 | yield $createEvent(Event::OBJECT_START); |
|
| 147 | |||
| 148 | 37 | $stack->push(StackFrame::object()); |
|
| 149 | 37 | break; |
|
| 150 | |||
| 151 | case Token::ARRAY_START: |
||
| 152 | 15 | yield $createEvent(Event::DOCUMENT_START); |
|
| 153 | 15 | yield $createEvent(Event::ARRAY_START); |
|
| 154 | |||
| 155 | 15 | $stack->push(StackFrame::array()); |
|
| 156 | 15 | break; |
|
| 157 | |||
| 158 | case Token::WHITESPACE: |
||
| 159 | case Token::COMA: |
||
| 160 | // this is ignored at top-level to parse json sequences |
||
| 161 | 2 | break; |
|
| 162 | |||
| 163 | default: |
||
| 164 | throw ParserException::unexpectedToken($token); |
||
| 165 | } |
||
| 166 | 52 | continue; |
|
| 167 | } |
||
| 168 | 52 | if ($stack->current()->isAwaitsComa()) { |
|
| 169 | 50 | if ($token->getId() == Token::COMA) { |
|
| 170 | 42 | $stack->current()->setAwaitsComa(false); |
|
| 171 | 42 | if ($stack->current()->isObject()) { |
|
| 172 | 29 | $stack->current()->setLastKey(null); |
|
| 173 | 29 | $stack->current()->setAwaitsKey(true); |
|
| 174 | } |
||
| 175 | 42 | continue; |
|
| 176 | 50 | } elseif ($stack->current()->isObject() && $token->getId() == Token::OBJECT_END) { |
|
| 177 | // pass |
||
| 178 | 36 | } elseif ($stack->current()->isArray() && $token->getId() == Token::ARRAY_END) { |
|
| 179 | // pass |
||
| 180 | } else { |
||
| 181 | throw ParserException::expectedButGot('","', $token); |
||
| 182 | } |
||
| 183 | } |
||
| 184 | 52 | if ($stack->current()->isAwaitsKeyDelimiter()) { |
|
| 185 | 42 | if ($token->getId() == Token::KEY_DELIMITER) { |
|
| 186 | 42 | $stack->current()->setAwaitsKeyDelimiter(false); |
|
| 187 | 42 | continue; |
|
| 188 | } else { |
||
| 189 | throw ParserException::expectedButGot('":"', $token); |
||
| 190 | } |
||
| 191 | } |
||
| 192 | 52 | if ($stack->current()->isAwaitsKey()) { |
|
| 193 | 43 | if ($token->getId() != Token::STRING && $token->getId() != Token::OBJECT_END) { |
|
| 194 | throw ParserException::expectedButGot('object key', $token); |
||
| 195 | } |
||
| 196 | } |
||
| 197 | |||
| 198 | 52 | if ($stack->current()->isArray()) { |
|
| 199 | 37 | switch ($token->getId()) { |
|
| 200 | case Token::STRING: |
||
| 201 | case Token::NUMBER: |
||
| 202 | case Token::NULL: |
||
| 203 | case Token::TRUE: |
||
| 204 | case Token::FALSE: |
||
| 205 | 35 | $stack->current()->setAwaitsComa(true); |
|
| 206 | 35 | $stack->current()->setLastKey( |
|
| 207 | 35 | $stack->current()->getElementCount() |
|
| 208 | ); |
||
| 209 | 35 | $stack->current()->incrementElementCount(); |
|
| 210 | 35 | yield $createEvent(Event::VALUE, $token->getValue()); |
|
| 211 | 35 | break; |
|
| 212 | |||
| 213 | case Token::ARRAY_START: |
||
| 214 | 7 | yield $createEvent(Event::ARRAY_START); |
|
| 215 | 7 | $stack->current()->setAwaitsComa(true); |
|
| 216 | 7 | $stack->current()->setLastKey( |
|
| 217 | 7 | $stack->current()->getElementCount() |
|
| 218 | ); |
||
| 219 | 7 | $stack->current()->incrementElementCount(); |
|
| 220 | 7 | $stack->push(StackFrame::array()); |
|
| 221 | 7 | break; |
|
| 222 | |||
| 223 | case Token::ARRAY_END: |
||
| 224 | 37 | $stack->pop(); |
|
| 225 | 37 | yield $createEvent(Event::ARRAY_END); |
|
| 226 | 37 | if ($stack->isEmpty()) { |
|
| 227 | 15 | yield $createEvent(Event::DOCUMENT_END); |
|
| 228 | } |
||
| 229 | 37 | break; |
|
| 230 | |||
| 231 | case Token::OBJECT_START: |
||
| 232 | 27 | $stack->current()->setLastKey( |
|
| 233 | 27 | $stack->current()->getElementCount() |
|
| 234 | ); |
||
| 235 | 27 | yield $createEvent(Event::OBJECT_START); |
|
| 236 | 27 | $stack->current()->setAwaitsComa(true); |
|
| 237 | 27 | $stack->current()->incrementElementCount(); |
|
| 238 | 27 | $stack->push(StackFrame::object()); |
|
| 239 | 27 | break; |
|
| 240 | |||
| 241 | case Token::OBJECT_END: |
||
| 242 | $stack->pop(); |
||
| 243 | yield $createEvent(Event::OBJECT_END); |
||
| 244 | break; |
||
| 245 | |||
| 246 | default: |
||
| 247 | throw ParserException::unexpectedToken($token); |
||
| 248 | } |
||
| 249 | |||
| 250 | 37 | continue; |
|
| 251 | } |
||
| 252 | |||
| 253 | 43 | if ($stack->current()->isObject()) { |
|
| 254 | 43 | switch ($token->getId()) { |
|
| 255 | case Token::STRING: |
||
| 256 | 42 | if ($stack->current()->isAwaitsKey()) { |
|
| 257 | 42 | yield $createEvent(Event::KEY, $token->getValue()); |
|
| 258 | 42 | $stack->current()->setLastKey($token->getValue()); |
|
| 259 | 42 | $stack->current()->setAwaitsKeyDelimiter(true); |
|
| 260 | 42 | $stack->current()->setAwaitsKey(false); |
|
| 261 | } else { |
||
| 262 | 25 | yield $createEvent(Event::VALUE, $token->getValue()); |
|
| 263 | 25 | $stack->current()->setAwaitsComa(true); |
|
| 264 | 25 | $stack->current()->incrementElementCount(); |
|
| 265 | } |
||
| 266 | 42 | break; |
|
| 267 | |||
| 268 | case Token::NUMBER: |
||
| 269 | case Token::NULL: |
||
| 270 | case Token::TRUE: |
||
| 271 | case Token::FALSE: |
||
| 272 | 36 | yield $createEvent(Event::VALUE, $token->getValue()); |
|
| 273 | 36 | $stack->current()->setAwaitsComa(true); |
|
| 274 | 36 | $stack->current()->incrementElementCount(); |
|
| 275 | 36 | break; |
|
| 276 | |||
| 277 | case Token::ARRAY_START: |
||
| 278 | 28 | yield $createEvent(Event::ARRAY_START); |
|
| 279 | 28 | $stack->current()->setAwaitsComa(true); |
|
| 280 | 28 | $stack->current()->incrementElementCount(); |
|
| 281 | 28 | $stack->push(StackFrame::array()); |
|
| 282 | 28 | break; |
|
| 283 | |||
| 284 | case Token::ARRAY_END: |
||
| 285 | yield $createEvent(Event::ARRAY_END); |
||
| 286 | $stack->pop(); |
||
| 287 | break; |
||
| 288 | |||
| 289 | case Token::OBJECT_START: |
||
| 290 | 27 | yield $createEvent(Event::OBJECT_START); |
|
| 291 | 27 | $stack->current()->setAwaitsComa(true); |
|
| 292 | 27 | $stack->current()->incrementElementCount(); |
|
| 293 | 27 | $stack->push(StackFrame::object()); |
|
| 294 | 27 | break; |
|
| 295 | |||
| 296 | case Token::OBJECT_END: |
||
| 297 | 43 | $stack->pop(); |
|
| 298 | 43 | yield $createEvent(Event::OBJECT_END); |
|
| 299 | 43 | if ($stack->isEmpty()) { |
|
| 300 | 37 | yield $createEvent(Event::DOCUMENT_END); |
|
| 301 | } |
||
| 302 | 43 | break; |
|
| 303 | |||
| 304 | default: |
||
| 305 | throw ParserException::unexpectedToken($token); |
||
| 306 | } |
||
| 307 | |||
| 308 | 43 | continue; |
|
| 309 | } |
||
| 310 | } |
||
| 311 | 52 | } |
|
| 312 | |||
| 313 | /** |
||
| 314 | * @return iterable|Tokenizer|Token[] |
||
| 315 | */ |
||
| 316 | 52 | public function tokens(): iterable |
|
| 317 | { |
||
| 318 | 52 | return new Tokenizer($this->buffer); |
|
| 319 | } |
||
| 320 | |||
| 321 | /** |
||
| 322 | * @param string $eventId |
||
| 323 | * @param $value |
||
| 324 | * @param Stack $stack |
||
| 325 | * @param int $lineNumber |
||
| 326 | * @param int $charNumber |
||
| 327 | * @return Event |
||
| 328 | */ |
||
| 329 | 52 | protected function createEvent(string $eventId, $value, Stack $stack, int $lineNumber, int $charNumber): Event |
|
| 330 | { |
||
| 331 | 52 | return new Event($eventId, $value, $stack, $lineNumber, $charNumber); |
|
| 332 | } |
||
| 333 | } |
||
| 334 |