TurtleParser::xNIL()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2.0625

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 3
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 7
ccs 3
cts 4
cp 0.75
crap 2.0625
rs 10
1
<?php
2
3
/**
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-2 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use Exception;
17
use sweetrdf\InMemoryStoreSqlite\Log\Logger;
18
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
19
use sweetrdf\InMemoryStoreSqlite\StringReader;
20
21
use function sweetrdf\InMemoryStoreSqlite\calcURI;
22
23
class TurtleParser extends BaseParser
24
{
25
    protected int $state;
26
    protected int $max_parsing_loops;
27
    protected string $unparsed_code;
28
29 100
    public function __construct(Logger $logger, NamespaceHelper $namespaceHelper, StringReader $stringReader)
30
    {
31 100
        parent::__construct($logger, $namespaceHelper, $stringReader);
32
33 100
        $this->state = 0;
34 100
        $this->unparsed_code = '';
35 100
        $this->max_parsing_loops = 500;
36
    }
37
38 100
    protected function x($re, $v, $options = 'si')
39
    {
40 100
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
41
42
        /* comment removal */
43 100
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
44
            $v = $m[2];
45
        }
46
47 100
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
48
    }
49
50 2
    private function createBnodeID(): string
51
    {
52 2
        ++$this->bnode_id;
53
54 2
        return '_:'.$this->bnode_prefix.$this->bnode_id;
55
    }
56
57 2
    protected function addT(array $t): void
58
    {
59 2
        $this->triples[$this->t_count] = $t;
60 2
        ++$this->t_count;
61
    }
62
63
    protected function countTriples()
64
    {
65
        return $this->t_count;
66
    }
67
68
    protected function getUnparsedCode()
69
    {
70
        return $this->unparsed_code;
71
    }
72
73 2
    public function parse(string $path, string $data = ''): void
74
    {
75 2
        $this->triples = [];
76 2
        $this->t_count = 0;
77 2
        $this->reader->init($path, $data);
78 2
        $this->base = $this->reader->getBase();
79 2
        $this->r = ['vars' => []];
80
        /* parse */
81 2
        $buffer = '';
82 2
        $more_triples = [];
83 2
        $sub_v = '';
84 2
        $sub_v2 = '';
85 2
        $loops = 0;
86 2
        $prologue_done = 0;
87 2
        while ($d = $this->reader->readStream(8192)) {
88 2
            $buffer .= $d;
89 2
            $sub_v = $buffer;
90
            do {
91 2
                $proceed = 0;
92 2
                if (!$prologue_done) {
93 2
                    $proceed = 1;
94 2
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
95 2
                        $loops = 0;
96 2
                        $sub_v .= $this->reader->readStream(128);
97
                        /* in case we missed the final DOT in the previous prologue loop */
98 2
                        if ($sub_r = $this->x('\.', $sub_v)) {
99
                            $sub_v = $sub_r[1];
100
                        }
101
                        /* more prologue to come, use outer loop */
102 2
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
103 2
                            $proceed = 0;
104
                        }
105
                    } else {
106 2
                        $prologue_done = 1;
107
                    }
108
                }
109
                if (
110 2
                    $prologue_done
111 2
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
112 2
                    && \is_array($sub_r)
113
                ) {
114 2
                    $proceed = 1;
115 2
                    $loops = 0;
116 2
                    foreach ($sub_r as $t) {
117 2
                        $this->addT($t);
118
                    }
119
                }
120 2
            } while ($proceed);
121 2
            ++$loops;
122 2
            $buffer = $sub_v;
123 2
            if ($loops > $this->max_parsing_loops) {
124
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
125
                throw new Exception($msg);
126
            }
127
        }
128 2
        foreach ($more_triples as $t) {
129
            $this->addT($t);
130
        }
131 2
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
132 2
        $buffer = $sub_v;
133 2
        $this->unparsed_code = $buffer;
134
135
        /* remove trailing comments */
136 2
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
137
            $this->unparsed_code = $m[2];
138
        }
139
140 2
        if ($this->unparsed_code && !$this->logger->hasEntries('error')) {
141
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
142
            if (trim($rest)) {
143
                $this->logger->error('Could not parse "'.$rest.'"');
144
            }
145
        }
146
    }
147
148 2
    protected function xPrologue($v)
149
    {
150 2
        $r = 0;
151 2
        if (!$this->t_count) {
152 2
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
153
                $this->base = $sub_r;
154
                $r = 1;
155
            }
156 2
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
157 2
                $this->namespaceHelper->setPrefix($sub_r['prefix'], $sub_r['uri']);
158 2
                $r = 1;
159
            }
160
        }
161
162 2
        return [$r, $v];
163
    }
164
165
    /* 3 */
166
167 100
    protected function xBaseDecl($v)
168
    {
169 100
        if ($r = $this->x("\@?base\s+", $v)) {
170
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
171
                if ($sub_r = $this->x('\.', $sub_v)) {
172
                    $sub_v = $sub_r[1];
173
                }
174
175
                return [$r, $sub_v];
176
            }
177
        }
178
179 100
        return [0, $v];
180
    }
181
182
    /* 4 */
183
184 100
    protected function xPrefixDecl($v)
185
    {
186 100
        if ($r = $this->x("\@?prefix\s+", $v)) {
187 22
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
188 22
                $prefix = $r;
189 22
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
190 22
                    $uri = calcURI($r, $this->base);
191 22
                    if ($sub_r = $this->x('\.', $sub_v)) {
192 16
                        $sub_v = $sub_r[1];
193
                    }
194
195 22
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
196
                }
197
            }
198
        }
199
200 100
        return [0, $v];
201
    }
202
203
    /* 21.., 32.. */
204
205 100
    protected function xTriplesBlock($v)
206
    {
207 100
        $pre_r = [];
208 100
        $r = [];
209 100
        $state = 1;
210 100
        $sub_v = $v;
211 100
        $buffer = $sub_v;
212
        do {
213 100
            $proceed = 0;
214 100
            if (1 == $state) {/* expecting subject */
215 100
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
216 100
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
217 100
                    $t['s'] = $sub_r['value'];
218 100
                    $t['s_type'] = $sub_r['type'];
219 100
                    $state = 2;
220 100
                    $proceed = 1;
221 100
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
222
                        if ('placeholder' == $t['s_type']) {
223
                            $state = 4;
224
                        } else {
225 100
                            $this->logger->error('"'.$sub_r[1].'" after subject found.');
226
                        }
227
                    }
228 98
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
229
                    $t['s'] = $sub_r['id'];
230
                    $t['s_type'] = $sub_r['type'];
231
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
232
                    $state = 2;
233
                    $proceed = 1;
234
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
235
                        $this->logger->error('DOT after subject found.');
236
                    }
237 98
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
238
                    $t['s'] = $sub_r['id'];
239
                    $t['s_type'] = $sub_r['type'];
240
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
241
                    $state = 2;
242
                    $proceed = 1;
243 98
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
244
                    $this->logger->error('Subject expected, DOT found.'.$sub_v);
245
                }
246
            }
247 100
            if (2 == $state) {/* expecting predicate */
248 100
                if ($sub_r = $this->x('a\s+', $sub_v)) {
249 1
                    $sub_v = $sub_r[1];
250 1
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
251 1
                    $t['p_type'] = 'uri';
252 1
                    $state = 3;
253 1
                    $proceed = 1;
254 100
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
255 100
                    if ('bnode' == $sub_r['type']) {
256
                        $this->logger->error('Blank node used as triple predicate');
257
                    }
258 100
                    $t['p'] = $sub_r['value'];
259 100
                    $t['p_type'] = $sub_r['type'];
260 100
                    $state = 3;
261 100
                    $proceed = 1;
262
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
263
                    $state = 4;
264
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
265
                    $buffer = $sub_v;
266
                    $r = array_merge($r, $pre_r);
267
                    $pre_r = [];
268
                    $proceed = 0;
269
                }
270
            }
271 100
            if (3 == $state) {/* expecting object */
272 100
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
273 100
                    $t['o'] = $sub_r['value'];
274 100
                    $t['o_type'] = $sub_r['type'];
275 100
                    $t['o_lang'] = $sub_r['lang'] ?? '';
276 100
                    $t['o_datatype'] = $sub_r['datatype'] ?? '';
277 100
                    $pre_r[] = $t;
278 100
                    $state = 4;
279 100
                    $proceed = 1;
280 2
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
281
                    $t['o'] = $sub_r['id'];
282
                    $t['o_type'] = $sub_r['type'];
283
                    $t['o_datatype'] = '';
284
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
285
                    $state = 4;
286
                    $proceed = 1;
287 2
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
288 2
                    $t['o'] = $sub_r['id'];
289 2
                    $t['o_type'] = $sub_r['type'];
290 2
                    $t['o_datatype'] = '';
291 2
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
292 2
                    $state = 4;
293 2
                    $proceed = 1;
294
                }
295
            }
296 100
            if (4 == $state) {/* expecting . or ; or , or } */
297 100
                if ($sub_r = $this->x('\.', $sub_v)) {
298 98
                    $sub_v = $sub_r[1];
299 98
                    $buffer = $sub_v;
300 98
                    $r = array_merge($r, $pre_r);
301 98
                    $pre_r = [];
302 98
                    $state = 1;
303 98
                    $proceed = 1;
304 29
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
305 11
                    $sub_v = $sub_r[1];
306 11
                    $state = 2;
307 11
                    $proceed = 1;
308 25
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
309 20
                    $sub_v = $sub_r[1];
310 20
                    $state = 3;
311 20
                    $proceed = 1;
312 20
                    if ($sub_r = $this->x('\}', $sub_v)) {
313
                        $this->logger->error('Object expected, } found.');
314
                    }
315
                }
316 100
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
317 97
                    $buffer = $sub_v;
318 97
                    $r = array_merge($r, $pre_r);
319 97
                    $pre_r = [];
320 97
                    $proceed = 0;
321
                }
322
            }
323 100
        } while ($proceed);
324
325 100
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
326
    }
327
328
    /* 39.. */
329
330 98
    protected function xBlankNodePropertyList($v)
331
    {
332 98
        if ($sub_r = $this->x('\[', $v)) {
333 2
            $sub_v = $sub_r[1];
334 2
            $s = $this->createBnodeID();
335 2
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
336 2
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
337 2
            $state = 2;
338 2
            $closed = 0;
339
            do {
340 2
                $proceed = 0;
341 2
                if (2 == $state) {/* expecting predicate */
342 2
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
343
                        $sub_v = $sub_r[1];
344
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
345
                        $t['p_type'] = 'uri';
346
                        $state = 3;
347
                        $proceed = 1;
348 2
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
349 2
                        $t['p'] = $sub_r['value'];
350 2
                        $t['p_type'] = $sub_r['type'];
351 2
                        $state = 3;
352 2
                        $proceed = 1;
353
                    }
354
                }
355 2
                if (3 == $state) {/* expecting object */
356 2
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
357 2
                        $t['o'] = $sub_r['value'];
358 2
                        $t['o_type'] = $sub_r['type'];
359 2
                        $t['o_lang'] = $sub_r['lang'] ?? '';
360 2
                        $t['o_datatype'] = $sub_r['datatype'] ?? '';
361 2
                        $r['triples'][] = $t;
362 2
                        $state = 4;
363 2
                        $proceed = 1;
364
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
365
                        $t['o'] = $sub_r['id'];
366
                        $t['o_type'] = $sub_r['type'];
367
                        $t['o_datatype'] = '';
368
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
369
                        $state = 4;
370
                        $proceed = 1;
371
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
372
                        $t['o'] = $sub_r['id'];
373
                        $t['o_type'] = $sub_r['type'];
374
                        $t['o_datatype'] = '';
375
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
376
                        $state = 4;
377
                        $proceed = 1;
378
                    }
379
                }
380 2
                if (4 == $state) {/* expecting . or ; or , or ] */
381 2
                    if ($sub_r = $this->x('\.', $sub_v)) {
382
                        $sub_v = $sub_r[1];
383
                        $state = 1;
384
                        $proceed = 1;
385
                    }
386 2
                    if ($sub_r = $this->x('\;', $sub_v)) {
387 1
                        $sub_v = $sub_r[1];
388 1
                        $state = 2;
389 1
                        $proceed = 1;
390
                    }
391 2
                    if ($sub_r = $this->x('\,', $sub_v)) {
392
                        $sub_v = $sub_r[1];
393
                        $state = 3;
394
                        $proceed = 1;
395
                    }
396 2
                    if ($sub_r = $this->x('\]', $sub_v)) {
397 2
                        $sub_v = $sub_r[1];
398 2
                        $proceed = 0;
399 2
                        $closed = 1;
400
                    }
401
                }
402 2
            } while ($proceed);
403 2
            if ($closed) {
404 2
                return [$r, $sub_v];
405
            }
406
407
            return [0, $v];
408
        }
409
410 98
        return [0, $v];
411
    }
412
413
    /* 40.. */
414
415 98
    protected function xCollection($v)
416
    {
417 98
        if ($sub_r = $this->x('\(', $v)) {
418
            $sub_v = $sub_r[1];
419
            $s = $this->createBnodeID();
420
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
421
            $closed = 0;
422
            do {
423
                $proceed = 0;
424
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
425
                    $r['triples'][] = [
426
                        'type' => 'triple',
427
                        's' => $s,
428
                        's_type' => 'bnode',
429
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
430
                        'p_type' => 'uri',
431
                        'o' => $sub_r['value'],
432
                        'o_type' => $sub_r['type'],
433
                        'o_lang' => $sub_r['lang'] ?? '',
434
                        'o_datatype' => $sub_r['datatype'] ?? '',
435
                    ];
436
                    $proceed = 1;
437
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
438
                    $r['triples'][] = [
439
                        'type' => 'triple',
440
                        's' => $s,
441
                        's_type' => 'bnode',
442
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
443
                        'p_type' => 'uri',
444
                        'o' => $sub_r['id'],
445
                        'o_type' => $sub_r['type'],
446
                        'o_lang' => '',
447
                        'o_datatype' => '',
448
                    ];
449
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
450
                    $proceed = 1;
451
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
452
                    $r['triples'][] = [
453
                        'type' => 'triple',
454
                        's' => $s,
455
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
456
                        'o' => $sub_r['id'],
457
                        's_type' => 'bnode',
458
                        'p_type' => 'uri',
459
                        'o_type' => $sub_r['type'],
460
                        'o_lang' => '',
461
                        'o_datatype' => '',
462
                    ];
463
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
464
                    $proceed = 1;
465
                }
466
                if ($proceed) {
467
                    if ($sub_r = $this->x('\)', $sub_v)) {
468
                        $sub_v = $sub_r[1];
469
                        $r['triples'][] = [
470
                            'type' => 'triple',
471
                            's' => $s,
472
                            's_type' => 'bnode',
473
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
474
                            'p_type' => 'uri',
475
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
476
                            'o_type' => 'uri',
477
                            'o_lang' => '',
478
                            'o_datatype' => '',
479
                        ];
480
                        $closed = 1;
481
                        $proceed = 0;
482
                    } else {
483
                        $next_s = $this->createBnodeID();
484
                        $r['triples'][] = [
485
                            'type' => 'triple',
486
                            's' => $s,
487
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
488
                            'o' => $next_s,
489
                            's_type' => 'bnode',
490
                            'p_type' => 'uri',
491
                            'o_type' => 'bnode',
492
                            'o_lang' => '',
493
                            'o_datatype' => '',
494
                        ];
495
                        $s = $next_s;
496
                    }
497
                }
498
            } while ($proceed);
499
            if ($closed) {
500
                return [$r, $sub_v];
501
            }
502
        }
503
504 98
        return [0, $v];
505
    }
506
507
    /* 42 */
508
509 100
    protected function xVarOrTerm($v)
510
    {
511 100
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
512 94
            return [$sub_r, $sub_v];
513 100
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
514 96
            return [$sub_r, $sub_v];
515
        }
516
517 98
        return [0, $v];
518
    }
519
520
    /* 44, 74.., 75.. */
521
522 100
    protected function xVar($v)
523
    {
524 100
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
525 94
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
526 94
                if (!\in_array($sub_r, $this->r['vars'])) {
527 94
                    $this->r['vars'][] = $sub_r;
528
                }
529
530 94
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
531
            }
532
        }
533
534 100
        return [0, $v];
535
    }
536
537
    /* 45 */
538
539 100
    protected function xGraphTerm($v)
540
    {
541 100
        foreach ([
542 100
            'IRIref' => 'uri',
543 100
            'RDFLiteral' => 'literal',
544 100
            'NumericLiteral' => 'literal',
545 100
            'BooleanLiteral' => 'literal',
546 100
            'BlankNode' => 'bnode',
547 100
            'NIL' => 'uri',
548 100
            'Placeholder' => 'placeholder',
549 100
        ] as $term => $type) {
550 100
            $m = 'x'.$term;
551 100
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
552 96
                if (!\is_array($sub_r)) {
553 96
                    $sub_r = ['value' => $sub_r];
554
                }
555 96
                $sub_r['type'] = $sub_r['type'] ?? $type;
556
557 96
                return [$sub_r, $sub_v];
558
            }
559
        }
560
561 98
        return [0, $v];
562
    }
563
564
    /* 60 */
565
566 99
    protected function xRDFLiteral($v)
567
    {
568 99
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
569 53
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
570 53
            $r = $sub_r;
571 53
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
572 8
                $r['lang'] = $sub_r;
573
            } elseif (
574 52
                !$this->x('\s', $sub_v)
575 52
                && ($sub_r = $this->x('\^\^', $sub_v))
576 52
                && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1]))
577 52
                && $sub_r[1]
578
            ) {
579 3
                $r['datatype'] = $sub_r;
580
            }
581
582 53
            return [$r, $sub_v];
583
        }
584
585 98
        return [0, $v];
586
    }
587
588
    /* 61.., 62.., 63.., 64.. */
589
590 98
    protected function xNumericLiteral($v)
591
    {
592 98
        $sub_r = $this->x('(\-|\+)?', $v);
593 98
        $prefix = $sub_r[1];
594 98
        $sub_v = $sub_r[2];
595 98
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
596 98
            $m = 'x'.$type;
597 98
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
598 20
                $r = [
599 20
                    'value' => $prefix.$sub_r,
600 20
                    'type' => 'literal',
601 20
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
602 20
                ];
603
604 20
                return [$r, $sub_v];
605
            }
606
        }
607
608 98
        return [0, $v];
609
    }
610
611
    /* 65.. */
612
613 98
    protected function xBooleanLiteral($v)
614
    {
615 98
        if ($r = $this->x('(true|false)', $v)) {
616
            return [$r[1], $r[2]];
617
        }
618
619 98
        return [0, $v];
620
    }
621
622
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
623
624 99
    protected function xString($v)
625
    {/* largely simplified, may need some tweaks in following revisions */
626 99
        $sub_v = $v;
627 99
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
628 98
            return [0, $v];
629
        }
630 53
        $delim = $m[1];
631 53
        $rest = $m[2];
632 53
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
633 53
        $sub_type = $sub_types[$delim];
634 53
        $pos = 0;
635 53
        $r = false;
636
        do {
637 53
            $proceed = 0;
638 53
            $delim_pos = strpos($rest, $delim, $pos);
639 53
            if (false === $delim_pos) {
640
                break;
641
            }
642 53
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
643 53
            $r = substr($rest, 0, $delim_pos);
644 53
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
645 53
                $rest = $new_rest;
646
            } else {
647
                $r = false;
648
                $pos = $delim_pos + 1;
649
                $proceed = 1;
650
            }
651 53
        } while ($proceed);
652 53
        if (false !== $r) {
653 53
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
654
        }
655
656
        return [0, $v];
657
    }
658
659
    /* 67 */
660
661 100
    protected function xIRIref($v)
662
    {
663 100
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
664 95
            return [calcURI($r, $this->base), $v];
665 100
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
666 30
            return [$r, $v];
667
        }
668
669 99
        return [0, $v];
670
    }
671
672
    /* 68 */
673
674 100
    protected function xPrefixedName($v)
675
    {
676 100
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
677 30
            return [$r, $v];
678 99
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
679 8
            return $this->namespaceHelper->hasPrefix($r)
680
                ? [$this->namespaceHelper->getNamespace($r), $sub_v]
681 8
                : [0, $v];
682
        }
683
684 99
        return [0, $v];
685
    }
686
687
    /* 69.., 73.., 93, 94..  */
688
689 98
    protected function xBlankNode($v)
690
    {
691 98
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
692 5
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
693
        }
694 98
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
695
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
696
        }
697
698 98
        return [0, $v];
699
    }
700
701
    /* 70.. @@sync with SPARQLParser */
702
703 2
    protected function xIRI_REF($v)
704
    {
705
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
706 2
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
707
            return [$r[1], $r[2]];
708 2
        } elseif ($r = $this->x('\<\>', $v)) {
709
            return [true, $r[1]];
710 2
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
711 2
            return [$r[1] ? $r[1] : true, $r[2]];
712
        }
713
714 2
        return [0, $v];
715
    }
716
717
    /* 71 */
718
719 100
    protected function xPNAME_NS($v)
720
    {
721 100
        list($r, $sub_v) = $this->xPN_PREFIX($v);
722 100
        $prefix = $r ?: '';
723
724 100
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
725
    }
726
727
    /* 72 */
728
729 100
    protected function xPNAME_LN($v)
730
    {
731 100
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
732 33
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
733 33
                if (!$this->namespaceHelper->hasPrefix($r)) {
734 8
                    return [0, $v];
735
                }
736
737 30
                return [$this->namespaceHelper->getNamespace($r).$sub_r, $sub_v];
738
            }
739
        }
740
741 99
        return [0, $v];
742
    }
743
744
    /* 76 */
745
746 53
    protected function xLANGTAG($v)
747
    {
748 53
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
749 8
            return [$r[1], $r[3]];
750
        }
751
752 52
        return [0, $v];
753
    }
754
755
    /* 77.. */
756
757 98
    protected function xINTEGER($v)
758
    {
759 98
        if ($r = $this->x('([0-9]+)', $v)) {
760 21
            return [$r[1], $r[2]];
761
        }
762
763 98
        return [false, $v];
764
    }
765
766
    /* 78.. */
767
768 98
    protected function xDECIMAL($v)
769
    {
770 98
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
771 9
            return [$r[1], $r[2]];
772
        }
773 98
        if ($r = $this->x('(\.[0-9]+)', $v)) {
774
            return [$r[1], $r[2]];
775
        }
776
777 98
        return [false, $v];
778
    }
779
780
    /* 79.., 86.. */
781
782 98
    protected function xDOUBLE($v)
783
    {
784 98
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
785 6
            return [$r[1], $r[2]];
786
        }
787 98
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
788
            return [$r[1], $r[2]];
789
        }
790 98
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
791 6
            return [$r[1], $r[2]];
792
        }
793
794 98
        return [false, $v];
795
    }
796
797
    /* 92 */
798
799 98
    protected function xNIL($v)
800
    {
801 98
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
802
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
803
        }
804
805 98
        return [0, $v];
806
    }
807
808
    /* 95.. */
809
810 99
    protected function xPN_CHARS_BASE($v)
811
    {
812 99
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
813 95
            return [$r[1], $r[2]];
814
        }
815
816 99
        return [0, $v];
817
    }
818
819
    /* 96 */
820
821 95
    protected function xPN_CHARS_U($v)
822
    {
823 95
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
824 95
            return [$r, $sub_v];
825 95
        } elseif ($r = $this->x('(_)', $v)) {
826 1
            return [$r[1], $r[2]];
827
        }
828
829 95
        return [0, $v];
830
    }
831
832
    /* 97.. */
833
834 94
    protected function xVARNAME($v)
835
    {
836 94
        $r = '';
837
        do {
838 94
            $proceed = 0;
839 94
            if ($sub_r = $this->x('([0-9]+)', $v)) {
840 3
                $r .= $sub_r[1];
841 3
                $v = $sub_r[2];
842 3
                $proceed = 1;
843 94
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
844 94
                $r .= $sub_r;
845 94
                $v = $sub_v;
846 94
                $proceed = 1;
847 94
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
848
                $r .= $sub_r[1];
849
                $v = $sub_r[2];
850
                $proceed = 1;
851
            }
852 94
        } while ($proceed);
853
854 94
        return [$r, $v];
855
    }
856
857
    /* 98.. */
858
859 6
    protected function xPN_CHARS($v)
860
    {
861 6
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
862
            return [$r, $sub_v];
863 6
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
864
            return [$r[1], $r[2]];
865
        }
866
867 6
        return [false, $v];
868
    }
869
870
    /* 99 */
871
872 100
    protected function xPN_PREFIX($v)
873
    {
874 100
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
875 78
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
876
        }
877 99
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
878
            do {
879
                $proceed = 0;
880
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
881
                if (false !== $sub_r) {
882
                    $r .= $sub_r;
883
                    $proceed = 1;
884
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
885
                    $r .= '.';
886
                    $sub_v = $sub_r[1];
887
                    $proceed = 1;
888
                }
889
            } while ($proceed);
890
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
891
            $r .= $sub_r ?: '';
892
        }
893
894 99
        return [$r, $sub_v];
895
    }
896
897
    /* 100 */
898
899 33
    protected function xPN_LOCAL($v)
900
    {
901 33
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
902 33
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
903
        }
904 6
        $r = '';
905 6
        $sub_v = $v;
906
        do {
907 6
            $proceed = 0;
908 6
            if ($this->x('\s', $sub_v)) {
909
                return [$r, $sub_v];
910
            }
911 6
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
912 6
                $r .= $sub_r[1];
913 6
                $sub_v = $sub_r[2];
914 6
                $proceed = 1;
915 6
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
916 6
                $r .= $sub_r;
917 6
                $proceed = 1;
918 6
            } elseif ($r) {
919 6
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
920
                    $r .= $sub_r[1];
921
                    $sub_v = $sub_r[2];
922
                }
923 6
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
924
                    $r .= $sub_r;
925
                    $proceed = 1;
926
                }
927
            }
928 6
        } while ($proceed);
929
930 6
        return [$r, $sub_v];
931
    }
932
933 53
    protected function unescapeNtripleUTF($v)
934
    {
935 53
        if (false === strpos($v, '\\')) {
936 53
            return $v;
937
        }
938
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
939
        foreach ($mappings as $in => $out) {
940
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
941
        }
942
        if (false === strpos(strtolower($v), '\u')) {
943
            return $v;
944
        }
945
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
946
            $no = hexdec($m[2]);
947
            if ($no < 128) {
948
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

948
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
949
            } elseif ($no < 2048) {
950
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
951
            } elseif ($no < 65536) {
952
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
953
            } elseif ($no < 2097152) {
954
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
955
            } else {
956
                $char = '';
957
            }
958
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
959
        }
960
961
        return $v;
962
    }
963
964 98
    protected function xPlaceholder($v)
965
    {
966
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
967 98
        if ($r = $this->x('(\?|\$)', $v)) {
968
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
969
                $ph = substr($m[1], 1, -1);
970
                $rest = substr(trim($r[2]), \strlen($m[1]));
971
                if (!isset($this->r['placeholders'])) {
972
                    $this->r['placeholders'] = [];
973
                }
974
                if (!\in_array($ph, $this->r['placeholders'])) {
975
                    $this->r['placeholders'][] = $ph;
976
                }
977
978
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
979
            }
980
        }
981
982 98
        return [0, $v];
983
    }
984
}
985