Passed
Push — extract-store ( 206e1d...ec1a3e )
by Konrad
08:13 queued 03:59
created

TurtleParser::xVar()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 13
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
eloc 6
nc 4
nop 1
dl 0
loc 13
ccs 7
cts 7
cp 1
crap 5
rs 9.6111
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-3 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use function sweetrdf\InMemoryStoreSqlite\calcURI;
17
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
18
use sweetrdf\InMemoryStoreSqlite\StringReader;
19
20
class TurtleParser extends BaseParser
21
{
22 100
    public function __construct()
23
    {
24 100
        parent::__construct();
25
26 100
        $this->state = 0;
0 ignored issues
show
Bug Best Practice introduced by
The property state does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
27 100
        $this->unparsed_code = '';
0 ignored issues
show
Bug Best Practice introduced by
The property unparsed_code does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
28 100
        $this->max_parsing_loops = 500;
0 ignored issues
show
Bug Best Practice introduced by
The property max_parsing_loops does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
29 100
    }
30
31 100
    public function x($re, $v, $options = 'si')
32
    {
33 100
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
34
35
        /* comment removal */
36 100
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
37 12
            $v = $m[2];
38
        }
39
40 100
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
41
    }
42
43 25
    private function createBnodeID(): string
44
    {
45 25
        ++$this->bnode_id;
46
47 25
        return '_:'.$this->bnode_prefix.$this->bnode_id;
48
    }
49
50 23
    protected function addT(array $t): void
51
    {
52 23
        $this->triples[$this->t_count] = $t;
53 23
        ++$this->t_count;
54 23
    }
55
56
    protected function countTriples()
57
    {
58
        return $this->t_count;
59
    }
60
61
    protected function getUnparsedCode()
62
    {
63
        return $this->unparsed_code;
64
    }
65
66 24
    public function parse($path, $data = ''): void
67
    {
68 24
        $this->reader = new StringReader();
0 ignored issues
show
Bug Best Practice introduced by
The property reader does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
69 24
        $this->reader->init($path, $data);
70 24
        $this->base = $this->reader->getBase();
71 24
        $this->r = ['vars' => []];
72
        /* parse */
73 24
        $buffer = '';
74 24
        $more_triples = [];
75 24
        $sub_v = '';
76 24
        $sub_v2 = '';
77 24
        $loops = 0;
78 24
        $prologue_done = 0;
79 24
        while ($d = $this->reader->readStream(8192)) {
80 24
            $buffer .= $d;
81 24
            $sub_v = $buffer;
82
            do {
83 24
                $proceed = 0;
84 24
                if (!$prologue_done) {
85 24
                    $proceed = 1;
86 24
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
87 23
                        $loops = 0;
88 23
                        $sub_v .= $this->reader->readStream(128);
89
                        /* in case we missed the final DOT in the previous prologue loop */
90 23
                        if ($sub_r = $this->x('\.', $sub_v)) {
91
                            $sub_v = $sub_r[1];
92
                        }
93
                        /* more prologue to come, use outer loop */
94 23
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
95 23
                            $proceed = 0;
96
                        }
97
                    } else {
98 24
                        $prologue_done = 1;
99
                    }
100
                }
101
                if (
102 24
                    $prologue_done
103 24
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
104 24
                    && \is_array($sub_r)
105
                ) {
106 24
                    $proceed = 1;
107 24
                    $loops = 0;
108 24
                    foreach ($sub_r as $t) {
109 24
                        $this->addT($t);
110
                    }
111
                }
112 24
            } while ($proceed);
113 24
            ++$loops;
114 24
            $buffer = $sub_v;
115 24
            if ($loops > $this->max_parsing_loops) {
116
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
117
                throw new Exception($msg);
0 ignored issues
show
Bug introduced by
The type sweetrdf\InMemoryStoreSqlite\Parser\Exception was not found. Did you mean Exception? If so, make sure to prefix the type with \.
Loading history...
118
            }
119
        }
120 24
        foreach ($more_triples as $t) {
121
            $this->addT($t);
122
        }
123 24
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
124 24
        $buffer = $sub_v;
125 24
        $this->unparsed_code = $buffer;
126
127
        /* remove trailing comments */
128 24
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
129
            $this->unparsed_code = $m[2];
130
        }
131
132 24
        if ($this->unparsed_code && !$this->getErrors()) {
133 24
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
134 24
            if (trim($rest)) {
135
                $this->addError('Could not parse "'.$rest.'"');
136
            }
137
        }
138 24
    }
139
140 24
    protected function xPrologue($v)
141
    {
142 24
        $r = 0;
143 24
        if (!$this->t_count) {
144 24
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
145
                $this->base = $sub_r;
146
                $r = 1;
147
            }
148 24
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
149 23
                $this->prefixes[$sub_r['prefix']] = $sub_r['uri'];
150 23
                $r = 1;
151
            }
152
        }
153
154 24
        return [$r, $v];
155
    }
156
157
    /* 3 */
158
159 100
    protected function xBaseDecl($v)
160
    {
161 100
        if ($r = $this->x("\@?base\s+", $v)) {
162
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
163
                if ($sub_r = $this->x('\.', $sub_v)) {
164
                    $sub_v = $sub_r[1];
165
                }
166
167
                return [$r, $sub_v];
168
            }
169
        }
170
171 100
        return [0, $v];
172
    }
173
174
    /* 4 */
175
176 100
    protected function xPrefixDecl($v)
177
    {
178 100
        if ($r = $this->x("\@?prefix\s+", $v)) {
179 27
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
180 27
                $prefix = $r;
181 27
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
182 27
                    $uri = calcURI($r, $this->base);
183 27
                    if ($sub_r = $this->x('\.', $sub_v)) {
184 23
                        $sub_v = $sub_r[1];
185
                    }
186
187 27
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
188
                }
189
            }
190
        }
191
192 100
        return [0, $v];
193
    }
194
195
    /* 21.., 32.. */
196
197 100
    protected function xTriplesBlock($v)
198
    {
199 100
        $pre_r = [];
200 100
        $r = [];
201 100
        $state = 1;
202 100
        $sub_v = $v;
203 100
        $buffer = $sub_v;
204
        do {
205 100
            $proceed = 0;
206 100
            if (1 == $state) {/* expecting subject */
207 100
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
208 100
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
209 100
                    $t['s'] = $sub_r['value'];
210 100
                    $t['s_type'] = $sub_r['type'];
211 100
                    $state = 2;
212 100
                    $proceed = 1;
213 100
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
214
                        if ('placeholder' == $t['s_type']) {
215
                            $state = 4;
216
                        } else {
217 100
                            $this->addError('"'.$sub_r[1].'" after subject found.');
218
                        }
219
                    }
220 99
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
221
                    $t['s'] = $sub_r['id'];
222
                    $t['s_type'] = $sub_r['type'];
223
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
224
                    $state = 2;
225
                    $proceed = 1;
226
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
227
                        $this->addError('DOT after subject found.');
228
                    }
229 99
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
230
                    $t['s'] = $sub_r['id'];
231
                    $t['s_type'] = $sub_r['type'];
232
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
233
                    $state = 2;
234
                    $proceed = 1;
235 99
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
236
                    $this->addError('Subject expected, DOT found.'.$sub_v);
237
                }
238
            }
239 100
            if (2 == $state) {/* expecting predicate */
240 100
                if ($sub_r = $this->x('a\s+', $sub_v)) {
241
                    $sub_v = $sub_r[1];
242
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
243
                    $t['p_type'] = 'uri';
244
                    $state = 3;
245
                    $proceed = 1;
246 100
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
247 100
                    if ('bnode' == $sub_r['type']) {
248
                        $this->addError('Blank node used as triple predicate');
249
                    }
250 100
                    $t['p'] = $sub_r['value'];
251 100
                    $t['p_type'] = $sub_r['type'];
252 100
                    $state = 3;
253 100
                    $proceed = 1;
254 13
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
255 12
                    $state = 4;
256 1
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
257
                    $buffer = $sub_v;
258
                    $r = array_merge($r, $pre_r);
259
                    $pre_r = [];
260
                    $proceed = 0;
261
                }
262
            }
263 100
            if (3 == $state) {/* expecting object */
264 100
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
265 100
                    $t['o'] = $sub_r['value'];
266 100
                    $t['o_type'] = $sub_r['type'];
267 100
                    $t['o_lang'] = $sub_r['lang'] ?? '';
268 100
                    $t['o_datatype'] = $sub_r['datatype'] ?? '';
269 100
                    $pre_r[] = $t;
270 100
                    $state = 4;
271 100
                    $proceed = 1;
272 26
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
273 23
                    $t['o'] = $sub_r['id'];
274 23
                    $t['o_type'] = $sub_r['type'];
275 23
                    $t['o_datatype'] = '';
276 23
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
277 23
                    $state = 4;
278 23
                    $proceed = 1;
279 26
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
280 13
                    $t['o'] = $sub_r['id'];
281 13
                    $t['o_type'] = $sub_r['type'];
282 13
                    $t['o_datatype'] = '';
283 13
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
284 13
                    $state = 4;
285 13
                    $proceed = 1;
286
                }
287
            }
288 100
            if (4 == $state) {/* expecting . or ; or , or } */
289 100
                if ($sub_r = $this->x('\.', $sub_v)) {
290 100
                    $sub_v = $sub_r[1];
291 100
                    $buffer = $sub_v;
292 100
                    $r = array_merge($r, $pre_r);
293 100
                    $pre_r = [];
294 100
                    $state = 1;
295 100
                    $proceed = 1;
296 32
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
297 26
                    $sub_v = $sub_r[1];
298 26
                    $state = 2;
299 26
                    $proceed = 1;
300 17
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
301 15
                    $sub_v = $sub_r[1];
302 15
                    $state = 3;
303 15
                    $proceed = 1;
304 15
                    if ($sub_r = $this->x('\}', $sub_v)) {
305
                        $this->addError('Object expected, } found.');
306
                    }
307
                }
308 100
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
309 100
                    $buffer = $sub_v;
310 100
                    $r = array_merge($r, $pre_r);
311 100
                    $pre_r = [];
312 100
                    $proceed = 0;
313
                }
314
            }
315 100
        } while ($proceed);
316
317 100
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
318
    }
319
320
    /* 39.. */
321
322 99
    protected function xBlankNodePropertyList($v)
323
    {
324 99
        if ($sub_r = $this->x('\[', $v)) {
325 13
            $sub_v = $sub_r[1];
326 13
            $s = $this->createBnodeID();
327 13
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
328 13
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
329 13
            $state = 2;
330 13
            $closed = 0;
331
            do {
332 13
                $proceed = 0;
333 13
                if (2 == $state) {/* expecting predicate */
334 13
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
335
                        $sub_v = $sub_r[1];
336
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
337
                        $t['p_type'] = 'uri';
338
                        $state = 3;
339
                        $proceed = 1;
340 13
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
341 13
                        $t['p'] = $sub_r['value'];
342 13
                        $t['p_type'] = $sub_r['type'];
343 13
                        $state = 3;
344 13
                        $proceed = 1;
345
                    }
346
                }
347 13
                if (3 == $state) {/* expecting object */
348 13
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
349 13
                        $t['o'] = $sub_r['value'];
350 13
                        $t['o_type'] = $sub_r['type'];
351 13
                        $t['o_lang'] = $sub_r['lang'] ?? '';
352 13
                        $t['o_datatype'] = $sub_r['datatype'] ?? '';
353 13
                        $r['triples'][] = $t;
354 13
                        $state = 4;
355 13
                        $proceed = 1;
356
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
357
                        $t['o'] = $sub_r['id'];
358
                        $t['o_type'] = $sub_r['type'];
359
                        $t['o_datatype'] = '';
360
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
361
                        $state = 4;
362
                        $proceed = 1;
363
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
364
                        $t['o'] = $sub_r['id'];
365
                        $t['o_type'] = $sub_r['type'];
366
                        $t['o_datatype'] = '';
367
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
368
                        $state = 4;
369
                        $proceed = 1;
370
                    }
371
                }
372 13
                if (4 == $state) {/* expecting . or ; or , or ] */
373 13
                    if ($sub_r = $this->x('\.', $sub_v)) {
374
                        $sub_v = $sub_r[1];
375
                        $state = 1;
376
                        $proceed = 1;
377
                    }
378 13
                    if ($sub_r = $this->x('\;', $sub_v)) {
379 12
                        $sub_v = $sub_r[1];
380 12
                        $state = 2;
381 12
                        $proceed = 1;
382
                    }
383 13
                    if ($sub_r = $this->x('\,', $sub_v)) {
384
                        $sub_v = $sub_r[1];
385
                        $state = 3;
386
                        $proceed = 1;
387
                    }
388 13
                    if ($sub_r = $this->x('\]', $sub_v)) {
389 13
                        $sub_v = $sub_r[1];
390 13
                        $proceed = 0;
391 13
                        $closed = 1;
392
                    }
393
                }
394 13
            } while ($proceed);
395 13
            if ($closed) {
396 13
                return [$r, $sub_v];
397
            }
398
399 11
            return [0, $v];
400
        }
401
402 99
        return [0, $v];
403
    }
404
405
    /* 40.. */
406
407 99
    protected function xCollection($v)
408
    {
409 99
        if ($sub_r = $this->x('\(', $v)) {
410 23
            $sub_v = $sub_r[1];
411 23
            $s = $this->createBnodeID();
412 23
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
413 23
            $closed = 0;
414
            do {
415 23
                $proceed = 0;
416 23
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
417 23
                    $r['triples'][] = [
418 23
                        'type' => 'triple',
419 23
                        's' => $s,
420 23
                        's_type' => 'bnode',
421 23
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
422 23
                        'p_type' => 'uri',
423 23
                        'o' => $sub_r['value'],
424 23
                        'o_type' => $sub_r['type'],
425 23
                        'o_lang' => $sub_r['lang'] ?? '',
426 23
                        'o_datatype' => $sub_r['datatype'] ?? '',
427
                    ];
428 23
                    $proceed = 1;
429
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
430
                    $r['triples'][] = [
431
                        'type' => 'triple',
432
                        's' => $s,
433
                        's_type' => 'bnode',
434
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
435
                        'p_type' => 'uri',
436
                        'o' => $sub_r['id'],
437
                        'o_type' => $sub_r['type'],
438
                        'o_lang' => '',
439
                        'o_datatype' => '',
440
                    ];
441
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
442
                    $proceed = 1;
443
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
444
                    $r['triples'][] = [
445
                        'type' => 'triple',
446
                        's' => $s,
447
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
448
                        'o' => $sub_r['id'],
449
                        's_type' => 'bnode',
450
                        'p_type' => 'uri',
451
                        'o_type' => $sub_r['type'],
452
                        'o_lang' => '',
453
                        'o_datatype' => '',
454
                    ];
455
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
456
                    $proceed = 1;
457
                }
458 23
                if ($proceed) {
459 23
                    if ($sub_r = $this->x('\)', $sub_v)) {
460 23
                        $sub_v = $sub_r[1];
461 23
                        $r['triples'][] = [
462 23
                            'type' => 'triple',
463 23
                            's' => $s,
464 23
                            's_type' => 'bnode',
465 23
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
466 23
                            'p_type' => 'uri',
467 23
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
468 23
                            'o_type' => 'uri',
469 23
                            'o_lang' => '',
470 23
                            'o_datatype' => '',
471
                        ];
472 23
                        $closed = 1;
473 23
                        $proceed = 0;
474
                    } else {
475 23
                        $next_s = $this->createBnodeID();
476 23
                        $r['triples'][] = [
477 23
                            'type' => 'triple',
478 23
                            's' => $s,
479 23
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
480 23
                            'o' => $next_s,
481 23
                            's_type' => 'bnode',
482 23
                            'p_type' => 'uri',
483 23
                            'o_type' => 'bnode',
484 23
                            'o_lang' => '',
485 23
                            'o_datatype' => '',
486
                        ];
487 23
                        $s = $next_s;
488
                    }
489
                }
490 23
            } while ($proceed);
491 23
            if ($closed) {
492 23
                return [$r, $sub_v];
493
            }
494
        }
495
496 99
        return [0, $v];
497
    }
498
499
    /* 42 */
500
501 100
    protected function xVarOrTerm($v)
502
    {
503 100
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
504 98
            return [$sub_r, $sub_v];
505 100
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
506 99
            return [$sub_r, $sub_v];
507
        }
508
509 99
        return [0, $v];
510
    }
511
512
    /* 44, 74.., 75.. */
513
514 100
    protected function xVar($v)
515
    {
516 100
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
517 98
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
518 98
                if (!\in_array($sub_r, $this->r['vars'])) {
519 98
                    $this->r['vars'][] = $sub_r;
520
                }
521
522 98
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
523
            }
524
        }
525
526 100
        return [0, $v];
527
    }
528
529
    /* 45 */
530
531 100
    protected function xGraphTerm($v)
532
    {
533
        foreach ([
534 100
            'IRIref' => 'uri',
535
            'RDFLiteral' => 'literal',
536
            'NumericLiteral' => 'literal',
537
            'BooleanLiteral' => 'literal',
538
            'BlankNode' => 'bnode',
539
            'NIL' => 'uri',
540
            'Placeholder' => 'placeholder',
541 100
        ] as $term => $type) {
542 100
            $m = 'x'.$term;
543 100
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
544 99
                if (!\is_array($sub_r)) {
545 99
                    $sub_r = ['value' => $sub_r];
546
                }
547 99
                $sub_r['type'] = $sub_r['type'] ?? $type;
548
549 99
                return [$sub_r, $sub_v];
550
            }
551
        }
552
553 99
        return [0, $v];
554
    }
555
556
    /* 60 */
557
558 100
    protected function xRDFLiteral($v)
559
    {
560 100
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
561 75
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
562 75
            $r = $sub_r;
563 75
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
564 8
                $r['lang'] = $sub_r;
565
            } elseif (
566 74
                !$this->x('\s', $sub_v)
567 74
                && ($sub_r = $this->x('\^\^', $sub_v))
568 74
                && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1]))
569 74
                && $sub_r[1]
570
            ) {
571 3
                $r['datatype'] = $sub_r;
572
            }
573
574 75
            return [$r, $sub_v];
575
        }
576
577 99
        return [0, $v];
578
    }
579
580
    /* 61.., 62.., 63.., 64.. */
581
582 99
    protected function xNumericLiteral($v)
583
    {
584 99
        $sub_r = $this->x('(\-|\+)?', $v);
585 99
        $prefix = $sub_r[1];
586 99
        $sub_v = $sub_r[2];
587 99
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
588 99
            $m = 'x'.$type;
589 99
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
590 12
                $r = [
591 12
                    'value' => $prefix.$sub_r,
592 12
                    'type' => 'literal',
593 12
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
594
                ];
595
596 12
                return [$r, $sub_v];
597
            }
598
        }
599
600 99
        return [0, $v];
601
    }
602
603
    /* 65.. */
604
605 99
    protected function xBooleanLiteral($v)
606
    {
607 99
        if ($r = $this->x('(true|false)', $v)) {
608
            return [$r[1], $r[2]];
609
        }
610
611 99
        return [0, $v];
612
    }
613
614
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
615
616 100
    protected function xString($v)
617
    {/* largely simplified, may need some tweaks in following revisions */
618 100
        $sub_v = $v;
619 100
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
620 99
            return [0, $v];
621
        }
622 75
        $delim = $m[1];
623 75
        $rest = $m[2];
624 75
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
625 75
        $sub_type = $sub_types[$delim];
626 75
        $pos = 0;
627 75
        $r = false;
628
        do {
629 75
            $proceed = 0;
630 75
            $delim_pos = strpos($rest, $delim, $pos);
631 75
            if (false === $delim_pos) {
632 24
                break;
633
            }
634 75
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
635 75
            $r = substr($rest, 0, $delim_pos);
636 75
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
637 75
                $rest = $new_rest;
638
            } else {
639 1
                $r = false;
640 1
                $pos = $delim_pos + 1;
641 1
                $proceed = 1;
642
            }
643 75
        } while ($proceed);
644 75
        if (false !== $r) {
645 75
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
646
        }
647
648 24
        return [0, $v];
649
    }
650
651
    /* 67 */
652
653 100
    protected function xIRIref($v)
654
    {
655 100
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
656 99
            return [calcURI($r, $this->base), $v];
657 100
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
658 33
            return [$r, $v];
659
        }
660
661 100
        return [0, $v];
662
    }
663
664
    /* 68 */
665
666 100
    protected function xPrefixedName($v)
667
    {
668 100
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
669 33
            return [$r, $v];
670 100
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
671 30
            return isset($this->prefixes[$r]) ? [$this->prefixes[$r], $sub_v] : [0, $v];
672
        }
673
674 100
        return [0, $v];
675
    }
676
677
    /* 69.., 73.., 93, 94..  */
678
679 99
    protected function xBlankNode($v)
680
    {
681 99
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
682 26
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
683
        }
684 99
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
685
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
686
        }
687
688 99
        return [0, $v];
689
    }
690
691
    /* 70.. @@sync with SPARQLParser */
692
693 24
    protected function xIRI_REF($v)
694
    {
695
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
696 24
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
697
            return [$r[1], $r[2]];
698 24
        } elseif ($r = $this->x('\<\>', $v)) {
699 23
            return [true, $r[1]];
700 24
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
701 24
            return [$r[1] ? $r[1] : true, $r[2]];
702
        }
703
704 24
        return [0, $v];
705
    }
706
707
    /* 71 */
708
709 100
    protected function xPNAME_NS($v)
710
    {
711 100
        list($r, $sub_v) = $this->xPN_PREFIX($v);
712 100
        $prefix = $r ?: '';
713
714 100
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
715
    }
716
717
    /* 72 */
718
719 100
    protected function xPNAME_LN($v)
720
    {
721 100
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
722 37
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
723 37
                if (!isset($this->prefixes[$r])) {
724 30
                    return [0, $v];
725
                }
726
727 33
                return [$this->prefixes[$r].$sub_r, $sub_v];
728
            }
729
        }
730
731 100
        return [0, $v];
732
    }
733
734
    /* 76 */
735
736 75
    protected function xLANGTAG($v)
737
    {
738 75
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
739 8
            return [$r[1], $r[3]];
740
        }
741
742 74
        return [0, $v];
743
    }
744
745
    /* 77.. */
746
747 99
    protected function xINTEGER($v)
748
    {
749 99
        if ($r = $this->x('([0-9]+)', $v)) {
750 15
            return [$r[1], $r[2]];
751
        }
752
753 99
        return [false, $v];
754
    }
755
756
    /* 78.. */
757
758 99
    protected function xDECIMAL($v)
759
    {
760 99
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
761 2
            return [$r[1], $r[2]];
762
        }
763 99
        if ($r = $this->x('(\.[0-9]+)', $v)) {
764
            return [$r[1], $r[2]];
765
        }
766
767 99
        return [false, $v];
768
    }
769
770
    /* 79.., 86.. */
771
772 99
    protected function xDOUBLE($v)
773
    {
774 99
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
775 1
            return [$r[1], $r[2]];
776
        }
777 99
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
778
            return [$r[1], $r[2]];
779
        }
780 99
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
781 1
            return [$r[1], $r[2]];
782
        }
783
784 99
        return [false, $v];
785
    }
786
787
    /* 92 */
788
789 99
    protected function xNIL($v)
790
    {
791 99
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
792
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
793
        }
794
795 99
        return [0, $v];
796
    }
797
798
    /* 95.. */
799
800 99
    protected function xPN_CHARS_BASE($v)
801
    {
802 99
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
803 98
            return [$r[1], $r[2]];
804
        }
805
806 99
        return [0, $v];
807
    }
808
809
    /* 96 */
810
811 98
    protected function xPN_CHARS_U($v)
812
    {
813 98
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
814 98
            return [$r, $sub_v];
815 98
        } elseif ($r = $this->x('(_)', $v)) {
816 1
            return [$r[1], $r[2]];
817
        }
818
819 98
        return [0, $v];
820
    }
821
822
    /* 97.. */
823
824 98
    protected function xVARNAME($v)
825
    {
826 98
        $r = '';
827
        do {
828 98
            $proceed = 0;
829 98
            if ($sub_r = $this->x('([0-9]+)', $v)) {
830 3
                $r .= $sub_r[1];
831 3
                $v = $sub_r[2];
832 3
                $proceed = 1;
833 98
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
834 98
                $r .= $sub_r;
835 98
                $v = $sub_v;
836 98
                $proceed = 1;
837 98
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
838
                $r .= $sub_r[1];
839
                $v = $sub_r[2];
840
                $proceed = 1;
841
            }
842 98
        } while ($proceed);
843
844 98
        return [$r, $v];
845
    }
846
847
    /* 98.. */
848
849 4
    protected function xPN_CHARS($v)
850
    {
851 4
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
852
            return [$r, $sub_v];
853 4
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
854
            return [$r[1], $r[2]];
855
        }
856
857 4
        return [false, $v];
858
    }
859
860
    /* 99 */
861
862 100
    protected function xPN_PREFIX($v)
863
    {
864 100
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
865 90
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
866
        }
867 99
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
868
            do {
869
                $proceed = 0;
870
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
871
                if (false !== $sub_r) {
872
                    $r .= $sub_r;
873
                    $proceed = 1;
874
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
875
                    $r .= '.';
876
                    $sub_v = $sub_r[1];
877
                    $proceed = 1;
878
                }
879
            } while ($proceed);
880
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
881
            $r .= $sub_r ?: '';
882
        }
883
884 99
        return [$r, $sub_v];
885
    }
886
887
    /* 100 */
888
889 37
    protected function xPN_LOCAL($v)
890
    {
891 37
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
892 37
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
893
        }
894 24
        $r = '';
895 24
        $sub_v = $v;
896
        do {
897 24
            $proceed = 0;
898 24
            if ($this->x('\s', $sub_v)) {
899
                return [$r, $sub_v];
900
            }
901 24
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
902 4
                $r .= $sub_r[1];
903 4
                $sub_v = $sub_r[2];
904 4
                $proceed = 1;
905 24
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
906 4
                $r .= $sub_r;
907 4
                $proceed = 1;
908 24
            } elseif ($r) {
909 4
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
910
                    $r .= $sub_r[1];
911
                    $sub_v = $sub_r[2];
912
                }
913 4
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
914
                    $r .= $sub_r;
915
                    $proceed = 1;
916
                }
917
            }
918 24
        } while ($proceed);
919
920 24
        return [$r, $sub_v];
921
    }
922
923 75
    protected function unescapeNtripleUTF($v)
924
    {
925 75
        if (false === strpos($v, '\\')) {
926 75
            return $v;
927
        }
928 1
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
929 1
        foreach ($mappings as $in => $out) {
930 1
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
931
        }
932 1
        if (false === strpos(strtolower($v), '\u')) {
933 1
            return $v;
934
        }
935
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
936
            $no = hexdec($m[2]);
937
            if ($no < 128) {
938
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

938
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
939
            } elseif ($no < 2048) {
940
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
941
            } elseif ($no < 65536) {
942
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
943
            } elseif ($no < 2097152) {
944
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
945
            } else {
946
                $char = '';
947
            }
948
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
949
        }
950
951
        return $v;
952
    }
953
954 99
    protected function xPlaceholder($v)
955
    {
956
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
957 99
        if ($r = $this->x('(\?|\$)', $v)) {
958
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
959
                $ph = substr($m[1], 1, -1);
960
                $rest = substr(trim($r[2]), \strlen($m[1]));
961
                if (!isset($this->r['placeholders'])) {
962
                    $this->r['placeholders'] = [];
963
                }
964
                if (!\in_array($ph, $this->r['placeholders'])) {
965
                    $this->r['placeholders'][] = $ph;
966
                }
967
968
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
969
            }
970
        }
971
972 99
        return [0, $v];
973
    }
974
}
975