Passed
Push — master ( bec537...66c0c2 )
by Konrad
03:40
created

TurtleParser::xCollection()   C

Complexity

Conditions 12
Paths 37

Size

Total Lines 90
Code Lines 77

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 50
CRAP Score 17.7652

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 12
eloc 77
c 1
b 0
f 0
nc 37
nop 1
dl 0
loc 90
ccs 50
cts 76
cp 0.6579
crap 17.7652
rs 6.0751

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-3 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use Exception;
17
use function sweetrdf\InMemoryStoreSqlite\calcURI;
18
use sweetrdf\InMemoryStoreSqlite\Log\Logger;
19
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
20
use sweetrdf\InMemoryStoreSqlite\StringReader;
21
22
class TurtleParser extends BaseParser
23
{
24 88
    public function __construct(Logger $logger)
25
    {
26 88
        parent::__construct($logger);
27
28 88
        $this->state = 0;
0 ignored issues
show
Bug Best Practice introduced by
The property state does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
29 88
        $this->unparsed_code = '';
0 ignored issues
show
Bug Best Practice introduced by
The property unparsed_code does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
30 88
        $this->max_parsing_loops = 500;
0 ignored issues
show
Bug Best Practice introduced by
The property max_parsing_loops does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
31 88
    }
32
33 88
    protected function x($re, $v, $options = 'si')
34
    {
35 88
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
36
37
        /* comment removal */
38 88
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
39
            $v = $m[2];
40
        }
41
42 88
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
43
    }
44
45 10
    private function createBnodeID(): string
46
    {
47 10
        ++$this->bnode_id;
48
49 10
        return '_:'.$this->bnode_prefix.$this->bnode_id;
50
    }
51
52 8
    protected function addT(array $t): void
53
    {
54 8
        $this->triples[$this->t_count] = $t;
55 8
        ++$this->t_count;
56 8
    }
57
58
    protected function countTriples()
59
    {
60
        return $this->t_count;
61
    }
62
63
    protected function getUnparsedCode()
64
    {
65
        return $this->unparsed_code;
66
    }
67
68 9
    public function parse(string $path, string $data = ''): void
69
    {
70 9
        $this->reader = new StringReader();
0 ignored issues
show
Bug Best Practice introduced by
The property reader does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
71 9
        $this->reader->init($path, $data);
72 9
        $this->base = $this->reader->getBase();
73 9
        $this->r = ['vars' => []];
74
        /* parse */
75 9
        $buffer = '';
76 9
        $more_triples = [];
77 9
        $sub_v = '';
78 9
        $sub_v2 = '';
79 9
        $loops = 0;
80 9
        $prologue_done = 0;
81 9
        while ($d = $this->reader->readStream(8192)) {
82 9
            $buffer .= $d;
83 9
            $sub_v = $buffer;
84
            do {
85 9
                $proceed = 0;
86 9
                if (!$prologue_done) {
87 9
                    $proceed = 1;
88 9
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
89 8
                        $loops = 0;
90 8
                        $sub_v .= $this->reader->readStream(128);
91
                        /* in case we missed the final DOT in the previous prologue loop */
92 8
                        if ($sub_r = $this->x('\.', $sub_v)) {
93
                            $sub_v = $sub_r[1];
94
                        }
95
                        /* more prologue to come, use outer loop */
96 8
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
97 8
                            $proceed = 0;
98
                        }
99
                    } else {
100 9
                        $prologue_done = 1;
101
                    }
102
                }
103
                if (
104 9
                    $prologue_done
105 9
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
106 9
                    && \is_array($sub_r)
107
                ) {
108 9
                    $proceed = 1;
109 9
                    $loops = 0;
110 9
                    foreach ($sub_r as $t) {
111 9
                        $this->addT($t);
112
                    }
113
                }
114 9
            } while ($proceed);
115 9
            ++$loops;
116 9
            $buffer = $sub_v;
117 9
            if ($loops > $this->max_parsing_loops) {
118
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
119
                throw new Exception($msg);
120
            }
121
        }
122 9
        foreach ($more_triples as $t) {
123
            $this->addT($t);
124
        }
125 9
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
126 9
        $buffer = $sub_v;
127 9
        $this->unparsed_code = $buffer;
128
129
        /* remove trailing comments */
130 9
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
131
            $this->unparsed_code = $m[2];
132
        }
133
134 9
        if ($this->unparsed_code && !$this->logger->hasEntries('error')) {
135 9
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
136 9
            if (trim($rest)) {
137
                $this->logger->error('Could not parse "'.$rest.'"');
138
            }
139
        }
140 9
    }
141
142 9
    protected function xPrologue($v)
143
    {
144 9
        $r = 0;
145 9
        if (!$this->t_count) {
146 9
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
147
                $this->base = $sub_r;
148
                $r = 1;
149
            }
150 9
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
151 8
                $this->prefixes[$sub_r['prefix']] = $sub_r['uri'];
152 8
                $r = 1;
153
            }
154
        }
155
156 9
        return [$r, $v];
157
    }
158
159
    /* 3 */
160
161 88
    protected function xBaseDecl($v)
162
    {
163 88
        if ($r = $this->x("\@?base\s+", $v)) {
164
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
165
                if ($sub_r = $this->x('\.', $sub_v)) {
166
                    $sub_v = $sub_r[1];
167
                }
168
169
                return [$r, $sub_v];
170
            }
171
        }
172
173 88
        return [0, $v];
174
    }
175
176
    /* 4 */
177
178 88
    protected function xPrefixDecl($v)
179
    {
180 88
        if ($r = $this->x("\@?prefix\s+", $v)) {
181 12
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
182 12
                $prefix = $r;
183 12
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
184 12
                    $uri = calcURI($r, $this->base);
185 12
                    if ($sub_r = $this->x('\.', $sub_v)) {
186 8
                        $sub_v = $sub_r[1];
187
                    }
188
189 12
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
190
                }
191
            }
192
        }
193
194 88
        return [0, $v];
195
    }
196
197
    /* 21.., 32.. */
198
199 88
    protected function xTriplesBlock($v)
200
    {
201 88
        $pre_r = [];
202 88
        $r = [];
203 88
        $state = 1;
204 88
        $sub_v = $v;
205 88
        $buffer = $sub_v;
206
        do {
207 88
            $proceed = 0;
208 88
            if (1 == $state) {/* expecting subject */
209 88
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
210 88
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
211 88
                    $t['s'] = $sub_r['value'];
212 88
                    $t['s_type'] = $sub_r['type'];
213 88
                    $state = 2;
214 88
                    $proceed = 1;
215 88
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
216
                        if ('placeholder' == $t['s_type']) {
217
                            $state = 4;
218
                        } else {
219 88
                            $this->logger->error('"'.$sub_r[1].'" after subject found.');
220
                        }
221
                    }
222 87
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
223
                    $t['s'] = $sub_r['id'];
224
                    $t['s_type'] = $sub_r['type'];
225
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
226
                    $state = 2;
227
                    $proceed = 1;
228
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
229
                        $this->logger->error('DOT after subject found.');
230
                    }
231 87
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
232
                    $t['s'] = $sub_r['id'];
233
                    $t['s_type'] = $sub_r['type'];
234
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
235
                    $state = 2;
236
                    $proceed = 1;
237 87
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
238
                    $this->logger->error('Subject expected, DOT found.'.$sub_v);
239
                }
240
            }
241 88
            if (2 == $state) {/* expecting predicate */
242 88
                if ($sub_r = $this->x('a\s+', $sub_v)) {
243
                    $sub_v = $sub_r[1];
244
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
245
                    $t['p_type'] = 'uri';
246
                    $state = 3;
247
                    $proceed = 1;
248 88
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
249 88
                    if ('bnode' == $sub_r['type']) {
250
                        $this->logger->error('Blank node used as triple predicate');
251
                    }
252 88
                    $t['p'] = $sub_r['value'];
253 88
                    $t['p_type'] = $sub_r['type'];
254 88
                    $state = 3;
255 88
                    $proceed = 1;
256 1
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
257
                    $state = 4;
258 1
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
259
                    $buffer = $sub_v;
260
                    $r = array_merge($r, $pre_r);
261
                    $pre_r = [];
262
                    $proceed = 0;
263
                }
264
            }
265 88
            if (3 == $state) {/* expecting object */
266 88
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
267 88
                    $t['o'] = $sub_r['value'];
268 88
                    $t['o_type'] = $sub_r['type'];
269 88
                    $t['o_lang'] = $sub_r['lang'] ?? '';
270 88
                    $t['o_datatype'] = $sub_r['datatype'] ?? '';
271 88
                    $pre_r[] = $t;
272 88
                    $state = 4;
273 88
                    $proceed = 1;
274 11
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
275 8
                    $t['o'] = $sub_r['id'];
276 8
                    $t['o_type'] = $sub_r['type'];
277 8
                    $t['o_datatype'] = '';
278 8
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
279 8
                    $state = 4;
280 8
                    $proceed = 1;
281 11
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
282 10
                    $t['o'] = $sub_r['id'];
283 10
                    $t['o_type'] = $sub_r['type'];
284 10
                    $t['o_datatype'] = '';
285 10
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
286 10
                    $state = 4;
287 10
                    $proceed = 1;
288
                }
289
            }
290 88
            if (4 == $state) {/* expecting . or ; or , or } */
291 88
                if ($sub_r = $this->x('\.', $sub_v)) {
292 88
                    $sub_v = $sub_r[1];
293 88
                    $buffer = $sub_v;
294 88
                    $r = array_merge($r, $pre_r);
295 88
                    $pre_r = [];
296 88
                    $state = 1;
297 88
                    $proceed = 1;
298 17
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
299 11
                    $sub_v = $sub_r[1];
300 11
                    $state = 2;
301 11
                    $proceed = 1;
302 14
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
303 12
                    $sub_v = $sub_r[1];
304 12
                    $state = 3;
305 12
                    $proceed = 1;
306 12
                    if ($sub_r = $this->x('\}', $sub_v)) {
307
                        $this->logger->error('Object expected, } found.');
308
                    }
309
                }
310 88
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
311 87
                    $buffer = $sub_v;
312 87
                    $r = array_merge($r, $pre_r);
313 87
                    $pre_r = [];
314 87
                    $proceed = 0;
315
                }
316
            }
317 88
        } while ($proceed);
318
319 88
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
320
    }
321
322
    /* 39.. */
323
324 87
    protected function xBlankNodePropertyList($v)
325
    {
326 87
        if ($sub_r = $this->x('\[', $v)) {
327 10
            $sub_v = $sub_r[1];
328 10
            $s = $this->createBnodeID();
329 10
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
330 10
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
331 10
            $state = 2;
332 10
            $closed = 0;
333
            do {
334 10
                $proceed = 0;
335 10
                if (2 == $state) {/* expecting predicate */
336 10
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
337
                        $sub_v = $sub_r[1];
338
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
339
                        $t['p_type'] = 'uri';
340
                        $state = 3;
341
                        $proceed = 1;
342 10
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
343 10
                        $t['p'] = $sub_r['value'];
344 10
                        $t['p_type'] = $sub_r['type'];
345 10
                        $state = 3;
346 10
                        $proceed = 1;
347
                    }
348
                }
349 10
                if (3 == $state) {/* expecting object */
350 10
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
351 10
                        $t['o'] = $sub_r['value'];
352 10
                        $t['o_type'] = $sub_r['type'];
353 10
                        $t['o_lang'] = $sub_r['lang'] ?? '';
354 10
                        $t['o_datatype'] = $sub_r['datatype'] ?? '';
355 10
                        $r['triples'][] = $t;
356 10
                        $state = 4;
357 10
                        $proceed = 1;
358
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
359
                        $t['o'] = $sub_r['id'];
360
                        $t['o_type'] = $sub_r['type'];
361
                        $t['o_datatype'] = '';
362
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
363
                        $state = 4;
364
                        $proceed = 1;
365
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
366
                        $t['o'] = $sub_r['id'];
367
                        $t['o_type'] = $sub_r['type'];
368
                        $t['o_datatype'] = '';
369
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
370
                        $state = 4;
371
                        $proceed = 1;
372
                    }
373
                }
374 10
                if (4 == $state) {/* expecting . or ; or , or ] */
375 10
                    if ($sub_r = $this->x('\.', $sub_v)) {
376
                        $sub_v = $sub_r[1];
377
                        $state = 1;
378
                        $proceed = 1;
379
                    }
380 10
                    if ($sub_r = $this->x('\;', $sub_v)) {
381 9
                        $sub_v = $sub_r[1];
382 9
                        $state = 2;
383 9
                        $proceed = 1;
384
                    }
385 10
                    if ($sub_r = $this->x('\,', $sub_v)) {
386
                        $sub_v = $sub_r[1];
387
                        $state = 3;
388
                        $proceed = 1;
389
                    }
390 10
                    if ($sub_r = $this->x('\]', $sub_v)) {
391 10
                        $sub_v = $sub_r[1];
392 10
                        $proceed = 0;
393 10
                        $closed = 1;
394
                    }
395
                }
396 10
            } while ($proceed);
397 10
            if ($closed) {
398 10
                return [$r, $sub_v];
399
            }
400
401 8
            return [0, $v];
402
        }
403
404 87
        return [0, $v];
405
    }
406
407
    /* 40.. */
408
409 87
    protected function xCollection($v)
410
    {
411 87
        if ($sub_r = $this->x('\(', $v)) {
412 8
            $sub_v = $sub_r[1];
413 8
            $s = $this->createBnodeID();
414 8
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
415 8
            $closed = 0;
416
            do {
417 8
                $proceed = 0;
418 8
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
419 8
                    $r['triples'][] = [
420 8
                        'type' => 'triple',
421 8
                        's' => $s,
422 8
                        's_type' => 'bnode',
423 8
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
424 8
                        'p_type' => 'uri',
425 8
                        'o' => $sub_r['value'],
426 8
                        'o_type' => $sub_r['type'],
427 8
                        'o_lang' => $sub_r['lang'] ?? '',
428 8
                        'o_datatype' => $sub_r['datatype'] ?? '',
429
                    ];
430 8
                    $proceed = 1;
431
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
432
                    $r['triples'][] = [
433
                        'type' => 'triple',
434
                        's' => $s,
435
                        's_type' => 'bnode',
436
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
437
                        'p_type' => 'uri',
438
                        'o' => $sub_r['id'],
439
                        'o_type' => $sub_r['type'],
440
                        'o_lang' => '',
441
                        'o_datatype' => '',
442
                    ];
443
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
444
                    $proceed = 1;
445
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
446
                    $r['triples'][] = [
447
                        'type' => 'triple',
448
                        's' => $s,
449
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
450
                        'o' => $sub_r['id'],
451
                        's_type' => 'bnode',
452
                        'p_type' => 'uri',
453
                        'o_type' => $sub_r['type'],
454
                        'o_lang' => '',
455
                        'o_datatype' => '',
456
                    ];
457
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
458
                    $proceed = 1;
459
                }
460 8
                if ($proceed) {
461 8
                    if ($sub_r = $this->x('\)', $sub_v)) {
462 8
                        $sub_v = $sub_r[1];
463 8
                        $r['triples'][] = [
464 8
                            'type' => 'triple',
465 8
                            's' => $s,
466 8
                            's_type' => 'bnode',
467 8
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
468 8
                            'p_type' => 'uri',
469 8
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
470 8
                            'o_type' => 'uri',
471 8
                            'o_lang' => '',
472 8
                            'o_datatype' => '',
473
                        ];
474 8
                        $closed = 1;
475 8
                        $proceed = 0;
476
                    } else {
477 8
                        $next_s = $this->createBnodeID();
478 8
                        $r['triples'][] = [
479 8
                            'type' => 'triple',
480 8
                            's' => $s,
481 8
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
482 8
                            'o' => $next_s,
483 8
                            's_type' => 'bnode',
484 8
                            'p_type' => 'uri',
485 8
                            'o_type' => 'bnode',
486 8
                            'o_lang' => '',
487 8
                            'o_datatype' => '',
488
                        ];
489 8
                        $s = $next_s;
490
                    }
491
                }
492 8
            } while ($proceed);
493 8
            if ($closed) {
494 8
                return [$r, $sub_v];
495
            }
496
        }
497
498 87
        return [0, $v];
499
    }
500
501
    /* 42 */
502
503 88
    protected function xVarOrTerm($v)
504
    {
505 88
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
506 85
            return [$sub_r, $sub_v];
507 88
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
508 87
            return [$sub_r, $sub_v];
509
        }
510
511 87
        return [0, $v];
512
    }
513
514
    /* 44, 74.., 75.. */
515
516 88
    protected function xVar($v)
517
    {
518 88
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
519 85
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
520 85
                if (!\in_array($sub_r, $this->r['vars'])) {
521 85
                    $this->r['vars'][] = $sub_r;
522
                }
523
524 85
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
525
            }
526
        }
527
528 88
        return [0, $v];
529
    }
530
531
    /* 45 */
532
533 88
    protected function xGraphTerm($v)
534
    {
535
        foreach ([
536 88
            'IRIref' => 'uri',
537
            'RDFLiteral' => 'literal',
538
            'NumericLiteral' => 'literal',
539
            'BooleanLiteral' => 'literal',
540
            'BlankNode' => 'bnode',
541
            'NIL' => 'uri',
542
            'Placeholder' => 'placeholder',
543 88
        ] as $term => $type) {
544 88
            $m = 'x'.$term;
545 88
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
546 87
                if (!\is_array($sub_r)) {
547 87
                    $sub_r = ['value' => $sub_r];
548
                }
549 87
                $sub_r['type'] = $sub_r['type'] ?? $type;
550
551 87
                return [$sub_r, $sub_v];
552
            }
553
        }
554
555 87
        return [0, $v];
556
    }
557
558
    /* 60 */
559
560 88
    protected function xRDFLiteral($v)
561
    {
562 88
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
563 62
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
564 62
            $r = $sub_r;
565 62
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
566 9
                $r['lang'] = $sub_r;
567
            } elseif (
568 61
                !$this->x('\s', $sub_v)
569 61
                && ($sub_r = $this->x('\^\^', $sub_v))
570 61
                && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1]))
571 61
                && $sub_r[1]
572
            ) {
573 4
                $r['datatype'] = $sub_r;
574
            }
575
576 62
            return [$r, $sub_v];
577
        }
578
579 87
        return [0, $v];
580
    }
581
582
    /* 61.., 62.., 63.., 64.. */
583
584 87
    protected function xNumericLiteral($v)
585
    {
586 87
        $sub_r = $this->x('(\-|\+)?', $v);
587 87
        $prefix = $sub_r[1];
588 87
        $sub_v = $sub_r[2];
589 87
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
590 87
            $m = 'x'.$type;
591 87
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
592 12
                $r = [
593 12
                    'value' => $prefix.$sub_r,
594 12
                    'type' => 'literal',
595 12
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
596
                ];
597
598 12
                return [$r, $sub_v];
599
            }
600
        }
601
602 87
        return [0, $v];
603
    }
604
605
    /* 65.. */
606
607 87
    protected function xBooleanLiteral($v)
608
    {
609 87
        if ($r = $this->x('(true|false)', $v)) {
610
            return [$r[1], $r[2]];
611
        }
612
613 87
        return [0, $v];
614
    }
615
616
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
617
618 88
    protected function xString($v)
619
    {/* largely simplified, may need some tweaks in following revisions */
620 88
        $sub_v = $v;
621 88
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
622 87
            return [0, $v];
623
        }
624 62
        $delim = $m[1];
625 62
        $rest = $m[2];
626 62
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
627 62
        $sub_type = $sub_types[$delim];
628 62
        $pos = 0;
629 62
        $r = false;
630
        do {
631 62
            $proceed = 0;
632 62
            $delim_pos = strpos($rest, $delim, $pos);
633 62
            if (false === $delim_pos) {
634 9
                break;
635
            }
636 62
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
637 62
            $r = substr($rest, 0, $delim_pos);
638 62
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
639 62
                $rest = $new_rest;
640
            } else {
641 1
                $r = false;
642 1
                $pos = $delim_pos + 1;
643 1
                $proceed = 1;
644
            }
645 62
        } while ($proceed);
646 62
        if (false !== $r) {
647 62
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
648
        }
649
650 9
        return [0, $v];
651
    }
652
653
    /* 67 */
654
655 88
    protected function xIRIref($v)
656
    {
657 88
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
658 87
            return [calcURI($r, $this->base), $v];
659 88
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
660 19
            return [$r, $v];
661
        }
662
663 88
        return [0, $v];
664
    }
665
666
    /* 68 */
667
668 88
    protected function xPrefixedName($v)
669
    {
670 88
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
671 19
            return [$r, $v];
672 88
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
673 16
            return isset($this->prefixes[$r]) ? [$this->prefixes[$r], $sub_v] : [0, $v];
674
        }
675
676 88
        return [0, $v];
677
    }
678
679
    /* 69.., 73.., 93, 94..  */
680
681 87
    protected function xBlankNode($v)
682
    {
683 87
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
684 12
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
685
        }
686 87
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
687
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
688
        }
689
690 87
        return [0, $v];
691
    }
692
693
    /* 70.. @@sync with SPARQLParser */
694
695 9
    protected function xIRI_REF($v)
696
    {
697
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
698 9
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
699
            return [$r[1], $r[2]];
700 9
        } elseif ($r = $this->x('\<\>', $v)) {
701 8
            return [true, $r[1]];
702 9
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
703 9
            return [$r[1] ? $r[1] : true, $r[2]];
704
        }
705
706 9
        return [0, $v];
707
    }
708
709
    /* 71 */
710
711 88
    protected function xPNAME_NS($v)
712
    {
713 88
        list($r, $sub_v) = $this->xPN_PREFIX($v);
714 88
        $prefix = $r ?: '';
715
716 88
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
717
    }
718
719
    /* 72 */
720
721 88
    protected function xPNAME_LN($v)
722
    {
723 88
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
724 23
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
725 23
                if (!isset($this->prefixes[$r])) {
726 16
                    return [0, $v];
727
                }
728
729 19
                return [$this->prefixes[$r].$sub_r, $sub_v];
730
            }
731
        }
732
733 88
        return [0, $v];
734
    }
735
736
    /* 76 */
737
738 62
    protected function xLANGTAG($v)
739
    {
740 62
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
741 9
            return [$r[1], $r[3]];
742
        }
743
744 61
        return [0, $v];
745
    }
746
747
    /* 77.. */
748
749 87
    protected function xINTEGER($v)
750
    {
751 87
        if ($r = $this->x('([0-9]+)', $v)) {
752 15
            return [$r[1], $r[2]];
753
        }
754
755 87
        return [false, $v];
756
    }
757
758
    /* 78.. */
759
760 87
    protected function xDECIMAL($v)
761
    {
762 87
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
763 2
            return [$r[1], $r[2]];
764
        }
765 87
        if ($r = $this->x('(\.[0-9]+)', $v)) {
766
            return [$r[1], $r[2]];
767
        }
768
769 87
        return [false, $v];
770
    }
771
772
    /* 79.., 86.. */
773
774 87
    protected function xDOUBLE($v)
775
    {
776 87
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
777 1
            return [$r[1], $r[2]];
778
        }
779 87
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
780
            return [$r[1], $r[2]];
781
        }
782 87
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
783 1
            return [$r[1], $r[2]];
784
        }
785
786 87
        return [false, $v];
787
    }
788
789
    /* 92 */
790
791 87
    protected function xNIL($v)
792
    {
793 87
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
794
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
795
        }
796
797 87
        return [0, $v];
798
    }
799
800
    /* 95.. */
801
802 87
    protected function xPN_CHARS_BASE($v)
803
    {
804 87
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
805 85
            return [$r[1], $r[2]];
806
        }
807
808 87
        return [0, $v];
809
    }
810
811
    /* 96 */
812
813 85
    protected function xPN_CHARS_U($v)
814
    {
815 85
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
816 85
            return [$r, $sub_v];
817 85
        } elseif ($r = $this->x('(_)', $v)) {
818 1
            return [$r[1], $r[2]];
819
        }
820
821 85
        return [0, $v];
822
    }
823
824
    /* 97.. */
825
826 85
    protected function xVARNAME($v)
827
    {
828 85
        $r = '';
829
        do {
830 85
            $proceed = 0;
831 85
            if ($sub_r = $this->x('([0-9]+)', $v)) {
832 2
                $r .= $sub_r[1];
833 2
                $v = $sub_r[2];
834 2
                $proceed = 1;
835 85
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
836 85
                $r .= $sub_r;
837 85
                $v = $sub_v;
838 85
                $proceed = 1;
839 85
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
840
                $r .= $sub_r[1];
841
                $v = $sub_r[2];
842
                $proceed = 1;
843
            }
844 85
        } while ($proceed);
845
846 85
        return [$r, $v];
847
    }
848
849
    /* 98.. */
850
851 4
    protected function xPN_CHARS($v)
852
    {
853 4
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
854
            return [$r, $sub_v];
855 4
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
856
            return [$r[1], $r[2]];
857
        }
858
859 4
        return [false, $v];
860
    }
861
862
    /* 99 */
863
864 88
    protected function xPN_PREFIX($v)
865
    {
866 88
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
867 77
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
868
        }
869 87
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
870
            do {
871
                $proceed = 0;
872
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
873
                if (false !== $sub_r) {
874
                    $r .= $sub_r;
875
                    $proceed = 1;
876
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
877
                    $r .= '.';
878
                    $sub_v = $sub_r[1];
879
                    $proceed = 1;
880
                }
881
            } while ($proceed);
882
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
883
            $r .= $sub_r ?: '';
884
        }
885
886 87
        return [$r, $sub_v];
887
    }
888
889
    /* 100 */
890
891 23
    protected function xPN_LOCAL($v)
892
    {
893 23
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
894 23
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
895
        }
896 9
        $r = '';
897 9
        $sub_v = $v;
898
        do {
899 9
            $proceed = 0;
900 9
            if ($this->x('\s', $sub_v)) {
901
                return [$r, $sub_v];
902
            }
903 9
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
904 4
                $r .= $sub_r[1];
905 4
                $sub_v = $sub_r[2];
906 4
                $proceed = 1;
907 9
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
908 4
                $r .= $sub_r;
909 4
                $proceed = 1;
910 9
            } elseif ($r) {
911 4
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
912
                    $r .= $sub_r[1];
913
                    $sub_v = $sub_r[2];
914
                }
915 4
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
916
                    $r .= $sub_r;
917
                    $proceed = 1;
918
                }
919
            }
920 9
        } while ($proceed);
921
922 9
        return [$r, $sub_v];
923
    }
924
925 62
    protected function unescapeNtripleUTF($v)
926
    {
927 62
        if (false === strpos($v, '\\')) {
928 62
            return $v;
929
        }
930 1
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
931 1
        foreach ($mappings as $in => $out) {
932 1
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
933
        }
934 1
        if (false === strpos(strtolower($v), '\u')) {
935 1
            return $v;
936
        }
937
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
938
            $no = hexdec($m[2]);
939
            if ($no < 128) {
940
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

940
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
941
            } elseif ($no < 2048) {
942
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
943
            } elseif ($no < 65536) {
944
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
945
            } elseif ($no < 2097152) {
946
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
947
            } else {
948
                $char = '';
949
            }
950
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
951
        }
952
953
        return $v;
954
    }
955
956 87
    protected function xPlaceholder($v)
957
    {
958
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
959 87
        if ($r = $this->x('(\?|\$)', $v)) {
960
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
961
                $ph = substr($m[1], 1, -1);
962
                $rest = substr(trim($r[2]), \strlen($m[1]));
963
                if (!isset($this->r['placeholders'])) {
964
                    $this->r['placeholders'] = [];
965
                }
966
                if (!\in_array($ph, $this->r['placeholders'])) {
967
                    $this->r['placeholders'][] = $ph;
968
                }
969
970
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
971
            }
972
        }
973
974 87
        return [0, $v];
975
    }
976
}
977