Passed
Branch extract-store (f24e42)
by Konrad
04:37
created

TurtleParser::parse()   F

Complexity

Conditions 19
Paths 324

Size

Total Lines 70
Code Lines 51

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 43
CRAP Score 19.6619

Importance

Changes 0
Metric Value
cc 19
eloc 51
nc 324
nop 2
dl 0
loc 70
ccs 43
cts 49
cp 0.8776
crap 19.6619
rs 2.1333
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-3 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use function sweetrdf\InMemoryStoreSqlite\calcURI;
17
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
18
use sweetrdf\InMemoryStoreSqlite\StringReader;
19
20
class TurtleParser extends BaseParser
21
{
22 105
    public function __construct()
23
    {
24 105
        parent::__construct();
25
26 105
        $this->state = 0;
0 ignored issues
show
Bug Best Practice introduced by
The property state does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
27 105
        $this->unparsed_code = '';
0 ignored issues
show
Bug Best Practice introduced by
The property unparsed_code does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
28 105
        $this->max_parsing_loops = 500;
0 ignored issues
show
Bug Best Practice introduced by
The property max_parsing_loops does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
29 105
    }
30
31 105
    public function x($re, $v, $options = 'si')
32
    {
33 105
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
34
35
        /* comment removal */
36 105
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
37 12
            $v = $m[2];
38
        }
39
40 105
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
41
    }
42
43 25
    private function createBnodeID(): string
44
    {
45 25
        ++$this->bnode_id;
46
47 25
        return '_:'.$this->bnode_prefix.$this->bnode_id;
48
    }
49
50 23
    protected function addT(array $t): void
51
    {
52 23
        $this->triples[$this->t_count] = $t;
53 23
        ++$this->t_count;
54 23
    }
55
56
    protected function countTriples()
57
    {
58
        return $this->t_count;
59
    }
60
61
    protected function getUnparsedCode()
62
    {
63
        return $this->unparsed_code;
64
    }
65
66 24
    public function parse($path, $data = ''): void
67
    {
68 24
        $this->reader = new StringReader();
0 ignored issues
show
Bug Best Practice introduced by
The property reader does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
69 24
        $this->reader->init($path, $data);
70 24
        $this->base = $this->reader->getBase();
71 24
        $this->r = ['vars' => []];
72
        /* parse */
73 24
        $buffer = '';
74 24
        $more_triples = [];
75 24
        $sub_v = '';
76 24
        $sub_v2 = '';
77 24
        $loops = 0;
78 24
        $prologue_done = 0;
79 24
        while ($d = $this->reader->readStream(8192)) {
80 24
            $buffer .= $d;
81 24
            $sub_v = $buffer;
82
            do {
83 24
                $proceed = 0;
84 24
                if (!$prologue_done) {
85 24
                    $proceed = 1;
86 24
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
87 23
                        $loops = 0;
88 23
                        $sub_v .= $this->reader->readStream(128);
89
                        /* in case we missed the final DOT in the previous prologue loop */
90 23
                        if ($sub_r = $this->x('\.', $sub_v)) {
91
                            $sub_v = $sub_r[1];
92
                        }
93
                        /* more prologue to come, use outer loop */
94 23
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
95 23
                            $proceed = 0;
96
                        }
97
                    } else {
98 24
                        $prologue_done = 1;
99
                    }
100
                }
101
                if (
102 24
                    $prologue_done
103 24
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
104 24
                    && \is_array($sub_r)
105
                ) {
106 24
                    $proceed = 1;
107 24
                    $loops = 0;
108 24
                    foreach ($sub_r as $t) {
109 24
                        $this->addT($t);
110
                    }
111
                }
112 24
            } while ($proceed);
113 24
            ++$loops;
114 24
            $buffer = $sub_v;
115 24
            if ($loops > $this->max_parsing_loops) {
116
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
117
                throw new Exception($msg);
0 ignored issues
show
Bug introduced by
The type sweetrdf\InMemoryStoreSqlite\Parser\Exception was not found. Did you mean Exception? If so, make sure to prefix the type with \.
Loading history...
118
            }
119
        }
120 24
        foreach ($more_triples as $t) {
121
            $this->addT($t);
122
        }
123 24
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
124 24
        $buffer = $sub_v;
125 24
        $this->unparsed_code = $buffer;
126
127
        /* remove trailing comments */
128 24
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
129
            $this->unparsed_code = $m[2];
130
        }
131
132 24
        if ($this->unparsed_code && !$this->getErrors()) {
133 24
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
134 24
            if (trim($rest)) {
135
                $this->addError('Could not parse "'.$rest.'"');
136
            }
137
        }
138 24
    }
139
140 24
    protected function xPrologue($v)
141
    {
142 24
        $r = 0;
143 24
        if (!$this->t_count) {
144 24
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
145
                $this->base = $sub_r;
146
                $r = 1;
147
            }
148 24
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
149 23
                $this->prefixes[$sub_r['prefix']] = $sub_r['uri'];
150 23
                $r = 1;
151
            }
152
        }
153
154 24
        return [$r, $v];
155
    }
156
157
    /* 3 */
158
159 105
    protected function xBaseDecl($v)
160
    {
161 105
        if ($r = $this->x("\@?base\s+", $v)) {
162
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
163
                if ($sub_r = $this->x('\.', $sub_v)) {
164
                    $sub_v = $sub_r[1];
165
                }
166
167
                return [$r, $sub_v];
168
            }
169
        }
170
171 105
        return [0, $v];
172
    }
173
174
    /* 4 */
175
176 105
    protected function xPrefixDecl($v)
177
    {
178 105
        if ($r = $this->x("\@?prefix\s+", $v)) {
179 26
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
180 26
                $prefix = $r;
181 26
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
182 26
                    $uri = calcURI($r, $this->base);
183 26
                    if ($sub_r = $this->x('\.', $sub_v)) {
184 23
                        $sub_v = $sub_r[1];
185
                    }
186
187 26
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
188
                }
189
            }
190
        }
191
192 105
        return [0, $v];
193
    }
194
195
    /* 21.., 32.. */
196
197 105
    protected function xTriplesBlock($v)
198
    {
199 105
        $pre_r = [];
200 105
        $r = [];
201 105
        $state = 1;
202 105
        $sub_v = $v;
203 105
        $buffer = $sub_v;
204
        do {
205 105
            $proceed = 0;
206 105
            if (1 == $state) {/* expecting subject */
207 105
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
208 105
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
209 105
                    $t['s'] = $sub_r['value'];
210 105
                    $t['s_type'] = $sub_r['type'];
211 105
                    $state = 2;
212 105
                    $proceed = 1;
213 105
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
214
                        if ('placeholder' == $t['s_type']) {
215
                            $state = 4;
216
                        } else {
217 105
                            $this->addError('"'.$sub_r[1].'" after subject found.');
218
                        }
219
                    }
220 103
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
221
                    $t['s'] = $sub_r['id'];
222
                    $t['s_type'] = $sub_r['type'];
223
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
224
                    $state = 2;
225
                    $proceed = 1;
226
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
227
                        $this->addError('DOT after subject found.');
228
                    }
229 103
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
230
                    $t['s'] = $sub_r['id'];
231
                    $t['s_type'] = $sub_r['type'];
232
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
233
                    $state = 2;
234
                    $proceed = 1;
235 103
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
236
                    $this->addError('Subject expected, DOT found.'.$sub_v);
237
                }
238
            }
239 105
            if (2 == $state) {/* expecting predicate */
240 105
                if ($sub_r = $this->x('a\s+', $sub_v)) {
241 2
                    $sub_v = $sub_r[1];
242 2
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
243 2
                    $t['p_type'] = 'uri';
244 2
                    $state = 3;
245 2
                    $proceed = 1;
246 105
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
247 105
                    if ('bnode' == $sub_r['type']) {
248
                        $this->addError('Blank node used as triple predicate');
249
                    }
250 105
                    $t['p'] = $sub_r['value'];
251 105
                    $t['p_type'] = $sub_r['type'];
252 105
                    $state = 3;
253 105
                    $proceed = 1;
254 13
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
255 12
                    $state = 4;
256 1
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
257
                    $buffer = $sub_v;
258
                    $r = array_merge($r, $pre_r);
259
                    $pre_r = [];
260
                    $proceed = 0;
261
                }
262
            }
263 105
            if (3 == $state) {/* expecting object */
264 105
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
265 105
                    $t['o'] = $sub_r['value'];
266 105
                    $t['o_type'] = $sub_r['type'];
267 105
                    $t['o_lang'] = $this->v('lang', '', $sub_r);
268 105
                    $t['o_datatype'] = $this->v('datatype', '', $sub_r);
269 105
                    $pre_r[] = $t;
270 105
                    $state = 4;
271 105
                    $proceed = 1;
272 26
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
273 23
                    $t['o'] = $sub_r['id'];
274 23
                    $t['o_type'] = $sub_r['type'];
275 23
                    $t['o_datatype'] = '';
276 23
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
277 23
                    $state = 4;
278 23
                    $proceed = 1;
279 26
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
280 13
                    $t['o'] = $sub_r['id'];
281 13
                    $t['o_type'] = $sub_r['type'];
282 13
                    $t['o_datatype'] = '';
283 13
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
284 13
                    $state = 4;
285 13
                    $proceed = 1;
286
                }
287
            }
288 105
            if (4 == $state) {/* expecting . or ; or , or } */
289 105
                if ($sub_r = $this->x('\.', $sub_v)) {
290 105
                    $sub_v = $sub_r[1];
291 105
                    $buffer = $sub_v;
292 105
                    $r = array_merge($r, $pre_r);
293 105
                    $pre_r = [];
294 105
                    $state = 1;
295 105
                    $proceed = 1;
296 33
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
297 26
                    $sub_v = $sub_r[1];
298 26
                    $state = 2;
299 26
                    $proceed = 1;
300 18
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
301 15
                    $sub_v = $sub_r[1];
302 15
                    $state = 3;
303 15
                    $proceed = 1;
304 15
                    if ($sub_r = $this->x('\}', $sub_v)) {
305
                        $this->addError('Object expected, } found.');
306
                    }
307
                }
308 105
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
309 105
                    $buffer = $sub_v;
310 105
                    $r = array_merge($r, $pre_r);
311 105
                    $pre_r = [];
312 105
                    $proceed = 0;
313
                }
314
            }
315 105
        } while ($proceed);
316
317 105
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
318
    }
319
320
    /* 39.. */
321
322 103
    protected function xBlankNodePropertyList($v)
323
    {
324 103
        if ($sub_r = $this->x('\[', $v)) {
325 13
            $sub_v = $sub_r[1];
326 13
            $s = $this->createBnodeID();
327 13
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
328 13
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
329 13
            $state = 2;
330 13
            $closed = 0;
331
            do {
332 13
                $proceed = 0;
333 13
                if (2 == $state) {/* expecting predicate */
334 13
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
335
                        $sub_v = $sub_r[1];
336
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
337
                        $t['p_type'] = 'uri';
338
                        $state = 3;
339
                        $proceed = 1;
340 13
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
341 13
                        $t['p'] = $sub_r['value'];
342 13
                        $t['p_type'] = $sub_r['type'];
343 13
                        $state = 3;
344 13
                        $proceed = 1;
345
                    }
346
                }
347 13
                if (3 == $state) {/* expecting object */
348 13
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
349 13
                        $t['o'] = $sub_r['value'];
350 13
                        $t['o_type'] = $sub_r['type'];
351 13
                        $t['o_lang'] = $this->v('lang', '', $sub_r);
352 13
                        $t['o_datatype'] = $this->v('datatype', '', $sub_r);
353 13
                        $r['triples'][] = $t;
354 13
                        $state = 4;
355 13
                        $proceed = 1;
356
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
357
                        $t['o'] = $sub_r['id'];
358
                        $t['o_type'] = $sub_r['type'];
359
                        $t['o_datatype'] = '';
360
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
361
                        $state = 4;
362
                        $proceed = 1;
363
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
364
                        $t['o'] = $sub_r['id'];
365
                        $t['o_type'] = $sub_r['type'];
366
                        $t['o_datatype'] = '';
367
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
368
                        $state = 4;
369
                        $proceed = 1;
370
                    }
371
                }
372 13
                if (4 == $state) {/* expecting . or ; or , or ] */
373 13
                    if ($sub_r = $this->x('\.', $sub_v)) {
374
                        $sub_v = $sub_r[1];
375
                        $state = 1;
376
                        $proceed = 1;
377
                    }
378 13
                    if ($sub_r = $this->x('\;', $sub_v)) {
379 12
                        $sub_v = $sub_r[1];
380 12
                        $state = 2;
381 12
                        $proceed = 1;
382
                    }
383 13
                    if ($sub_r = $this->x('\,', $sub_v)) {
384
                        $sub_v = $sub_r[1];
385
                        $state = 3;
386
                        $proceed = 1;
387
                    }
388 13
                    if ($sub_r = $this->x('\]', $sub_v)) {
389 13
                        $sub_v = $sub_r[1];
390 13
                        $proceed = 0;
391 13
                        $closed = 1;
392
                    }
393
                }
394 13
            } while ($proceed);
395 13
            if ($closed) {
396 13
                return [$r, $sub_v];
397
            }
398
399 11
            return [0, $v];
400
        }
401
402 103
        return [0, $v];
403
    }
404
405
    /* 40.. */
406
407 103
    protected function xCollection($v)
408
    {
409 103
        if ($sub_r = $this->x('\(', $v)) {
410 23
            $sub_v = $sub_r[1];
411 23
            $s = $this->createBnodeID();
412 23
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
413 23
            $closed = 0;
414
            do {
415 23
                $proceed = 0;
416 23
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
417 23
                    $r['triples'][] = [
418 23
                        'type' => 'triple',
419 23
                        's' => $s,
420 23
                        's_type' => 'bnode',
421 23
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
422 23
                        'p_type' => 'uri',
423 23
                        'o' => $sub_r['value'],
424 23
                        'o_type' => $sub_r['type'],
425 23
                        'o_lang' => $this->v('lang', '', $sub_r),
426 23
                        'o_datatype' => $this->v('datatype', '', $sub_r),
427
                    ];
428 23
                    $proceed = 1;
429
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
430
                    $r['triples'][] = [
431
                        'type' => 'triple',
432
                        's' => $s,
433
                        's_type' => 'bnode',
434
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
435
                        'p_type' => 'uri',
436
                        'o' => $sub_r['id'],
437
                        'o_type' => $sub_r['type'],
438
                        'o_lang' => '',
439
                        'o_datatype' => '',
440
                    ];
441
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
442
                    $proceed = 1;
443
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
444
                    $r['triples'][] = [
445
                        'type' => 'triple',
446
                        's' => $s,
447
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
448
                        'o' => $sub_r['id'],
449
                        's_type' => 'bnode',
450
                        'p_type' => 'uri',
451
                        'o_type' => $sub_r['type'],
452
                        'o_lang' => '',
453
                        'o_datatype' => '',
454
                    ];
455
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
456
                    $proceed = 1;
457
                }
458 23
                if ($proceed) {
459 23
                    if ($sub_r = $this->x('\)', $sub_v)) {
460 23
                        $sub_v = $sub_r[1];
461 23
                        $r['triples'][] = [
462 23
                            'type' => 'triple',
463 23
                            's' => $s,
464 23
                            's_type' => 'bnode',
465 23
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
466 23
                            'p_type' => 'uri',
467 23
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
468 23
                            'o_type' => 'uri',
469 23
                            'o_lang' => '',
470 23
                            'o_datatype' => '',
471
                        ];
472 23
                        $closed = 1;
473 23
                        $proceed = 0;
474
                    } else {
475 23
                        $next_s = $this->createBnodeID();
476 23
                        $r['triples'][] = [
477 23
                            'type' => 'triple',
478 23
                            's' => $s,
479 23
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
480 23
                            'o' => $next_s,
481 23
                            's_type' => 'bnode',
482 23
                            'p_type' => 'uri',
483 23
                            'o_type' => 'bnode',
484 23
                            'o_lang' => '',
485 23
                            'o_datatype' => '',
486
                        ];
487 23
                        $s = $next_s;
488
                    }
489
                }
490 23
            } while ($proceed);
491 23
            if ($closed) {
492 23
                return [$r, $sub_v];
493
            }
494
        }
495
496 103
        return [0, $v];
497
    }
498
499
    /* 42 */
500
501 105
    protected function xVarOrTerm($v)
502
    {
503 105
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
504 102
            return [$sub_r, $sub_v];
505 105
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
506 104
            return [$sub_r, $sub_v];
507
        }
508
509 103
        return [0, $v];
510
    }
511
512
    /* 44, 74.., 75.. */
513
514 105
    protected function xVar($v)
515
    {
516 105
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
517 102
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
518 102
                if (!\in_array($sub_r, $this->r['vars'])) {
519 102
                    $this->r['vars'][] = $sub_r;
520
                }
521
522 102
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
523
            }
524
        }
525
526 105
        return [0, $v];
527
    }
528
529
    /* 45 */
530
531 105
    protected function xGraphTerm($v)
532
    {
533
        foreach ([
534 105
            'IRIref' => 'uri',
535
            'RDFLiteral' => 'literal',
536
            'NumericLiteral' => 'literal',
537
            'BooleanLiteral' => 'literal',
538
            'BlankNode' => 'bnode',
539
            'NIL' => 'uri',
540
            'Placeholder' => 'placeholder',
541 105
        ] as $term => $type) {
542 105
            $m = 'x'.$term;
543 105
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
544 104
                if (!\is_array($sub_r)) {
545 104
                    $sub_r = ['value' => $sub_r];
546
                }
547 104
                $sub_r['type'] = $this->v1('type', $type, $sub_r);
548
549 104
                return [$sub_r, $sub_v];
550
            }
551
        }
552
553 103
        return [0, $v];
554
    }
555
556
    /* 60 */
557
558 104
    protected function xRDFLiteral($v)
559
    {
560 104
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
561 78
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
562 78
            $r = $sub_r;
563 78
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
564 8
                $r['lang'] = $sub_r;
565 77
            } elseif (!$this->x('\s', $sub_v) && ($sub_r = $this->x('\^\^', $sub_v)) && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1])) && $sub_r[1]) {
566 3
                $r['datatype'] = $sub_r;
567
            }
568
569 78
            return [$r, $sub_v];
570
        }
571
572 103
        return [0, $v];
573
    }
574
575
    /* 61.., 62.., 63.., 64.. */
576
577 103
    protected function xNumericLiteral($v)
578
    {
579 103
        $sub_r = $this->x('(\-|\+)?', $v);
580 103
        $prefix = $sub_r[1];
581 103
        $sub_v = $sub_r[2];
582 103
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
583 103
            $m = 'x'.$type;
584 103
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
585 12
                $r = [
586 12
                    'value' => $prefix.$sub_r,
587 12
                    'type' => 'literal',
588 12
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
589
                ];
590
591 12
                return [$r, $sub_v];
592
            }
593
        }
594
595 103
        return [0, $v];
596
    }
597
598
    /* 65.. */
599
600 103
    protected function xBooleanLiteral($v)
601
    {
602 103
        if ($r = $this->x('(true|false)', $v)) {
603
            return [$r[1], $r[2]];
604
        }
605
606 103
        return [0, $v];
607
    }
608
609
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
610
611 104
    protected function xString($v)
612
    {/* largely simplified, may need some tweaks in following revisions */
613 104
        $sub_v = $v;
614 104
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
615 103
            return [0, $v];
616
        }
617 78
        $delim = $m[1];
618 78
        $rest = $m[2];
619 78
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
620 78
        $sub_type = $sub_types[$delim];
621 78
        $pos = 0;
622 78
        $r = false;
623
        do {
624 78
            $proceed = 0;
625 78
            $delim_pos = strpos($rest, $delim, $pos);
626 78
            if (false === $delim_pos) {
627 24
                break;
628
            }
629 78
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
630 78
            $r = substr($rest, 0, $delim_pos);
631 78
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
632 78
                $rest = $new_rest;
633
            } else {
634 1
                $r = false;
635 1
                $pos = $delim_pos + 1;
636 1
                $proceed = 1;
637
            }
638 78
        } while ($proceed);
639 78
        if (false !== $r) {
640 78
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
641
        }
642
643 24
        return [0, $v];
644
    }
645
646
    /* 67 */
647
648 105
    protected function xIRIref($v)
649
    {
650 105
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
651 104
            return [calcURI($r, $this->base), $v];
652 104
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
653 34
            return [$r, $v];
654
        }
655
656 104
        return [0, $v];
657
    }
658
659
    /* 68 */
660
661 104
    protected function xPrefixedName($v)
662
    {
663 104
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
664 34
            return [$r, $v];
665 104
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
666 30
            return isset($this->prefixes[$r]) ? [$this->prefixes[$r], $sub_v] : [0, $v];
667
        }
668
669 104
        return [0, $v];
670
    }
671
672
    /* 69.., 73.., 93, 94..  */
673
674 103
    protected function xBlankNode($v)
675
    {
676 103
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
677 26
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
678
        }
679 103
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
680
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
681
        }
682
683 103
        return [0, $v];
684
    }
685
686
    /* 70.. @@sync with SPARQLParser */
687
688 24
    protected function xIRI_REF($v)
689
    {
690
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
691 24
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
692
            return [$r[1], $r[2]];
693 24
        } elseif ($r = $this->x('\<\>', $v)) {
694 23
            return [true, $r[1]];
695 24
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
696 24
            return [$r[1] ? $r[1] : true, $r[2]];
697
        }
698
699 24
        return [0, $v];
700
    }
701
702
    /* 71 */
703
704 104
    protected function xPNAME_NS($v)
705
    {
706 104
        list($r, $sub_v) = $this->xPN_PREFIX($v);
707 104
        $prefix = $r ?: '';
708
709 104
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
710
    }
711
712
    /* 72 */
713
714 104
    protected function xPNAME_LN($v)
715
    {
716 104
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
717 38
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
718 38
                if (!isset($this->prefixes[$r])) {
719 30
                    return [0, $v];
720
                }
721
722 34
                return [$this->prefixes[$r].$sub_r, $sub_v];
723
            }
724
        }
725
726 104
        return [0, $v];
727
    }
728
729
    /* 76 */
730
731 78
    protected function xLANGTAG($v)
732
    {
733 78
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
734 8
            return [$r[1], $r[3]];
735
        }
736
737 77
        return [0, $v];
738
    }
739
740
    /* 77.. */
741
742 103
    protected function xINTEGER($v)
743
    {
744 103
        if ($r = $this->x('([0-9]+)', $v)) {
745 17
            return [$r[1], $r[2]];
746
        }
747
748 103
        return [false, $v];
749
    }
750
751
    /* 78.. */
752
753 103
    protected function xDECIMAL($v)
754
    {
755 103
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
756 2
            return [$r[1], $r[2]];
757
        }
758 103
        if ($r = $this->x('(\.[0-9]+)', $v)) {
759
            return [$r[1], $r[2]];
760
        }
761
762 103
        return [false, $v];
763
    }
764
765
    /* 79.., 86.. */
766
767 103
    protected function xDOUBLE($v)
768
    {
769 103
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
770 1
            return [$r[1], $r[2]];
771
        }
772 103
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
773
            return [$r[1], $r[2]];
774
        }
775 103
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
776 1
            return [$r[1], $r[2]];
777
        }
778
779 103
        return [false, $v];
780
    }
781
782
    /* 92 */
783
784 103
    protected function xNIL($v)
785
    {
786 103
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
787
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
788
        }
789
790 103
        return [0, $v];
791
    }
792
793
    /* 95.. */
794
795 103
    protected function xPN_CHARS_BASE($v)
796
    {
797 103
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
798 102
            return [$r[1], $r[2]];
799
        }
800
801 103
        return [0, $v];
802
    }
803
804
    /* 96 */
805
806 102
    protected function xPN_CHARS_U($v)
807
    {
808 102
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
809 102
            return [$r, $sub_v];
810 102
        } elseif ($r = $this->x('(_)', $v)) {
811 1
            return [$r[1], $r[2]];
812
        }
813
814 102
        return [0, $v];
815
    }
816
817
    /* 97.. */
818
819 102
    protected function xVARNAME($v)
820
    {
821 102
        $r = '';
822
        do {
823 102
            $proceed = 0;
824 102
            if ($sub_r = $this->x('([0-9]+)', $v)) {
825 3
                $r .= $sub_r[1];
826 3
                $v = $sub_r[2];
827 3
                $proceed = 1;
828 102
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
829 102
                $r .= $sub_r;
830 102
                $v = $sub_v;
831 102
                $proceed = 1;
832 102
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
833
                $r .= $sub_r[1];
834
                $v = $sub_r[2];
835
                $proceed = 1;
836
            }
837 102
        } while ($proceed);
838
839 102
        return [$r, $v];
840
    }
841
842
    /* 98.. */
843
844 4
    protected function xPN_CHARS($v)
845
    {
846 4
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
847
            return [$r, $sub_v];
848 4
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
849
            return [$r[1], $r[2]];
850
        }
851
852 4
        return [false, $v];
853
    }
854
855
    /* 99 */
856
857 104
    protected function xPN_PREFIX($v)
858
    {
859 104
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
860 95
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
861
        }
862 103
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
863
            do {
864
                $proceed = 0;
865
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
866
                if (false !== $sub_r) {
867
                    $r .= $sub_r;
868
                    $proceed = 1;
869
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
870
                    $r .= '.';
871
                    $sub_v = $sub_r[1];
872
                    $proceed = 1;
873
                }
874
            } while ($proceed);
875
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
876
            $r .= $sub_r ?: '';
877
        }
878
879 103
        return [$r, $sub_v];
880
    }
881
882
    /* 100 */
883
884 38
    protected function xPN_LOCAL($v)
885
    {
886 38
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
887 38
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
888
        }
889 24
        $r = '';
890 24
        $sub_v = $v;
891
        do {
892 24
            $proceed = 0;
893 24
            if ($this->x('\s', $sub_v)) {
894
                return [$r, $sub_v];
895
            }
896 24
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
897 4
                $r .= $sub_r[1];
898 4
                $sub_v = $sub_r[2];
899 4
                $proceed = 1;
900 24
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
901 4
                $r .= $sub_r;
902 4
                $proceed = 1;
903 24
            } elseif ($r) {
904 4
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
905
                    $r .= $sub_r[1];
906
                    $sub_v = $sub_r[2];
907
                }
908 4
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
909
                    $r .= $sub_r;
910
                    $proceed = 1;
911
                }
912
            }
913 24
        } while ($proceed);
914
915 24
        return [$r, $sub_v];
916
    }
917
918 78
    protected function unescapeNtripleUTF($v)
919
    {
920 78
        if (false === strpos($v, '\\')) {
921 78
            return $v;
922
        }
923 1
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
924 1
        foreach ($mappings as $in => $out) {
925 1
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
926
        }
927 1
        if (false === strpos(strtolower($v), '\u')) {
928 1
            return $v;
929
        }
930
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
931
            $no = hexdec($m[2]);
932
            if ($no < 128) {
933
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

933
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
934
            } elseif ($no < 2048) {
935
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
936
            } elseif ($no < 65536) {
937
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
938
            } elseif ($no < 2097152) {
939
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
940
            } else {
941
                $char = '';
942
            }
943
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
944
        }
945
946
        return $v;
947
    }
948
949 103
    protected function xPlaceholder($v)
950
    {
951
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
952 103
        if ($r = $this->x('(\?|\$)', $v)) {
953
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
954
                $ph = substr($m[1], 1, -1);
955
                $rest = substr(trim($r[2]), \strlen($m[1]));
956
                if (!isset($this->r['placeholders'])) {
957
                    $this->r['placeholders'] = [];
958
                }
959
                if (!\in_array($ph, $this->r['placeholders'])) {
960
                    $this->r['placeholders'][] = $ph;
961
                }
962
963
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
964
            }
965
        }
966
967 103
        return [0, $v];
968
    }
969
}
970