TurtleParser::xPN_LOCAL()   C
last analyzed

Complexity

Conditions 13
Paths 9

Size

Total Lines 32
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 14.5274

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 13
eloc 24
c 1
b 0
f 0
nc 9
nop 1
dl 0
loc 32
ccs 19
cts 24
cp 0.7917
crap 14.5274
rs 6.6166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-2 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use Exception;
17
use sweetrdf\InMemoryStoreSqlite\Log\Logger;
18
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
19
use sweetrdf\InMemoryStoreSqlite\StringReader;
20
21
use function sweetrdf\InMemoryStoreSqlite\calcURI;
22
23
class TurtleParser extends BaseParser
24
{
25
    protected int $state;
26
    protected int $max_parsing_loops;
27
    protected string $unparsed_code;
28
29 100
    public function __construct(Logger $logger, NamespaceHelper $namespaceHelper, StringReader $stringReader)
30
    {
31 100
        parent::__construct($logger, $namespaceHelper, $stringReader);
32
33 100
        $this->state = 0;
34 100
        $this->unparsed_code = '';
35 100
        $this->max_parsing_loops = 500;
36
    }
37
38 100
    protected function x($re, $v, $options = 'si')
39
    {
40 100
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
41
42
        /* comment removal */
43 100
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
44
            $v = $m[2];
45
        }
46
47 100
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
48
    }
49
50 2
    private function createBnodeID(): string
51
    {
52 2
        ++$this->bnode_id;
53
54 2
        return '_:'.$this->bnode_prefix.$this->bnode_id;
55
    }
56
57 2
    protected function addT(array $t): void
58
    {
59 2
        $this->triples[$this->t_count] = $t;
60 2
        ++$this->t_count;
61
    }
62
63
    protected function countTriples()
64
    {
65
        return $this->t_count;
66
    }
67
68
    protected function getUnparsedCode()
69
    {
70
        return $this->unparsed_code;
71
    }
72
73 2
    public function parse(string $path, string $data = ''): void
74
    {
75 2
        $this->triples = [];
76 2
        $this->t_count = 0;
77 2
        $this->reader->init($path, $data);
78 2
        $this->base = $this->reader->getBase();
79 2
        $this->r = ['vars' => []];
80
        /* parse */
81 2
        $buffer = '';
82 2
        $more_triples = [];
83 2
        $sub_v = '';
84 2
        $sub_v2 = '';
85 2
        $loops = 0;
86 2
        $prologue_done = 0;
87 2
        while ($d = $this->reader->readStream(8192)) {
88 2
            $buffer .= $d;
89 2
            $sub_v = $buffer;
90
            do {
91 2
                $proceed = 0;
92 2
                if (!$prologue_done) {
93 2
                    $proceed = 1;
94 2
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
95 2
                        $loops = 0;
96 2
                        $sub_v .= $this->reader->readStream(128);
97
                        /* in case we missed the final DOT in the previous prologue loop */
98 2
                        if ($sub_r = $this->x('\.', $sub_v)) {
99
                            $sub_v = $sub_r[1];
100
                        }
101
                        /* more prologue to come, use outer loop */
102 2
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
103 2
                            $proceed = 0;
104
                        }
105
                    } else {
106 2
                        $prologue_done = 1;
107
                    }
108
                }
109
                if (
110 2
                    $prologue_done
111 2
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
112 2
                    && \is_array($sub_r)
113
                ) {
114 2
                    $proceed = 1;
115 2
                    $loops = 0;
116 2
                    foreach ($sub_r as $t) {
117 2
                        $this->addT($t);
118
                    }
119
                }
120 2
            } while ($proceed);
121 2
            ++$loops;
122 2
            $buffer = $sub_v;
123 2
            if ($loops > $this->max_parsing_loops) {
124
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
125
                throw new Exception($msg);
126
            }
127
        }
128 2
        foreach ($more_triples as $t) {
129
            $this->addT($t);
130
        }
131 2
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
132 2
        $buffer = $sub_v;
133 2
        $this->unparsed_code = $buffer;
134
135
        /* remove trailing comments */
136 2
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
137
            $this->unparsed_code = $m[2];
138
        }
139
140 2
        if ($this->unparsed_code && !$this->logger->hasEntries('error')) {
141
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
142
            if (trim($rest)) {
143
                $this->logger->error('Could not parse "'.$rest.'"');
144
            }
145
        }
146
    }
147
148 2
    protected function xPrologue($v)
149
    {
150 2
        $r = 0;
151 2
        if (!$this->t_count) {
152 2
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
153
                $this->base = $sub_r;
154
                $r = 1;
155
            }
156 2
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
157 2
                $this->namespaceHelper->setPrefix($sub_r['prefix'], $sub_r['uri']);
158 2
                $r = 1;
159
            }
160
        }
161
162 2
        return [$r, $v];
163
    }
164
165
    /* 3 */
166
167 100
    protected function xBaseDecl($v)
168
    {
169 100
        if ($r = $this->x("\@?base\s+", $v)) {
170
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
171
                if ($sub_r = $this->x('\.', $sub_v)) {
172
                    $sub_v = $sub_r[1];
173
                }
174
175
                return [$r, $sub_v];
176
            }
177
        }
178
179 100
        return [0, $v];
180
    }
181
182
    /* 4 */
183
184 100
    protected function xPrefixDecl($v)
185
    {
186 100
        if ($r = $this->x("\@?prefix\s+", $v)) {
187 22
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
188 22
                $prefix = $r;
189 22
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
190 22
                    $uri = calcURI($r, $this->base);
191 22
                    if ($sub_r = $this->x('\.', $sub_v)) {
192 16
                        $sub_v = $sub_r[1];
193
                    }
194
195 22
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
196
                }
197
            }
198
        }
199
200 100
        return [0, $v];
201
    }
202
203
    /* 21.., 32.. */
204
205 100
    protected function xTriplesBlock($v)
206
    {
207 100
        $pre_r = [];
208 100
        $r = [];
209 100
        $state = 1;
210 100
        $sub_v = $v;
211 100
        $buffer = $sub_v;
212
        do {
213 100
            $proceed = 0;
214 100
            if (1 == $state) {/* expecting subject */
215 100
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
216 100
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
217 100
                    $t['s'] = $sub_r['value'];
218 100
                    $t['s_type'] = $sub_r['type'];
219 100
                    $state = 2;
220 100
                    $proceed = 1;
221 100
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
222
                        if ('placeholder' == $t['s_type']) {
223
                            $state = 4;
224
                        } else {
225 100
                            $this->logger->error('"'.$sub_r[1].'" after subject found.');
226
                        }
227
                    }
228 98
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
229
                    $t['s'] = $sub_r['id'];
230
                    $t['s_type'] = $sub_r['type'];
231
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
232
                    $state = 2;
233
                    $proceed = 1;
234
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
235
                        $this->logger->error('DOT after subject found.');
236
                    }
237 98
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
238
                    $t['s'] = $sub_r['id'];
239
                    $t['s_type'] = $sub_r['type'];
240
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
241
                    $state = 2;
242
                    $proceed = 1;
243 98
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
244
                    $this->logger->error('Subject expected, DOT found.'.$sub_v);
245
                }
246
            }
247 100
            if (2 == $state) {/* expecting predicate */
248 100
                if ($sub_r = $this->x('a\s+', $sub_v)) {
249 1
                    $sub_v = $sub_r[1];
250 1
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
251 1
                    $t['p_type'] = 'uri';
252 1
                    $state = 3;
253 1
                    $proceed = 1;
254 100
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
255 100
                    if ('bnode' == $sub_r['type']) {
256
                        $this->logger->error('Blank node used as triple predicate');
257
                    }
258 100
                    $t['p'] = $sub_r['value'];
259 100
                    $t['p_type'] = $sub_r['type'];
260 100
                    $state = 3;
261 100
                    $proceed = 1;
262
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
263
                    $state = 4;
264
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
265
                    $buffer = $sub_v;
266
                    $r = array_merge($r, $pre_r);
267
                    $pre_r = [];
268
                    $proceed = 0;
269
                }
270
            }
271 100
            if (3 == $state) {/* expecting object */
272 100
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
273 100
                    $t['o'] = $sub_r['value'];
274 100
                    $t['o_type'] = $sub_r['type'];
275 100
                    $t['o_lang'] = $sub_r['lang'] ?? '';
276 100
                    $t['o_datatype'] = $sub_r['datatype'] ?? '';
277 100
                    $pre_r[] = $t;
278 100
                    $state = 4;
279 100
                    $proceed = 1;
280 2
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
281
                    $t['o'] = $sub_r['id'];
282
                    $t['o_type'] = $sub_r['type'];
283
                    $t['o_datatype'] = '';
284
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
285
                    $state = 4;
286
                    $proceed = 1;
287 2
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
288 2
                    $t['o'] = $sub_r['id'];
289 2
                    $t['o_type'] = $sub_r['type'];
290 2
                    $t['o_datatype'] = '';
291 2
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
292 2
                    $state = 4;
293 2
                    $proceed = 1;
294
                }
295
            }
296 100
            if (4 == $state) {/* expecting . or ; or , or } */
297 100
                if ($sub_r = $this->x('\.', $sub_v)) {
298 98
                    $sub_v = $sub_r[1];
299 98
                    $buffer = $sub_v;
300 98
                    $r = array_merge($r, $pre_r);
301 98
                    $pre_r = [];
302 98
                    $state = 1;
303 98
                    $proceed = 1;
304 29
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
305 11
                    $sub_v = $sub_r[1];
306 11
                    $state = 2;
307 11
                    $proceed = 1;
308 25
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
309 20
                    $sub_v = $sub_r[1];
310 20
                    $state = 3;
311 20
                    $proceed = 1;
312 20
                    if ($sub_r = $this->x('\}', $sub_v)) {
313
                        $this->logger->error('Object expected, } found.');
314
                    }
315
                }
316 100
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
317 97
                    $buffer = $sub_v;
318 97
                    $r = array_merge($r, $pre_r);
319 97
                    $pre_r = [];
320 97
                    $proceed = 0;
321
                }
322
            }
323 100
        } while ($proceed);
324
325 100
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
326
    }
327
328
    /* 39.. */
329
330 98
    protected function xBlankNodePropertyList($v)
331
    {
332 98
        if ($sub_r = $this->x('\[', $v)) {
333 2
            $sub_v = $sub_r[1];
334 2
            $s = $this->createBnodeID();
335 2
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
336 2
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
337 2
            $state = 2;
338 2
            $closed = 0;
339
            do {
340 2
                $proceed = 0;
341 2
                if (2 == $state) {/* expecting predicate */
342 2
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
343
                        $sub_v = $sub_r[1];
344
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
345
                        $t['p_type'] = 'uri';
346
                        $state = 3;
347
                        $proceed = 1;
348 2
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
349 2
                        $t['p'] = $sub_r['value'];
350 2
                        $t['p_type'] = $sub_r['type'];
351 2
                        $state = 3;
352 2
                        $proceed = 1;
353
                    }
354
                }
355 2
                if (3 == $state) {/* expecting object */
356 2
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
357 2
                        $t['o'] = $sub_r['value'];
358 2
                        $t['o_type'] = $sub_r['type'];
359 2
                        $t['o_lang'] = $sub_r['lang'] ?? '';
360 2
                        $t['o_datatype'] = $sub_r['datatype'] ?? '';
361 2
                        $r['triples'][] = $t;
362 2
                        $state = 4;
363 2
                        $proceed = 1;
364
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
365
                        $t['o'] = $sub_r['id'];
366
                        $t['o_type'] = $sub_r['type'];
367
                        $t['o_datatype'] = '';
368
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
369
                        $state = 4;
370
                        $proceed = 1;
371
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
372
                        $t['o'] = $sub_r['id'];
373
                        $t['o_type'] = $sub_r['type'];
374
                        $t['o_datatype'] = '';
375
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
376
                        $state = 4;
377
                        $proceed = 1;
378
                    }
379
                }
380 2
                if (4 == $state) {/* expecting . or ; or , or ] */
381 2
                    if ($sub_r = $this->x('\.', $sub_v)) {
382
                        $sub_v = $sub_r[1];
383
                        $state = 1;
384
                        $proceed = 1;
385
                    }
386 2
                    if ($sub_r = $this->x('\;', $sub_v)) {
387 1
                        $sub_v = $sub_r[1];
388 1
                        $state = 2;
389 1
                        $proceed = 1;
390
                    }
391 2
                    if ($sub_r = $this->x('\,', $sub_v)) {
392
                        $sub_v = $sub_r[1];
393
                        $state = 3;
394
                        $proceed = 1;
395
                    }
396 2
                    if ($sub_r = $this->x('\]', $sub_v)) {
397 2
                        $sub_v = $sub_r[1];
398 2
                        $proceed = 0;
399 2
                        $closed = 1;
400
                    }
401
                }
402 2
            } while ($proceed);
403 2
            if ($closed) {
404 2
                return [$r, $sub_v];
405
            }
406
407
            return [0, $v];
408
        }
409
410 98
        return [0, $v];
411
    }
412
413
    /* 40.. */
414
415 98
    protected function xCollection($v)
416
    {
417 98
        if ($sub_r = $this->x('\(', $v)) {
418
            $sub_v = $sub_r[1];
419
            $s = $this->createBnodeID();
420
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
421
            $closed = 0;
422
            do {
423
                $proceed = 0;
424
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
425
                    $r['triples'][] = [
426
                        'type' => 'triple',
427
                        's' => $s,
428
                        's_type' => 'bnode',
429
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
430
                        'p_type' => 'uri',
431
                        'o' => $sub_r['value'],
432
                        'o_type' => $sub_r['type'],
433
                        'o_lang' => $sub_r['lang'] ?? '',
434
                        'o_datatype' => $sub_r['datatype'] ?? '',
435
                    ];
436
                    $proceed = 1;
437
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
438
                    $r['triples'][] = [
439
                        'type' => 'triple',
440
                        's' => $s,
441
                        's_type' => 'bnode',
442
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
443
                        'p_type' => 'uri',
444
                        'o' => $sub_r['id'],
445
                        'o_type' => $sub_r['type'],
446
                        'o_lang' => '',
447
                        'o_datatype' => '',
448
                    ];
449
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
450
                    $proceed = 1;
451
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
452
                    $r['triples'][] = [
453
                        'type' => 'triple',
454
                        's' => $s,
455
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
456
                        'o' => $sub_r['id'],
457
                        's_type' => 'bnode',
458
                        'p_type' => 'uri',
459
                        'o_type' => $sub_r['type'],
460
                        'o_lang' => '',
461
                        'o_datatype' => '',
462
                    ];
463
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
464
                    $proceed = 1;
465
                }
466
                if ($proceed) {
467
                    if ($sub_r = $this->x('\)', $sub_v)) {
468
                        $sub_v = $sub_r[1];
469
                        $r['triples'][] = [
470
                            'type' => 'triple',
471
                            's' => $s,
472
                            's_type' => 'bnode',
473
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
474
                            'p_type' => 'uri',
475
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
476
                            'o_type' => 'uri',
477
                            'o_lang' => '',
478
                            'o_datatype' => '',
479
                        ];
480
                        $closed = 1;
481
                        $proceed = 0;
482
                    } else {
483
                        $next_s = $this->createBnodeID();
484
                        $r['triples'][] = [
485
                            'type' => 'triple',
486
                            's' => $s,
487
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
488
                            'o' => $next_s,
489
                            's_type' => 'bnode',
490
                            'p_type' => 'uri',
491
                            'o_type' => 'bnode',
492
                            'o_lang' => '',
493
                            'o_datatype' => '',
494
                        ];
495
                        $s = $next_s;
496
                    }
497
                }
498
            } while ($proceed);
499
            if ($closed) {
500
                return [$r, $sub_v];
501
            }
502
        }
503
504 98
        return [0, $v];
505
    }
506
507
    /* 42 */
508
509 100
    protected function xVarOrTerm($v)
510
    {
511 100
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
512 94
            return [$sub_r, $sub_v];
513 100
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
514 96
            return [$sub_r, $sub_v];
515
        }
516
517 98
        return [0, $v];
518
    }
519
520
    /* 44, 74.., 75.. */
521
522 100
    protected function xVar($v)
523
    {
524 100
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
525 94
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
526 94
                if (!\in_array($sub_r, $this->r['vars'])) {
527 94
                    $this->r['vars'][] = $sub_r;
528
                }
529
530 94
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
531
            }
532
        }
533
534 100
        return [0, $v];
535
    }
536
537
    /* 45 */
538
539 100
    protected function xGraphTerm($v)
540
    {
541 100
        foreach ([
542 100
            'IRIref' => 'uri',
543 100
            'RDFLiteral' => 'literal',
544 100
            'NumericLiteral' => 'literal',
545 100
            'BooleanLiteral' => 'literal',
546 100
            'BlankNode' => 'bnode',
547 100
            'NIL' => 'uri',
548 100
            'Placeholder' => 'placeholder',
549 100
        ] as $term => $type) {
550 100
            $m = 'x'.$term;
551 100
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
552 96
                if (!\is_array($sub_r)) {
553 96
                    $sub_r = ['value' => $sub_r];
554
                }
555 96
                $sub_r['type'] = $sub_r['type'] ?? $type;
556
557 96
                return [$sub_r, $sub_v];
558
            }
559
        }
560
561 98
        return [0, $v];
562
    }
563
564
    /* 60 */
565
566 99
    protected function xRDFLiteral($v)
567
    {
568 99
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
569 53
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
570 53
            $r = $sub_r;
571 53
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
572 8
                $r['lang'] = $sub_r;
573
            } elseif (
574 52
                !$this->x('\s', $sub_v)
575 52
                && ($sub_r = $this->x('\^\^', $sub_v))
576 52
                && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1]))
577 52
                && $sub_r[1]
578
            ) {
579 3
                $r['datatype'] = $sub_r;
580
            }
581
582 53
            return [$r, $sub_v];
583
        }
584
585 98
        return [0, $v];
586
    }
587
588
    /* 61.., 62.., 63.., 64.. */
589
590 98
    protected function xNumericLiteral($v)
591
    {
592 98
        $sub_r = $this->x('(\-|\+)?', $v);
593 98
        $prefix = $sub_r[1];
594 98
        $sub_v = $sub_r[2];
595 98
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
596 98
            $m = 'x'.$type;
597 98
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
598 20
                $r = [
599 20
                    'value' => $prefix.$sub_r,
600 20
                    'type' => 'literal',
601 20
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
602 20
                ];
603
604 20
                return [$r, $sub_v];
605
            }
606
        }
607
608 98
        return [0, $v];
609
    }
610
611
    /* 65.. */
612
613 98
    protected function xBooleanLiteral($v)
614
    {
615 98
        if ($r = $this->x('(true|false)', $v)) {
616
            return [$r[1], $r[2]];
617
        }
618
619 98
        return [0, $v];
620
    }
621
622
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
623
624 99
    protected function xString($v)
625
    {/* largely simplified, may need some tweaks in following revisions */
626 99
        $sub_v = $v;
627 99
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
628 98
            return [0, $v];
629
        }
630 53
        $delim = $m[1];
631 53
        $rest = $m[2];
632 53
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
633 53
        $sub_type = $sub_types[$delim];
634 53
        $pos = 0;
635 53
        $r = false;
636
        do {
637 53
            $proceed = 0;
638 53
            $delim_pos = strpos($rest, $delim, $pos);
639 53
            if (false === $delim_pos) {
640
                break;
641
            }
642 53
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
643 53
            $r = substr($rest, 0, $delim_pos);
644 53
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
645 53
                $rest = $new_rest;
646
            } else {
647
                $r = false;
648
                $pos = $delim_pos + 1;
649
                $proceed = 1;
650
            }
651 53
        } while ($proceed);
652 53
        if (false !== $r) {
653 53
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
654
        }
655
656
        return [0, $v];
657
    }
658
659
    /* 67 */
660
661 100
    protected function xIRIref($v)
662
    {
663 100
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
664 95
            return [calcURI($r, $this->base), $v];
665 100
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
666 30
            return [$r, $v];
667
        }
668
669 99
        return [0, $v];
670
    }
671
672
    /* 68 */
673
674 100
    protected function xPrefixedName($v)
675
    {
676 100
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
677 30
            return [$r, $v];
678 99
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
679 8
            return $this->namespaceHelper->hasPrefix($r)
680
                ? [$this->namespaceHelper->getNamespace($r), $sub_v]
681 8
                : [0, $v];
682
        }
683
684 99
        return [0, $v];
685
    }
686
687
    /* 69.., 73.., 93, 94..  */
688
689 98
    protected function xBlankNode($v)
690
    {
691 98
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
692 5
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
693
        }
694 98
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
695
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
696
        }
697
698 98
        return [0, $v];
699
    }
700
701
    /* 70.. @@sync with SPARQLParser */
702
703 2
    protected function xIRI_REF($v)
704
    {
705
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
706 2
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
707
            return [$r[1], $r[2]];
708 2
        } elseif ($r = $this->x('\<\>', $v)) {
709
            return [true, $r[1]];
710 2
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
711 2
            return [$r[1] ? $r[1] : true, $r[2]];
712
        }
713
714 2
        return [0, $v];
715
    }
716
717
    /* 71 */
718
719 100
    protected function xPNAME_NS($v)
720
    {
721 100
        list($r, $sub_v) = $this->xPN_PREFIX($v);
722 100
        $prefix = $r ?: '';
723
724 100
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
725
    }
726
727
    /* 72 */
728
729 100
    protected function xPNAME_LN($v)
730
    {
731 100
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
732 33
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
733 33
                if (!$this->namespaceHelper->hasPrefix($r)) {
734 8
                    return [0, $v];
735
                }
736
737 30
                return [$this->namespaceHelper->getNamespace($r).$sub_r, $sub_v];
738
            }
739
        }
740
741 99
        return [0, $v];
742
    }
743
744
    /* 76 */
745
746 53
    protected function xLANGTAG($v)
747
    {
748 53
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
749 8
            return [$r[1], $r[3]];
750
        }
751
752 52
        return [0, $v];
753
    }
754
755
    /* 77.. */
756
757 98
    protected function xINTEGER($v)
758
    {
759 98
        if ($r = $this->x('([0-9]+)', $v)) {
760 21
            return [$r[1], $r[2]];
761
        }
762
763 98
        return [false, $v];
764
    }
765
766
    /* 78.. */
767
768 98
    protected function xDECIMAL($v)
769
    {
770 98
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
771 9
            return [$r[1], $r[2]];
772
        }
773 98
        if ($r = $this->x('(\.[0-9]+)', $v)) {
774
            return [$r[1], $r[2]];
775
        }
776
777 98
        return [false, $v];
778
    }
779
780
    /* 79.., 86.. */
781
782 98
    protected function xDOUBLE($v)
783
    {
784 98
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
785 6
            return [$r[1], $r[2]];
786
        }
787 98
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
788
            return [$r[1], $r[2]];
789
        }
790 98
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
791 6
            return [$r[1], $r[2]];
792
        }
793
794 98
        return [false, $v];
795
    }
796
797
    /* 92 */
798
799 98
    protected function xNIL($v)
800
    {
801 98
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
802
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
803
        }
804
805 98
        return [0, $v];
806
    }
807
808
    /* 95.. */
809
810 99
    protected function xPN_CHARS_BASE($v)
811
    {
812 99
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
813 95
            return [$r[1], $r[2]];
814
        }
815
816 99
        return [0, $v];
817
    }
818
819
    /* 96 */
820
821 95
    protected function xPN_CHARS_U($v)
822
    {
823 95
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
824 95
            return [$r, $sub_v];
825 95
        } elseif ($r = $this->x('(_)', $v)) {
826 1
            return [$r[1], $r[2]];
827
        }
828
829 95
        return [0, $v];
830
    }
831
832
    /* 97.. */
833
834 94
    protected function xVARNAME($v)
835
    {
836 94
        $r = '';
837
        do {
838 94
            $proceed = 0;
839 94
            if ($sub_r = $this->x('([0-9]+)', $v)) {
840 3
                $r .= $sub_r[1];
841 3
                $v = $sub_r[2];
842 3
                $proceed = 1;
843 94
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
844 94
                $r .= $sub_r;
845 94
                $v = $sub_v;
846 94
                $proceed = 1;
847 94
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
848
                $r .= $sub_r[1];
849
                $v = $sub_r[2];
850
                $proceed = 1;
851
            }
852 94
        } while ($proceed);
853
854 94
        return [$r, $v];
855
    }
856
857
    /* 98.. */
858
859 6
    protected function xPN_CHARS($v)
860
    {
861 6
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
862
            return [$r, $sub_v];
863 6
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
864
            return [$r[1], $r[2]];
865
        }
866
867 6
        return [false, $v];
868
    }
869
870
    /* 99 */
871
872 100
    protected function xPN_PREFIX($v)
873
    {
874 100
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
875 78
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
876
        }
877 99
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
878
            do {
879
                $proceed = 0;
880
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
881
                if (false !== $sub_r) {
882
                    $r .= $sub_r;
883
                    $proceed = 1;
884
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
885
                    $r .= '.';
886
                    $sub_v = $sub_r[1];
887
                    $proceed = 1;
888
                }
889
            } while ($proceed);
890
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
891
            $r .= $sub_r ?: '';
892
        }
893
894 99
        return [$r, $sub_v];
895
    }
896
897
    /* 100 */
898
899 33
    protected function xPN_LOCAL($v)
900
    {
901 33
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
902 33
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
903
        }
904 6
        $r = '';
905 6
        $sub_v = $v;
906
        do {
907 6
            $proceed = 0;
908 6
            if ($this->x('\s', $sub_v)) {
909
                return [$r, $sub_v];
910
            }
911 6
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
912 6
                $r .= $sub_r[1];
913 6
                $sub_v = $sub_r[2];
914 6
                $proceed = 1;
915 6
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
916 6
                $r .= $sub_r;
917 6
                $proceed = 1;
918 6
            } elseif ($r) {
919 6
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
920
                    $r .= $sub_r[1];
921
                    $sub_v = $sub_r[2];
922
                }
923 6
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
924
                    $r .= $sub_r;
925
                    $proceed = 1;
926
                }
927
            }
928 6
        } while ($proceed);
929
930 6
        return [$r, $sub_v];
931
    }
932
933 53
    protected function unescapeNtripleUTF($v)
934
    {
935 53
        if (false === strpos($v, '\\')) {
936 53
            return $v;
937
        }
938
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
939
        foreach ($mappings as $in => $out) {
940
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
941
        }
942
        if (false === strpos(strtolower($v), '\u')) {
943
            return $v;
944
        }
945
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
946
            $no = hexdec($m[2]);
947
            if ($no < 128) {
948
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

948
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
949
            } elseif ($no < 2048) {
950
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
951
            } elseif ($no < 65536) {
952
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
953
            } elseif ($no < 2097152) {
954
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
955
            } else {
956
                $char = '';
957
            }
958
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
959
        }
960
961
        return $v;
962
    }
963
964 98
    protected function xPlaceholder($v)
965
    {
966
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
967 98
        if ($r = $this->x('(\?|\$)', $v)) {
968
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
969
                $ph = substr($m[1], 1, -1);
970
                $rest = substr(trim($r[2]), \strlen($m[1]));
971
                if (!isset($this->r['placeholders'])) {
972
                    $this->r['placeholders'] = [];
973
                }
974
                if (!\in_array($ph, $this->r['placeholders'])) {
975
                    $this->r['placeholders'][] = $ph;
976
                }
977
978
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
979
            }
980
        }
981
982 98
        return [0, $v];
983
    }
984
}
985