Passed
Push — master ( 5f3d2e...2722b4 )
by Konrad
21:26 queued 17:04
created

TurtleParser::xPN_LOCAL()   C

Complexity

Conditions 13
Paths 9

Size

Total Lines 32
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 14.5274

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 13
eloc 24
c 1
b 0
f 0
nc 9
nop 1
dl 0
loc 32
ccs 19
cts 24
cp 0.7917
crap 14.5274
rs 6.6166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-2 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use Exception;
17
use sweetrdf\InMemoryStoreSqlite\Log\Logger;
18
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
19
use sweetrdf\InMemoryStoreSqlite\StringReader;
20
21
use function sweetrdf\InMemoryStoreSqlite\calcURI;
22
23
class TurtleParser extends BaseParser
24
{
25
    protected int $state;
26
    protected int $max_parsing_loops;
27
    protected string $unparsed_code;
28
29 98
    public function __construct(Logger $logger, NamespaceHelper $namespaceHelper, StringReader $stringReader)
30
    {
31 98
        parent::__construct($logger, $namespaceHelper, $stringReader);
32
33 98
        $this->state = 0;
34 98
        $this->unparsed_code = '';
35 98
        $this->max_parsing_loops = 500;
36
    }
37
38 98
    protected function x($re, $v, $options = 'si')
39
    {
40 98
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
41
42
        /* comment removal */
43 98
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
44
            $v = $m[2];
45
        }
46
47 98
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
48
    }
49
50 2
    private function createBnodeID(): string
51
    {
52 2
        ++$this->bnode_id;
53
54 2
        return '_:'.$this->bnode_prefix.$this->bnode_id;
55
    }
56
57 2
    protected function addT(array $t): void
58
    {
59 2
        $this->triples[$this->t_count] = $t;
60 2
        ++$this->t_count;
61
    }
62
63
    protected function countTriples()
64
    {
65
        return $this->t_count;
66
    }
67
68
    protected function getUnparsedCode()
69
    {
70
        return $this->unparsed_code;
71
    }
72
73 2
    public function parse(string $path, string $data = ''): void
74
    {
75 2
        $this->triples = [];
76 2
        $this->t_count = 0;
77 2
        $this->reader->init($path, $data);
78 2
        $this->base = $this->reader->getBase();
79 2
        $this->r = ['vars' => []];
80
        /* parse */
81 2
        $buffer = '';
82 2
        $more_triples = [];
83 2
        $sub_v = '';
84 2
        $sub_v2 = '';
85 2
        $loops = 0;
86 2
        $prologue_done = 0;
87 2
        while ($d = $this->reader->readStream(8192)) {
88 2
            $buffer .= $d;
89 2
            $sub_v = $buffer;
90
            do {
91 2
                $proceed = 0;
92 2
                if (!$prologue_done) {
93 2
                    $proceed = 1;
94 2
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
95 2
                        $loops = 0;
96 2
                        $sub_v .= $this->reader->readStream(128);
97
                        /* in case we missed the final DOT in the previous prologue loop */
98 2
                        if ($sub_r = $this->x('\.', $sub_v)) {
99
                            $sub_v = $sub_r[1];
100
                        }
101
                        /* more prologue to come, use outer loop */
102 2
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
103 2
                            $proceed = 0;
104
                        }
105
                    } else {
106 2
                        $prologue_done = 1;
107
                    }
108
                }
109
                if (
110 2
                    $prologue_done
111 2
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
112 2
                    && \is_array($sub_r)
113
                ) {
114 2
                    $proceed = 1;
115 2
                    $loops = 0;
116 2
                    foreach ($sub_r as $t) {
117 2
                        $this->addT($t);
118
                    }
119
                }
120 2
            } while ($proceed);
121 2
            ++$loops;
122 2
            $buffer = $sub_v;
123 2
            if ($loops > $this->max_parsing_loops) {
124
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
125
                throw new Exception($msg);
126
            }
127
        }
128 2
        foreach ($more_triples as $t) {
129
            $this->addT($t);
130
        }
131 2
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
132 2
        $buffer = $sub_v;
133 2
        $this->unparsed_code = $buffer;
134
135
        /* remove trailing comments */
136 2
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
137
            $this->unparsed_code = $m[2];
138
        }
139
140 2
        if ($this->unparsed_code && !$this->logger->hasEntries('error')) {
141
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
142
            if (trim($rest)) {
143
                $this->logger->error('Could not parse "'.$rest.'"');
144
            }
145
        }
146
    }
147
148 2
    protected function xPrologue($v)
149
    {
150 2
        $r = 0;
151 2
        if (!$this->t_count) {
152 2
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
153
                $this->base = $sub_r;
154
                $r = 1;
155
            }
156 2
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
157 2
                $this->namespaceHelper->setPrefix($sub_r['prefix'], $sub_r['uri']);
158 2
                $r = 1;
159
            }
160
        }
161
162 2
        return [$r, $v];
163
    }
164
165
    /* 3 */
166
167 98
    protected function xBaseDecl($v)
168
    {
169 98
        if ($r = $this->x("\@?base\s+", $v)) {
170
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
171
                if ($sub_r = $this->x('\.', $sub_v)) {
172
                    $sub_v = $sub_r[1];
173
                }
174
175
                return [$r, $sub_v];
176
            }
177
        }
178
179 98
        return [0, $v];
180
    }
181
182
    /* 4 */
183
184 98
    protected function xPrefixDecl($v)
185
    {
186 98
        if ($r = $this->x("\@?prefix\s+", $v)) {
187 21
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
188 21
                $prefix = $r;
189 21
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
190 21
                    $uri = calcURI($r, $this->base);
191 21
                    if ($sub_r = $this->x('\.', $sub_v)) {
192 16
                        $sub_v = $sub_r[1];
193
                    }
194
195 21
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
196
                }
197
            }
198
        }
199
200 98
        return [0, $v];
201
    }
202
203
    /* 21.., 32.. */
204
205 98
    protected function xTriplesBlock($v)
206
    {
207 98
        $pre_r = [];
208 98
        $r = [];
209 98
        $state = 1;
210 98
        $sub_v = $v;
211 98
        $buffer = $sub_v;
212
        do {
213 98
            $proceed = 0;
214 98
            if (1 == $state) {/* expecting subject */
215 98
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
216 98
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
217 98
                    $t['s'] = $sub_r['value'];
218 98
                    $t['s_type'] = $sub_r['type'];
219 98
                    $state = 2;
220 98
                    $proceed = 1;
221 98
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
222
                        if ('placeholder' == $t['s_type']) {
223
                            $state = 4;
224
                        } else {
225 98
                            $this->logger->error('"'.$sub_r[1].'" after subject found.');
226
                        }
227
                    }
228 96
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
229
                    $t['s'] = $sub_r['id'];
230
                    $t['s_type'] = $sub_r['type'];
231
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
232
                    $state = 2;
233
                    $proceed = 1;
234
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
235
                        $this->logger->error('DOT after subject found.');
236
                    }
237 96
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
238
                    $t['s'] = $sub_r['id'];
239
                    $t['s_type'] = $sub_r['type'];
240
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
241
                    $state = 2;
242
                    $proceed = 1;
243 96
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
244
                    $this->logger->error('Subject expected, DOT found.'.$sub_v);
245
                }
246
            }
247 98
            if (2 == $state) {/* expecting predicate */
248 98
                if ($sub_r = $this->x('a\s+', $sub_v)) {
249
                    $sub_v = $sub_r[1];
250
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
251
                    $t['p_type'] = 'uri';
252
                    $state = 3;
253
                    $proceed = 1;
254 98
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
255 98
                    if ('bnode' == $sub_r['type']) {
256
                        $this->logger->error('Blank node used as triple predicate');
257
                    }
258 98
                    $t['p'] = $sub_r['value'];
259 98
                    $t['p_type'] = $sub_r['type'];
260 98
                    $state = 3;
261 98
                    $proceed = 1;
262
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
263
                    $state = 4;
264
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
265
                    $buffer = $sub_v;
266
                    $r = array_merge($r, $pre_r);
267
                    $pre_r = [];
268
                    $proceed = 0;
269
                }
270
            }
271 98
            if (3 == $state) {/* expecting object */
272 98
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
273 98
                    $t['o'] = $sub_r['value'];
274 98
                    $t['o_type'] = $sub_r['type'];
275 98
                    $t['o_lang'] = $sub_r['lang'] ?? '';
276 98
                    $t['o_datatype'] = $sub_r['datatype'] ?? '';
277 98
                    $pre_r[] = $t;
278 98
                    $state = 4;
279 98
                    $proceed = 1;
280 2
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
281
                    $t['o'] = $sub_r['id'];
282
                    $t['o_type'] = $sub_r['type'];
283
                    $t['o_datatype'] = '';
284
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
285
                    $state = 4;
286
                    $proceed = 1;
287 2
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
288 2
                    $t['o'] = $sub_r['id'];
289 2
                    $t['o_type'] = $sub_r['type'];
290 2
                    $t['o_datatype'] = '';
291 2
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
292 2
                    $state = 4;
293 2
                    $proceed = 1;
294
                }
295
            }
296 98
            if (4 == $state) {/* expecting . or ; or , or } */
297 98
                if ($sub_r = $this->x('\.', $sub_v)) {
298 96
                    $sub_v = $sub_r[1];
299 96
                    $buffer = $sub_v;
300 96
                    $r = array_merge($r, $pre_r);
301 96
                    $pre_r = [];
302 96
                    $state = 1;
303 96
                    $proceed = 1;
304 28
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
305 11
                    $sub_v = $sub_r[1];
306 11
                    $state = 2;
307 11
                    $proceed = 1;
308 24
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
309 20
                    $sub_v = $sub_r[1];
310 20
                    $state = 3;
311 20
                    $proceed = 1;
312 20
                    if ($sub_r = $this->x('\}', $sub_v)) {
313
                        $this->logger->error('Object expected, } found.');
314
                    }
315
                }
316 98
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
317 95
                    $buffer = $sub_v;
318 95
                    $r = array_merge($r, $pre_r);
319 95
                    $pre_r = [];
320 95
                    $proceed = 0;
321
                }
322
            }
323 98
        } while ($proceed);
324
325 98
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
326
    }
327
328
    /* 39.. */
329
330 96
    protected function xBlankNodePropertyList($v)
331
    {
332 96
        if ($sub_r = $this->x('\[', $v)) {
333 2
            $sub_v = $sub_r[1];
334 2
            $s = $this->createBnodeID();
335 2
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
336 2
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
337 2
            $state = 2;
338 2
            $closed = 0;
339
            do {
340 2
                $proceed = 0;
341 2
                if (2 == $state) {/* expecting predicate */
342 2
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
343
                        $sub_v = $sub_r[1];
344
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
345
                        $t['p_type'] = 'uri';
346
                        $state = 3;
347
                        $proceed = 1;
348 2
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
349 2
                        $t['p'] = $sub_r['value'];
350 2
                        $t['p_type'] = $sub_r['type'];
351 2
                        $state = 3;
352 2
                        $proceed = 1;
353
                    }
354
                }
355 2
                if (3 == $state) {/* expecting object */
356 2
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
357 2
                        $t['o'] = $sub_r['value'];
358 2
                        $t['o_type'] = $sub_r['type'];
359 2
                        $t['o_lang'] = $sub_r['lang'] ?? '';
360 2
                        $t['o_datatype'] = $sub_r['datatype'] ?? '';
361 2
                        $r['triples'][] = $t;
362 2
                        $state = 4;
363 2
                        $proceed = 1;
364
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
365
                        $t['o'] = $sub_r['id'];
366
                        $t['o_type'] = $sub_r['type'];
367
                        $t['o_datatype'] = '';
368
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
369
                        $state = 4;
370
                        $proceed = 1;
371
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
372
                        $t['o'] = $sub_r['id'];
373
                        $t['o_type'] = $sub_r['type'];
374
                        $t['o_datatype'] = '';
375
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
376
                        $state = 4;
377
                        $proceed = 1;
378
                    }
379
                }
380 2
                if (4 == $state) {/* expecting . or ; or , or ] */
381 2
                    if ($sub_r = $this->x('\.', $sub_v)) {
382
                        $sub_v = $sub_r[1];
383
                        $state = 1;
384
                        $proceed = 1;
385
                    }
386 2
                    if ($sub_r = $this->x('\;', $sub_v)) {
387 1
                        $sub_v = $sub_r[1];
388 1
                        $state = 2;
389 1
                        $proceed = 1;
390
                    }
391 2
                    if ($sub_r = $this->x('\,', $sub_v)) {
392
                        $sub_v = $sub_r[1];
393
                        $state = 3;
394
                        $proceed = 1;
395
                    }
396 2
                    if ($sub_r = $this->x('\]', $sub_v)) {
397 2
                        $sub_v = $sub_r[1];
398 2
                        $proceed = 0;
399 2
                        $closed = 1;
400
                    }
401
                }
402 2
            } while ($proceed);
403 2
            if ($closed) {
404 2
                return [$r, $sub_v];
405
            }
406
407
            return [0, $v];
408
        }
409
410 96
        return [0, $v];
411
    }
412
413
    /* 40.. */
414
415 96
    protected function xCollection($v)
416
    {
417 96
        if ($sub_r = $this->x('\(', $v)) {
418
            $sub_v = $sub_r[1];
419
            $s = $this->createBnodeID();
420
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
421
            $closed = 0;
422
            do {
423
                $proceed = 0;
424
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
425
                    $r['triples'][] = [
426
                        'type' => 'triple',
427
                        's' => $s,
428
                        's_type' => 'bnode',
429
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
430
                        'p_type' => 'uri',
431
                        'o' => $sub_r['value'],
432
                        'o_type' => $sub_r['type'],
433
                        'o_lang' => $sub_r['lang'] ?? '',
434
                        'o_datatype' => $sub_r['datatype'] ?? '',
435
                    ];
436
                    $proceed = 1;
437
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
438
                    $r['triples'][] = [
439
                        'type' => 'triple',
440
                        's' => $s,
441
                        's_type' => 'bnode',
442
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
443
                        'p_type' => 'uri',
444
                        'o' => $sub_r['id'],
445
                        'o_type' => $sub_r['type'],
446
                        'o_lang' => '',
447
                        'o_datatype' => '',
448
                    ];
449
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
450
                    $proceed = 1;
451
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
452
                    $r['triples'][] = [
453
                        'type' => 'triple',
454
                        's' => $s,
455
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
456
                        'o' => $sub_r['id'],
457
                        's_type' => 'bnode',
458
                        'p_type' => 'uri',
459
                        'o_type' => $sub_r['type'],
460
                        'o_lang' => '',
461
                        'o_datatype' => '',
462
                    ];
463
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
464
                    $proceed = 1;
465
                }
466
                if ($proceed) {
467
                    if ($sub_r = $this->x('\)', $sub_v)) {
468
                        $sub_v = $sub_r[1];
469
                        $r['triples'][] = [
470
                            'type' => 'triple',
471
                            's' => $s,
472
                            's_type' => 'bnode',
473
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
474
                            'p_type' => 'uri',
475
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
476
                            'o_type' => 'uri',
477
                            'o_lang' => '',
478
                            'o_datatype' => '',
479
                        ];
480
                        $closed = 1;
481
                        $proceed = 0;
482
                    } else {
483
                        $next_s = $this->createBnodeID();
484
                        $r['triples'][] = [
485
                            'type' => 'triple',
486
                            's' => $s,
487
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
488
                            'o' => $next_s,
489
                            's_type' => 'bnode',
490
                            'p_type' => 'uri',
491
                            'o_type' => 'bnode',
492
                            'o_lang' => '',
493
                            'o_datatype' => '',
494
                        ];
495
                        $s = $next_s;
496
                    }
497
                }
498
            } while ($proceed);
499
            if ($closed) {
500
                return [$r, $sub_v];
501
            }
502
        }
503
504 96
        return [0, $v];
505
    }
506
507
    /* 42 */
508
509 98
    protected function xVarOrTerm($v)
510
    {
511 98
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
512 92
            return [$sub_r, $sub_v];
513 98
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
514 95
            return [$sub_r, $sub_v];
515
        }
516
517 96
        return [0, $v];
518
    }
519
520
    /* 44, 74.., 75.. */
521
522 98
    protected function xVar($v)
523
    {
524 98
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
525 92
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
526 92
                if (!\in_array($sub_r, $this->r['vars'])) {
527 92
                    $this->r['vars'][] = $sub_r;
528
                }
529
530 92
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
531
            }
532
        }
533
534 98
        return [0, $v];
535
    }
536
537
    /* 45 */
538
539 98
    protected function xGraphTerm($v)
540
    {
541 98
        foreach ([
542 98
            'IRIref' => 'uri',
543 98
            'RDFLiteral' => 'literal',
544 98
            'NumericLiteral' => 'literal',
545 98
            'BooleanLiteral' => 'literal',
546 98
            'BlankNode' => 'bnode',
547 98
            'NIL' => 'uri',
548 98
            'Placeholder' => 'placeholder',
549 98
        ] as $term => $type) {
550 98
            $m = 'x'.$term;
551 98
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
552 95
                if (!\is_array($sub_r)) {
553 95
                    $sub_r = ['value' => $sub_r];
554
                }
555 95
                $sub_r['type'] = $sub_r['type'] ?? $type;
556
557 95
                return [$sub_r, $sub_v];
558
            }
559
        }
560
561 96
        return [0, $v];
562
    }
563
564
    /* 60 */
565
566 97
    protected function xRDFLiteral($v)
567
    {
568 97
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
569 53
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
570 53
            $r = $sub_r;
571 53
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
572 8
                $r['lang'] = $sub_r;
573
            } elseif (
574 52
                !$this->x('\s', $sub_v)
575 52
                && ($sub_r = $this->x('\^\^', $sub_v))
576 52
                && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1]))
577 52
                && $sub_r[1]
578
            ) {
579 3
                $r['datatype'] = $sub_r;
580
            }
581
582 53
            return [$r, $sub_v];
583
        }
584
585 96
        return [0, $v];
586
    }
587
588
    /* 61.., 62.., 63.., 64.. */
589
590 96
    protected function xNumericLiteral($v)
591
    {
592 96
        $sub_r = $this->x('(\-|\+)?', $v);
593 96
        $prefix = $sub_r[1];
594 96
        $sub_v = $sub_r[2];
595 96
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
596 96
            $m = 'x'.$type;
597 96
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
598 20
                $r = [
599 20
                    'value' => $prefix.$sub_r,
600 20
                    'type' => 'literal',
601 20
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
602 20
                ];
603
604 20
                return [$r, $sub_v];
605
            }
606
        }
607
608 96
        return [0, $v];
609
    }
610
611
    /* 65.. */
612
613 96
    protected function xBooleanLiteral($v)
614
    {
615 96
        if ($r = $this->x('(true|false)', $v)) {
616
            return [$r[1], $r[2]];
617
        }
618
619 96
        return [0, $v];
620
    }
621
622
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
623
624 97
    protected function xString($v)
625
    {/* largely simplified, may need some tweaks in following revisions */
626 97
        $sub_v = $v;
627 97
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
628 96
            return [0, $v];
629
        }
630 53
        $delim = $m[1];
631 53
        $rest = $m[2];
632 53
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
633 53
        $sub_type = $sub_types[$delim];
634 53
        $pos = 0;
635 53
        $r = false;
636
        do {
637 53
            $proceed = 0;
638 53
            $delim_pos = strpos($rest, $delim, $pos);
639 53
            if (false === $delim_pos) {
640
                break;
641
            }
642 53
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
643 53
            $r = substr($rest, 0, $delim_pos);
644 53
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
645 53
                $rest = $new_rest;
646
            } else {
647
                $r = false;
648
                $pos = $delim_pos + 1;
649
                $proceed = 1;
650
            }
651 53
        } while ($proceed);
652 53
        if (false !== $r) {
653 53
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
654
        }
655
656
        return [0, $v];
657
    }
658
659
    /* 67 */
660
661 98
    protected function xIRIref($v)
662
    {
663 98
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
664 94
            return [calcURI($r, $this->base), $v];
665 98
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
666 29
            return [$r, $v];
667
        }
668
669 97
        return [0, $v];
670
    }
671
672
    /* 68 */
673
674 98
    protected function xPrefixedName($v)
675
    {
676 98
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
677 29
            return [$r, $v];
678 97
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
679 8
            return $this->namespaceHelper->hasPrefix($r)
680
                ? [$this->namespaceHelper->getNamespace($r), $sub_v]
681 8
                : [0, $v];
682
        }
683
684 97
        return [0, $v];
685
    }
686
687
    /* 69.., 73.., 93, 94..  */
688
689 96
    protected function xBlankNode($v)
690
    {
691 96
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
692 5
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
693
        }
694 96
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
695
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
696
        }
697
698 96
        return [0, $v];
699
    }
700
701
    /* 70.. @@sync with SPARQLParser */
702
703 2
    protected function xIRI_REF($v)
704
    {
705
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
706 2
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
707
            return [$r[1], $r[2]];
708 2
        } elseif ($r = $this->x('\<\>', $v)) {
709
            return [true, $r[1]];
710 2
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
711 2
            return [$r[1] ? $r[1] : true, $r[2]];
712
        }
713
714 2
        return [0, $v];
715
    }
716
717
    /* 71 */
718
719 98
    protected function xPNAME_NS($v)
720
    {
721 98
        list($r, $sub_v) = $this->xPN_PREFIX($v);
722 98
        $prefix = $r ?: '';
723
724 98
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
725
    }
726
727
    /* 72 */
728
729 98
    protected function xPNAME_LN($v)
730
    {
731 98
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
732 32
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
733 32
                if (!$this->namespaceHelper->hasPrefix($r)) {
734 8
                    return [0, $v];
735
                }
736
737 29
                return [$this->namespaceHelper->getNamespace($r).$sub_r, $sub_v];
738
            }
739
        }
740
741 97
        return [0, $v];
742
    }
743
744
    /* 76 */
745
746 53
    protected function xLANGTAG($v)
747
    {
748 53
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
749 8
            return [$r[1], $r[3]];
750
        }
751
752 52
        return [0, $v];
753
    }
754
755
    /* 77.. */
756
757 96
    protected function xINTEGER($v)
758
    {
759 96
        if ($r = $this->x('([0-9]+)', $v)) {
760 21
            return [$r[1], $r[2]];
761
        }
762
763 96
        return [false, $v];
764
    }
765
766
    /* 78.. */
767
768 96
    protected function xDECIMAL($v)
769
    {
770 96
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
771 9
            return [$r[1], $r[2]];
772
        }
773 96
        if ($r = $this->x('(\.[0-9]+)', $v)) {
774
            return [$r[1], $r[2]];
775
        }
776
777 96
        return [false, $v];
778
    }
779
780
    /* 79.., 86.. */
781
782 96
    protected function xDOUBLE($v)
783
    {
784 96
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
785 6
            return [$r[1], $r[2]];
786
        }
787 96
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
788
            return [$r[1], $r[2]];
789
        }
790 96
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
791 6
            return [$r[1], $r[2]];
792
        }
793
794 96
        return [false, $v];
795
    }
796
797
    /* 92 */
798
799 96
    protected function xNIL($v)
800
    {
801 96
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
802
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
803
        }
804
805 96
        return [0, $v];
806
    }
807
808
    /* 95.. */
809
810 97
    protected function xPN_CHARS_BASE($v)
811
    {
812 97
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
813 93
            return [$r[1], $r[2]];
814
        }
815
816 97
        return [0, $v];
817
    }
818
819
    /* 96 */
820
821 93
    protected function xPN_CHARS_U($v)
822
    {
823 93
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
824 93
            return [$r, $sub_v];
825 93
        } elseif ($r = $this->x('(_)', $v)) {
826 1
            return [$r[1], $r[2]];
827
        }
828
829 93
        return [0, $v];
830
    }
831
832
    /* 97.. */
833
834 92
    protected function xVARNAME($v)
835
    {
836 92
        $r = '';
837
        do {
838 92
            $proceed = 0;
839 92
            if ($sub_r = $this->x('([0-9]+)', $v)) {
840 3
                $r .= $sub_r[1];
841 3
                $v = $sub_r[2];
842 3
                $proceed = 1;
843 92
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
844 92
                $r .= $sub_r;
845 92
                $v = $sub_v;
846 92
                $proceed = 1;
847 92
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
848
                $r .= $sub_r[1];
849
                $v = $sub_r[2];
850
                $proceed = 1;
851
            }
852 92
        } while ($proceed);
853
854 92
        return [$r, $v];
855
    }
856
857
    /* 98.. */
858
859 6
    protected function xPN_CHARS($v)
860
    {
861 6
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
862
            return [$r, $sub_v];
863 6
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
864
            return [$r[1], $r[2]];
865
        }
866
867 6
        return [false, $v];
868
    }
869
870
    /* 99 */
871
872 98
    protected function xPN_PREFIX($v)
873
    {
874 98
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
875 77
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
876
        }
877 97
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
878
            do {
879
                $proceed = 0;
880
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
881
                if (false !== $sub_r) {
882
                    $r .= $sub_r;
883
                    $proceed = 1;
884
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
885
                    $r .= '.';
886
                    $sub_v = $sub_r[1];
887
                    $proceed = 1;
888
                }
889
            } while ($proceed);
890
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
891
            $r .= $sub_r ?: '';
892
        }
893
894 97
        return [$r, $sub_v];
895
    }
896
897
    /* 100 */
898
899 32
    protected function xPN_LOCAL($v)
900
    {
901 32
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
902 32
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
903
        }
904 6
        $r = '';
905 6
        $sub_v = $v;
906
        do {
907 6
            $proceed = 0;
908 6
            if ($this->x('\s', $sub_v)) {
909
                return [$r, $sub_v];
910
            }
911 6
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
912 6
                $r .= $sub_r[1];
913 6
                $sub_v = $sub_r[2];
914 6
                $proceed = 1;
915 6
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
916 6
                $r .= $sub_r;
917 6
                $proceed = 1;
918 6
            } elseif ($r) {
919 6
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
920
                    $r .= $sub_r[1];
921
                    $sub_v = $sub_r[2];
922
                }
923 6
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
924
                    $r .= $sub_r;
925
                    $proceed = 1;
926
                }
927
            }
928 6
        } while ($proceed);
929
930 6
        return [$r, $sub_v];
931
    }
932
933 53
    protected function unescapeNtripleUTF($v)
934
    {
935 53
        if (false === strpos($v, '\\')) {
936 53
            return $v;
937
        }
938
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
939
        foreach ($mappings as $in => $out) {
940
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
941
        }
942
        if (false === strpos(strtolower($v), '\u')) {
943
            return $v;
944
        }
945
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
946
            $no = hexdec($m[2]);
947
            if ($no < 128) {
948
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

948
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
949
            } elseif ($no < 2048) {
950
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
951
            } elseif ($no < 65536) {
952
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
953
            } elseif ($no < 2097152) {
954
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
955
            } else {
956
                $char = '';
957
            }
958
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
959
        }
960
961
        return $v;
962
    }
963
964 96
    protected function xPlaceholder($v)
965
    {
966
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
967 96
        if ($r = $this->x('(\?|\$)', $v)) {
968
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
969
                $ph = substr($m[1], 1, -1);
970
                $rest = substr(trim($r[2]), \strlen($m[1]));
971
                if (!isset($this->r['placeholders'])) {
972
                    $this->r['placeholders'] = [];
973
                }
974
                if (!\in_array($ph, $this->r['placeholders'])) {
975
                    $this->r['placeholders'][] = $ph;
976
                }
977
978
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
979
            }
980
        }
981
982 96
        return [0, $v];
983
    }
984
}
985