Passed
Push — master ( 079a9e...374fd3 )
by Konrad
24:04
created

TurtleParser::xPN_LOCAL()   C

Complexity

Conditions 13
Paths 9

Size

Total Lines 32
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 14.5274

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 13
eloc 24
c 1
b 0
f 0
nc 9
nop 1
dl 0
loc 32
ccs 19
cts 24
cp 0.7917
crap 14.5274
rs 6.6166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-2 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use Exception;
17
use sweetrdf\InMemoryStoreSqlite\Log\Logger;
18
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
19
use sweetrdf\InMemoryStoreSqlite\StringReader;
20
21
use function sweetrdf\InMemoryStoreSqlite\calcURI;
22
23
class TurtleParser extends BaseParser
24
{
25
    protected int $state;
26
    protected int $max_parsing_loops;
27
    protected string $unparsed_code;
28
29 99
    public function __construct(Logger $logger, NamespaceHelper $namespaceHelper, StringReader $stringReader)
30
    {
31 99
        parent::__construct($logger, $namespaceHelper, $stringReader);
32
33 99
        $this->state = 0;
34 99
        $this->unparsed_code = '';
35 99
        $this->max_parsing_loops = 500;
36
    }
37
38 99
    protected function x($re, $v, $options = 'si')
39
    {
40 99
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
41
42
        /* comment removal */
43 99
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
44
            $v = $m[2];
45
        }
46
47 99
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
48
    }
49
50 2
    private function createBnodeID(): string
51
    {
52 2
        ++$this->bnode_id;
53
54 2
        return '_:'.$this->bnode_prefix.$this->bnode_id;
55
    }
56
57 2
    protected function addT(array $t): void
58
    {
59 2
        $this->triples[$this->t_count] = $t;
60 2
        ++$this->t_count;
61
    }
62
63
    protected function countTriples()
64
    {
65
        return $this->t_count;
66
    }
67
68
    protected function getUnparsedCode()
69
    {
70
        return $this->unparsed_code;
71
    }
72
73 2
    public function parse(string $path, string $data = ''): void
74
    {
75 2
        $this->triples = [];
76 2
        $this->t_count = 0;
77 2
        $this->reader->init($path, $data);
78 2
        $this->base = $this->reader->getBase();
79 2
        $this->r = ['vars' => []];
80
        /* parse */
81 2
        $buffer = '';
82 2
        $more_triples = [];
83 2
        $sub_v = '';
84 2
        $sub_v2 = '';
85 2
        $loops = 0;
86 2
        $prologue_done = 0;
87 2
        while ($d = $this->reader->readStream(8192)) {
88 2
            $buffer .= $d;
89 2
            $sub_v = $buffer;
90
            do {
91 2
                $proceed = 0;
92 2
                if (!$prologue_done) {
93 2
                    $proceed = 1;
94 2
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
95 2
                        $loops = 0;
96 2
                        $sub_v .= $this->reader->readStream(128);
97
                        /* in case we missed the final DOT in the previous prologue loop */
98 2
                        if ($sub_r = $this->x('\.', $sub_v)) {
99
                            $sub_v = $sub_r[1];
100
                        }
101
                        /* more prologue to come, use outer loop */
102 2
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
103 2
                            $proceed = 0;
104
                        }
105
                    } else {
106 2
                        $prologue_done = 1;
107
                    }
108
                }
109
                if (
110 2
                    $prologue_done
111 2
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
112 2
                    && \is_array($sub_r)
113
                ) {
114 2
                    $proceed = 1;
115 2
                    $loops = 0;
116 2
                    foreach ($sub_r as $t) {
117 2
                        $this->addT($t);
118
                    }
119
                }
120 2
            } while ($proceed);
121 2
            ++$loops;
122 2
            $buffer = $sub_v;
123 2
            if ($loops > $this->max_parsing_loops) {
124
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
125
                throw new Exception($msg);
126
            }
127
        }
128 2
        foreach ($more_triples as $t) {
129
            $this->addT($t);
130
        }
131 2
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
132 2
        $buffer = $sub_v;
133 2
        $this->unparsed_code = $buffer;
134
135
        /* remove trailing comments */
136 2
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
137
            $this->unparsed_code = $m[2];
138
        }
139
140 2
        if ($this->unparsed_code && !$this->logger->hasEntries('error')) {
141
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
142
            if (trim($rest)) {
143
                $this->logger->error('Could not parse "'.$rest.'"');
144
            }
145
        }
146
    }
147
148 2
    protected function xPrologue($v)
149
    {
150 2
        $r = 0;
151 2
        if (!$this->t_count) {
152 2
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
153
                $this->base = $sub_r;
154
                $r = 1;
155
            }
156 2
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
157 2
                $this->namespaceHelper->setPrefix($sub_r['prefix'], $sub_r['uri']);
158 2
                $r = 1;
159
            }
160
        }
161
162 2
        return [$r, $v];
163
    }
164
165
    /* 3 */
166
167 99
    protected function xBaseDecl($v)
168
    {
169 99
        if ($r = $this->x("\@?base\s+", $v)) {
170
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
171
                if ($sub_r = $this->x('\.', $sub_v)) {
172
                    $sub_v = $sub_r[1];
173
                }
174
175
                return [$r, $sub_v];
176
            }
177
        }
178
179 99
        return [0, $v];
180
    }
181
182
    /* 4 */
183
184 99
    protected function xPrefixDecl($v)
185
    {
186 99
        if ($r = $this->x("\@?prefix\s+", $v)) {
187 22
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
188 22
                $prefix = $r;
189 22
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
190 22
                    $uri = calcURI($r, $this->base);
191 22
                    if ($sub_r = $this->x('\.', $sub_v)) {
192 16
                        $sub_v = $sub_r[1];
193
                    }
194
195 22
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
196
                }
197
            }
198
        }
199
200 99
        return [0, $v];
201
    }
202
203
    /* 21.., 32.. */
204
205 99
    protected function xTriplesBlock($v)
206
    {
207 99
        $pre_r = [];
208 99
        $r = [];
209 99
        $state = 1;
210 99
        $sub_v = $v;
211 99
        $buffer = $sub_v;
212
        do {
213 99
            $proceed = 0;
214 99
            if (1 == $state) {/* expecting subject */
215 99
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
216 99
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
217 99
                    $t['s'] = $sub_r['value'];
218 99
                    $t['s_type'] = $sub_r['type'];
219 99
                    $state = 2;
220 99
                    $proceed = 1;
221 99
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
222
                        if ('placeholder' == $t['s_type']) {
223
                            $state = 4;
224
                        } else {
225 99
                            $this->logger->error('"'.$sub_r[1].'" after subject found.');
226
                        }
227
                    }
228 97
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
229
                    $t['s'] = $sub_r['id'];
230
                    $t['s_type'] = $sub_r['type'];
231
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
232
                    $state = 2;
233
                    $proceed = 1;
234
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
235
                        $this->logger->error('DOT after subject found.');
236
                    }
237 97
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
238
                    $t['s'] = $sub_r['id'];
239
                    $t['s_type'] = $sub_r['type'];
240
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
241
                    $state = 2;
242
                    $proceed = 1;
243 97
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
244
                    $this->logger->error('Subject expected, DOT found.'.$sub_v);
245
                }
246
            }
247 99
            if (2 == $state) {/* expecting predicate */
248 99
                if ($sub_r = $this->x('a\s+', $sub_v)) {
249 1
                    $sub_v = $sub_r[1];
250 1
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
251 1
                    $t['p_type'] = 'uri';
252 1
                    $state = 3;
253 1
                    $proceed = 1;
254 99
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
255 99
                    if ('bnode' == $sub_r['type']) {
256
                        $this->logger->error('Blank node used as triple predicate');
257
                    }
258 99
                    $t['p'] = $sub_r['value'];
259 99
                    $t['p_type'] = $sub_r['type'];
260 99
                    $state = 3;
261 99
                    $proceed = 1;
262
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
263
                    $state = 4;
264
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
265
                    $buffer = $sub_v;
266
                    $r = array_merge($r, $pre_r);
267
                    $pre_r = [];
268
                    $proceed = 0;
269
                }
270
            }
271 99
            if (3 == $state) {/* expecting object */
272 99
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
273 99
                    $t['o'] = $sub_r['value'];
274 99
                    $t['o_type'] = $sub_r['type'];
275 99
                    $t['o_lang'] = $sub_r['lang'] ?? '';
276 99
                    $t['o_datatype'] = $sub_r['datatype'] ?? '';
277 99
                    $pre_r[] = $t;
278 99
                    $state = 4;
279 99
                    $proceed = 1;
280 2
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
281
                    $t['o'] = $sub_r['id'];
282
                    $t['o_type'] = $sub_r['type'];
283
                    $t['o_datatype'] = '';
284
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
285
                    $state = 4;
286
                    $proceed = 1;
287 2
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
288 2
                    $t['o'] = $sub_r['id'];
289 2
                    $t['o_type'] = $sub_r['type'];
290 2
                    $t['o_datatype'] = '';
291 2
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
292 2
                    $state = 4;
293 2
                    $proceed = 1;
294
                }
295
            }
296 99
            if (4 == $state) {/* expecting . or ; or , or } */
297 99
                if ($sub_r = $this->x('\.', $sub_v)) {
298 97
                    $sub_v = $sub_r[1];
299 97
                    $buffer = $sub_v;
300 97
                    $r = array_merge($r, $pre_r);
301 97
                    $pre_r = [];
302 97
                    $state = 1;
303 97
                    $proceed = 1;
304 29
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
305 11
                    $sub_v = $sub_r[1];
306 11
                    $state = 2;
307 11
                    $proceed = 1;
308 25
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
309 20
                    $sub_v = $sub_r[1];
310 20
                    $state = 3;
311 20
                    $proceed = 1;
312 20
                    if ($sub_r = $this->x('\}', $sub_v)) {
313
                        $this->logger->error('Object expected, } found.');
314
                    }
315
                }
316 99
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
317 96
                    $buffer = $sub_v;
318 96
                    $r = array_merge($r, $pre_r);
319 96
                    $pre_r = [];
320 96
                    $proceed = 0;
321
                }
322
            }
323 99
        } while ($proceed);
324
325 99
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
326
    }
327
328
    /* 39.. */
329
330 97
    protected function xBlankNodePropertyList($v)
331
    {
332 97
        if ($sub_r = $this->x('\[', $v)) {
333 2
            $sub_v = $sub_r[1];
334 2
            $s = $this->createBnodeID();
335 2
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
336 2
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
337 2
            $state = 2;
338 2
            $closed = 0;
339
            do {
340 2
                $proceed = 0;
341 2
                if (2 == $state) {/* expecting predicate */
342 2
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
343
                        $sub_v = $sub_r[1];
344
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
345
                        $t['p_type'] = 'uri';
346
                        $state = 3;
347
                        $proceed = 1;
348 2
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
349 2
                        $t['p'] = $sub_r['value'];
350 2
                        $t['p_type'] = $sub_r['type'];
351 2
                        $state = 3;
352 2
                        $proceed = 1;
353
                    }
354
                }
355 2
                if (3 == $state) {/* expecting object */
356 2
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
357 2
                        $t['o'] = $sub_r['value'];
358 2
                        $t['o_type'] = $sub_r['type'];
359 2
                        $t['o_lang'] = $sub_r['lang'] ?? '';
360 2
                        $t['o_datatype'] = $sub_r['datatype'] ?? '';
361 2
                        $r['triples'][] = $t;
362 2
                        $state = 4;
363 2
                        $proceed = 1;
364
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
365
                        $t['o'] = $sub_r['id'];
366
                        $t['o_type'] = $sub_r['type'];
367
                        $t['o_datatype'] = '';
368
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
369
                        $state = 4;
370
                        $proceed = 1;
371
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
372
                        $t['o'] = $sub_r['id'];
373
                        $t['o_type'] = $sub_r['type'];
374
                        $t['o_datatype'] = '';
375
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
376
                        $state = 4;
377
                        $proceed = 1;
378
                    }
379
                }
380 2
                if (4 == $state) {/* expecting . or ; or , or ] */
381 2
                    if ($sub_r = $this->x('\.', $sub_v)) {
382
                        $sub_v = $sub_r[1];
383
                        $state = 1;
384
                        $proceed = 1;
385
                    }
386 2
                    if ($sub_r = $this->x('\;', $sub_v)) {
387 1
                        $sub_v = $sub_r[1];
388 1
                        $state = 2;
389 1
                        $proceed = 1;
390
                    }
391 2
                    if ($sub_r = $this->x('\,', $sub_v)) {
392
                        $sub_v = $sub_r[1];
393
                        $state = 3;
394
                        $proceed = 1;
395
                    }
396 2
                    if ($sub_r = $this->x('\]', $sub_v)) {
397 2
                        $sub_v = $sub_r[1];
398 2
                        $proceed = 0;
399 2
                        $closed = 1;
400
                    }
401
                }
402 2
            } while ($proceed);
403 2
            if ($closed) {
404 2
                return [$r, $sub_v];
405
            }
406
407
            return [0, $v];
408
        }
409
410 97
        return [0, $v];
411
    }
412
413
    /* 40.. */
414
415 97
    protected function xCollection($v)
416
    {
417 97
        if ($sub_r = $this->x('\(', $v)) {
418
            $sub_v = $sub_r[1];
419
            $s = $this->createBnodeID();
420
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
421
            $closed = 0;
422
            do {
423
                $proceed = 0;
424
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
425
                    $r['triples'][] = [
426
                        'type' => 'triple',
427
                        's' => $s,
428
                        's_type' => 'bnode',
429
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
430
                        'p_type' => 'uri',
431
                        'o' => $sub_r['value'],
432
                        'o_type' => $sub_r['type'],
433
                        'o_lang' => $sub_r['lang'] ?? '',
434
                        'o_datatype' => $sub_r['datatype'] ?? '',
435
                    ];
436
                    $proceed = 1;
437
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
438
                    $r['triples'][] = [
439
                        'type' => 'triple',
440
                        's' => $s,
441
                        's_type' => 'bnode',
442
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
443
                        'p_type' => 'uri',
444
                        'o' => $sub_r['id'],
445
                        'o_type' => $sub_r['type'],
446
                        'o_lang' => '',
447
                        'o_datatype' => '',
448
                    ];
449
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
450
                    $proceed = 1;
451
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
452
                    $r['triples'][] = [
453
                        'type' => 'triple',
454
                        's' => $s,
455
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
456
                        'o' => $sub_r['id'],
457
                        's_type' => 'bnode',
458
                        'p_type' => 'uri',
459
                        'o_type' => $sub_r['type'],
460
                        'o_lang' => '',
461
                        'o_datatype' => '',
462
                    ];
463
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
464
                    $proceed = 1;
465
                }
466
                if ($proceed) {
467
                    if ($sub_r = $this->x('\)', $sub_v)) {
468
                        $sub_v = $sub_r[1];
469
                        $r['triples'][] = [
470
                            'type' => 'triple',
471
                            's' => $s,
472
                            's_type' => 'bnode',
473
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
474
                            'p_type' => 'uri',
475
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
476
                            'o_type' => 'uri',
477
                            'o_lang' => '',
478
                            'o_datatype' => '',
479
                        ];
480
                        $closed = 1;
481
                        $proceed = 0;
482
                    } else {
483
                        $next_s = $this->createBnodeID();
484
                        $r['triples'][] = [
485
                            'type' => 'triple',
486
                            's' => $s,
487
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
488
                            'o' => $next_s,
489
                            's_type' => 'bnode',
490
                            'p_type' => 'uri',
491
                            'o_type' => 'bnode',
492
                            'o_lang' => '',
493
                            'o_datatype' => '',
494
                        ];
495
                        $s = $next_s;
496
                    }
497
                }
498
            } while ($proceed);
499
            if ($closed) {
500
                return [$r, $sub_v];
501
            }
502
        }
503
504 97
        return [0, $v];
505
    }
506
507
    /* 42 */
508
509 99
    protected function xVarOrTerm($v)
510
    {
511 99
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
512 93
            return [$sub_r, $sub_v];
513 99
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
514 96
            return [$sub_r, $sub_v];
515
        }
516
517 97
        return [0, $v];
518
    }
519
520
    /* 44, 74.., 75.. */
521
522 99
    protected function xVar($v)
523
    {
524 99
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
525 93
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
526 93
                if (!\in_array($sub_r, $this->r['vars'])) {
527 93
                    $this->r['vars'][] = $sub_r;
528
                }
529
530 93
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
531
            }
532
        }
533
534 99
        return [0, $v];
535
    }
536
537
    /* 45 */
538
539 99
    protected function xGraphTerm($v)
540
    {
541 99
        foreach ([
542 99
            'IRIref' => 'uri',
543 99
            'RDFLiteral' => 'literal',
544 99
            'NumericLiteral' => 'literal',
545 99
            'BooleanLiteral' => 'literal',
546 99
            'BlankNode' => 'bnode',
547 99
            'NIL' => 'uri',
548 99
            'Placeholder' => 'placeholder',
549 99
        ] as $term => $type) {
550 99
            $m = 'x'.$term;
551 99
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
552 96
                if (!\is_array($sub_r)) {
553 96
                    $sub_r = ['value' => $sub_r];
554
                }
555 96
                $sub_r['type'] = $sub_r['type'] ?? $type;
556
557 96
                return [$sub_r, $sub_v];
558
            }
559
        }
560
561 97
        return [0, $v];
562
    }
563
564
    /* 60 */
565
566 98
    protected function xRDFLiteral($v)
567
    {
568 98
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
569 53
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
570 53
            $r = $sub_r;
571 53
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
572 8
                $r['lang'] = $sub_r;
573
            } elseif (
574 52
                !$this->x('\s', $sub_v)
575 52
                && ($sub_r = $this->x('\^\^', $sub_v))
576 52
                && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1]))
577 52
                && $sub_r[1]
578
            ) {
579 3
                $r['datatype'] = $sub_r;
580
            }
581
582 53
            return [$r, $sub_v];
583
        }
584
585 97
        return [0, $v];
586
    }
587
588
    /* 61.., 62.., 63.., 64.. */
589
590 97
    protected function xNumericLiteral($v)
591
    {
592 97
        $sub_r = $this->x('(\-|\+)?', $v);
593 97
        $prefix = $sub_r[1];
594 97
        $sub_v = $sub_r[2];
595 97
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
596 97
            $m = 'x'.$type;
597 97
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
598 20
                $r = [
599 20
                    'value' => $prefix.$sub_r,
600 20
                    'type' => 'literal',
601 20
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
602 20
                ];
603
604 20
                return [$r, $sub_v];
605
            }
606
        }
607
608 97
        return [0, $v];
609
    }
610
611
    /* 65.. */
612
613 97
    protected function xBooleanLiteral($v)
614
    {
615 97
        if ($r = $this->x('(true|false)', $v)) {
616
            return [$r[1], $r[2]];
617
        }
618
619 97
        return [0, $v];
620
    }
621
622
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
623
624 98
    protected function xString($v)
625
    {/* largely simplified, may need some tweaks in following revisions */
626 98
        $sub_v = $v;
627 98
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
628 97
            return [0, $v];
629
        }
630 53
        $delim = $m[1];
631 53
        $rest = $m[2];
632 53
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
633 53
        $sub_type = $sub_types[$delim];
634 53
        $pos = 0;
635 53
        $r = false;
636
        do {
637 53
            $proceed = 0;
638 53
            $delim_pos = strpos($rest, $delim, $pos);
639 53
            if (false === $delim_pos) {
640
                break;
641
            }
642 53
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
643 53
            $r = substr($rest, 0, $delim_pos);
644 53
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
645 53
                $rest = $new_rest;
646
            } else {
647
                $r = false;
648
                $pos = $delim_pos + 1;
649
                $proceed = 1;
650
            }
651 53
        } while ($proceed);
652 53
        if (false !== $r) {
653 53
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
654
        }
655
656
        return [0, $v];
657
    }
658
659
    /* 67 */
660
661 99
    protected function xIRIref($v)
662
    {
663 99
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
664 95
            return [calcURI($r, $this->base), $v];
665 99
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
666 30
            return [$r, $v];
667
        }
668
669 98
        return [0, $v];
670
    }
671
672
    /* 68 */
673
674 99
    protected function xPrefixedName($v)
675
    {
676 99
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
677 30
            return [$r, $v];
678 98
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
679 8
            return $this->namespaceHelper->hasPrefix($r)
680
                ? [$this->namespaceHelper->getNamespace($r), $sub_v]
681 8
                : [0, $v];
682
        }
683
684 98
        return [0, $v];
685
    }
686
687
    /* 69.., 73.., 93, 94..  */
688
689 97
    protected function xBlankNode($v)
690
    {
691 97
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
692 5
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
693
        }
694 97
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
695
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
696
        }
697
698 97
        return [0, $v];
699
    }
700
701
    /* 70.. @@sync with SPARQLParser */
702
703 2
    protected function xIRI_REF($v)
704
    {
705
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
706 2
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
707
            return [$r[1], $r[2]];
708 2
        } elseif ($r = $this->x('\<\>', $v)) {
709
            return [true, $r[1]];
710 2
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
711 2
            return [$r[1] ? $r[1] : true, $r[2]];
712
        }
713
714 2
        return [0, $v];
715
    }
716
717
    /* 71 */
718
719 99
    protected function xPNAME_NS($v)
720
    {
721 99
        list($r, $sub_v) = $this->xPN_PREFIX($v);
722 99
        $prefix = $r ?: '';
723
724 99
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
725
    }
726
727
    /* 72 */
728
729 99
    protected function xPNAME_LN($v)
730
    {
731 99
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
732 33
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
733 33
                if (!$this->namespaceHelper->hasPrefix($r)) {
734 8
                    return [0, $v];
735
                }
736
737 30
                return [$this->namespaceHelper->getNamespace($r).$sub_r, $sub_v];
738
            }
739
        }
740
741 98
        return [0, $v];
742
    }
743
744
    /* 76 */
745
746 53
    protected function xLANGTAG($v)
747
    {
748 53
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
749 8
            return [$r[1], $r[3]];
750
        }
751
752 52
        return [0, $v];
753
    }
754
755
    /* 77.. */
756
757 97
    protected function xINTEGER($v)
758
    {
759 97
        if ($r = $this->x('([0-9]+)', $v)) {
760 21
            return [$r[1], $r[2]];
761
        }
762
763 97
        return [false, $v];
764
    }
765
766
    /* 78.. */
767
768 97
    protected function xDECIMAL($v)
769
    {
770 97
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
771 9
            return [$r[1], $r[2]];
772
        }
773 97
        if ($r = $this->x('(\.[0-9]+)', $v)) {
774
            return [$r[1], $r[2]];
775
        }
776
777 97
        return [false, $v];
778
    }
779
780
    /* 79.., 86.. */
781
782 97
    protected function xDOUBLE($v)
783
    {
784 97
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
785 6
            return [$r[1], $r[2]];
786
        }
787 97
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
788
            return [$r[1], $r[2]];
789
        }
790 97
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
791 6
            return [$r[1], $r[2]];
792
        }
793
794 97
        return [false, $v];
795
    }
796
797
    /* 92 */
798
799 97
    protected function xNIL($v)
800
    {
801 97
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
802
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
803
        }
804
805 97
        return [0, $v];
806
    }
807
808
    /* 95.. */
809
810 98
    protected function xPN_CHARS_BASE($v)
811
    {
812 98
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
813 94
            return [$r[1], $r[2]];
814
        }
815
816 98
        return [0, $v];
817
    }
818
819
    /* 96 */
820
821 94
    protected function xPN_CHARS_U($v)
822
    {
823 94
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
824 94
            return [$r, $sub_v];
825 94
        } elseif ($r = $this->x('(_)', $v)) {
826 1
            return [$r[1], $r[2]];
827
        }
828
829 94
        return [0, $v];
830
    }
831
832
    /* 97.. */
833
834 93
    protected function xVARNAME($v)
835
    {
836 93
        $r = '';
837
        do {
838 93
            $proceed = 0;
839 93
            if ($sub_r = $this->x('([0-9]+)', $v)) {
840 3
                $r .= $sub_r[1];
841 3
                $v = $sub_r[2];
842 3
                $proceed = 1;
843 93
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
844 93
                $r .= $sub_r;
845 93
                $v = $sub_v;
846 93
                $proceed = 1;
847 93
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
848
                $r .= $sub_r[1];
849
                $v = $sub_r[2];
850
                $proceed = 1;
851
            }
852 93
        } while ($proceed);
853
854 93
        return [$r, $v];
855
    }
856
857
    /* 98.. */
858
859 6
    protected function xPN_CHARS($v)
860
    {
861 6
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
862
            return [$r, $sub_v];
863 6
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
864
            return [$r[1], $r[2]];
865
        }
866
867 6
        return [false, $v];
868
    }
869
870
    /* 99 */
871
872 99
    protected function xPN_PREFIX($v)
873
    {
874 99
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
875 78
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
876
        }
877 98
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
878
            do {
879
                $proceed = 0;
880
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
881
                if (false !== $sub_r) {
882
                    $r .= $sub_r;
883
                    $proceed = 1;
884
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
885
                    $r .= '.';
886
                    $sub_v = $sub_r[1];
887
                    $proceed = 1;
888
                }
889
            } while ($proceed);
890
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
891
            $r .= $sub_r ?: '';
892
        }
893
894 98
        return [$r, $sub_v];
895
    }
896
897
    /* 100 */
898
899 33
    protected function xPN_LOCAL($v)
900
    {
901 33
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
902 33
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
903
        }
904 6
        $r = '';
905 6
        $sub_v = $v;
906
        do {
907 6
            $proceed = 0;
908 6
            if ($this->x('\s', $sub_v)) {
909
                return [$r, $sub_v];
910
            }
911 6
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
912 6
                $r .= $sub_r[1];
913 6
                $sub_v = $sub_r[2];
914 6
                $proceed = 1;
915 6
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
916 6
                $r .= $sub_r;
917 6
                $proceed = 1;
918 6
            } elseif ($r) {
919 6
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
920
                    $r .= $sub_r[1];
921
                    $sub_v = $sub_r[2];
922
                }
923 6
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
924
                    $r .= $sub_r;
925
                    $proceed = 1;
926
                }
927
            }
928 6
        } while ($proceed);
929
930 6
        return [$r, $sub_v];
931
    }
932
933 53
    protected function unescapeNtripleUTF($v)
934
    {
935 53
        if (false === strpos($v, '\\')) {
936 53
            return $v;
937
        }
938
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
939
        foreach ($mappings as $in => $out) {
940
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
941
        }
942
        if (false === strpos(strtolower($v), '\u')) {
943
            return $v;
944
        }
945
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
946
            $no = hexdec($m[2]);
947
            if ($no < 128) {
948
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

948
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
949
            } elseif ($no < 2048) {
950
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
951
            } elseif ($no < 65536) {
952
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
953
            } elseif ($no < 2097152) {
954
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
955
            } else {
956
                $char = '';
957
            }
958
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
959
        }
960
961
        return $v;
962
    }
963
964 97
    protected function xPlaceholder($v)
965
    {
966
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
967 97
        if ($r = $this->x('(\?|\$)', $v)) {
968
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
969
                $ph = substr($m[1], 1, -1);
970
                $rest = substr(trim($r[2]), \strlen($m[1]));
971
                if (!isset($this->r['placeholders'])) {
972
                    $this->r['placeholders'] = [];
973
                }
974
                if (!\in_array($ph, $this->r['placeholders'])) {
975
                    $this->r['placeholders'][] = $ph;
976
                }
977
978
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
979
            }
980
        }
981
982 97
        return [0, $v];
983
    }
984
}
985