Passed
Push — master ( 4e8b13...a3a48f )
by Konrad
04:16
created

TurtleParser::xPN_LOCAL()   C

Complexity

Conditions 13
Paths 9

Size

Total Lines 32
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 14.5274

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 13
eloc 24
c 1
b 0
f 0
nc 9
nop 1
dl 0
loc 32
ccs 19
cts 24
cp 0.7917
crap 14.5274
rs 6.6166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-3 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use Exception;
17
use function sweetrdf\InMemoryStoreSqlite\calcURI;
18
use sweetrdf\InMemoryStoreSqlite\Log\Logger;
19
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
20
use sweetrdf\InMemoryStoreSqlite\StringReader;
21
22
class TurtleParser extends BaseParser
23
{
24 96
    public function __construct(Logger $logger, NamespaceHelper $namespaceHelper, StringReader $stringReader)
25
    {
26 96
        parent::__construct($logger, $namespaceHelper, $stringReader);
27
28 96
        $this->state = 0;
0 ignored issues
show
Bug Best Practice introduced by
The property state does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
29 96
        $this->unparsed_code = '';
0 ignored issues
show
Bug Best Practice introduced by
The property unparsed_code does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
30 96
        $this->max_parsing_loops = 500;
0 ignored issues
show
Bug Best Practice introduced by
The property max_parsing_loops does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
31 96
    }
32
33 96
    protected function x($re, $v, $options = 'si')
34
    {
35 96
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
36
37
        /* comment removal */
38 96
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
39
            $v = $m[2];
40
        }
41
42 96
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
43
    }
44
45 2
    private function createBnodeID(): string
46
    {
47 2
        ++$this->bnode_id;
48
49 2
        return '_:'.$this->bnode_prefix.$this->bnode_id;
50
    }
51
52 2
    protected function addT(array $t): void
53
    {
54 2
        $this->triples[$this->t_count] = $t;
55 2
        ++$this->t_count;
56 2
    }
57
58
    protected function countTriples()
59
    {
60
        return $this->t_count;
61
    }
62
63
    protected function getUnparsedCode()
64
    {
65
        return $this->unparsed_code;
66
    }
67
68 2
    public function parse(string $path, string $data = ''): void
69
    {
70 2
        $this->triples = [];
71 2
        $this->t_count = 0;
72 2
        $this->reader->init($path, $data);
73 2
        $this->base = $this->reader->getBase();
74 2
        $this->r = ['vars' => []];
75
        /* parse */
76 2
        $buffer = '';
77 2
        $more_triples = [];
78 2
        $sub_v = '';
79 2
        $sub_v2 = '';
80 2
        $loops = 0;
81 2
        $prologue_done = 0;
82 2
        while ($d = $this->reader->readStream(8192)) {
83 2
            $buffer .= $d;
84 2
            $sub_v = $buffer;
85
            do {
86 2
                $proceed = 0;
87 2
                if (!$prologue_done) {
88 2
                    $proceed = 1;
89 2
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
90 2
                        $loops = 0;
91 2
                        $sub_v .= $this->reader->readStream(128);
92
                        /* in case we missed the final DOT in the previous prologue loop */
93 2
                        if ($sub_r = $this->x('\.', $sub_v)) {
94
                            $sub_v = $sub_r[1];
95
                        }
96
                        /* more prologue to come, use outer loop */
97 2
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
98 2
                            $proceed = 0;
99
                        }
100
                    } else {
101 2
                        $prologue_done = 1;
102
                    }
103
                }
104
                if (
105 2
                    $prologue_done
106 2
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
107 2
                    && \is_array($sub_r)
108
                ) {
109 2
                    $proceed = 1;
110 2
                    $loops = 0;
111 2
                    foreach ($sub_r as $t) {
112 2
                        $this->addT($t);
113
                    }
114
                }
115 2
            } while ($proceed);
116 2
            ++$loops;
117 2
            $buffer = $sub_v;
118 2
            if ($loops > $this->max_parsing_loops) {
119
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
120
                throw new Exception($msg);
121
            }
122
        }
123 2
        foreach ($more_triples as $t) {
124
            $this->addT($t);
125
        }
126 2
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
127 2
        $buffer = $sub_v;
128 2
        $this->unparsed_code = $buffer;
129
130
        /* remove trailing comments */
131 2
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
132
            $this->unparsed_code = $m[2];
133
        }
134
135 2
        if ($this->unparsed_code && !$this->logger->hasEntries('error')) {
136
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
137
            if (trim($rest)) {
138
                $this->logger->error('Could not parse "'.$rest.'"');
139
            }
140
        }
141 2
    }
142
143 2
    protected function xPrologue($v)
144
    {
145 2
        $r = 0;
146 2
        if (!$this->t_count) {
147 2
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
148
                $this->base = $sub_r;
149
                $r = 1;
150
            }
151 2
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
152 2
                $this->namespaceHelper->setPrefix($sub_r['prefix'], $sub_r['uri']);
153 2
                $r = 1;
154
            }
155
        }
156
157 2
        return [$r, $v];
158
    }
159
160
    /* 3 */
161
162 96
    protected function xBaseDecl($v)
163
    {
164 96
        if ($r = $this->x("\@?base\s+", $v)) {
165
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
166
                if ($sub_r = $this->x('\.', $sub_v)) {
167
                    $sub_v = $sub_r[1];
168
                }
169
170
                return [$r, $sub_v];
171
            }
172
        }
173
174 96
        return [0, $v];
175
    }
176
177
    /* 4 */
178
179 96
    protected function xPrefixDecl($v)
180
    {
181 96
        if ($r = $this->x("\@?prefix\s+", $v)) {
182 21
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
183 21
                $prefix = $r;
184 21
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
185 21
                    $uri = calcURI($r, $this->base);
186 21
                    if ($sub_r = $this->x('\.', $sub_v)) {
187 16
                        $sub_v = $sub_r[1];
188
                    }
189
190 21
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
191
                }
192
            }
193
        }
194
195 96
        return [0, $v];
196
    }
197
198
    /* 21.., 32.. */
199
200 96
    protected function xTriplesBlock($v)
201
    {
202 96
        $pre_r = [];
203 96
        $r = [];
204 96
        $state = 1;
205 96
        $sub_v = $v;
206 96
        $buffer = $sub_v;
207
        do {
208 96
            $proceed = 0;
209 96
            if (1 == $state) {/* expecting subject */
210 96
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
211 96
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
212 96
                    $t['s'] = $sub_r['value'];
213 96
                    $t['s_type'] = $sub_r['type'];
214 96
                    $state = 2;
215 96
                    $proceed = 1;
216 96
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
217
                        if ('placeholder' == $t['s_type']) {
218
                            $state = 4;
219
                        } else {
220 96
                            $this->logger->error('"'.$sub_r[1].'" after subject found.');
221
                        }
222
                    }
223 94
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
224
                    $t['s'] = $sub_r['id'];
225
                    $t['s_type'] = $sub_r['type'];
226
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
227
                    $state = 2;
228
                    $proceed = 1;
229
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
230
                        $this->logger->error('DOT after subject found.');
231
                    }
232 94
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
233
                    $t['s'] = $sub_r['id'];
234
                    $t['s_type'] = $sub_r['type'];
235
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
236
                    $state = 2;
237
                    $proceed = 1;
238 94
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
239
                    $this->logger->error('Subject expected, DOT found.'.$sub_v);
240
                }
241
            }
242 96
            if (2 == $state) {/* expecting predicate */
243 96
                if ($sub_r = $this->x('a\s+', $sub_v)) {
244
                    $sub_v = $sub_r[1];
245
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
246
                    $t['p_type'] = 'uri';
247
                    $state = 3;
248
                    $proceed = 1;
249 96
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
250 96
                    if ('bnode' == $sub_r['type']) {
251
                        $this->logger->error('Blank node used as triple predicate');
252
                    }
253 96
                    $t['p'] = $sub_r['value'];
254 96
                    $t['p_type'] = $sub_r['type'];
255 96
                    $state = 3;
256 96
                    $proceed = 1;
257
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
258
                    $state = 4;
259
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
260
                    $buffer = $sub_v;
261
                    $r = array_merge($r, $pre_r);
262
                    $pre_r = [];
263
                    $proceed = 0;
264
                }
265
            }
266 96
            if (3 == $state) {/* expecting object */
267 96
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
268 96
                    $t['o'] = $sub_r['value'];
269 96
                    $t['o_type'] = $sub_r['type'];
270 96
                    $t['o_lang'] = $sub_r['lang'] ?? '';
271 96
                    $t['o_datatype'] = $sub_r['datatype'] ?? '';
272 96
                    $pre_r[] = $t;
273 96
                    $state = 4;
274 96
                    $proceed = 1;
275 2
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
276
                    $t['o'] = $sub_r['id'];
277
                    $t['o_type'] = $sub_r['type'];
278
                    $t['o_datatype'] = '';
279
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
280
                    $state = 4;
281
                    $proceed = 1;
282 2
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
283 2
                    $t['o'] = $sub_r['id'];
284 2
                    $t['o_type'] = $sub_r['type'];
285 2
                    $t['o_datatype'] = '';
286 2
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
287 2
                    $state = 4;
288 2
                    $proceed = 1;
289
                }
290
            }
291 96
            if (4 == $state) {/* expecting . or ; or , or } */
292 96
                if ($sub_r = $this->x('\.', $sub_v)) {
293 94
                    $sub_v = $sub_r[1];
294 94
                    $buffer = $sub_v;
295 94
                    $r = array_merge($r, $pre_r);
296 94
                    $pre_r = [];
297 94
                    $state = 1;
298 94
                    $proceed = 1;
299 27
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
300 10
                    $sub_v = $sub_r[1];
301 10
                    $state = 2;
302 10
                    $proceed = 1;
303 24
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
304 20
                    $sub_v = $sub_r[1];
305 20
                    $state = 3;
306 20
                    $proceed = 1;
307 20
                    if ($sub_r = $this->x('\}', $sub_v)) {
308
                        $this->logger->error('Object expected, } found.');
309
                    }
310
                }
311 96
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
312 93
                    $buffer = $sub_v;
313 93
                    $r = array_merge($r, $pre_r);
314 93
                    $pre_r = [];
315 93
                    $proceed = 0;
316
                }
317
            }
318 96
        } while ($proceed);
319
320 96
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
321
    }
322
323
    /* 39.. */
324
325 94
    protected function xBlankNodePropertyList($v)
326
    {
327 94
        if ($sub_r = $this->x('\[', $v)) {
328 2
            $sub_v = $sub_r[1];
329 2
            $s = $this->createBnodeID();
330 2
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
331 2
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
332 2
            $state = 2;
333 2
            $closed = 0;
334
            do {
335 2
                $proceed = 0;
336 2
                if (2 == $state) {/* expecting predicate */
337 2
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
338
                        $sub_v = $sub_r[1];
339
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
340
                        $t['p_type'] = 'uri';
341
                        $state = 3;
342
                        $proceed = 1;
343 2
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
344 2
                        $t['p'] = $sub_r['value'];
345 2
                        $t['p_type'] = $sub_r['type'];
346 2
                        $state = 3;
347 2
                        $proceed = 1;
348
                    }
349
                }
350 2
                if (3 == $state) {/* expecting object */
351 2
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
352 2
                        $t['o'] = $sub_r['value'];
353 2
                        $t['o_type'] = $sub_r['type'];
354 2
                        $t['o_lang'] = $sub_r['lang'] ?? '';
355 2
                        $t['o_datatype'] = $sub_r['datatype'] ?? '';
356 2
                        $r['triples'][] = $t;
357 2
                        $state = 4;
358 2
                        $proceed = 1;
359
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
360
                        $t['o'] = $sub_r['id'];
361
                        $t['o_type'] = $sub_r['type'];
362
                        $t['o_datatype'] = '';
363
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
364
                        $state = 4;
365
                        $proceed = 1;
366
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
367
                        $t['o'] = $sub_r['id'];
368
                        $t['o_type'] = $sub_r['type'];
369
                        $t['o_datatype'] = '';
370
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
371
                        $state = 4;
372
                        $proceed = 1;
373
                    }
374
                }
375 2
                if (4 == $state) {/* expecting . or ; or , or ] */
376 2
                    if ($sub_r = $this->x('\.', $sub_v)) {
377
                        $sub_v = $sub_r[1];
378
                        $state = 1;
379
                        $proceed = 1;
380
                    }
381 2
                    if ($sub_r = $this->x('\;', $sub_v)) {
382 1
                        $sub_v = $sub_r[1];
383 1
                        $state = 2;
384 1
                        $proceed = 1;
385
                    }
386 2
                    if ($sub_r = $this->x('\,', $sub_v)) {
387
                        $sub_v = $sub_r[1];
388
                        $state = 3;
389
                        $proceed = 1;
390
                    }
391 2
                    if ($sub_r = $this->x('\]', $sub_v)) {
392 2
                        $sub_v = $sub_r[1];
393 2
                        $proceed = 0;
394 2
                        $closed = 1;
395
                    }
396
                }
397 2
            } while ($proceed);
398 2
            if ($closed) {
399 2
                return [$r, $sub_v];
400
            }
401
402
            return [0, $v];
403
        }
404
405 94
        return [0, $v];
406
    }
407
408
    /* 40.. */
409
410 94
    protected function xCollection($v)
411
    {
412 94
        if ($sub_r = $this->x('\(', $v)) {
413
            $sub_v = $sub_r[1];
414
            $s = $this->createBnodeID();
415
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
416
            $closed = 0;
417
            do {
418
                $proceed = 0;
419
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
420
                    $r['triples'][] = [
421
                        'type' => 'triple',
422
                        's' => $s,
423
                        's_type' => 'bnode',
424
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
425
                        'p_type' => 'uri',
426
                        'o' => $sub_r['value'],
427
                        'o_type' => $sub_r['type'],
428
                        'o_lang' => $sub_r['lang'] ?? '',
429
                        'o_datatype' => $sub_r['datatype'] ?? '',
430
                    ];
431
                    $proceed = 1;
432
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
433
                    $r['triples'][] = [
434
                        'type' => 'triple',
435
                        's' => $s,
436
                        's_type' => 'bnode',
437
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
438
                        'p_type' => 'uri',
439
                        'o' => $sub_r['id'],
440
                        'o_type' => $sub_r['type'],
441
                        'o_lang' => '',
442
                        'o_datatype' => '',
443
                    ];
444
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
445
                    $proceed = 1;
446
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
447
                    $r['triples'][] = [
448
                        'type' => 'triple',
449
                        's' => $s,
450
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
451
                        'o' => $sub_r['id'],
452
                        's_type' => 'bnode',
453
                        'p_type' => 'uri',
454
                        'o_type' => $sub_r['type'],
455
                        'o_lang' => '',
456
                        'o_datatype' => '',
457
                    ];
458
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
459
                    $proceed = 1;
460
                }
461
                if ($proceed) {
462
                    if ($sub_r = $this->x('\)', $sub_v)) {
463
                        $sub_v = $sub_r[1];
464
                        $r['triples'][] = [
465
                            'type' => 'triple',
466
                            's' => $s,
467
                            's_type' => 'bnode',
468
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
469
                            'p_type' => 'uri',
470
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
471
                            'o_type' => 'uri',
472
                            'o_lang' => '',
473
                            'o_datatype' => '',
474
                        ];
475
                        $closed = 1;
476
                        $proceed = 0;
477
                    } else {
478
                        $next_s = $this->createBnodeID();
479
                        $r['triples'][] = [
480
                            'type' => 'triple',
481
                            's' => $s,
482
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
483
                            'o' => $next_s,
484
                            's_type' => 'bnode',
485
                            'p_type' => 'uri',
486
                            'o_type' => 'bnode',
487
                            'o_lang' => '',
488
                            'o_datatype' => '',
489
                        ];
490
                        $s = $next_s;
491
                    }
492
                }
493
            } while ($proceed);
494
            if ($closed) {
495
                return [$r, $sub_v];
496
            }
497
        }
498
499 94
        return [0, $v];
500
    }
501
502
    /* 42 */
503
504 96
    protected function xVarOrTerm($v)
505
    {
506 96
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
507 90
            return [$sub_r, $sub_v];
508 96
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
509 94
            return [$sub_r, $sub_v];
510
        }
511
512 94
        return [0, $v];
513
    }
514
515
    /* 44, 74.., 75.. */
516
517 96
    protected function xVar($v)
518
    {
519 96
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
520 90
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
521 90
                if (!\in_array($sub_r, $this->r['vars'])) {
522 90
                    $this->r['vars'][] = $sub_r;
523
                }
524
525 90
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
526
            }
527
        }
528
529 96
        return [0, $v];
530
    }
531
532
    /* 45 */
533
534 96
    protected function xGraphTerm($v)
535
    {
536
        foreach ([
537 96
            'IRIref' => 'uri',
538
            'RDFLiteral' => 'literal',
539
            'NumericLiteral' => 'literal',
540
            'BooleanLiteral' => 'literal',
541
            'BlankNode' => 'bnode',
542
            'NIL' => 'uri',
543
            'Placeholder' => 'placeholder',
544 96
        ] as $term => $type) {
545 96
            $m = 'x'.$term;
546 96
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
547 94
                if (!\is_array($sub_r)) {
548 94
                    $sub_r = ['value' => $sub_r];
549
                }
550 94
                $sub_r['type'] = $sub_r['type'] ?? $type;
551
552 94
                return [$sub_r, $sub_v];
553
            }
554
        }
555
556 94
        return [0, $v];
557
    }
558
559
    /* 60 */
560
561 95
    protected function xRDFLiteral($v)
562
    {
563 95
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
564 53
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
565 53
            $r = $sub_r;
566 53
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
567 8
                $r['lang'] = $sub_r;
568
            } elseif (
569 52
                !$this->x('\s', $sub_v)
570 52
                && ($sub_r = $this->x('\^\^', $sub_v))
571 52
                && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1]))
572 52
                && $sub_r[1]
573
            ) {
574 3
                $r['datatype'] = $sub_r;
575
            }
576
577 53
            return [$r, $sub_v];
578
        }
579
580 94
        return [0, $v];
581
    }
582
583
    /* 61.., 62.., 63.., 64.. */
584
585 94
    protected function xNumericLiteral($v)
586
    {
587 94
        $sub_r = $this->x('(\-|\+)?', $v);
588 94
        $prefix = $sub_r[1];
589 94
        $sub_v = $sub_r[2];
590 94
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
591 94
            $m = 'x'.$type;
592 94
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
593 19
                $r = [
594 19
                    'value' => $prefix.$sub_r,
595 19
                    'type' => 'literal',
596 19
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
597
                ];
598
599 19
                return [$r, $sub_v];
600
            }
601
        }
602
603 94
        return [0, $v];
604
    }
605
606
    /* 65.. */
607
608 94
    protected function xBooleanLiteral($v)
609
    {
610 94
        if ($r = $this->x('(true|false)', $v)) {
611
            return [$r[1], $r[2]];
612
        }
613
614 94
        return [0, $v];
615
    }
616
617
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
618
619 95
    protected function xString($v)
620
    {/* largely simplified, may need some tweaks in following revisions */
621 95
        $sub_v = $v;
622 95
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
623 94
            return [0, $v];
624
        }
625 53
        $delim = $m[1];
626 53
        $rest = $m[2];
627 53
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
628 53
        $sub_type = $sub_types[$delim];
629 53
        $pos = 0;
630 53
        $r = false;
631
        do {
632 53
            $proceed = 0;
633 53
            $delim_pos = strpos($rest, $delim, $pos);
634 53
            if (false === $delim_pos) {
635
                break;
636
            }
637 53
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
638 53
            $r = substr($rest, 0, $delim_pos);
639 53
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
640 53
                $rest = $new_rest;
641
            } else {
642
                $r = false;
643
                $pos = $delim_pos + 1;
644
                $proceed = 1;
645
            }
646 53
        } while ($proceed);
647 53
        if (false !== $r) {
648 53
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
649
        }
650
651
        return [0, $v];
652
    }
653
654
    /* 67 */
655
656 96
    protected function xIRIref($v)
657
    {
658 96
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
659 92
            return [calcURI($r, $this->base), $v];
660 96
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
661 28
            return [$r, $v];
662
        }
663
664 95
        return [0, $v];
665
    }
666
667
    /* 68 */
668
669 96
    protected function xPrefixedName($v)
670
    {
671 96
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
672 28
            return [$r, $v];
673 95
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
674 7
            return $this->namespaceHelper->hasPrefix($r)
675
                ? [$this->namespaceHelper->getNamespace($r), $sub_v]
676 7
                : [0, $v];
677
        }
678
679 95
        return [0, $v];
680
    }
681
682
    /* 69.., 73.., 93, 94..  */
683
684 94
    protected function xBlankNode($v)
685
    {
686 94
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
687 4
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
688
        }
689 94
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
690
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
691
        }
692
693 94
        return [0, $v];
694
    }
695
696
    /* 70.. @@sync with SPARQLParser */
697
698 2
    protected function xIRI_REF($v)
699
    {
700
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
701 2
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
702
            return [$r[1], $r[2]];
703 2
        } elseif ($r = $this->x('\<\>', $v)) {
704
            return [true, $r[1]];
705 2
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
706 2
            return [$r[1] ? $r[1] : true, $r[2]];
707
        }
708
709 2
        return [0, $v];
710
    }
711
712
    /* 71 */
713
714 96
    protected function xPNAME_NS($v)
715
    {
716 96
        list($r, $sub_v) = $this->xPN_PREFIX($v);
717 96
        $prefix = $r ?: '';
718
719 96
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
720
    }
721
722
    /* 72 */
723
724 96
    protected function xPNAME_LN($v)
725
    {
726 96
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
727 31
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
728 31
                if (!$this->namespaceHelper->hasPrefix($r)) {
729 7
                    return [0, $v];
730
                }
731
732 28
                return [$this->namespaceHelper->getNamespace($r).$sub_r, $sub_v];
733
            }
734
        }
735
736 95
        return [0, $v];
737
    }
738
739
    /* 76 */
740
741 53
    protected function xLANGTAG($v)
742
    {
743 53
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
744 8
            return [$r[1], $r[3]];
745
        }
746
747 52
        return [0, $v];
748
    }
749
750
    /* 77.. */
751
752 94
    protected function xINTEGER($v)
753
    {
754 94
        if ($r = $this->x('([0-9]+)', $v)) {
755 20
            return [$r[1], $r[2]];
756
        }
757
758 94
        return [false, $v];
759
    }
760
761
    /* 78.. */
762
763 94
    protected function xDECIMAL($v)
764
    {
765 94
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
766 9
            return [$r[1], $r[2]];
767
        }
768 94
        if ($r = $this->x('(\.[0-9]+)', $v)) {
769
            return [$r[1], $r[2]];
770
        }
771
772 94
        return [false, $v];
773
    }
774
775
    /* 79.., 86.. */
776
777 94
    protected function xDOUBLE($v)
778
    {
779 94
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
780 6
            return [$r[1], $r[2]];
781
        }
782 94
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
783
            return [$r[1], $r[2]];
784
        }
785 94
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
786 6
            return [$r[1], $r[2]];
787
        }
788
789 94
        return [false, $v];
790
    }
791
792
    /* 92 */
793
794 94
    protected function xNIL($v)
795
    {
796 94
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
797
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
798
        }
799
800 94
        return [0, $v];
801
    }
802
803
    /* 95.. */
804
805 95
    protected function xPN_CHARS_BASE($v)
806
    {
807 95
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
808 91
            return [$r[1], $r[2]];
809
        }
810
811 95
        return [0, $v];
812
    }
813
814
    /* 96 */
815
816 91
    protected function xPN_CHARS_U($v)
817
    {
818 91
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
819 91
            return [$r, $sub_v];
820 91
        } elseif ($r = $this->x('(_)', $v)) {
821 1
            return [$r[1], $r[2]];
822
        }
823
824 91
        return [0, $v];
825
    }
826
827
    /* 97.. */
828
829 90
    protected function xVARNAME($v)
830
    {
831 90
        $r = '';
832
        do {
833 90
            $proceed = 0;
834 90
            if ($sub_r = $this->x('([0-9]+)', $v)) {
835 3
                $r .= $sub_r[1];
836 3
                $v = $sub_r[2];
837 3
                $proceed = 1;
838 90
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
839 90
                $r .= $sub_r;
840 90
                $v = $sub_v;
841 90
                $proceed = 1;
842 90
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
843
                $r .= $sub_r[1];
844
                $v = $sub_r[2];
845
                $proceed = 1;
846
            }
847 90
        } while ($proceed);
848
849 90
        return [$r, $v];
850
    }
851
852
    /* 98.. */
853
854 6
    protected function xPN_CHARS($v)
855
    {
856 6
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
857
            return [$r, $sub_v];
858 6
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
859
            return [$r[1], $r[2]];
860
        }
861
862 6
        return [false, $v];
863
    }
864
865
    /* 99 */
866
867 96
    protected function xPN_PREFIX($v)
868
    {
869 96
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
870 76
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
871
        }
872 95
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
873
            do {
874
                $proceed = 0;
875
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
876
                if (false !== $sub_r) {
877
                    $r .= $sub_r;
878
                    $proceed = 1;
879
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
880
                    $r .= '.';
881
                    $sub_v = $sub_r[1];
882
                    $proceed = 1;
883
                }
884
            } while ($proceed);
885
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
886
            $r .= $sub_r ?: '';
887
        }
888
889 95
        return [$r, $sub_v];
890
    }
891
892
    /* 100 */
893
894 31
    protected function xPN_LOCAL($v)
895
    {
896 31
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
897 31
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
898
        }
899 6
        $r = '';
900 6
        $sub_v = $v;
901
        do {
902 6
            $proceed = 0;
903 6
            if ($this->x('\s', $sub_v)) {
904
                return [$r, $sub_v];
905
            }
906 6
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
907 6
                $r .= $sub_r[1];
908 6
                $sub_v = $sub_r[2];
909 6
                $proceed = 1;
910 6
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
911 6
                $r .= $sub_r;
912 6
                $proceed = 1;
913 6
            } elseif ($r) {
914 6
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
915
                    $r .= $sub_r[1];
916
                    $sub_v = $sub_r[2];
917
                }
918 6
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
919
                    $r .= $sub_r;
920
                    $proceed = 1;
921
                }
922
            }
923 6
        } while ($proceed);
924
925 6
        return [$r, $sub_v];
926
    }
927
928 53
    protected function unescapeNtripleUTF($v)
929
    {
930 53
        if (false === strpos($v, '\\')) {
931 53
            return $v;
932
        }
933
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
934
        foreach ($mappings as $in => $out) {
935
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
936
        }
937
        if (false === strpos(strtolower($v), '\u')) {
938
            return $v;
939
        }
940
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
941
            $no = hexdec($m[2]);
942
            if ($no < 128) {
943
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

943
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
944
            } elseif ($no < 2048) {
945
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
946
            } elseif ($no < 65536) {
947
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
948
            } elseif ($no < 2097152) {
949
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
950
            } else {
951
                $char = '';
952
            }
953
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
954
        }
955
956
        return $v;
957
    }
958
959 94
    protected function xPlaceholder($v)
960
    {
961
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
962 94
        if ($r = $this->x('(\?|\$)', $v)) {
963
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
964
                $ph = substr($m[1], 1, -1);
965
                $rest = substr(trim($r[2]), \strlen($m[1]));
966
                if (!isset($this->r['placeholders'])) {
967
                    $this->r['placeholders'] = [];
968
                }
969
                if (!\in_array($ph, $this->r['placeholders'])) {
970
                    $this->r['placeholders'][] = $ph;
971
                }
972
973
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
974
            }
975
        }
976
977 94
        return [0, $v];
978
    }
979
}
980