Passed
Push — master ( 4e8b13...a3a48f )
by Konrad
04:16
created

TurtleParser::xDOUBLE()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 13
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 4.0312

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
eloc 7
c 1
b 0
f 0
nc 4
nop 1
dl 0
loc 13
ccs 7
cts 8
cp 0.875
crap 4.0312
rs 10
1
<?php
2
3
/*
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-3 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Parser;
15
16
use Exception;
17
use function sweetrdf\InMemoryStoreSqlite\calcURI;
18
use sweetrdf\InMemoryStoreSqlite\Log\Logger;
19
use sweetrdf\InMemoryStoreSqlite\NamespaceHelper;
20
use sweetrdf\InMemoryStoreSqlite\StringReader;
21
22
class TurtleParser extends BaseParser
23
{
24 96
    public function __construct(Logger $logger, NamespaceHelper $namespaceHelper, StringReader $stringReader)
25
    {
26 96
        parent::__construct($logger, $namespaceHelper, $stringReader);
27
28 96
        $this->state = 0;
0 ignored issues
show
Bug Best Practice introduced by
The property state does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
29 96
        $this->unparsed_code = '';
0 ignored issues
show
Bug Best Practice introduced by
The property unparsed_code does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
30 96
        $this->max_parsing_loops = 500;
0 ignored issues
show
Bug Best Practice introduced by
The property max_parsing_loops does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
31 96
    }
32
33 96
    protected function x($re, $v, $options = 'si')
34
    {
35 96
        $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
36
37
        /* comment removal */
38 96
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {
39
            $v = $m[2];
40
        }
41
42 96
        return preg_match("/^\s*".$re.'(.*)$/'.$options, $v, $m) ? $m : false;
43
    }
44
45 2
    private function createBnodeID(): string
46
    {
47 2
        ++$this->bnode_id;
48
49 2
        return '_:'.$this->bnode_prefix.$this->bnode_id;
50
    }
51
52 2
    protected function addT(array $t): void
53
    {
54 2
        $this->triples[$this->t_count] = $t;
55 2
        ++$this->t_count;
56 2
    }
57
58
    protected function countTriples()
59
    {
60
        return $this->t_count;
61
    }
62
63
    protected function getUnparsedCode()
64
    {
65
        return $this->unparsed_code;
66
    }
67
68 2
    public function parse(string $path, string $data = ''): void
69
    {
70 2
        $this->triples = [];
71 2
        $this->t_count = 0;
72 2
        $this->reader->init($path, $data);
73 2
        $this->base = $this->reader->getBase();
74 2
        $this->r = ['vars' => []];
75
        /* parse */
76 2
        $buffer = '';
77 2
        $more_triples = [];
78 2
        $sub_v = '';
79 2
        $sub_v2 = '';
80 2
        $loops = 0;
81 2
        $prologue_done = 0;
82 2
        while ($d = $this->reader->readStream(8192)) {
83 2
            $buffer .= $d;
84 2
            $sub_v = $buffer;
85
            do {
86 2
                $proceed = 0;
87 2
                if (!$prologue_done) {
88 2
                    $proceed = 1;
89 2
                    if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
90 2
                        $loops = 0;
91 2
                        $sub_v .= $this->reader->readStream(128);
92
                        /* in case we missed the final DOT in the previous prologue loop */
93 2
                        if ($sub_r = $this->x('\.', $sub_v)) {
94
                            $sub_v = $sub_r[1];
95
                        }
96
                        /* more prologue to come, use outer loop */
97 2
                        if ($this->x("\@?(base|prefix)", $sub_v)) {
98 2
                            $proceed = 0;
99
                        }
100
                    } else {
101 2
                        $prologue_done = 1;
102
                    }
103
                }
104
                if (
105 2
                    $prologue_done
106 2
                    && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v))
107 2
                    && \is_array($sub_r)
108
                ) {
109 2
                    $proceed = 1;
110 2
                    $loops = 0;
111 2
                    foreach ($sub_r as $t) {
112 2
                        $this->addT($t);
113
                    }
114
                }
115 2
            } while ($proceed);
116 2
            ++$loops;
117 2
            $buffer = $sub_v;
118 2
            if ($loops > $this->max_parsing_loops) {
119
                $msg = 'too many loops: '.$loops.'. Could not parse "'.substr($buffer, 0, 200).'..."';
120
                throw new Exception($msg);
121
            }
122
        }
123 2
        foreach ($more_triples as $t) {
124
            $this->addT($t);
125
        }
126 2
        $sub_v = \count($more_triples) ? $sub_v2 : $sub_v;
127 2
        $buffer = $sub_v;
128 2
        $this->unparsed_code = $buffer;
129
130
        /* remove trailing comments */
131 2
        while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) {
132
            $this->unparsed_code = $m[2];
133
        }
134
135 2
        if ($this->unparsed_code && !$this->logger->hasEntries('error')) {
136
            $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
137
            if (trim($rest)) {
138
                $this->logger->error('Could not parse "'.$rest.'"');
139
            }
140
        }
141 2
    }
142
143 2
    protected function xPrologue($v)
144
    {
145 2
        $r = 0;
146 2
        if (!$this->t_count) {
147 2
            if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
148
                $this->base = $sub_r;
149
                $r = 1;
150
            }
151 2
            while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
152 2
                $this->namespaceHelper->setPrefix($sub_r['prefix'], $sub_r['uri']);
153 2
                $r = 1;
154
            }
155
        }
156
157 2
        return [$r, $v];
158
    }
159
160
    /* 3 */
161
162 96
    protected function xBaseDecl($v)
163
    {
164 96
        if ($r = $this->x("\@?base\s+", $v)) {
165
            if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
166
                if ($sub_r = $this->x('\.', $sub_v)) {
167
                    $sub_v = $sub_r[1];
168
                }
169
170
                return [$r, $sub_v];
171
            }
172
        }
173
174 96
        return [0, $v];
175
    }
176
177
    /* 4 */
178
179 96
    protected function xPrefixDecl($v)
180
    {
181 96
        if ($r = $this->x("\@?prefix\s+", $v)) {
182 21
            if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
183 21
                $prefix = $r;
184 21
                if ((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
185 21
                    $uri = calcURI($r, $this->base);
186 21
                    if ($sub_r = $this->x('\.', $sub_v)) {
187 16
                        $sub_v = $sub_r[1];
188
                    }
189
190 21
                    return [['prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri], $sub_v];
191
                }
192
            }
193
        }
194
195 96
        return [0, $v];
196
    }
197
198
    /* 21.., 32.. */
199
200 96
    protected function xTriplesBlock($v)
201
    {
202 96
        $pre_r = [];
203 96
        $r = [];
204 96
        $state = 1;
205 96
        $sub_v = $v;
206 96
        $buffer = $sub_v;
207
        do {
208 96
            $proceed = 0;
209 96
            if (1 == $state) {/* expecting subject */
210 96
                $t = ['type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
211 96
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
212 96
                    $t['s'] = $sub_r['value'];
213 96
                    $t['s_type'] = $sub_r['type'];
214 96
                    $state = 2;
215 96
                    $proceed = 1;
216 96
                    if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
217
                        if ('placeholder' == $t['s_type']) {
218
                            $state = 4;
219
                        } else {
220 96
                            $this->logger->error('"'.$sub_r[1].'" after subject found.');
221
                        }
222
                    }
223 94
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
224
                    $t['s'] = $sub_r['id'];
225
                    $t['s_type'] = $sub_r['type'];
226
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
227
                    $state = 2;
228
                    $proceed = 1;
229
                    if ($sub_r = $this->x('\.', $sub_v)) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
230
                        $this->logger->error('DOT after subject found.');
231
                    }
232 94
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
233
                    $t['s'] = $sub_r['id'];
234
                    $t['s_type'] = $sub_r['type'];
235
                    $pre_r = array_merge($pre_r, $sub_r['triples']);
236
                    $state = 2;
237
                    $proceed = 1;
238 94
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
239
                    $this->logger->error('Subject expected, DOT found.'.$sub_v);
240
                }
241
            }
242 96
            if (2 == $state) {/* expecting predicate */
243 96
                if ($sub_r = $this->x('a\s+', $sub_v)) {
244
                    $sub_v = $sub_r[1];
245
                    $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
246
                    $t['p_type'] = 'uri';
247
                    $state = 3;
248
                    $proceed = 1;
249 96
                } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
250 96
                    if ('bnode' == $sub_r['type']) {
251
                        $this->logger->error('Blank node used as triple predicate');
252
                    }
253 96
                    $t['p'] = $sub_r['value'];
254 96
                    $t['p_type'] = $sub_r['type'];
255 96
                    $state = 3;
256 96
                    $proceed = 1;
257
                } elseif ($sub_r = $this->x('\.', $sub_v)) {
258
                    $state = 4;
259
                } elseif ($sub_r = $this->x('\}', $sub_v)) {
260
                    $buffer = $sub_v;
261
                    $r = array_merge($r, $pre_r);
262
                    $pre_r = [];
263
                    $proceed = 0;
264
                }
265
            }
266 96
            if (3 == $state) {/* expecting object */
267 96
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
268 96
                    $t['o'] = $sub_r['value'];
269 96
                    $t['o_type'] = $sub_r['type'];
270 96
                    $t['o_lang'] = $sub_r['lang'] ?? '';
271 96
                    $t['o_datatype'] = $sub_r['datatype'] ?? '';
272 96
                    $pre_r[] = $t;
273 96
                    $state = 4;
274 96
                    $proceed = 1;
275 2
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
276
                    $t['o'] = $sub_r['id'];
277
                    $t['o_type'] = $sub_r['type'];
278
                    $t['o_datatype'] = '';
279
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
280
                    $state = 4;
281
                    $proceed = 1;
282 2
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
283 2
                    $t['o'] = $sub_r['id'];
284 2
                    $t['o_type'] = $sub_r['type'];
285 2
                    $t['o_datatype'] = '';
286 2
                    $pre_r = array_merge($pre_r, [$t], $sub_r['triples']);
287 2
                    $state = 4;
288 2
                    $proceed = 1;
289
                }
290
            }
291 96
            if (4 == $state) {/* expecting . or ; or , or } */
292 96
                if ($sub_r = $this->x('\.', $sub_v)) {
293 94
                    $sub_v = $sub_r[1];
294 94
                    $buffer = $sub_v;
295 94
                    $r = array_merge($r, $pre_r);
296 94
                    $pre_r = [];
297 94
                    $state = 1;
298 94
                    $proceed = 1;
299 27
                } elseif ($sub_r = $this->x('\;', $sub_v)) {
300 10
                    $sub_v = $sub_r[1];
301 10
                    $state = 2;
302 10
                    $proceed = 1;
303 24
                } elseif ($sub_r = $this->x('\,', $sub_v)) {
304 20
                    $sub_v = $sub_r[1];
305 20
                    $state = 3;
306 20
                    $proceed = 1;
307 20
                    if ($sub_r = $this->x('\}', $sub_v)) {
308
                        $this->logger->error('Object expected, } found.');
309
                    }
310
                }
311 96
                if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
312 93
                    $buffer = $sub_v;
313 93
                    $r = array_merge($r, $pre_r);
314 93
                    $pre_r = [];
315 93
                    $proceed = 0;
316
                }
317
            }
318 96
        } while ($proceed);
319
320 96
        return \count($r) ? [$r, $buffer, $pre_r, $sub_v] : [0, $buffer, $pre_r, $sub_v];
321
    }
322
323
    /* 39.. */
324
325 94
    protected function xBlankNodePropertyList($v)
326
    {
327 94
        if ($sub_r = $this->x('\[', $v)) {
328 2
            $sub_v = $sub_r[1];
329 2
            $s = $this->createBnodeID();
330 2
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
331 2
            $t = ['type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''];
332 2
            $state = 2;
333 2
            $closed = 0;
334
            do {
335 2
                $proceed = 0;
336 2
                if (2 == $state) {/* expecting predicate */
337 2
                    if ($sub_r = $this->x('a\s+', $sub_v)) {
338
                        $sub_v = $sub_r[1];
339
                        $t['p'] = NamespaceHelper::NAMESPACE_RDF.'type';
340
                        $t['p_type'] = 'uri';
341
                        $state = 3;
342
                        $proceed = 1;
343 2
                    } elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
344 2
                        $t['p'] = $sub_r['value'];
345 2
                        $t['p_type'] = $sub_r['type'];
346 2
                        $state = 3;
347 2
                        $proceed = 1;
348
                    }
349
                }
350 2
                if (3 == $state) {/* expecting object */
351 2
                    if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
352 2
                        $t['o'] = $sub_r['value'];
353 2
                        $t['o_type'] = $sub_r['type'];
354 2
                        $t['o_lang'] = $sub_r['lang'] ?? '';
355 2
                        $t['o_datatype'] = $sub_r['datatype'] ?? '';
356 2
                        $r['triples'][] = $t;
357 2
                        $state = 4;
358 2
                        $proceed = 1;
359
                    } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
360
                        $t['o'] = $sub_r['id'];
361
                        $t['o_type'] = $sub_r['type'];
362
                        $t['o_datatype'] = '';
363
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
364
                        $state = 4;
365
                        $proceed = 1;
366
                    } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
367
                        $t['o'] = $sub_r['id'];
368
                        $t['o_type'] = $sub_r['type'];
369
                        $t['o_datatype'] = '';
370
                        $r['triples'] = array_merge($r['triples'], [$t], $sub_r['triples']);
371
                        $state = 4;
372
                        $proceed = 1;
373
                    }
374
                }
375 2
                if (4 == $state) {/* expecting . or ; or , or ] */
376 2
                    if ($sub_r = $this->x('\.', $sub_v)) {
377
                        $sub_v = $sub_r[1];
378
                        $state = 1;
379
                        $proceed = 1;
380
                    }
381 2
                    if ($sub_r = $this->x('\;', $sub_v)) {
382 1
                        $sub_v = $sub_r[1];
383 1
                        $state = 2;
384 1
                        $proceed = 1;
385
                    }
386 2
                    if ($sub_r = $this->x('\,', $sub_v)) {
387
                        $sub_v = $sub_r[1];
388
                        $state = 3;
389
                        $proceed = 1;
390
                    }
391 2
                    if ($sub_r = $this->x('\]', $sub_v)) {
392 2
                        $sub_v = $sub_r[1];
393 2
                        $proceed = 0;
394 2
                        $closed = 1;
395
                    }
396
                }
397 2
            } while ($proceed);
398 2
            if ($closed) {
399 2
                return [$r, $sub_v];
400
            }
401
402
            return [0, $v];
403
        }
404
405 94
        return [0, $v];
406
    }
407
408
    /* 40.. */
409
410 94
    protected function xCollection($v)
411
    {
412 94
        if ($sub_r = $this->x('\(', $v)) {
413
            $sub_v = $sub_r[1];
414
            $s = $this->createBnodeID();
415
            $r = ['id' => $s, 'type' => 'bnode', 'triples' => []];
416
            $closed = 0;
417
            do {
418
                $proceed = 0;
419
                if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
420
                    $r['triples'][] = [
421
                        'type' => 'triple',
422
                        's' => $s,
423
                        's_type' => 'bnode',
424
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
425
                        'p_type' => 'uri',
426
                        'o' => $sub_r['value'],
427
                        'o_type' => $sub_r['type'],
428
                        'o_lang' => $sub_r['lang'] ?? '',
429
                        'o_datatype' => $sub_r['datatype'] ?? '',
430
                    ];
431
                    $proceed = 1;
432
                } elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
433
                    $r['triples'][] = [
434
                        'type' => 'triple',
435
                        's' => $s,
436
                        's_type' => 'bnode',
437
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
438
                        'p_type' => 'uri',
439
                        'o' => $sub_r['id'],
440
                        'o_type' => $sub_r['type'],
441
                        'o_lang' => '',
442
                        'o_datatype' => '',
443
                    ];
444
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
445
                    $proceed = 1;
446
                } elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
447
                    $r['triples'][] = [
448
                        'type' => 'triple',
449
                        's' => $s,
450
                        'p' => NamespaceHelper::NAMESPACE_RDF.'first',
451
                        'o' => $sub_r['id'],
452
                        's_type' => 'bnode',
453
                        'p_type' => 'uri',
454
                        'o_type' => $sub_r['type'],
455
                        'o_lang' => '',
456
                        'o_datatype' => '',
457
                    ];
458
                    $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
459
                    $proceed = 1;
460
                }
461
                if ($proceed) {
462
                    if ($sub_r = $this->x('\)', $sub_v)) {
463
                        $sub_v = $sub_r[1];
464
                        $r['triples'][] = [
465
                            'type' => 'triple',
466
                            's' => $s,
467
                            's_type' => 'bnode',
468
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
469
                            'p_type' => 'uri',
470
                            'o' => NamespaceHelper::NAMESPACE_RDF.'nil',
471
                            'o_type' => 'uri',
472
                            'o_lang' => '',
473
                            'o_datatype' => '',
474
                        ];
475
                        $closed = 1;
476
                        $proceed = 0;
477
                    } else {
478
                        $next_s = $this->createBnodeID();
479
                        $r['triples'][] = [
480
                            'type' => 'triple',
481
                            's' => $s,
482
                            'p' => NamespaceHelper::NAMESPACE_RDF.'rest',
483
                            'o' => $next_s,
484
                            's_type' => 'bnode',
485
                            'p_type' => 'uri',
486
                            'o_type' => 'bnode',
487
                            'o_lang' => '',
488
                            'o_datatype' => '',
489
                        ];
490
                        $s = $next_s;
491
                    }
492
                }
493
            } while ($proceed);
494
            if ($closed) {
495
                return [$r, $sub_v];
496
            }
497
        }
498
499 94
        return [0, $v];
500
    }
501
502
    /* 42 */
503
504 96
    protected function xVarOrTerm($v)
505
    {
506 96
        if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
507 90
            return [$sub_r, $sub_v];
508 96
        } elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
509 94
            return [$sub_r, $sub_v];
510
        }
511
512 94
        return [0, $v];
513
    }
514
515
    /* 44, 74.., 75.. */
516
517 96
    protected function xVar($v)
518
    {
519 96
        if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
520 90
            if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
521 90
                if (!\in_array($sub_r, $this->r['vars'])) {
522 90
                    $this->r['vars'][] = $sub_r;
523
                }
524
525 90
                return [['value' => $sub_r, 'type' => 'var'], $sub_v.$r[3]];
526
            }
527
        }
528
529 96
        return [0, $v];
530
    }
531
532
    /* 45 */
533
534 96
    protected function xGraphTerm($v)
535
    {
536
        foreach ([
537 96
            'IRIref' => 'uri',
538
            'RDFLiteral' => 'literal',
539
            'NumericLiteral' => 'literal',
540
            'BooleanLiteral' => 'literal',
541
            'BlankNode' => 'bnode',
542
            'NIL' => 'uri',
543
            'Placeholder' => 'placeholder',
544 96
        ] as $term => $type) {
545 96
            $m = 'x'.$term;
546 96
            if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
547 94
                if (!\is_array($sub_r)) {
548 94
                    $sub_r = ['value' => $sub_r];
549
                }
550 94
                $sub_r['type'] = $sub_r['type'] ?? $type;
551
552 94
                return [$sub_r, $sub_v];
553
            }
554
        }
555
556 94
        return [0, $v];
557
    }
558
559
    /* 60 */
560
561 95
    protected function xRDFLiteral($v)
562
    {
563 95
        if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
564 53
            $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
565 53
            $r = $sub_r;
566 53
            if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
567 8
                $r['lang'] = $sub_r;
568
            } elseif (
569 52
                !$this->x('\s', $sub_v)
570 52
                && ($sub_r = $this->x('\^\^', $sub_v))
571 52
                && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1]))
572 52
                && $sub_r[1]
573
            ) {
574 3
                $r['datatype'] = $sub_r;
575
            }
576
577 53
            return [$r, $sub_v];
578
        }
579
580 94
        return [0, $v];
581
    }
582
583
    /* 61.., 62.., 63.., 64.. */
584
585 94
    protected function xNumericLiteral($v)
586
    {
587 94
        $sub_r = $this->x('(\-|\+)?', $v);
588 94
        $prefix = $sub_r[1];
589 94
        $sub_v = $sub_r[2];
590 94
        foreach (['DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer'] as $type => $xsd) {
591 94
            $m = 'x'.$type;
592 94
            if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && (false !== $sub_r)) {
593 19
                $r = [
594 19
                    'value' => $prefix.$sub_r,
595 19
                    'type' => 'literal',
596 19
                    'datatype' => NamespaceHelper::NAMESPACE_XSD.$xsd,
597
                ];
598
599 19
                return [$r, $sub_v];
600
            }
601
        }
602
603 94
        return [0, $v];
604
    }
605
606
    /* 65.. */
607
608 94
    protected function xBooleanLiteral($v)
609
    {
610 94
        if ($r = $this->x('(true|false)', $v)) {
611
            return [$r[1], $r[2]];
612
        }
613
614 94
        return [0, $v];
615
    }
616
617
    /* 66.., 87.., 88.., 89.., 90.., 91.. */
618
619 95
    protected function xString($v)
620
    {/* largely simplified, may need some tweaks in following revisions */
621 95
        $sub_v = $v;
622 95
        if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) {
623 94
            return [0, $v];
624
        }
625 53
        $delim = $m[1];
626 53
        $rest = $m[2];
627 53
        $sub_types = ["'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'];
628 53
        $sub_type = $sub_types[$delim];
629 53
        $pos = 0;
630 53
        $r = false;
631
        do {
632 53
            $proceed = 0;
633 53
            $delim_pos = strpos($rest, $delim, $pos);
634 53
            if (false === $delim_pos) {
635
                break;
636
            }
637 53
            $new_rest = substr($rest, $delim_pos + \strlen($delim));
638 53
            $r = substr($rest, 0, $delim_pos);
639 53
            if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(\strlen($m[1]) % 2)) {
640 53
                $rest = $new_rest;
641
            } else {
642
                $r = false;
643
                $pos = $delim_pos + 1;
644
                $proceed = 1;
645
            }
646 53
        } while ($proceed);
647 53
        if (false !== $r) {
648 53
            return [['value' => $r, 'type' => 'literal', 'sub_type' => $sub_type], $rest];
649
        }
650
651
        return [0, $v];
652
    }
653
654
    /* 67 */
655
656 96
    protected function xIRIref($v)
657
    {
658 96
        if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
659 92
            return [calcURI($r, $this->base), $v];
660 96
        } elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
661 28
            return [$r, $v];
662
        }
663
664 95
        return [0, $v];
665
    }
666
667
    /* 68 */
668
669 96
    protected function xPrefixedName($v)
670
    {
671 96
        if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
672 28
            return [$r, $v];
673 95
        } elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
674 7
            return $this->namespaceHelper->hasPrefix($r)
675
                ? [$this->namespaceHelper->getNamespace($r), $sub_v]
676 7
                : [0, $v];
677
        }
678
679 95
        return [0, $v];
680
    }
681
682
    /* 69.., 73.., 93, 94..  */
683
684 94
    protected function xBlankNode($v)
685
    {
686 94
        if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
687 4
            return [['type' => 'bnode', 'value' => '_:'.$r], $sub_v];
688
        }
689 94
        if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
690
            return [['type' => 'bnode', 'value' => $this->createBnodeID()], $r[1]];
691
        }
692
693 94
        return [0, $v];
694
    }
695
696
    /* 70.. @@sync with SPARQLParser */
697
698 2
    protected function xIRI_REF($v)
699
    {
700
        //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
701 2
        if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
0 ignored issues
show
Unused Code introduced by
The assignment to $sub_r is dead and can be removed.
Loading history...
702
            return [$r[1], $r[2]];
703 2
        } elseif ($r = $this->x('\<\>', $v)) {
704
            return [true, $r[1]];
705 2
        } elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
706 2
            return [$r[1] ? $r[1] : true, $r[2]];
707
        }
708
709 2
        return [0, $v];
710
    }
711
712
    /* 71 */
713
714 96
    protected function xPNAME_NS($v)
715
    {
716 96
        list($r, $sub_v) = $this->xPN_PREFIX($v);
717 96
        $prefix = $r ?: '';
718
719 96
        return ($r = $this->x("\:", $sub_v)) ? [$prefix.':', $r[1]] : [0, $v];
720
    }
721
722
    /* 72 */
723
724 96
    protected function xPNAME_LN($v)
725
    {
726 96
        if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
727 31
            if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
728 31
                if (!$this->namespaceHelper->hasPrefix($r)) {
729 7
                    return [0, $v];
730
                }
731
732 28
                return [$this->namespaceHelper->getNamespace($r).$sub_r, $sub_v];
733
            }
734
        }
735
736 95
        return [0, $v];
737
    }
738
739
    /* 76 */
740
741 53
    protected function xLANGTAG($v)
742
    {
743 53
        if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
744 8
            return [$r[1], $r[3]];
745
        }
746
747 52
        return [0, $v];
748
    }
749
750
    /* 77.. */
751
752 94
    protected function xINTEGER($v)
753
    {
754 94
        if ($r = $this->x('([0-9]+)', $v)) {
755 20
            return [$r[1], $r[2]];
756
        }
757
758 94
        return [false, $v];
759
    }
760
761
    /* 78.. */
762
763 94
    protected function xDECIMAL($v)
764
    {
765 94
        if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
766 9
            return [$r[1], $r[2]];
767
        }
768 94
        if ($r = $this->x('(\.[0-9]+)', $v)) {
769
            return [$r[1], $r[2]];
770
        }
771
772 94
        return [false, $v];
773
    }
774
775
    /* 79.., 86.. */
776
777 94
    protected function xDOUBLE($v)
778
    {
779 94
        if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
780 6
            return [$r[1], $r[2]];
781
        }
782 94
        if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
783
            return [$r[1], $r[2]];
784
        }
785 94
        if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
786 6
            return [$r[1], $r[2]];
787
        }
788
789 94
        return [false, $v];
790
    }
791
792
    /* 92 */
793
794 94
    protected function xNIL($v)
795
    {
796 94
        if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
797
            return [['type' => 'uri', 'value' => NamespaceHelper::NAMESPACE_RDF.'nil'], $r[1]];
798
        }
799
800 94
        return [0, $v];
801
    }
802
803
    /* 95.. */
804
805 95
    protected function xPN_CHARS_BASE($v)
806
    {
807 95
        if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
808 91
            return [$r[1], $r[2]];
809
        }
810
811 95
        return [0, $v];
812
    }
813
814
    /* 96 */
815
816 91
    protected function xPN_CHARS_U($v)
817
    {
818 91
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
819 91
            return [$r, $sub_v];
820 91
        } elseif ($r = $this->x('(_)', $v)) {
821 1
            return [$r[1], $r[2]];
822
        }
823
824 91
        return [0, $v];
825
    }
826
827
    /* 97.. */
828
829 90
    protected function xVARNAME($v)
830
    {
831 90
        $r = '';
832
        do {
833 90
            $proceed = 0;
834 90
            if ($sub_r = $this->x('([0-9]+)', $v)) {
835 3
                $r .= $sub_r[1];
836 3
                $v = $sub_r[2];
837 3
                $proceed = 1;
838 90
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
839 90
                $r .= $sub_r;
840 90
                $v = $sub_v;
841 90
                $proceed = 1;
842 90
            } elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
843
                $r .= $sub_r[1];
844
                $v = $sub_r[2];
845
                $proceed = 1;
846
            }
847 90
        } while ($proceed);
848
849 90
        return [$r, $v];
850
    }
851
852
    /* 98.. */
853
854 6
    protected function xPN_CHARS($v)
855
    {
856 6
        if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
857
            return [$r, $sub_v];
858 6
        } elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
859
            return [$r[1], $r[2]];
860
        }
861
862 6
        return [false, $v];
863
    }
864
865
    /* 99 */
866
867 96
    protected function xPN_PREFIX($v)
868
    {
869 96
        if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
870 76
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
871
        }
872 95
        if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
873
            do {
874
                $proceed = 0;
875
                list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
876
                if (false !== $sub_r) {
877
                    $r .= $sub_r;
878
                    $proceed = 1;
879
                } elseif ($sub_r = $this->x("\.", $sub_v)) {
880
                    $r .= '.';
881
                    $sub_v = $sub_r[1];
882
                    $proceed = 1;
883
                }
884
            } while ($proceed);
885
            list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
886
            $r .= $sub_r ?: '';
887
        }
888
889 95
        return [$r, $sub_v];
890
    }
891
892
    /* 100 */
893
894 31
    protected function xPN_LOCAL($v)
895
    {
896 31
        if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
897 31
            return [$sub_r[1], $sub_r[2]]; /* @@testing */
898
        }
899 6
        $r = '';
900 6
        $sub_v = $v;
901
        do {
902 6
            $proceed = 0;
903 6
            if ($this->x('\s', $sub_v)) {
904
                return [$r, $sub_v];
905
            }
906 6
            if ($sub_r = $this->x('([0-9])', $sub_v)) {
907 6
                $r .= $sub_r[1];
908 6
                $sub_v = $sub_r[2];
909 6
                $proceed = 1;
910 6
            } elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
911 6
                $r .= $sub_r;
912 6
                $proceed = 1;
913 6
            } elseif ($r) {
914 6
                if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
915
                    $r .= $sub_r[1];
916
                    $sub_v = $sub_r[2];
917
                }
918 6
                if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
919
                    $r .= $sub_r;
920
                    $proceed = 1;
921
                }
922
            }
923 6
        } while ($proceed);
924
925 6
        return [$r, $sub_v];
926
    }
927
928 53
    protected function unescapeNtripleUTF($v)
929
    {
930 53
        if (false === strpos($v, '\\')) {
931 53
            return $v;
932
        }
933
        $mappings = ['t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"];
934
        foreach ($mappings as $in => $out) {
935
            $v = preg_replace('/\x5c(['.$in.'])/', $out, $v);
936
        }
937
        if (false === strpos(strtolower($v), '\u')) {
938
            return $v;
939
        }
940
        while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
941
            $no = hexdec($m[2]);
942
            if ($no < 128) {
943
                $char = \chr($no);
0 ignored issues
show
Bug introduced by
It seems like $no can also be of type double; however, parameter $codepoint of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

943
                $char = \chr(/** @scrutinizer ignore-type */ $no);
Loading history...
944
            } elseif ($no < 2048) {
945
                $char = \chr(($no >> 6) + 192).\chr(($no & 63) + 128);
946
            } elseif ($no < 65536) {
947
                $char = \chr(($no >> 12) + 224).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
948
            } elseif ($no < 2097152) {
949
                $char = \chr(($no >> 18) + 240).\chr((($no >> 12) & 63) + 128).\chr((($no >> 6) & 63) + 128).\chr(($no & 63) + 128);
950
            } else {
951
                $char = '';
952
            }
953
            $v = str_replace('\\'.$m[1].$m[2], $char, $v);
954
        }
955
956
        return $v;
957
    }
958
959 94
    protected function xPlaceholder($v)
960
    {
961
        //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
962 94
        if ($r = $this->x('(\?|\$)', $v)) {
963
            if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && 0 === strpos(trim($r[2]), $m[1])) {
964
                $ph = substr($m[1], 1, -1);
965
                $rest = substr(trim($r[2]), \strlen($m[1]));
966
                if (!isset($this->r['placeholders'])) {
967
                    $this->r['placeholders'] = [];
968
                }
969
                if (!\in_array($ph, $this->r['placeholders'])) {
970
                    $this->r['placeholders'][] = $ph;
971
                }
972
973
                return [['value' => $ph, 'type' => 'placeholder'], $rest];
974
            }
975
        }
976
977 94
        return [0, $v];
978
    }
979
}
980