Passed
Branch extract-store (f24e42)
by Konrad
04:37
created

LoadQueryHandler::getTripleID()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 30
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 3.0416

Importance

Changes 0
Metric Value
cc 3
eloc 18
nc 3
nop 1
dl 0
loc 30
ccs 15
cts 18
cp 0.8333
crap 3.0416
rs 9.6666
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-3 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowack
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Store\QueryHandler;
15
16
use function sweetrdf\InMemoryStoreSqlite\calcURI;
17
use sweetrdf\InMemoryStoreSqlite\Store\TurtleLoader;
18
19
class LoadQueryHandler extends QueryHandler
20
{
21
    private string $target_graph;
22
23
    /**
24
     * @todo required?
25
     */
26
    private int $t_count;
27
28
    private int $write_buffer_size = 2500;
29
30 24
    public function runQuery($infos, $data = '', $keep_bnode_ids = 0)
31
    {
32 24
        $url = $infos['query']['url'];
33 24
        $graph = $infos['query']['target_graph'];
34 24
        $this->target_graph = $graph ? calcURI($graph) : calcURI($url);
35 24
        $this->keep_bnode_ids = $keep_bnode_ids;
0 ignored issues
show
Bug Best Practice introduced by
The property keep_bnode_ids does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
36
37
        // remove parameters
38 24
        $loader = new TurtleLoader();
39 24
        $loader->setCaller($this);
40
41
        /* logging */
42 24
        $this->t_count = 0;
43 24
        $this->t_start = 0;
0 ignored issues
show
Bug Best Practice introduced by
The property t_start does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
44
        /* load and parse */
45 24
        $this->max_term_id = $this->getMaxTermID();
0 ignored issues
show
Bug Best Practice introduced by
The property max_term_id does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
46 24
        $this->max_triple_id = $this->getMaxTripleID();
0 ignored issues
show
Bug Best Practice introduced by
The property max_triple_id does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
47
48 24
        $this->term_ids = [];
0 ignored issues
show
Bug Best Practice introduced by
The property term_ids does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
49 24
        $this->triple_ids = [];
0 ignored issues
show
Bug Best Practice introduced by
The property triple_ids does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
50 24
        $this->sql_buffers = [];
0 ignored issues
show
Bug Best Practice introduced by
The property sql_buffers does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
51 24
        $loader->parse($url, $data);
52
53
        /* done */
54 24
        $this->checkSQLBuffers(1);
55
56
        return [
57 24
            't_count' => $this->t_count,
58 24
            'load_time' => 0,
59
        ];
60
    }
61
62 24
    public function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '')
63
    {
64 24
        $type_ids = ['uri' => '0', 'bnode' => '1', 'literal' => '2'];
65 24
        $g = $this->getStoredTermID($this->target_graph, '0', 'id');
66 24
        $s = (('bnode' == $s_type) && !$this->keep_bnode_ids) ? '_:b'.abs(crc32($g.$s)).'_'.(\strlen($s) > 12 ? substr(substr($s, 2), -10) : substr($s, 2)) : $s;
67 24
        $o = (('bnode' == $o_type) && !$this->keep_bnode_ids) ? '_:b'.abs(crc32($g.$o)).'_'.(\strlen($o) > 12 ? substr(substr($o, 2), -10) : substr($o, 2)) : $o;
68
        /* triple */
69 24
        $t = [
70 24
            's' => $this->getStoredTermID($s, $type_ids[$s_type], 's'),
71 24
            'p' => $this->getStoredTermID($p, '0', 'id'),
72 24
            'o' => $this->getStoredTermID($o, $type_ids[$o_type], 'o'),
73 24
            'o_lang_dt' => $this->getStoredTermID($o_dt.$o_lang, $o_dt ? '0' : '2', 'id'),
74 24
            'o_comp' => $this->getOComp($o),
75 24
            's_type' => $type_ids[$s_type],
76 24
            'o_type' => $type_ids[$o_type],
77
        ];
78 24
        $t['t'] = $this->getTripleID($t);
79 24
        if (\is_array($t['t'])) {/* t exists already */
80
            $t['t'] = $t['t'][0];
81
        } else {
82 24
            $this->bufferTripleSQL($t);
83
        }
84
        /* g2t */
85 24
        $g2t = ['g' => $g, 't' => $t['t']];
86 24
        $this->bufferGraphSQL($g2t);
87 24
        ++$this->t_count;
88
        /* check buffers */
89 24
        if (0 == ($this->t_count % $this->write_buffer_size)) {
90
            $force_write = 1;
91
            $reset_buffers = (0 == ($this->t_count % ($this->write_buffer_size * 2)));
92
            $refresh_lock = (0 == ($this->t_count % 25000));
93
            $split_tables = (0 == ($this->t_count % ($this->write_buffer_size * 10)));
94
            $this->checkSQLBuffers($force_write, $reset_buffers, $refresh_lock, $split_tables);
0 ignored issues
show
Unused Code introduced by
The call to sweetrdf\InMemoryStoreSq...dler::checkSQLBuffers() has too many arguments starting with $refresh_lock. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

94
            $this->/** @scrutinizer ignore-call */ 
95
                   checkSQLBuffers($force_write, $reset_buffers, $refresh_lock, $split_tables);

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
95
        }
96 24
    }
97
98 24
    public function getMaxTermID(): int
99
    {
100 24
        $sql = '';
101 24
        foreach (['id2val', 's2val', 'o2val'] as $tbl) {
102 24
            $sql .= $sql ? ' UNION ' : '';
103 24
            $sql .= 'SELECT MAX(id) as id FROM '.$tbl;
104
        }
105 24
        $r = 0;
106
107 24
        $rows = $this->store->getDBObject()->fetchList($sql);
108
109 24
        if (\is_array($rows)) {
0 ignored issues
show
introduced by
The condition is_array($rows) is always true.
Loading history...
110 24
            foreach ($rows as $row) {
111 24
                $r = ($r < $row['id']) ? $row['id'] : $r;
112
            }
113
        }
114
115 24
        return $r + 1;
116
    }
117
118
    /**
119
     * @todo change DB schema and avoid using this function because it does not protect against race conditions
120
     *
121
     * @return int
122
     */
123 24
    public function getMaxTripleID()
124
    {
125 24
        $sql = 'SELECT MAX(t) AS `id` FROM triple';
126
127 24
        $row = $this->store->getDBObject()->fetchRow($sql);
128 24
        if (isset($row['id'])) {
129 5
            return $row['id'] + 1;
130
        }
131
132 24
        return 1;
133
    }
134
135 24
    public function getStoredTermID($val, $type_id, $tbl)
136
    {
137
        /* buffered */
138 24
        if (isset($this->term_ids[$val])) {
139 24
            if (!isset($this->term_ids[$val][$tbl])) {
140 23
                foreach (['id', 's', 'o'] as $other_tbl) {
141 23
                    if (isset($this->term_ids[$val][$other_tbl])) {
142 23
                        $this->term_ids[$val][$tbl] = $this->term_ids[$val][$other_tbl];
0 ignored issues
show
Bug Best Practice introduced by
The property term_ids does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
143 23
                        $this->bufferIDSQL($tbl, $this->term_ids[$val][$tbl], $val, $type_id);
144 23
                        break;
145
                    }
146
                }
147
            }
148
149 24
            return $this->term_ids[$val][$tbl];
150
        }
151
        /* db */
152 24
        $sub_tbls = ('id' == $tbl)
153 24
            ? ['id2val', 's2val', 'o2val']
154 24
            : ('s' == $tbl
155 24
                ? ['s2val', 'id2val', 'o2val']
156 24
                : ['o2val', 'id2val', 's2val']
157
            );
158
159 24
        foreach ($sub_tbls as $sub_tbl) {
160 24
            $id = 0;
161
            /* via hash */
162 24
            if (preg_match('/^(s2val|o2val)$/', $sub_tbl)) {
163 24
                $sql = 'SELECT id, val
164 24
                    FROM '.$sub_tbl.'
165 24
                    WHERE val_hash = "'.$this->getValueHash($val).'"';
166
167 24
                $rows = $this->store->getDBObject()->fetchList($sql);
168 24
                if (\is_array($rows)) {
169 24
                    foreach ($rows as $row) {
170
                        if ($row['val'] == $val) {
171
                            $id = $row['id'];
172
                            break;
173
                        }
174
                    }
175
                }
176
            } else {
177 24
                $binaryValue = $this->store->getDBObject()->escape($val);
178 24
                if (false !== empty($binaryValue)) {
179 24
                    $sql = 'SELECT id FROM '.$sub_tbl." WHERE val = '".$binaryValue."'";
180
181 24
                    $row = $this->store->getDBObject()->fetchRow($sql);
182 24
                    if (\is_array($row) && isset($row['id'])) {
183 5
                        $id = $row['id'];
184
                    }
185
                }
186
            }
187 24
            if (0 < $id) {
188 5
                $this->term_ids[$val] = [$tbl => $id];
189 5
                if ($sub_tbl != $tbl.'2val') {
190
                    $this->bufferIDSQL($tbl, $id, $val, $type_id);
191
                }
192 5
                break;
193
            }
194
        }
195
        /* new */
196 24
        if (!isset($this->term_ids[$val])) {
197 24
            $this->term_ids[$val] = [$tbl => $this->max_term_id];
198 24
            $this->bufferIDSQL($tbl, $this->max_term_id, $val, $type_id);
199 24
            ++$this->max_term_id;
200
        }
201
202 24
        return $this->term_ids[$val][$tbl];
203
    }
204
205 24
    public function getTripleID($t)
206
    {
207 24
        $val = serialize($t);
208
        /* buffered */
209 24
        if (isset($this->triple_ids[$val])) {
210
            /* hack for "don't insert this triple" */
211
            return [$this->triple_ids[$val]];
212
        }
213
        /* db */
214 24
        $sql = 'SELECT t
215
                  FROM triple
216 24
                 WHERE s = '.$t['s'].'
217 24
                    AND p = '.$t['p'].'
218 24
                    AND o = '.$t['o'].'
219 24
                    AND o_lang_dt = '.$t['o_lang_dt'].'
220 24
                    AND s_type = '.$t['s_type'].'
221 24
                    AND o_type = '.$t['o_type'].'
222
                 LIMIT 1';
223 24
        $row = $this->store->getDBObject()->fetchRow($sql);
224 24
        if (isset($row['t'])) {
225
            /* hack for "don't insert this triple" */
226
            $this->triple_ids[$val] = $row['t'];
0 ignored issues
show
Bug Best Practice introduced by
The property triple_ids does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
227
228
            return [$row['t']];
229
        } else {
230
            /* new */
231 24
            $this->triple_ids[$val] = $this->max_triple_id;
232 24
            ++$this->max_triple_id;
233
234 24
            return $this->triple_ids[$val];
235
        }
236
    }
237
238 25
    public function getOComp($val)
239
    {
240
        /* try date (e.g. 21 August 2007) */
241
        if (
242 25
            preg_match('/^[0-9]{1,2}\s+[a-z]+\s+[0-9]{4}/i', $val)
243 25
            && ($uts = strtotime($val))
244 25
            && (-1 !== $uts)
245
        ) {
246
            return date("Y-m-d\TH:i:s", $uts);
247
        }
248
249
        /* xsd date (e.g. 2009-05-28T18:03:38+09:00 2009-05-28T18:03:38GMT) */
250 25
        if (true === (bool) strtotime($val)) {
251 3
            return date('Y-m-d\TH:i:s\Z', strtotime($val));
252
        }
253
254 24
        if (is_numeric($val)) {
255 2
            $val = sprintf('%f', $val);
256 2
            if (preg_match("/([\-\+])([0-9]*)\.([0-9]*)/", $val, $m)) {
257 1
                return $m[1].sprintf('%018s', $m[2]).'.'.sprintf('%-015s', $m[3]);
258
            }
259 2
            if (preg_match("/([0-9]*)\.([0-9]*)/", $val, $m)) {
260 2
                return '+'.sprintf('%018s', $m[1]).'.'.sprintf('%-015s', $m[2]);
261
            }
262
263
            return $val;
264
        }
265
266
        /* any other string: remove tags, linebreaks etc., but keep MB-chars */
267
        // [\PL\s]+ ( = non-Letters) kills digits
268 24
        $re = '/[\PL\s]+/isu';
0 ignored issues
show
Unused Code introduced by
The assignment to $re is dead and can be removed.
Loading history...
269 24
        $re = '/[\s\'\"\´\`]+/is';
270 24
        $val = trim(preg_replace($re, '-', strip_tags($val)));
271 24
        if (\strlen($val) > 35) {
272 24
            $fnc = \function_exists('mb_substr') ? 'mb_substr' : 'substr';
273 24
            $val = $fnc($val, 0, 17).'-'.$fnc($val, -17);
274
        }
275
276 24
        return $val;
277
    }
278
279 24
    public function bufferTripleSQL($t)
280
    {
281 24
        $tbl = 'triple';
282 24
        $sql = ', ';
283
284 24
        $sqlHead = 'INSERT OR IGNORE INTO ';
285
286 24
        if (!isset($this->sql_buffers[$tbl])) {
287 24
            $this->sql_buffers[$tbl] = $sqlHead;
0 ignored issues
show
Bug Best Practice introduced by
The property sql_buffers does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
288 24
            $this->sql_buffers[$tbl] .= $tbl;
289 24
            $this->sql_buffers[$tbl] .= ' (t, s, p, o, o_lang_dt, o_comp, s_type, o_type) VALUES';
290 24
            $sql = ' ';
291
        }
292
293 24
        $oCompEscaped = $this->store->getDBObject()->escape($t['o_comp']);
294
295 24
        $this->sql_buffers[$tbl] .= $sql.'('.$t['t'].', '.$t['s'].', '.$t['p'].', ';
296 24
        $this->sql_buffers[$tbl] .= $t['o'].', '.$t['o_lang_dt'].", '";
297 24
        $this->sql_buffers[$tbl] .= $oCompEscaped."', ".$t['s_type'].', '.$t['o_type'].')';
298 24
    }
299
300 24
    public function bufferGraphSQL($g2t)
301
    {
302 24
        $tbl = 'g2t';
303 24
        $sql = ', ';
304
305
        /*
306
         * Use appropriate INSERT syntax, depending on the DBS.
307
         */
308 24
        $sqlHead = 'INSERT OR IGNORE INTO ';
309
310 24
        if (!isset($this->sql_buffers[$tbl])) {
311 24
            $this->sql_buffers[$tbl] = $sqlHead.$tbl.' (g, t) VALUES';
0 ignored issues
show
Bug Best Practice introduced by
The property sql_buffers does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
312 24
            $sql = ' ';
313
        }
314 24
        $this->sql_buffers[$tbl] .= $sql.'('.$g2t['g'].', '.$g2t['t'].')';
315 24
    }
316
317 24
    public function bufferIDSQL($tbl, $id, $val, $val_type)
318
    {
319 24
        $tbl = $tbl.'2val';
320 24
        if ('id2val' == $tbl) {
321 24
            $cols = 'id, val, val_type';
322 24
            $vals = '('.$id.", '".$this->store->getDBObject()->escape($val)."', ".$val_type.')';
323 24
        } elseif (preg_match('/^(s2val|o2val)$/', $tbl)) {
324 24
            $cols = 'id, val_hash, val';
325 24
            $vals = '('.$id.", '"
326 24
                .$this->getValueHash($val)
327 24
                ."', '"
328 24
                .$this->store->getDBObject()->escape($val)
329 24
                ."')";
330
        } else {
331
            $cols = 'id, val';
332
            $vals = '('.$id.", '".$this->store->getDBObject()->escape($val)."')";
333
        }
334 24
        if (!isset($this->sql_buffers[$tbl])) {
335 24
            $this->sql_buffers[$tbl] = '';
0 ignored issues
show
Bug Best Practice introduced by
The property sql_buffers does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
336 24
            $sqlHead = 'INSERT OR IGNORE INTO ';
337
338 24
            $sql = $sqlHead.$tbl.'('.$cols.') VALUES ';
339
        } else {
340 24
            $sql = ', ';
341
        }
342 24
        $sql .= $vals;
343 24
        $this->sql_buffers[$tbl] .= $sql;
344 24
    }
345
346 24
    public function checkSQLBuffers($force_write = 0, $reset_id_buffers = 0)
347
    {
348 24
        foreach (['triple', 'g2t', 'id2val', 's2val', 'o2val'] as $tbl) {
349 24
            $buffer_size = isset($this->sql_buffers[$tbl]) ? 1 : 0;
350 24
            if ($buffer_size && $force_write) {
351 24
                $this->store->getDBObject()->simpleQuery($this->sql_buffers[$tbl]);
352
                /* table error */
353 24
                $error = $this->store->getDBObject()->getErrorMessage();
0 ignored issues
show
Unused Code introduced by
The assignment to $error is dead and can be removed.
Loading history...
354 24
                unset($this->sql_buffers[$tbl]);
355
356
                /* reset term id buffers */
357 24
                if ($reset_id_buffers) {
358
                    $this->term_ids = [];
0 ignored issues
show
Bug Best Practice introduced by
The property term_ids does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
359
                    $this->triple_ids = [];
0 ignored issues
show
Bug Best Practice introduced by
The property triple_ids does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
360
                }
361
            }
362
        }
363
364 24
        return 1;
365
    }
366
}
367