Passed
Push — extract-store ( 38a23e...9fc033 )
by Konrad
05:01
created

InsertQueryHandler::addTriplesToGraph()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 2
nop 2
dl 0
loc 9
ccs 5
cts 5
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under
5
 * the terms of the GPL-3 license.
6
 *
7
 * (c) Konrad Abicht <[email protected]>
8
 * (c) Benjamin Nowak
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace sweetrdf\InMemoryStoreSqlite\Store\QueryHandler;
15
16
use sweetrdf\InMemoryStoreSqlite\KeyValueBag;
17
18
class InsertQueryHandler extends QueryHandler
19
{
20
    /**
21
     * When set it is used to store term information to speed up insert into operations.
22
     */
23
    private KeyValueBag $rowCache;
24
25
    /**
26
     * Is being used for blank nodes to generate a hash which is not only dependent on
27
     * blank node ID and graph, but also on a random value.
28
     * Otherwise blank nodes inserted in different "insert-sessions" will have the same reference.
29
     */
30
    private ?string $sessionId = null;
31
32 75
    public function setRowCache(KeyValueBag $rowCache): void
33
    {
34 75
        $this->rowCache = $rowCache;
35 75
    }
36
37 75
    public function runQuery(array $infos)
38
    {
39 75
        $this->sessionId = bin2hex(random_bytes(8));
40
41 75
        foreach ($infos['query']['construct_triples'] as $triple) {
42 75
            $this->addTripleToGraph($triple, $infos['query']['target_graph']);
43
        }
44
45 75
        $this->sessionId = null;
46 75
    }
47
48
    /**
49
     * @todo cache once loaded triples/quads
50
     */
51 75
    private function addTripleToGraph(array $triple, string $graph): void
52
    {
53
        /*
54
         * information:
55
         *
56
         *  + val_hash: hashed version of given value
57
         *  + val_type: type of the term; one of: bnode, uri, literal
58
         */
59
60 75
        $triple = $this->prepareTriple($triple, $graph);
61
62
        /*
63
         * graph
64
         */
65 75
        $graphId = $this->getIdOfExistingTerm($graph, 'id');
66 75
        if (null == $graphId) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing $graphId of type integer|null against null; this is ambiguous if the integer can be zero. Consider using a strict comparison === instead.
Loading history...
67 75
            $graphId = $this->store->getDBObject()->insert('id2val', [
68 75
                'id' => $this->getMaxTermId(),
69 75
                'val' => $graph,
70 75
                'val_type' => 0, // = uri
71
            ]);
72
        }
73
74
        /*
75
         * s2val
76
         */
77 75
        $subjectId = $this->getIdOfExistingTerm($triple['s'], 'subject');
78 75
        if (null == $subjectId) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing $subjectId of type integer|null against null; this is ambiguous if the integer can be zero. Consider using a strict comparison === instead.
Loading history...
79 75
            $subjectId = $this->getMaxTermId();
80 75
            $this->store->getDBObject()->insert('s2val', [
81 75
                'id' => $subjectId,
82 75
                'val' => $triple['s'],
83 75
                'val_hash' => $this->getValueHash($triple['s']),
84
            ]);
85
        }
86
87
        /*
88
         * predicate
89
         */
90 75
        $predicateId = $this->getIdOfExistingTerm($triple['p'], 'id');
91 75
        if (null == $predicateId) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing $predicateId of type integer|null against null; this is ambiguous if the integer can be zero. Consider using a strict comparison === instead.
Loading history...
92 75
            $predicateId = $this->getMaxTermId();
93 75
            $this->store->getDBObject()->insert('id2val', [
94 75
                'id' => $predicateId,
95 75
                'val' => $triple['p'],
96 75
                'val_type' => 0, // = uri
97
            ]);
98
        }
99
100
        /*
101
         * o2val
102
         */
103 75
        $objectId = $this->getIdOfExistingTerm($triple['o'], 'object');
104 75
        if (null == $objectId) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing $objectId of type integer|null against null; this is ambiguous if the integer can be zero. Consider using a strict comparison === instead.
Loading history...
105 75
            $objectId = $this->getMaxTermId();
106 75
            $this->store->getDBObject()->insert('o2val', [
107 75
                'id' => $objectId,
108 75
                'val' => $triple['o'],
109 75
                'val_hash' => $this->getValueHash($triple['o']),
110
            ]);
111
        }
112
113
        /*
114
         * o_lang_dt
115
         */
116
        // notice: only one of these two is set
117 75
        $oLangDt = $triple['o_datatype'].$triple['o_lang'];
118 75
        $oLangDtId = $this->getIdOfExistingTerm($oLangDt, 'id');
119 75
        if (null == $oLangDtId) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing $oLangDtId of type integer|null against null; this is ambiguous if the integer can be zero. Consider using a strict comparison === instead.
Loading history...
120 75
            $oLangDtId = $this->getMaxTermId();
121 75
            $this->store->getDBObject()->insert('id2val', [
122 75
                'id' => $oLangDtId,
123 75
                'val' => $oLangDt,
124 75
                'val_type' => !empty($triple['o_datatype']) ? 0 : 2,
125
            ]);
126
        }
127
128
        /*
129
         * triple
130
         */
131 75
        $sql = 'SELECT * FROM triple WHERE s = ? AND p = ? AND o = ?';
132 75
        $check = $this->store->getDBObject()->fetchRow($sql, [$subjectId, $predicateId, $objectId]);
133 75
        if (false === $check) {
134 75
            $tripleId = $this->store->getDBObject()->insert('triple', [
135 75
                's' => $subjectId,
136 75
                's_type' => $triple['s_type_int'],
137 75
                'p' => $predicateId,
138 75
                'o' => $objectId,
139 75
                'o_type' => $triple['o_type_int'],
140 75
                'o_lang_dt' => $oLangDtId,
141 75
                'o_comp' => $this->getOComp($triple['o']),
142
            ]);
143
        } else {
144 1
            $tripleId = $check['t'];
145
        }
146
147
        /*
148
         * triple to graph
149
         */
150 75
        $sql = 'SELECT * FROM g2t WHERE g = ? AND t = ?';
151 75
        $check = $this->store->getDBObject()->fetchRow($sql, [$graphId, $tripleId]);
152 75
        if (false == $check) {
153 75
            $this->store->getDBObject()->insert('g2t', [
154 75
                'g' => $graphId,
155 75
                't' => $tripleId,
156
            ]);
157
        }
158 75
    }
159
160 75
    private function prepareTriple(array $triple, string $graph): array
161
    {
162
        /*
163
         * subject: set type int
164
         */
165 75
        $triple['s_type_int'] = 0; // uri
166 75
        if ('bnode' == $triple['s_type']) {
167 5
            $triple['s_type_int'] = 1;
168 73
        } elseif ('literal' == $triple['s_type']) {
169
            $triple['s_type_int'] = 2;
170
        }
171
172
        /*
173
         * subject is a blank node
174
         */
175 75
        if ('bnode' == $triple['s_type']) {
176
            // transforms _:foo to _:b671320391_foo
177 5
            $s = $triple['s'];
178
            // TODO make bnode ID only unique for this session, not in general
179 5
            $triple['s'] = '_:b'.$this->getValueHash($this->sessionId.$graph.$s).'_';
180 5
            $triple['s'] .= substr($s, 2);
181
        }
182
183
        /*
184
         * object: set type int
185
         */
186 75
        $triple['o_type_int'] = 0; // uri
187 75
        if ('bnode' == $triple['o_type']) {
188 4
            $triple['o_type_int'] = 1;
189 74
        } elseif ('literal' == $triple['o_type']) {
190 58
            $triple['o_type_int'] = 2;
191
        }
192
193
        /*
194
         * object is a blank node
195
         */
196 75
        if ('bnode' == $triple['o_type']) {
197
            // transforms _:foo to _:b671320391_foo
198 4
            $o = $triple['o'];
199
            // TODO make bnode ID only unique for this session, not in general
200 4
            $triple['o'] = '_:b'.$this->getValueHash($this->sessionId.$graph.$o).'_';
201 4
            $triple['o'] .= substr($o, 2);
202
        }
203
204 75
        return $triple;
205
    }
206
207
    /**
208
     * Get normalized value for ORDER BY operations.
209
     */
210 75
    private function getOComp($val): string
211
    {
212
        /* try date (e.g. 21 August 2007) */
213
        if (
214 75
            preg_match('/^[0-9]{1,2}\s+[a-z]+\s+[0-9]{4}/i', $val)
215 75
            && ($uts = strtotime($val))
216 75
            && (-1 !== $uts)
217
        ) {
218 1
            return date("Y-m-d\TH:i:s", $uts);
219
        }
220
221
        /* xsd date (e.g. 2009-05-28T18:03:38+09:00 2009-05-28T18:03:38GMT) */
222 75
        if (true === (bool) strtotime($val)) {
223 3
            return date('Y-m-d\TH:i:s\Z', strtotime($val));
224
        }
225
226 74
        if (is_numeric($val)) {
227 23
            $val = sprintf('%f', $val);
228 23
            if (preg_match("/([\-\+])([0-9]*)\.([0-9]*)/", $val, $m)) {
229
                return $m[1].sprintf('%018s', $m[2]).'.'.sprintf('%-015s', $m[3]);
230
            }
231 23
            if (preg_match("/([0-9]*)\.([0-9]*)/", $val, $m)) {
232 23
                return '+'.sprintf('%018s', $m[1]).'.'.sprintf('%-015s', $m[2]);
233
            }
234
235
            return $val;
236
        }
237
238
        /* any other string: remove tags, linebreaks etc., but keep MB-chars */
239
        // [\PL\s]+ ( = non-Letters) kills digits
240 55
        $re = '/[\PL\s]+/isu';
0 ignored issues
show
Unused Code introduced by
The assignment to $re is dead and can be removed.
Loading history...
241 55
        $re = '/[\s\'\"\´\`]+/is';
242 55
        $val = trim(preg_replace($re, '-', strip_tags($val)));
243 55
        if (\strlen($val) > 35) {
244 3
            $fnc = \function_exists('mb_substr') ? 'mb_substr' : 'substr';
245 3
            $val = $fnc($val, 0, 17).'-'.$fnc($val, -17);
246
        }
247
248 55
        return $val;
249
    }
250
251
    /**
252
     * Generates the next valid ID based on latest values in id2val, s2val and o2val.
253
     *
254
     * @return int returns 1 or higher
255
     */
256 75
    private function getMaxTermId(): int
257
    {
258 75
        $sql = '';
259 75
        foreach (['id2val', 's2val', 'o2val'] as $table) {
260 75
            $sql .= !empty($sql) ? ' UNION ' : '';
261 75
            $sql .= 'SELECT MAX(id) as id FROM '.$table;
262
        }
263 75
        $result = 0;
264
265 75
        $rows = $this->store->getDBObject()->fetchList($sql);
266
267 75
        if (\is_array($rows)) {
0 ignored issues
show
introduced by
The condition is_array($rows) is always true.
Loading history...
268 75
            foreach ($rows as $row) {
269 75
                $result = ($result < $row['id']) ? $row['id'] : $result;
270
            }
271
        }
272
273 75
        return $result + 1;
274
    }
275
276
    /**
277
     * @param string $type     One of: bnode, uri, literal
278
     * @param string $quadPart One of: id, subject, object
279
     *
280
     * @return int 1 (or higher), if available, or null
281
     */
282 75
    private function getIdOfExistingTerm(string $value, string $quadPart): ?int
283
    {
284
        // id (predicate or graph)
285 75
        if ('id' == $quadPart) {
286 75
            $sql = 'SELECT id, val FROM id2val WHERE val = ?';
287
288 75
            $hashKey = md5($sql.json_encode([$value]));
289 75
            if (false === $this->rowCache->has($hashKey)) {
290 75
                $row = $this->store->getDBObject()->fetchRow($sql, [$value]);
291 75
                if (\is_array($row)) {
292 43
                    $this->rowCache->set($hashKey, $row);
293
                }
294
            }
295
296 75
            $entry = $this->rowCache->get($hashKey);
297
298
            // entry found, use its ID
299 75
            if (\is_array($entry)) {
300 43
                return $entry['id'];
301
            } else {
302 75
                return null;
303
            }
304
        } else {
305
            // subject or object
306 75
            $table = 'subject' == $quadPart ? 's2val' : 'o2val';
307 75
            $sql = 'SELECT id, val FROM '.$table.' WHERE val_hash = ?';
308 75
            $params = [$this->getValueHash($value)];
309
310 75
            $hashKey = md5($sql.json_encode($params));
311 75
            if (false === $this->rowCache->has($hashKey)) {
312 75
                $row = $this->store->getDBObject()->fetchRow($sql, $params);
313 75
                if (\is_array($row)) {
314 24
                    $this->rowCache->set($hashKey, $row);
315
                }
316
            }
317
318 75
            $entry = $this->rowCache->get($hashKey);
319
320
            // entry found, use its ID
321 75
            if (isset($entry['val']) && $entry['val'] == $value) {
322 24
                return $entry['id'];
323
            } else {
324 75
                return null;
325
            }
326
        }
327
    }
328
}
329