Writer::createIndexes()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 9
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 9
rs 9.6666
cc 1
eloc 5
nc 1
nop 0
1
<?php
2
declare(strict_types=1);
3
4
namespace Crossjoin\Browscap\Parser\Sqlite;
5
6
use Crossjoin\Browscap\Exception\ParserConditionNotSatisfiedException;
7
use Crossjoin\Browscap\Exception\ParserRuntimeException;
8
use Crossjoin\Browscap\Exception\UnexpectedValueException;
9
use Crossjoin\Browscap\Parser\Sqlite\Adapter\AdapterFactory;
10
use Crossjoin\Browscap\Parser\Sqlite\Adapter\AdapterInterface;
11
use Crossjoin\Browscap\Parser\Sqlite\Adapter\PreparedStatementInterface;
12
use Crossjoin\Browscap\Parser\WriterInterface;
13
use Crossjoin\Browscap\PropertyFilter\PropertyFilterTrait;
14
use Crossjoin\Browscap\Source\DataSet;
15
use Crossjoin\Browscap\Source\Ini\File;
16
use Crossjoin\Browscap\Source\Ini\ParseHeaderSectionTrait;
17
use Crossjoin\Browscap\Source\SourceInterface;
18
19
/**
20
 * Class Writer
21
 *
22
 * @package Crossjoin\Browscap\Parser\Sqlite
23
 * @author Christoph Ziegenberg <[email protected]>
24
 * @link https://github.com/crossjoin/browscap
25
 */
26
class Writer implements WriterInterface
27
{
28
    use DataDirectoryTrait;
29
    use DataVersionHashTrait;
30
    use PropertyFilterTrait;
31
    use ParseHeaderSectionTrait;
32
33
    /**
34
     * @var AdapterInterface
35
     */
36
    protected $adapter;
37
38
    /**
39
     * @var SourceInterface
40
     */
41
    protected $source;
42
43
    /**
44
     * @var string
45
     */
46
    protected $temporaryFileName;
47
48
    /**
49
     * @var int[]
50
     */
51
    protected $primaryIds = [];
52
53
    /**
54
     * @var PreparedStatementInterface[]
55
     */
56
    protected $statements = [];
57
58
    /**
59
     * @var string[]
60
     */
61
    protected $parentPatterns = [];
62
63
    /**
64
     * @var string[]
65
     */
66
    protected $propertyNames = [];
67
68
    /**
69
     * @var string[]
70
     */
71
    protected $propertyValues = [];
72
73
    /**
74
     * @var string[]
75
     */
76
    protected $keywords = [];
77
78
    /**
79
     * Writer constructor.
80
     *
81
     * @param string $dataDirectory
82
     * @param SourceInterface $source
83
     */
84
    public function __construct(string $dataDirectory, SourceInterface $source)
85
    {
86
        $this->setDataDirectory($dataDirectory);
87
        $this->setSource($source);
88
    }
89
90
    /**
91
     * @return SourceInterface
92
     */
93
    protected function getSource() : SourceInterface
94
    {
95
        return $this->source;
96
    }
97
98
    /**
99
     * @param SourceInterface $source
100
     */
101
    protected function setSource(SourceInterface $source)
102
    {
103
        $this->source = $source;
104
    }
105
106
    /**
107
     * @return string
108
     */
109
    protected function getTemporaryFileName() : string
110
    {
111
        if ($this->temporaryFileName === null) {
112
            $this->temporaryFileName = $this->getDataDirectory() . DIRECTORY_SEPARATOR .
113
                'browscap_' . microtime(true) . '.sqlite';
114
        }
115
116
        return $this->temporaryFileName;
117
    }
118
119
    /**
120
     * @inheritdoc
121
     *
122
     * @throws ParserConditionNotSatisfiedException
123
     * @throws UnexpectedValueException
124
     */
125 View Code Duplication
    protected function getAdapter() : AdapterInterface
126
    {
127
        if ($this->adapter === null) {
128
            $databaseFile = $this->getTemporaryFileName();
129
            $adapter = AdapterFactory::getInstance($databaseFile);
130
            $this->setAdapter($adapter);
131
        }
132
133
        return $this->adapter;
134
    }
135
136
    /**
137
     * @param AdapterInterface $adapter
138
     */
139
    protected function setAdapter(AdapterInterface $adapter)
140
    {
141
        $this->adapter = $adapter;
142
    }
143
144
    /**
145
     * @inheritdoc
146
     *
147
     * @throws ParserRuntimeException
148
     */
149
    public function generate() : WriterInterface
150
    {
151
        // Disable time limit (generation is done within a few seconds, but if data have to be
152
        // downloaded it can take some more time)
153
        set_time_limit(0);
154
155
        $this->initializeDatabase();
156
157
        foreach ($this->getSource()->getDataSets() as $dataSet) {
0 ignored issues
show
Bug introduced by
The expression $this->getSource()->getDataSets() of type object<Iterator>|object<...rowscap\Source\DataSet> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
158
            $this->processDataSet($dataSet);
159
        }
160
161
        $this->finalizeDatabase();
162
163
        return $this;
164
    }
165
166
    protected function initializeDatabase()
167
    {
168
        $adapter = $this->getAdapter();
169
170
        // Enable writes
171
        $adapter->exec('PRAGMA query_only = OFF');
172
173
        // Data generation optimization
174
        $adapter->exec('PRAGMA synchronous = OFF'); // Reduces time by about 35% - 40%
175
        $adapter->exec('PRAGMA journal_mode = OFF'); // Reduces time by about 10% - 15%
176
        $adapter->exec('PRAGMA cache_size = -70000'); // Reduces time by about 2% - 3%
177
178
        // Data query optimization
179
        $adapter->exec('PRAGMA temp_store = MEMORY');
180
        $adapter->exec('PRAGMA automatic_index = OFF');
181
182
        // Create tables
183
        $adapter->exec(
184
            'CREATE TABLE IF NOT EXISTS info (version_id INTEGER PRIMARY KEY ASC, release_time INTEGER NOT NULL, ' .
185
            'type_id INTEGER NOT NULL, data_hash TEXT NOT NULL)'
186
        );
187
        $adapter->exec(
188
            'CREATE TABLE IF NOT EXISTS browser (browser_id INTEGER PRIMARY KEY ASC, browser_parent_id INTEGER, ' .
189
            'browser_pattern TEXT NOT NULL)'
190
        );
191
        $adapter->exec(
192
            'CREATE TABLE IF NOT EXISTS search (browser_id INTEGER PRIMARY KEY ASC, ' .
193
            'browser_pattern_length INTEGER NOT NULL, browser_pattern TEXT NOT NULL)'
194
        );
195
196
        $adapter->exec(
197
            'CREATE TABLE IF NOT EXISTS browser_property (browser_property_id INTEGER PRIMARY KEY ASC, ' .
198
            'browser_id INTEGER NOT NULL, property_key_id INTEGER NOT NULL, property_value_id INTEGER NOT NULL)'
199
        );
200
        $adapter->exec(
201
            'CREATE TABLE IF NOT EXISTS browser_property_key (property_key_id INTEGER PRIMARY KEY ASC, ' .
202
            'property_key TEXT NOT NULL)'
203
        );
204
        $adapter->exec(
205
            'CREATE TABLE IF NOT EXISTS browser_property_value (property_value_id INTEGER PRIMARY KEY ASC, ' .
206
            'property_value TEXT NOT NULL)'
207
        );
208
209
        // Prepare insert statements
210
        $this->statements = [
211
            'info'          => $adapter->prepare('INSERT INTO info VALUES (:version, :time, :type, :hash)'),
212
            'browser'       => $adapter->prepare('INSERT INTO browser VALUES (:id, :parent, :pattern)'),
213
            'search'        => $adapter->prepare('INSERT INTO search VALUES (:id, :length, :pattern)'),
214
            'property'      => $adapter->prepare('INSERT INTO browser_property VALUES (NULL, :id, :key, :value)'),
215
            'propertyKey'   => $adapter->prepare('INSERT INTO browser_property_key VALUES (:id, :key)'),
216
            'propertyValue' => $adapter->prepare('INSERT INTO browser_property_value VALUES (:id, :value)'),
217
        ];
218
219
        $adapter->beginTransaction();
220
    }
221
222
    /**
223
     * @param string $tableName
224
     *
225
     * @return integer
226
     */
227
    public function getNextId(string $tableName)
228
    {
229
        if (!array_key_exists($tableName, $this->primaryIds)) {
230
            $this->primaryIds[$tableName] = 1;
231
        } else {
232
            $this->primaryIds[$tableName]++;
233
        }
234
235
        return $this->primaryIds[$tableName];
236
    }
237
238
    /**
239
     * @param DataSet $dataSet
240
     *
241
     * @throws ParserRuntimeException
242
     */
243
    protected function processDataSet(DataSet $dataSet)
244
    {
245
        if ($dataSet->getPattern() === File::HEADER_PATTERN) {
246
            $this->processHeader($dataSet);
247
        } else {
248
            $this->processBrowserData($dataSet);
249
        }
250
    }
251
252
    /**
253
     * @param DataSet $dataSet
254
     */
255
    protected function processHeader(DataSet $dataSet)
256
    {
257
        $result = $this->parseHeaderDataSet($dataSet);
258
        $versionId = $result['version'];
259
        $releaseTime = $result['released'];
260
        $typeId = $result['type'];
261
262
        $this->statements['info']->execute([
263
            'version' => $versionId,
264
            'time' => $releaseTime,
265
            'type' => $typeId,
266
            'hash' => $this->getDataVersionHash(),
267
        ]);
268
    }
269
270
    /**
271
     * @param DataSet $dataSet
272
     *
273
     * @throws ParserRuntimeException
274
     */
275
    protected function processBrowserData(DataSet $dataSet)
276
    {
277
        // Get properties and filter them
278
        $pattern = $dataSet->getPattern();
279
        $propertiesOriginal = $dataSet->getProperties();
280
        $properties = $propertiesOriginal;
281
        $propertyFilter = $this->getPropertyFilter();
282
        foreach ($properties as $propertyName => $propertyValue) {
283
            if ($propertyFilter->isFiltered($propertyName)) {
284
                unset($properties[$propertyName]);
285
            }
286
        }
287
        $browserId = $this->getNextId('browser');
288
289
        // Check for placeholders
290
        $hasBrowscapPlaceholders = (strpos($pattern, '*') !== false || strpos($pattern, '?') !== false);
291
292
        // Parent patterns do not contain browscap placeholders, so we only need to save some of them for referencing.
293
        // (Use unmodified pattern here to find them later.)
294
        if ($hasBrowscapPlaceholders === false) {
295
            $this->addParentPattern($pattern, $browserId);
296
        }
297
298
        // Get parent id
299
        $parentId = $this->getParentPatternId($propertiesOriginal);
300
301
        // Get property ids (and generate new entries for new properties)
302
        $propertyIds = $this->getIdsForProperties($properties);
303
304
        // Filter the keywords from the pattern (all strings containing of the characters a-z,
305
        // with at least 4 characters) and count them to check for the most important keywords
306
        // later.
307
        $this->extractKeywordsFromPattern($pattern);
308
309
        // Save browser entry
310
        $this->statements['browser']->execute(['id' => $browserId, 'parent' => $parentId, 'pattern' => $pattern]);
311
312
        // Removed 'optimization': Previously patterns without a browscap placeholder like '*' were
313
        // ignored, assuming that these are only used as parents, but there are about 20 'real'
314
        // patterns without palceholders.
315
        //
316
        // We use the GLOB function in Sqlite, but this is case-sensitive. So we lower-case the pattern here
317
        // (and later, when searching for it).
318
        $this->statements['search']->execute([
319
            'id' => $browserId,
320
            'length' => strlen(str_replace('*', '', $pattern)),
321
            'pattern' => strtolower($pattern)]
322
        );
323
324
        // Save all properties for the current pattern
325
        foreach ($propertyIds as $keyId => $valueId) {
326
            $this->statements['property']->execute(['id' => $browserId, 'key' => $keyId, 'value' => $valueId]);
327
        }
328
    }
329
330
    /**
331
     * @param string $pattern
332
     * @param int $browserId
333
     */
334
    protected function addParentPattern(string $pattern, int $browserId)
335
    {
336
        $this->parentPatterns[$pattern] = $browserId;
337
    }
338
339
    /**
340
     * @param array $properties
341
     *
342
     * @return int|null
0 ignored issues
show
Documentation introduced by
Should the return type not be string|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
343
     * @throws ParserRuntimeException
344
     */
345
    protected function getParentPatternId(array &$properties)
346
    {
347
        $parentId = null;
348
        /** @noinspection UnSafeIsSetOverArrayInspection */
349
        if (isset($properties['Parent'])) {
350
            if (array_key_exists($properties['Parent'], $this->parentPatterns)) {
351
                $parentId = $this->parentPatterns[$properties['Parent']];
352
                unset($properties['Parent']);
353
            } else {
354
                throw new ParserRuntimeException("Parent '" . $properties['Parent'] . "' not found.");
355
            }
356
        }
357
358
        return $parentId;
359
    }
360
361
    /**
362
     * @param array $properties
363
     *
364
     * @return array
365
     */
366
    protected function getIdsForProperties(array $properties)
367
    {
368
        $browserPropertyIds = [];
369
370
        foreach ($properties as $propertyKey => $propertyValue) {
371
            if ($propertyKey === 'Parent') {
372
                continue;
373
            }
374
            if (!array_key_exists($propertyKey, $this->propertyNames)) {
375
                $propertyKeyId = $this->getNextId('property_key');
376
                $this->propertyNames[$propertyKey] = $propertyKeyId;
377
378
                $this->statements['propertyKey']->execute([
379
                    'id' => $propertyKeyId,
380
                    'key' => $propertyKey,
381
                ]);
382
            }
383
384
            if (!array_key_exists($propertyValue, $this->propertyValues)) {
385
                $propertyValueId = $this->getNextId('property_value');
386
                $this->propertyValues[$propertyValue] = $propertyValueId;
387
388
                $this->statements['propertyValue']->execute([
389
                    'id' => $propertyValueId,
390
                    'value' => $propertyValue,
391
                ]);
392
            }
393
394
            $propertyKeyId = $this->propertyNames[$propertyKey];
395
            $propertyValueId = $this->propertyValues[$propertyValue];
396
397
            $browserPropertyIds[$propertyKeyId] = $propertyValueId;
398
        }
399
400
        return $browserPropertyIds;
401
    }
402
403
    /**
404
     * @param string $pattern
405
     */
406
    protected function extractKeywordsFromPattern(string $pattern)
407
    {
408
        // Filter the keywords from the pattern (all strings containing of the characters a-z,
409
        // with at least 4 characters) and count them to check for the most important keywords
410
        // later.
411
        preg_match_all('#[a-z][a-z0-9]{3,}#', strtolower($pattern), $matches);
412
        foreach ($matches[0] as $keyword) {
413
            $this->keywords[$keyword] = array_key_exists($keyword, $this->keywords) ? ++$this->keywords[$keyword] : 1;
414
        }
415
    }
416
417
    protected function finalizeDatabase()
418
    {
419
        $adapter = $this->getAdapter();
420
421
        // Commit transaction (has to be done before changing the structure for search optimization)
422
        $adapter->commitTransaction();
423
424
        // Optimizations
425
        $this->generateKeywordSearchTables();
426
        $this->createIndexes();
427
        $this->optimizeTableForReading();
428
429
        // Replace the link to the table with the new one
430
        $this->saveLink();
431
432
        // Delete old database files
433
        $this->cleanUp();
434
    }
435
436
    protected function createIndexes()
437
    {
438
        $adapter = $this->getAdapter();
439
440
        $adapter->exec(
441
            'CREATE UNIQUE INDEX IF NOT EXISTS u_brpr_brid_prkeid ON browser_property(browser_id, property_key_id)'
442
        );
443
        $adapter->exec('CREATE INDEX IF NOT EXISTS i_se_brpale ON search (browser_pattern_length)');
444
    }
445
446
    protected function generateKeywordSearchTables()
447
    {
448
        $adapter = $this->getAdapter();
449
450
        // Create keyword table
451
        $adapter->exec(
452
            'CREATE TABLE IF NOT EXISTS keyword (keyword_id INTEGER PRIMARY KEY ASC, keyword_value TEXT NOT NULL)'
453
        );
454
455
        // Use only the top keywords - if we'd use all, this would create thousands of tables,
456
        // which would be very bad for the performance.
457
        arsort($this->keywords);
458
        $keywords = array_slice($this->keywords, 0, 100);
459
460
        // Important: Now re-sort the array again to start with the most used keywords having the lowest count
461
        asort($keywords);
462
463
        $keywordId = 1;
464
        foreach ($keywords as $keywordValue => $keywordCount) {
465
            // Do NOT use "CREATE TABLE ... AS" here, because this would automatically add an extra id column,
466
            // which requires additional space
467
            $adapter->exec(
468
                'CREATE TABLE IF NOT EXISTS "search_' . $keywordValue . '" ' .
469
                '(browser_id INTEGER PRIMARY KEY ASC, browser_pattern_length INTEGER NOT NULL, ' .
470
                'browser_pattern TEXT NOT NULL)'
471
            );
472
            /** @noinspection DisconnectedForeachInstructionInspection */
473
            $adapter->beginTransaction();
474
            $adapter->exec(
475
                'INSERT INTO "search_' . $keywordValue . '" ' .
476
                'SELECT browser_id, browser_pattern_length, browser_pattern ' .
477
                "FROM search WHERE browser_pattern GLOB '*$keywordValue*'"
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $keywordValue instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
478
            );
479
            $adapter->exec("INSERT INTO keyword VALUES ($keywordId, '$keywordValue')");
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $keywordId instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $keywordValue instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
480
            $adapter->exec("DELETE FROM search WHERE browser_pattern GLOB '*$keywordValue*'");
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $keywordValue instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
481
            /** @noinspection DisconnectedForeachInstructionInspection */
482
            $adapter->commitTransaction();
483
484
            $adapter->exec(
485
                'CREATE INDEX IF NOT EXISTS i_se' . $keywordId . '_brpale ON "search_' . $keywordValue .
486
                '"(browser_pattern_length)'
487
            );
488
489
            $keywordId++;
490
        }
491
    }
492
493
    protected function optimizeTableForReading()
494
    {
495
        $adapter = $this->getAdapter();
496
497
        $adapter->exec('VACUUM');
498
        $adapter->exec('PRAGMA query_only = ON');
499
    }
500
501
    /**
502
     * @throws ParserRuntimeException
503
     */
504
    protected function saveLink()
505
    {
506
        // Update the link to the new database
507
        $linkFile = $this->getLinkPath();
508
        $databaseFile = basename($this->getTemporaryFileName());
509
        if (@file_put_contents($linkFile, $databaseFile) === false) {
510
            throw new ParserRuntimeException("Could not create/update link file '$linkFile'.");
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $linkFile instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
511
        }
512
    }
513
514
    protected function cleanUp()
515
    {
516
        $currentDatabaseFile = basename($this->getTemporaryFileName());
517
        foreach (glob($this->getDataDirectory() . DIRECTORY_SEPARATOR . 'browscap_*.sqlite') as $file) {
518
            if (basename($file) !== $currentDatabaseFile) {
519
                @unlink($file);
520
            }
521
        }
522
    }
523
524
    /**
525
     * @return string
526
     */
527
    protected function getLinkPath() : string
528
    {
529
        return $this->getDataDirectory() . DIRECTORY_SEPARATOR . Parser::LINK_FILENAME;
530
    }
531
}
532