Completed
Pull Request — master (#16)
by Gordon
02:18
created

BulkIndexingHelper   A

Complexity

Total Complexity 11

Size/Duplication

Total Lines 94
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 11
eloc 50
c 1
b 0
f 0
dl 0
loc 94
rs 10

1 Method

Rating   Name   Duplication   Size   Complexity  
B bulkIndex() 0 87 11
1
<?php declare(strict_types = 1);
2
3
/**
4
 * Created by PhpStorm.
5
 * User: gordon
6
 * Date: 25/3/2561
7
 * Time: 17:01 น.
8
 */
9
10
namespace Suilven\FreeTextSearch\Helper;
11
12
use League\CLImate\CLImate;
0 ignored issues
show
Bug introduced by
The type League\CLImate\CLImate was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
13
use SilverStripe\CMS\Model\SiteTree;
14
use SilverStripe\Core\Config\Config;
15
use SilverStripe\ORM\DataObject;
16
use SilverStripe\ORM\DB;
17
use SilverStripe\ORM\Queries\SQLUpdate;
18
use SilverStripe\SiteConfig\SiteConfig;
19
use Suilven\FreeTextSearch\Factory\BulkIndexerFactory;
20
use Suilven\FreeTextSearch\Indexes;
21
22
class BulkIndexingHelper
23
{
24
    /**
25
     * @param string $indexName
26
     * @param boolean $dirty Set this to true to only index 'dirty' DataObjects, false to reindex all
27
     * @param CLImate|null $climate
28
     */
29
    public function bulkIndex($indexName, $dirty = false, $climate = null)
30
    {
31
        $indexes = new Indexes();
32
        $index = $indexes->getIndex($indexName);
33
34
        /** @var string $clazz */
35
        $clazz = $index->getClass();
36
37
38
        $startTime = \microtime(true);
39
40
        if (!(is_null($climate))) {
41
            $climate->border('*');
42
            $climate->green()->bold('Indexing sitetree');
43
            $climate->border();
44
        }
45
46
        $filters = ['ShowInSearch' => true];
47
        if ($dirty) {
48
            $filters['IsDirtyFreeTextSearch'] = true;
49
        }
50
51
        $nDocuments = SiteTree::get()->filter($filters)->count();
52
53
        error_log('N DOCUMENTS: ' . $nDocuments);
54
55
        if ($nDocuments > 0) {
56
            $config = SiteConfig::current_site_config();
57
58
            // * @phpstan-ignore-next-line
59
            $bulkSize = $config->BulkSize;
60
            $pages = 1+\round($nDocuments / $bulkSize);
61
            $progress = !is_null($climate) ? $climate->progress()->total($nDocuments) : null;
62
63
            if (!is_null($climate)) {
64
                $climate->green('Pages: ' . $pages);
65
                $climate->green()->info('Indexing ' . $nDocuments .' objects');
66
            }
67
68
            $factory = new BulkIndexerFactory();
69
            $bulkIndexer = $factory->getBulkIndexer();
70
            $bulkIndexer->setIndex($indexName);
71
72
            for ($i = 0; $i < $pages; $i++) {
73
                $dataObjects = $clazz::get()->limit($bulkSize, $i*$bulkSize)->filter($filters);
74
                foreach ($dataObjects as $do) {
75
                    // Note this adds data to the payload, does not actually indexing against the third party search engine
76
                    $bulkIndexer->addDataObject($do);
77
                }
78
79
                // index objects up to configured bulk size
80
                $bulkIndexer->indexDataObjects();
81
                $current = $bulkSize * ($i+1);
82
                if ($current > $nDocuments) {
83
                    $current = $nDocuments;
84
                }
85
                if (!is_null($progress)) {
86
                    $progress->current($current);
87
                }
88
            }
89
        }
90
91
92
        $endTime = \microtime(true);
93
        $delta = $endTime-$startTime;
94
95
        $rate = \round($nDocuments / $delta, 2);
96
97
        $elapsedStr = \round($delta, 2);
98
99
        if (!is_null($climate)) {
100
            $climate->bold()->blue()->inline("{$nDocuments}");
101
            $climate->blue()->inline(' objects indexed in ');
102
            $climate->bold()->blue()->inline("{$elapsedStr}");
103
            $climate->blue()->inline('s, ');
104
            $climate->bold()->blue()->inline("{$rate}");
105
            $climate->blue(' per second ');
106
        }
107
108
        $clazz = $index->getClass();
109
        $table =     Config::inst()->get($clazz, 'table_name');
110
111
112
        DB::query("UPDATE \"{$table}\" SET \"IsDirtyFreeTextSearch\" = 0");
113
114
        // @todo How to get the table name from versions?
115
        DB::query("UPDATE \"{$table}_Live\" SET \"IsDirtyFreeTextSearch\" = 0");
116
117
118
    }
119
}
120