Issues (186)

src/Solr/Reindex/Handlers/SolrReindexBase.php (1 issue)

1
<?php
2
3
namespace SilverStripe\FullTextSearch\Solr\Reindex\Handlers;
4
5
use Psr\Log\LoggerInterface;
6
use SilverStripe\Core\Environment;
7
use SilverStripe\FullTextSearch\Search\Services\IndexableService;
8
use SilverStripe\FullTextSearch\Solr\Solr;
9
use SilverStripe\FullTextSearch\Solr\SolrIndex;
10
use SilverStripe\FullTextSearch\Search\Variants\SearchVariant;
11
use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
12
use SilverStripe\ORM\DataObject;
13
use SilverStripe\ORM\DataList;
14
use SilverStripe\ORM\DB;
15
16
/**
17
 * Base class for re-indexing of solr content
18
 */
19
abstract class SolrReindexBase implements SolrReindexHandler
20
{
21
    public function runReindex(LoggerInterface $logger, $batchSize, $taskName, $classes = null)
22
    {
23
        foreach (Solr::get_indexes() as $indexInstance) {
24
            $this->processIndex($logger, $indexInstance, $batchSize, $taskName, $classes);
25
        }
26
    }
27
28
    /**
29
     * Process index for a single SolrIndex instance
30
     *
31
     * @param LoggerInterface $logger
32
     * @param SolrIndex $indexInstance
33
     * @param int $batchSize
34
     * @param string $taskName
35
     * @param string $classes
36
     */
37
    protected function processIndex(
38
        LoggerInterface $logger,
39
        SolrIndex $indexInstance,
40
        $batchSize,
41
        $taskName,
42
        $classes = null
43
    ) {
44
        // Filter classes for this index
45
        $indexClasses = $this->getClassesForIndex($indexInstance, $classes);
46
47
        // Clear all records in this index which do not contain the given classes
48
        $logger->info("Clearing obsolete classes from " . $indexInstance->getIndexName());
49
        $indexInstance->clearObsoleteClasses($indexClasses);
50
51
        // Build queue for each class
52
        foreach ($indexClasses as $class => $options) {
53
            $includeSubclasses = $options['include_children'];
54
55
            foreach (SearchVariant::reindex_states($class, $includeSubclasses) as $state) {
56
                $this->processVariant($logger, $indexInstance, $state, $class, $includeSubclasses, $batchSize, $taskName);
57
            }
58
        }
59
    }
60
61
    /**
62
     * Get valid classes and options for an index with an optional filter
63
     *
64
     * @param SolrIndex $index
65
     * @param string|array $filterClasses Optional class or classes to limit to
66
     * @return array List of classes, where the key is the classname and value is list of options
67
     */
68
    protected function getClassesForIndex(SolrIndex $index, $filterClasses = null)
69
    {
70
        // Get base classes
71
        $classes = $index->getClasses();
72
        if (!$filterClasses) {
73
            return $classes;
74
        }
75
76
        // Apply filter
77
        if (!is_array($filterClasses)) {
78
            $filterClasses = explode(',', $filterClasses);
79
        }
80
        return array_intersect_key($classes, array_combine($filterClasses, $filterClasses));
81
    }
82
83
    /**
84
     * Process re-index for a given variant state and class
85
     *
86
     * @param LoggerInterface $logger
87
     * @param SolrIndex $indexInstance
88
     * @param array $state Variant state
89
     * @param string $class
90
     * @param bool $includeSubclasses
91
     * @param int $batchSize
92
     * @param string $taskName
93
     */
94
    protected function processVariant(
95
        LoggerInterface $logger,
96
        SolrIndex $indexInstance,
97
        $state,
98
        $class,
99
        $includeSubclasses,
100
        $batchSize,
101
        $taskName
102
    ) {
103
        // Get current state
104
        $originalState = SearchVariant::current_state();
105
        // Set state
106
        SearchVariant::activate_state($state);
107
108
        // Count records
109
        $query = $class::get();
110
        if (!$includeSubclasses) {
111
            $query = $query->filter('ClassName', $class);
112
        }
113
        $total = $query->count();
114
115
        // Skip this variant if nothing to process, or if there are no records
116
        if ($total == 0 || $indexInstance->variantStateExcluded($state)) {
117
            // Remove all records in the current state, since there are no groups to process
118
            $logger->info("Clearing all records of type {$class} in the current state: " . json_encode($state));
119
            $this->clearRecords($indexInstance, $class);
120
            return;
121
        }
122
123
        // For each group, run processing
124
        $groups = (int)(($total + $batchSize - 1) / $batchSize);
125
        for ($group = 0; $group < $groups; $group++) {
126
            $this->processGroup($logger, $indexInstance, $state, $class, $groups, $group, $taskName);
127
        }
128
129
        // Reset state to originalState
130
        SearchVariant::activate_state($originalState);
131
    }
132
133
    /**
134
     * Initiate the processing of a single group
135
     *
136
     * @param LoggerInterface $logger
137
     * @param SolrIndex $indexInstance Index instance
138
     * @param array $state Variant state
139
     * @param string $class Class to index
140
     * @param int $groups Total groups
141
     * @param int $group Index of group to process
142
     * @param string $taskName Name of task script to run
143
     */
144
    abstract protected function processGroup(
145
        LoggerInterface $logger,
146
        SolrIndex $indexInstance,
147
        $state,
148
        $class,
149
        $groups,
150
        $group,
151
        $taskName
152
    );
153
154
    /**
155
     * Explicitly invoke the process that performs the group
156
     * processing. Can be run either by a background task or a queuedjob.
157
     *
158
     * Does not commit changes to the index, so this must be controlled externally.
159
     *
160
     * @param LoggerInterface $logger
161
     * @param SolrIndex $indexInstance
162
     * @param array $state
163
     * @param string $class
164
     * @param int $groups
165
     * @param int $group
166
     */
167
    public function runGroup(
168
        LoggerInterface $logger,
169
        SolrIndex $indexInstance,
170
        $state,
171
        $class,
172
        $groups,
173
        $group
174
    ) {
175
        // Set time limit and state
176
        Environment::increaseTimeLimitTo();
177
        // Get current state
178
        $originalState = SearchVariant::current_state();
179
        // Set state
180
        SearchVariant::activate_state($state);
181
182
        $logger->info("Adding $class");
183
184
        // Prior to adding these records to solr, delete existing solr records
185
        $this->clearRecords($indexInstance, $class, $groups, $group);
186
187
        // Process selected records in this class
188
        $items = $this->getRecordsInGroup($indexInstance, $class, $groups, $group);
189
        $processed = array();
190
        foreach ($items as $item) {
191
            $processed[] = $item->ID;
192
193
            // By this point, obsolete classes/states have been removed in processVariant
194
            // and obsolete records have been removed in clearRecords
195
            $indexInstance->add($item);
196
            $item->destroy();
197
        }
198
        $logger->info("Updated " . implode(',', $processed));
199
200
        // Reset state to originalState
201
        SearchVariant::activate_state($originalState);
202
203
        // This will slow down things a tiny bit, but it is done so that we don't timeout to the database during a reindex
204
        DB::query('SELECT 1');
205
206
        $logger->info("Done");
207
    }
208
209
    /**
210
     * Gets the datalist of records in the given group in the current state
211
     *
212
     * Assumes that the desired variant state is in effect.
213
     *
214
     * @param SolrIndex $indexInstance
215
     * @param string $class
216
     * @param int $groups
217
     * @param int $group
218
     * @return DataList
219
     */
220
    protected function getRecordsInGroup(SolrIndex $indexInstance, $class, $groups, $group)
221
    {
222
        // Generate filtered list of local records
223
        $baseClass = DataObject::getSchema()->baseDataClass($class);
224
        /** @var DataList $items */
225
        $items = DataList::create($class)
226
            ->where(sprintf(
227
                '"%s"."ID" %% \'%d\' = \'%d\'',
228
                DataObject::getSchema()->tableName($baseClass),
229
                intval($groups),
230
                intval($group)
231
            ))
232
            ->sort("ID");
233
234
        // Add child filter
235
        $classes = $indexInstance->getClasses();
236
        $options = $classes[$class];
237
        if (!$options['include_children']) {
238
            $items = $items->filter('ClassName', $class);
239
        }
240
241
        $indexableService = IndexableService::singleton();
242
243
        // ShowInSearch filter
244
        // we cannot use $items->remove($item), as that deletes the record from the database
245
        $idsToRemove = [];
246
        foreach ($items as $item) {
247
            if (!$indexableService->isIndexable($item)) {
248
                $idsToRemove[] = $item->ID;
249
            }
250
        }
251
        if (!empty($idsToRemove)) {
252
            sort($idsToRemove);
253
            $items = $items->exclude(['ID' => $idsToRemove]);
254
        }
255
        return $items;
256
    }
257
258
    /**
259
     * Clear all records of the given class in the current state ONLY.
260
     *
261
     * Optionally delete from a given group (where the group is defined as the ID % total groups)
262
     *
263
     * @param SolrIndex $indexInstance Index instance
264
     * @param string $class Class name
265
     * @param int $groups Number of groups, if clearing from a striped group
266
     * @param int $group Group number, if clearing from a striped group
267
     */
268
    protected function clearRecords(SolrIndex $indexInstance, $class, $groups = null, $group = null)
269
    {
270
        // Clear by classname
271
        $conditions = array("+(ClassHierarchy:{$class})");
272
273
        // If grouping, delete from this group only
274
        if ($groups) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $groups of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
275
            $conditions[] = "+_query_:\"{!frange l={$group} u={$group}}mod(ID, {$groups})\"";
276
        }
277
278
        // Also filter by state (suffix on document ID)
279
        $query = new SearchQuery();
280
        SearchVariant::with($class)
281
            ->call('alterQuery', $query, $indexInstance);
282
        if ($query->isfiltered()) {
283
            $conditions = array_merge($conditions, $indexInstance->getFiltersComponent($query));
284
        }
285
286
        // Invoke delete on index
287
        $deleteQuery = implode(' ', $conditions);
288
        $indexInstance
289
            ->getService()
290
            ->deleteByQuery($deleteQuery);
291
    }
292
}
293