Completed
Push — EZP-31287 ( af9523...59859e )
by
unknown
18:52
created

DoctrineDatabase::index()   C

Complexity

Conditions 11
Paths 74

Size

Total Lines 75

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
nc 74
nop 1
dl 0
loc 75
rs 6.3987
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @copyright Copyright (C) eZ Systems AS. All rights reserved.
5
 * @license For full copyright and license information view LICENSE file distributed with this source code.
6
 */
7
namespace eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Gateway;
8
9
use eZ\Publish\Core\Persistence\Legacy\Content\Language\MaskGenerator;
10
use eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Gateway;
11
use eZ\Publish\Core\Persistence\Database\DatabaseHandler;
12
use eZ\Publish\Core\Persistence\TransformationProcessor;
13
use eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex;
14
use eZ\Publish\Core\Search\Legacy\Content\FullTextData;
15
use eZ\Publish\SPI\Persistence\Content;
16
use eZ\Publish\SPI\Persistence\Content\Type\Handler as SPITypeHandler;
17
use eZ\Publish\SPI\Search\Field;
18
19
/**
20
 * WordIndexer gateway implementation using the Doctrine database.
21
 */
22
class DoctrineDatabase extends Gateway
23
{
24
    /**
25
     * Max acceptable by any DBMS INT value.
26
     *
27
     * Note: 2^31-1 seems to be the most reasonable value that should work in any setup.
28
     */
29
    const DB_INT_MAX = 2147483647;
30
31
    /**
32
     * Database handler.
33
     *
34
     * @var \eZ\Publish\Core\Persistence\Database\DatabaseHandler
35
     *
36
     * @deprecated Start to use DBAL $connection instead.
37
     */
38
    protected $dbHandler;
39
40
    /**
41
     * SPI Content Type Handler.
42
     *
43
     * Need this for being able to pick fields that are searchable.
44
     *
45
     * @var \eZ\Publish\SPI\Persistence\Content\Type\Handler
46
     */
47
    protected $typeHandler;
48
49
    /**
50
     * Transformation processor.
51
     *
52
     * Need this for being able to transform text to searchable value
53
     *
54
     * @var \eZ\Publish\Core\Persistence\TransformationProcessor
55
     */
56
    protected $transformationProcessor;
57
58
    /**
59
     * LegacySearchService.
60
     *
61
     * Need this for queries on ezsearch* tables
62
     *
63
     * @var \eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex
64
     */
65
    protected $searchIndex;
66
67
    /** @var \eZ\Publish\Core\Persistence\Legacy\Content\Language\MaskGenerator */
68
    private $languageMaskGenerator;
69
70
    /**
71
     * Full text search configuration options.
72
     *
73
     * @var array
74
     */
75
    protected $fullTextSearchConfiguration;
76
77
    /**
78
     * Construct from handler handler.
79
     *
80
     * @param \eZ\Publish\Core\Persistence\Database\DatabaseHandler $dbHandler
81
     * @param \eZ\Publish\SPI\Persistence\Content\Type\Handler $typeHandler
82
     * @param \eZ\Publish\Core\Persistence\TransformationProcessor $transformationProcessor
83
     * @param \eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex $searchIndex
84
     * @param \eZ\Publish\Core\Persistence\Legacy\Content\Language\MaskGenerator $languageMaskGenerator
85
     * @param array $fullTextSearchConfiguration
86
     */
87
    public function __construct(
88
        DatabaseHandler $dbHandler,
89
        SPITypeHandler $typeHandler,
90
        TransformationProcessor $transformationProcessor,
91
        SearchIndex $searchIndex,
92
        MaskGenerator $languageMaskGenerator,
93
        array $fullTextSearchConfiguration
94
    ) {
95
        $this->dbHandler = $dbHandler;
96
        $this->typeHandler = $typeHandler;
97
        $this->transformationProcessor = $transformationProcessor;
98
        $this->searchIndex = $searchIndex;
99
        $this->fullTextSearchConfiguration = $fullTextSearchConfiguration;
100
        $this->languageMaskGenerator = $languageMaskGenerator;
101
    }
102
103
    /**
104
     * Index search engine full text data corresponding to content object field values.
105
     *
106
     * Ported from the legacy code
107
     *
108
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L45
109
     *
110
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData $fullTextData
111
     */
112
    public function index(FullTextData $fullTextData)
113
    {
114
        $indexArray = [];
115
        $indexArrayOnlyWords = [];
116
        $wordCount = 0;
117
        $placement = 0;
118
119
        // Remove previously indexed content if exists to avoid keeping in index removed field values
120
        $this->remove($fullTextData->id);
121
        foreach ($fullTextData->values as $fullTextValue) {
122
            /** @var \eZ\Publish\Core\Search\Legacy\Content\FullTextValue $fullTextValue */
123
            if (is_numeric(trim($fullTextValue->value))) {
124
                $integerValue = (int)$fullTextValue->value;
125
                if ($integerValue > self::DB_INT_MAX) {
126
                    $integerValue = 0;
127
                }
128
            } else {
129
                $integerValue = 0;
130
            }
131
            $text = $this->transformationProcessor->transform(
132
                $fullTextValue->value,
133
                !empty($fullTextValue->transformationRules)
134
                    ? $fullTextValue->transformationRules
135
                    : $this->fullTextSearchConfiguration['commands']
136
            );
137
            // split by non-words
138
            $wordArray = $fullTextValue->splitFlag ? preg_split('/\W/u', $text, -1, PREG_SPLIT_NO_EMPTY) : [$text];
139
            foreach ($wordArray as $word) {
140
                if (trim($word) === '') {
141
                    continue;
142
                }
143
                // words stored in search index are limited to 150 characters
144
                if (mb_strlen($word) > 150) {
145
                    $word = mb_substr($word, 0, 150);
146
                }
147
                $indexArray[] = [
148
                    'Word' => $word,
149
                    'ContentClassAttributeID' => $fullTextValue->fieldDefinitionId,
150
                    'identifier' => $fullTextValue->fieldDefinitionIdentifier,
151
                    'integer_value' => $integerValue,
152
                    'language_code' => $fullTextValue->languageCode,
153
                    'is_main_and_always_available' => $fullTextValue->isMainAndAlwaysAvailable,
154
                ];
155
                $indexArrayOnlyWords[$word] = 1;
156
                ++$wordCount;
157
                // if we have "www." before word than
158
                // treat it as url and add additional entry to the index
159
                if (mb_strtolower(mb_substr($word, 0, 4)) === 'www.') {
160
                    $additionalUrlWord = substr($word, 4);
161
                    $indexArray[] = [
162
                        'Word' => $additionalUrlWord,
163
                        'ContentClassAttributeID' => $fullTextValue->fieldDefinitionId,
164
                        'identifier' => $fullTextValue->fieldDefinitionIdentifier,
165
                        'integer_value' => $integerValue,
166
                        'language_code' => $fullTextValue->languageCode,
167
                        'is_main_and_always_available' => $fullTextValue->isMainAndAlwaysAvailable,
168
                    ];
169
                    $indexArrayOnlyWords[$additionalUrlWord] = 1;
170
                    ++$wordCount;
171
                }
172
            }
173
        }
174
175
        $wordIDArray = $this->buildWordIDArray(array_keys($indexArrayOnlyWords));
176
        $this->dbHandler->beginTransaction();
177
        for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 1000) {
178
            $placement = $this->indexWords(
179
                $fullTextData,
180
                array_slice($indexArray, $arrayCount, 1000),
181
                $wordIDArray,
182
                $placement
183
            );
184
        }
185
        $this->dbHandler->commit();
186
    }
187
188
    /**
189
     * Indexes an array of FullTextData objects.
190
     *
191
     * Note: on large amounts of data make sure to iterate with several calls to this function with
192
     * a limited set of FullTextData objects. Amount you have memory for depends on server, size
193
     * of FullTextData objects & PHP version.
194
     *
195
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData[] $fullTextBulkData
196
     */
197
    public function bulkIndex(array $fullTextBulkData)
198
    {
199
        foreach ($fullTextBulkData as $fullTextData) {
200
            $this->index($fullTextData);
201
        }
202
    }
203
204
    /**
205
     * Remove whole content or a specific version from index.
206
     *
207
     * Ported from the legacy code
208
     *
209
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L386
210
     *
211
     * @param mixed $contentId
212
     * @param mixed|null $versionId
213
     *
214
     * @return bool
215
     */
216
    public function remove($contentId, $versionId = null)
217
    {
218
        $doDelete = false;
219
        $this->dbHandler->beginTransaction();
220
        // fetch all the words and decrease the object count on all the words
221
        $wordIDList = $this->searchIndex->getContentObjectWords($contentId);
222
        if (count($wordIDList) > 0) {
223
            $this->searchIndex->decrementWordObjectCount($wordIDList);
224
            $doDelete = true;
225
        }
226
        if ($doDelete) {
227
            $this->searchIndex->deleteWordsWithoutObjects();
228
            $this->searchIndex->deleteObjectWordsLink($contentId);
229
        }
230
        $this->dbHandler->commit();
231
232
        return true;
233
    }
234
235
    /**
236
     * Remove entire search index.
237
     */
238
    public function purgeIndex()
239
    {
240
        $this->searchIndex->purge();
241
    }
242
243
    /**
244
     * Index wordIndex.
245
     *
246
     * Ported from the legacy code
247
     *
248
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L255
249
     *
250
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData $fullTextData
251
     * @param array $indexArray
252
     * @param array $wordIDArray
253
     * @param int $placement
254
     *
255
     * @return int last placement
256
     */
257
    private function indexWords(FullTextData $fullTextData, array $indexArray, array $wordIDArray, $placement = 0)
258
    {
259
        $contentId = $fullTextData->id;
260
261
        $prevWordId = 0;
262
263
        for ($i = 0; $i < count($indexArray); ++$i) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
264
            $indexWord = $indexArray[$i]['Word'];
265
            $contentFieldId = $indexArray[$i]['ContentClassAttributeID'];
266
            $identifier = $indexArray[$i]['identifier'];
267
            $integerValue = $indexArray[$i]['integer_value'];
268
            $languageCode = $indexArray[$i]['language_code'];
269
            $wordId = $wordIDArray[$indexWord];
270
            $isMainAndAlwaysAvailable = $indexArray[$i]['is_main_and_always_available'];
271
            $languageMask = $this->languageMaskGenerator->generateLanguageMaskFromLanguageCodes(
272
                [$languageCode],
273
                $isMainAndAlwaysAvailable
274
            );
275
276
            if (isset($indexArray[$i + 1])) {
277
                $nextIndexWord = $indexArray[$i + 1]['Word'];
278
                $nextWordId = $wordIDArray[$nextIndexWord];
279
            } else {
280
                $nextWordId = 0;
281
            }
282
            $frequency = 0;
283
            $this->searchIndex->addObjectWordLink(
284
                $wordId,
285
                $contentId,
286
                $frequency,
287
                $placement,
288
                $nextWordId,
289
                $prevWordId,
290
                $fullTextData->contentTypeId,
291
                $contentFieldId,
292
                $fullTextData->published,
293
                $fullTextData->sectionId,
294
                $identifier,
295
                $integerValue,
296
                $languageMask
297
            );
298
            $prevWordId = $wordId;
299
            ++$placement;
300
        }
301
302
        return $placement;
303
    }
304
305
    /**
306
     * Build WordIDArray and update ezsearch_word table.
307
     *
308
     * Ported from the legacy code
309
     *
310
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L155
311
     *
312
     * @param array $indexArrayOnlyWords words for object to add
313
     *
314
     * @return array wordIDArray
315
     */
316
    private function buildWordIDArray(array $indexArrayOnlyWords)
317
    {
318
        $wordCount = count($indexArrayOnlyWords);
319
        $wordIDArray = [];
320
        $wordArray = [];
321
322
        // store the words in the index and remember the ID
323
        $this->dbHandler->beginTransaction();
324
        for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 500) {
325
            // Fetch already indexed words from database
326
            $wordArrayChuck = array_slice($indexArrayOnlyWords, $arrayCount, 500);
327
            $wordRes = $this->searchIndex->getWords($wordArrayChuck);
328
329
            // Build a has of the existing words
330
            $wordResCount = count($wordRes);
331
            $existingWordArray = [];
332
            for ($i = 0; $i < $wordResCount; ++$i) {
333
                $wordIDArray[] = $wordRes[$i]['id'];
334
                $existingWordArray[] = $wordRes[$i]['word'];
335
                $wordArray[$wordRes[$i]['word']] = $wordRes[$i]['id'];
336
            }
337
338
            // Update the object count of existing words by one
339
            if (count($wordIDArray) > 0) {
340
                $this->searchIndex->incrementWordObjectCount($wordIDArray);
341
            }
342
343
            // Insert if there is any news words
344
            $newWordArray = array_diff($wordArrayChuck, $existingWordArray);
345
            if (count($newWordArray) > 0) {
346
                $this->searchIndex->addWords($newWordArray);
347
                $newWordRes = $this->searchIndex->getWords($newWordArray);
348
                $newWordCount = count($newWordRes);
349
                for ($i = 0; $i < $newWordCount; ++$i) {
350
                    $wordLowercase = $this->transformationProcessor->transformByGroup($newWordRes[$i]['word'], 'lowercase');
351
                    $wordArray[$wordLowercase] = $newWordRes[$i]['id'];
352
                }
353
            }
354
        }
355
        $this->dbHandler->commit();
356
357
        return $wordArray;
358
    }
359
}
360