Completed
Push — master ( 7afdbb...e391f2 )
by André
24:54
created

DoctrineDatabase::index()   C

Complexity

Conditions 9
Paths 38

Size

Total Lines 60
Code Lines 42

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 9
eloc 42
c 1
b 0
f 0
nc 38
nop 1
dl 0
loc 60
rs 6.8358

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * File containing the DoctrineDatabase Content search Gateway class.
4
 *
5
 * @copyright Copyright (C) eZ Systems AS. All rights reserved.
6
 * @license For full copyright and license information view LICENSE file distributed with this source code.
7
 */
8
namespace eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Gateway;
9
10
use eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Gateway;
11
use eZ\Publish\Core\Persistence\Database\DatabaseHandler;
12
use eZ\Publish\Core\Persistence\TransformationProcessor;
13
use eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex;
14
use eZ\Publish\Core\Search\Legacy\Content\FullTextData;
15
use eZ\Publish\SPI\Persistence\Content;
16
use eZ\Publish\SPI\Persistence\Content\Type\Handler as SPITypeHandler;
17
use eZ\Publish\SPI\Search\Field;
18
19
/**
20
 * WordIndexer gateway implementation using the Doctrine database.
21
 */
22
class DoctrineDatabase extends Gateway
23
{
24
    /**
25
     * Max acceptable by any DBMS INT value.
26
     *
27
     * Note: 2^31-1 seems to be the most reasonable value that should work in any setup.
28
     */
29
    const DB_INT_MAX = 2147483647;
30
31
    /**
32
     * Database handler.
33
     *
34
     * @var \eZ\Publish\Core\Persistence\Database\DatabaseHandler
35
     */
36
    protected $dbHandler;
37
38
    /**
39
     * SPI Content Type Handler.
40
     *
41
     * Need this for being able to pick fields that are searchable.
42
     *
43
     * @var \eZ\Publish\SPI\Persistence\Content\Type\Handler
44
     */
45
    protected $typeHandler;
46
47
    /**
48
     * Transformation processor.
49
     *
50
     * Need this for being able to transform text to searchable value
51
     *
52
     * @var \eZ\Publish\Core\Persistence\TransformationProcessor
53
     */
54
    protected $transformationProcessor;
55
56
    /**
57
     * LegacySearchService.
58
     *
59
     * Need this for queries on ezsearch* tables
60
     *
61
     * @var \eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex
62
     */
63
    protected $searchIndex;
64
65
    /**
66
     * Construct from handler handler.
67
     *
68
     * @param \eZ\Publish\Core\Persistence\Database\DatabaseHandler $dbHandler
69
     * @param \eZ\Publish\SPI\Persistence\Content\Type\Handler $typeHandler
70
     * @param \eZ\Publish\Core\Persistence\TransformationProcessor $transformationProcessor
71
     * @param \eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex $searchIndex
72
     */
73
    public function __construct(
74
        DatabaseHandler $dbHandler,
75
        SPITypeHandler $typeHandler,
76
        TransformationProcessor $transformationProcessor,
77
        SearchIndex $searchIndex
78
    ) {
79
        $this->dbHandler = $dbHandler;
80
        $this->typeHandler = $typeHandler;
81
        $this->transformationProcessor = $transformationProcessor;
82
        $this->searchIndex = $searchIndex;
83
    }
84
85
    /**
86
     * Index search engine full text data corresponding to content object field values.
87
     *
88
     * Ported from the legacy code
89
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L45
90
     *
91
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData $fullTextData
92
     */
93
    public function index(FullTextData $fullTextData)
94
    {
95
        $indexArray = [];
96
        $indexArrayOnlyWords = [];
97
        $wordCount = 0;
98
        $placement = 0;
99
100
        foreach ($fullTextData->values as $fullTextValue) {
101
            /** @var \eZ\Publish\Core\Search\Legacy\Content\FullTextValue $fullTextValue */
102
            if (is_numeric(trim($fullTextValue->value))) {
103
                $integerValue = (int)$fullTextValue->value;
104
                if ($integerValue > self::DB_INT_MAX) {
105
                    $integerValue = 0;
106
                }
107
            } else {
108
                $integerValue = 0;
109
            }
110
            // Split text on whitespace
111
            $wordArray = explode(' ', $fullTextValue->value);
112
            foreach ($wordArray as $word) {
113
                if (trim($word) === '') {
114
                    continue;
115
                }
116
                $word = $this->transformationProcessor->transformByGroup($word, 'lowercase');
117
                // words stored in search index are limited to 150 characters
118
                if (mb_strlen($word) > 150) {
119
                    $word = mb_substr($word, 0, 150);
120
                }
121
                $indexArray[] = ['Word' => $word,
122
                    'ContentClassAttributeID' => $fullTextValue->fieldDefinitionId,
123
                    'identifier' => $fullTextValue->id,
124
                    'integer_value' => $integerValue, ];
125
                $indexArrayOnlyWords[$word] = 1;
126
                ++$wordCount;
127
                // if we have "www." before word than
128
                // treat it as url and add additional entry to the index
129
                if (mb_substr(mb_strtolower($word), 0, 4) === 'www.') {
130
                    $additionalUrlWord = substr($word, 4);
131
                    $indexArray[] = ['Word' => $additionalUrlWord,
132
                        'ContentClassAttributeID' => $fullTextValue->fieldDefinitionId,
133
                        'identifier' => $fullTextValue->id,
134
                        'integer_value' => $integerValue, ];
135
                    $indexArrayOnlyWords[$additionalUrlWord] = 1;
136
                    ++$wordCount;
137
                }
138
            }
139
        }
140
141
        $wordIDArray = $this->buildWordIDArray(array_keys($indexArrayOnlyWords));
142
        $this->dbHandler->beginTransaction();
143
        for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 1000) {
144
            $placement = $this->indexWords(
145
                $fullTextData,
146
                array_slice($indexArray, $arrayCount, 1000),
147
                $wordIDArray,
148
                $placement
149
            );
150
        }
151
        $this->dbHandler->commit();
152
    }
153
154
    /**
155
     * Indexes an array of FullTextData objects.
156
     *
157
     * Note: on large amounts of data make sure to iterate with several calls to this function with
158
     * a limited set of FullTextData objects. Amount you have memory for depends on server, size
159
     * of FullTextData objects & PHP version.
160
     *
161
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData[] $fullTextBulkData
162
     */
163
    public function bulkIndex(array $fullTextBulkData)
164
    {
165
        foreach ($fullTextBulkData as $fullTextData) {
166
            $this->index($fullTextData);
167
        }
168
    }
169
170
    /**
171
     * Remove whole content or a specific version from index.
172
     *
173
     * Ported from the legacy code
174
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L386
175
     *
176
     * @param mixed $contentId
177
     * @param mixed|null $versionId
178
     *
179
     * @return bool
180
     */
181
    public function remove($contentId, $versionId = null)
182
    {
183
        $doDelete = false;
184
        $this->dbHandler->beginTransaction();
185
        // fetch all the words and decrease the object count on all the words
186
        $wordIDList = $this->searchIndex->getContentObjectWords($contentId);
187
        if (count($wordIDList) > 0) {
188
            $this->searchIndex->decrementWordObjectCount($wordIDList);
189
            $doDelete = true;
190
        }
191
        if ($doDelete) {
192
            $this->searchIndex->deleteWordsWithoutObjects();
193
            $this->searchIndex->deleteObjectWordsLink($contentId);
194
        }
195
        $this->dbHandler->commit();
196
197
        return true;
198
    }
199
200
    /**
201
     * Remove entire search index.
202
     */
203
    public function purgeIndex()
204
    {
205
        $this->searchIndex->purge();
206
    }
207
208
    /**
209
     * Index wordIndex.
210
     *
211
     * Ported from the legacy code
212
     *
213
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L255
214
     *
215
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData $fullTextData
216
     * @param array $indexArray
217
     * @param array $wordIDArray
218
     * @param int $placement
219
     *
220
     * @return int last placement
221
     */
222
    private function indexWords(FullTextData $fullTextData, array $indexArray, array $wordIDArray, $placement = 0)
223
    {
224
        $contentId = $fullTextData->id;
225
226
        $prevWordId = 0;
227
228
        for ($i = 0; $i < count($indexArray); ++$i) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
229
            $indexWord = $indexArray[$i]['Word'];
230
            $contentFieldId = $indexArray[$i]['ContentClassAttributeID'];
231
            $identifier = $indexArray[$i]['identifier'];
232
            $integerValue = $indexArray[$i]['integer_value'];
233
            $wordId = $wordIDArray[$indexWord];
234
235
            if (isset($indexArray[$i + 1])) {
236
                $nextIndexWord = $indexArray[$i + 1]['Word'];
237
                $nextWordId = $wordIDArray[$nextIndexWord];
238
            } else {
239
                $nextWordId = 0;
240
            }
241
            $frequency = 0;
242
            $this->searchIndex->addObjectWordLink($wordId, $contentId, $frequency, $placement, $nextWordId, $prevWordId, $fullTextData->contentTypeId, $contentFieldId, $fullTextData->published, $fullTextData->sectionId, $identifier, $integerValue);
243
            $prevWordId = $wordId;
244
            ++$placement;
245
        }
246
247
        return $placement;
248
    }
249
250
    /**
251
     * Build WordIDArray and update ezsearch_word table.
252
     *
253
     * Ported from the legacy code
254
     *
255
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L155
256
     *
257
     * @param array $indexArrayOnlyWords words for object to add
258
     *
259
     * @return array wordIDArray
260
     */
261
    private function buildWordIDArray(array $indexArrayOnlyWords)
262
    {
263
        $wordCount = count($indexArrayOnlyWords);
264
        $wordIDArray = [];
265
        $wordArray = [];
266
267
        // store the words in the index and remember the ID
268
        $this->dbHandler->beginTransaction();
269
        for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 500) {
270
            // Fetch already indexed words from database
271
            $wordArrayChuck = array_slice($indexArrayOnlyWords, $arrayCount, 500);
272
            $wordRes = $this->searchIndex->getWords($wordArrayChuck);
273
274
            // Build a has of the existing words
275
            $wordResCount = count($wordRes);
276
            $existingWordArray = [];
277
            for ($i = 0; $i < $wordResCount; ++$i) {
278
                $wordIDArray[] = $wordRes[$i]['id'];
279
                $existingWordArray[] = $wordRes[$i]['word'];
280
                $wordArray[$wordRes[$i]['word']] = $wordRes[$i]['id'];
281
            }
282
283
            // Update the object count of existing words by one
284
            if (count($wordIDArray) > 0) {
285
                $this->searchIndex->incrementWordObjectCount($wordIDArray);
286
            }
287
288
            // Insert if there is any news words
289
            $newWordArray = array_diff($wordArrayChuck, $existingWordArray);
290
            if (count($newWordArray) > 0) {
291
                $this->searchIndex->addWords($newWordArray);
292
                $newWordRes = $this->searchIndex->getWords($newWordArray);
293
                $newWordCount = count($newWordRes);
294
                for ($i = 0; $i < $newWordCount; ++$i) {
295
                    $wordLowercase = $this->transformationProcessor->transformByGroup($newWordRes[$i]['word'], 'lowercase');
296
                    $wordArray[$wordLowercase] = $newWordRes[$i]['id'];
297
                }
298
            }
299
        }
300
        $this->dbHandler->commit();
301
302
        return $wordArray;
303
    }
304
}
305