Completed
Push — ezp26175-exception_on_non_defa... ( 77d2f3...ca5fc8 )
by
unknown
39:33
created

DoctrineDatabase::index()   C

Complexity

Conditions 9
Paths 38

Size

Total Lines 61
Code Lines 42

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 42
nc 38
nop 1
dl 0
loc 61
rs 6.7603
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * File containing the DoctrineDatabase Content search Gateway class.
4
 *
5
 * @copyright Copyright (C) eZ Systems AS. All rights reserved.
6
 * @license For full copyright and license information view LICENSE file distributed with this source code.
7
 */
8
namespace eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Gateway;
9
10
use eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Gateway;
11
use eZ\Publish\Core\Persistence\Database\DatabaseHandler;
12
use eZ\Publish\Core\Persistence\TransformationProcessor;
13
use eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex;
14
use eZ\Publish\Core\Search\Legacy\Content\FullTextData;
15
use eZ\Publish\SPI\Persistence\Content;
16
use eZ\Publish\SPI\Persistence\Content\Type\Handler as SPITypeHandler;
17
use eZ\Publish\SPI\Search\Field;
18
19
/**
20
 * WordIndexer gateway implementation using the Doctrine database.
21
 */
22
class DoctrineDatabase extends Gateway
23
{
24
    /**
25
     * Max acceptable by any DBMS INT value.
26
     *
27
     * Note: 2^31-1 seems to be the most reasonable value that should work in any setup.
28
     */
29
    const DB_INT_MAX = 2147483647;
30
31
    /**
32
     * Database handler.
33
     *
34
     * @var \eZ\Publish\Core\Persistence\Database\DatabaseHandler
35
     */
36
    protected $dbHandler;
37
38
    /**
39
     * SPI Content Type Handler.
40
     *
41
     * Need this for being able to pick fields that are searchable.
42
     *
43
     * @var \eZ\Publish\SPI\Persistence\Content\Type\Handler
44
     */
45
    protected $typeHandler;
46
47
    /**
48
     * Transformation processor.
49
     *
50
     * Need this for being able to transform text to searchable value
51
     *
52
     * @var \eZ\Publish\Core\Persistence\TransformationProcessor
53
     */
54
    protected $transformationProcessor;
55
56
    /**
57
     * LegacySearchService.
58
     *
59
     * Need this for queries on ezsearch* tables
60
     *
61
     * @var \eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex
62
     */
63
    protected $searchIndex;
64
65
    /**
66
     * Full text search configuration options.
67
     *
68
     * @var array
69
     */
70
    protected $fullTextSearchConfiguration;
71
72
    /**
73
     * Construct from handler handler.
74
     *
75
     * @param \eZ\Publish\Core\Persistence\Database\DatabaseHandler $dbHandler
76
     * @param \eZ\Publish\SPI\Persistence\Content\Type\Handler $typeHandler
77
     * @param \eZ\Publish\Core\Persistence\TransformationProcessor $transformationProcessor
78
     * @param \eZ\Publish\Core\Search\Legacy\Content\WordIndexer\Repository\SearchIndex $searchIndex
79
     * @param array $fullTextSearchConfiguration
80
     */
81
    public function __construct(
82
        DatabaseHandler $dbHandler,
83
        SPITypeHandler $typeHandler,
84
        TransformationProcessor $transformationProcessor,
85
        SearchIndex $searchIndex,
86
        array $fullTextSearchConfiguration
87
    ) {
88
        $this->dbHandler = $dbHandler;
89
        $this->typeHandler = $typeHandler;
90
        $this->transformationProcessor = $transformationProcessor;
91
        $this->searchIndex = $searchIndex;
92
        $this->fullTextSearchConfiguration = $fullTextSearchConfiguration;
93
    }
94
95
    /**
96
     * Index search engine full text data corresponding to content object field values.
97
     *
98
     * Ported from the legacy code
99
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L45
100
     *
101
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData $fullTextData
102
     */
103
    public function index(FullTextData $fullTextData)
104
    {
105
        $indexArray = [];
106
        $indexArrayOnlyWords = [];
107
        $wordCount = 0;
108
        $placement = 0;
109
110
        // Remove previously indexed content if exists to avoid keeping in index removed field values
111
        $this->remove($fullTextData->id);
112
        foreach ($fullTextData->values as $fullTextValue) {
113
            /** @var \eZ\Publish\Core\Search\Legacy\Content\FullTextValue $fullTextValue */
114
            if (is_numeric(trim($fullTextValue->value))) {
115
                $integerValue = (int)$fullTextValue->value;
116
                if ($integerValue > self::DB_INT_MAX) {
117
                    $integerValue = 0;
118
                }
119
            } else {
120
                $integerValue = 0;
121
            }
122
            // Split transformed text on whitespace
123
            $wordArray = explode(' ', $this->transformationProcessor->transform($fullTextValue->value, $this->fullTextSearchConfiguration['commands']));
124
            foreach ($wordArray as $word) {
125
                if (trim($word) === '') {
126
                    continue;
127
                }
128
                // words stored in search index are limited to 150 characters
129
                if (mb_strlen($word) > 150) {
130
                    $word = mb_substr($word, 0, 150);
131
                }
132
                $indexArray[] = ['Word' => $word,
133
                    'ContentClassAttributeID' => $fullTextValue->fieldDefinitionId,
134
                    'identifier' => $fullTextValue->id,
135
                    'integer_value' => $integerValue, ];
136
                $indexArrayOnlyWords[$word] = 1;
137
                ++$wordCount;
138
                // if we have "www." before word than
139
                // treat it as url and add additional entry to the index
140
                if (mb_substr(mb_strtolower($word), 0, 4) === 'www.') {
141
                    $additionalUrlWord = substr($word, 4);
142
                    $indexArray[] = ['Word' => $additionalUrlWord,
143
                        'ContentClassAttributeID' => $fullTextValue->fieldDefinitionId,
144
                        'identifier' => $fullTextValue->id,
145
                        'integer_value' => $integerValue, ];
146
                    $indexArrayOnlyWords[$additionalUrlWord] = 1;
147
                    ++$wordCount;
148
                }
149
            }
150
        }
151
152
        $wordIDArray = $this->buildWordIDArray(array_keys($indexArrayOnlyWords));
153
        $this->dbHandler->beginTransaction();
154
        for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 1000) {
155
            $placement = $this->indexWords(
156
                $fullTextData,
157
                array_slice($indexArray, $arrayCount, 1000),
158
                $wordIDArray,
159
                $placement
160
            );
161
        }
162
        $this->dbHandler->commit();
163
    }
164
165
    /**
166
     * Indexes an array of FullTextData objects.
167
     *
168
     * Note: on large amounts of data make sure to iterate with several calls to this function with
169
     * a limited set of FullTextData objects. Amount you have memory for depends on server, size
170
     * of FullTextData objects & PHP version.
171
     *
172
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData[] $fullTextBulkData
173
     */
174
    public function bulkIndex(array $fullTextBulkData)
175
    {
176
        foreach ($fullTextBulkData as $fullTextData) {
177
            $this->index($fullTextData);
178
        }
179
    }
180
181
    /**
182
     * Remove whole content or a specific version from index.
183
     *
184
     * Ported from the legacy code
185
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L386
186
     *
187
     * @param mixed $contentId
188
     * @param mixed|null $versionId
189
     *
190
     * @return bool
191
     */
192
    public function remove($contentId, $versionId = null)
193
    {
194
        $doDelete = false;
195
        $this->dbHandler->beginTransaction();
196
        // fetch all the words and decrease the object count on all the words
197
        $wordIDList = $this->searchIndex->getContentObjectWords($contentId);
198
        if (count($wordIDList) > 0) {
199
            $this->searchIndex->decrementWordObjectCount($wordIDList);
200
            $doDelete = true;
201
        }
202
        if ($doDelete) {
203
            $this->searchIndex->deleteWordsWithoutObjects();
204
            $this->searchIndex->deleteObjectWordsLink($contentId);
205
        }
206
        $this->dbHandler->commit();
207
208
        return true;
209
    }
210
211
    /**
212
     * Remove entire search index.
213
     */
214
    public function purgeIndex()
215
    {
216
        $this->searchIndex->purge();
217
    }
218
219
    /**
220
     * Index wordIndex.
221
     *
222
     * Ported from the legacy code
223
     *
224
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L255
225
     *
226
     * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData $fullTextData
227
     * @param array $indexArray
228
     * @param array $wordIDArray
229
     * @param int $placement
230
     *
231
     * @return int last placement
232
     */
233
    private function indexWords(FullTextData $fullTextData, array $indexArray, array $wordIDArray, $placement = 0)
234
    {
235
        $contentId = $fullTextData->id;
236
237
        $prevWordId = 0;
238
239
        for ($i = 0; $i < count($indexArray); ++$i) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
240
            $indexWord = $indexArray[$i]['Word'];
241
            $contentFieldId = $indexArray[$i]['ContentClassAttributeID'];
242
            $identifier = $indexArray[$i]['identifier'];
243
            $integerValue = $indexArray[$i]['integer_value'];
244
            $wordId = $wordIDArray[$indexWord];
245
246
            if (isset($indexArray[$i + 1])) {
247
                $nextIndexWord = $indexArray[$i + 1]['Word'];
248
                $nextWordId = $wordIDArray[$nextIndexWord];
249
            } else {
250
                $nextWordId = 0;
251
            }
252
            $frequency = 0;
253
            $this->searchIndex->addObjectWordLink($wordId, $contentId, $frequency, $placement, $nextWordId, $prevWordId, $fullTextData->contentTypeId, $contentFieldId, $fullTextData->published, $fullTextData->sectionId, $identifier, $integerValue);
254
            $prevWordId = $wordId;
255
            ++$placement;
256
        }
257
258
        return $placement;
259
    }
260
261
    /**
262
     * Build WordIDArray and update ezsearch_word table.
263
     *
264
     * Ported from the legacy code
265
     *
266
     * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L155
267
     *
268
     * @param array $indexArrayOnlyWords words for object to add
269
     *
270
     * @return array wordIDArray
271
     */
272
    private function buildWordIDArray(array $indexArrayOnlyWords)
273
    {
274
        $wordCount = count($indexArrayOnlyWords);
275
        $wordIDArray = [];
276
        $wordArray = [];
277
278
        // store the words in the index and remember the ID
279
        $this->dbHandler->beginTransaction();
280
        for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 500) {
281
            // Fetch already indexed words from database
282
            $wordArrayChuck = array_slice($indexArrayOnlyWords, $arrayCount, 500);
283
            $wordRes = $this->searchIndex->getWords($wordArrayChuck);
284
285
            // Build a has of the existing words
286
            $wordResCount = count($wordRes);
287
            $existingWordArray = [];
288
            for ($i = 0; $i < $wordResCount; ++$i) {
289
                $wordIDArray[] = $wordRes[$i]['id'];
290
                $existingWordArray[] = $wordRes[$i]['word'];
291
                $wordArray[$wordRes[$i]['word']] = $wordRes[$i]['id'];
292
            }
293
294
            // Update the object count of existing words by one
295
            if (count($wordIDArray) > 0) {
296
                $this->searchIndex->incrementWordObjectCount($wordIDArray);
297
            }
298
299
            // Insert if there is any news words
300
            $newWordArray = array_diff($wordArrayChuck, $existingWordArray);
301
            if (count($newWordArray) > 0) {
302
                $this->searchIndex->addWords($newWordArray);
303
                $newWordRes = $this->searchIndex->getWords($newWordArray);
304
                $newWordCount = count($newWordRes);
305
                for ($i = 0; $i < $newWordCount; ++$i) {
306
                    $wordLowercase = $this->transformationProcessor->transformByGroup($newWordRes[$i]['word'], 'lowercase');
307
                    $wordArray[$wordLowercase] = $newWordRes[$i]['id'];
308
                }
309
            }
310
        }
311
        $this->dbHandler->commit();
312
313
        return $wordArray;
314
    }
315
}
316