Passed
Pull Request — master (#195)
by
unknown
06:45
created

ElasticSearch::index()   F

Complexity

Conditions 19
Paths 3841

Size

Total Lines 113
Code Lines 66

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 19
eloc 66
c 1
b 0
f 0
nc 3841
nop 1
dl 0
loc 113
rs 0.3499

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
namespace EWW\Dpf\Services\ElasticSearch;
3
4
/*
5
 * This file is part of the TYPO3 CMS project.
6
 *
7
 * It is free software; you can redistribute it and/or modify it under
8
 * the terms of the GNU General Public License, either version 2
9
 * of the License, or any later version.
10
 *
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 *
14
 * The TYPO3 project - inspiring people to share!
15
 */
16
17
use Elasticsearch\ClientBuilder;
18
use EWW\Dpf\Domain\Workflow\DocumentWorkflow;
19
use TYPO3\CMS\Extbase\Object\ObjectManager;
20
use EWW\Dpf\Configuration\ClientConfigurationManager;
21
use EWW\Dpf\Domain\Model\Document;
22
use TYPO3\CMS\Core\Utility\GeneralUtility;
23
use TYPO3\CMS\Core\Log\LogManager;
24
25
class ElasticSearch
26
{
27
    /**
28
     *
29
     * @var \TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface
30
     * @inject
31
     */
32
    protected $configurationManager;
33
34
    /**
35
     * frontendUserRepository
36
     *
37
     * @var \EWW\Dpf\Domain\Repository\FrontendUserRepository
38
     * @inject
39
     */
40
    protected $frontendUserRepository = null;
41
42
    protected $client;
43
44
    protected $server = 'host.docker.internal'; //127.0.0.1';
45
46
    protected $port = '9200';
47
48
    protected $indexName = 'kitodo_publication';
49
50
    //protected $mapping = '';
51
52
    //protected $hits;
53
54
    protected $results;
55
56
    protected $elasticsearchMapper;
57
58
59
    /**
60
     * elasticsearch client constructor
61
     */
62
    public function __construct()
63
    {
64
        $objectManager = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(ObjectManager::class);
65
66
        $this->elasticsearchMapper = $objectManager->get(ElasticsearchMapper::class);
67
68
        $clientConfigurationManager = $objectManager->get(ClientConfigurationManager::class);
69
70
        $this->server = $clientConfigurationManager->getElasticSearchHost();
71
        $this->port = $clientConfigurationManager->getElasticSearchPort();
72
73
        $hosts = array(
74
            $this->server . ':' . $this->port,
75
        );
76
77
        $clientBuilder = ClientBuilder::create();
78
        $clientBuilder->setHosts($hosts);
79
        $this->client = $clientBuilder->build();
80
81
        $this->initializeIndex($this->indexName);
82
83
    }
84
85
    /**
86
     * Get typoscript settings
87
     *
88
     * @return mixed
89
     */
90
    public function getSettings()
91
    {
92
        $frameworkConfiguration = $this->configurationManager->getConfiguration(
93
            \TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface::CONFIGURATION_TYPE_FRAMEWORK
94
        );
95
        return $frameworkConfiguration['settings'];
96
    }
97
98
99
    /**
100
     * Creates an index named by $indexName if it doesn't exist.
101
     *
102
     * @param $indexName
103
     */
104
    protected function initializeIndex($indexName)
105
    {
106
        $paramsIndex = [
107
            'index' => $indexName,
108
            'body' => [
109
                'settings' => [
110
                    //'index.requests.cache.enable' => false,
111
                    'analysis' => [
112
                        'filter' => [
113
                            'ngram' => [
114
                                'type' => 'ngram',
115
                                'min_gram' => 3,
116
                                'max_gram' => 3,
117
                                'token_chars' => [
118
                                    'letter',
119
                                    'digit'
120
                                ],
121
                            ]
122
                        ],
123
                        'analyzer' => [
124
                            'keyword_lowercase' => [
125
                                'tokenizer' => 'keyword',
126
                                'filter' => ['lowercase']
127
                            ]
128
                        ],
129
                        'normalizer' => [
130
                            'lowercase_normalizer' => [
131
                                'type' => 'custom',
132
                                'char_filter' => [],
133
                                'filter' => [
134
                                    'lowercase',
135
                                    'asciifolding'
136
                                ]
137
                            ]
138
                        ]
139
                    ]
140
                ],
141
                'mappings' => [
142
                    '_source' => [
143
                        'enabled' => true
144
                    ],
145
                    //'dynamic' => 'strict',
146
                    'properties' => [
147
                        'title' => [
148
                            'type' => 'text',
149
                            'fields' => [
150
                                'keyword' => [
151
                                    'type' => 'keyword',
152
                                    'normalizer' => 'lowercase_normalizer'
153
                                ]
154
                            ]
155
                        ],
156
                        'state' => [
157
                            'type' => 'keyword'
158
                        ],
159
                        'aliasState' => [
160
                            'type' => 'keyword'
161
                        ],
162
                        'year' => [
163
                            'type' => 'integer'
164
                        ],
165
                        'persons' => [
166
                            'type' => 'keyword'
167
                        ],
168
                        'personsSort' => [
169
                            'type' => 'text',
170
                            'fields' => [
171
                                'keyword' => [
172
                                    'type' => 'keyword',
173
                                    'normalizer' => 'lowercase_normalizer'
174
                                ]
175
                            ]
176
                        ],
177
                        'doctype' => [
178
                            'type' => 'keyword'
179
                        ],
180
                        'collections' => [
181
                            'type' => 'keyword'
182
                        ],
183
                        'hasFiles' => [
184
                            'type' => 'keyword'
185
                        ],
186
                        'creator' => [
187
                            'type' => 'keyword'
188
                        ],
189
                        'creatorRole' => [
190
                            'type' => 'keyword'
191
                        ],
192
                        'source' => [
193
                            'type' => 'text'
194
                        ],
195
                        'fobIdentifiers' => [
196
                            'type' => 'keyword'
197
                        ],
198
                        'personData' => [
199
                            //'enabled' => false,
200
                            'properties' => [
201
                                'name' => [
202
                                    'type' => 'keyword'
203
                                ],
204
                                'fobId' => [
205
                                    //'type' => 'keyword'
206
                                    'enabled' => false
207
                                ],
208
                                'index' => [
209
                                    //'type' => 'integer'
210
                                    'enabled' => false
211
                                ]
212
                            ]
213
                        ],
214
                        'affiliation' => [
215
                            'type' => 'keyword'
216
                        ],
217
                        'process_number' => [
218
                            'type' => 'keyword'
219
                        ],
220
                        'creationDate' => [
221
                            'type' =>  'date',
222
                            'format'=>  "yyyy-MM-dd"
223
                        ],
224
                        'embargoDate' => [
225
                            'type' =>  'date',
226
                            'format'=>  "yyyy-MM-dd"
227
                        ]
228
                    ]
229
                ]
230
            ]
231
        ];
232
233
        if (!$this->client->indices()->exists(['index' => $indexName])) {
234
            $this->client->indices()->create($paramsIndex);
235
        }
236
237
    }
238
239
    /**
240
     * Adds an document to the index.
241
     *
242
     * @param Document $document
243
     */
244
    public function index($document)
245
    {
246
        $data = json_decode($this->elasticsearchMapper->getElasticsearchJson($document));
247
248
        if ($data) {
249
250
            $data->state = $document->getState();
251
            $data->aliasState = DocumentWorkflow::STATE_TO_ALIASSTATE_MAPPING[$document->getState()];
252
253
            $data->objectIdentifier = $document->getObjectIdentifier();
254
255
            if (!$data->identifier || !is_array($data->identifier)) {
256
                $data->identifier = [];
257
            }
258
            $data->identifier[] = $document->getObjectIdentifier();
259
            $data->identifier[] = $document->getProcessNumber();
260
261
            if ($document->getCreator()) {
262
                $data->creator = $document->getCreator();
263
            } else {
264
                $data->creator = null;
265
            }
266
267
268
            if ($document->getCreator()) {
269
                /** @var \EWW\Dpf\Domain\Model\FrontendUser $creatorFeUser */
270
                $creatorFeUser = $this->frontendUserRepository->findByUid($document->getCreator());
271
                $data->creatorRole = $creatorFeUser->getUserRole();
272
            } else {
273
                $data->creatorRole = '';
274
            }
275
276
            $creationDate = new \DateTime($document->getCreationDate());
277
278
            $data->creationDate = $creationDate->format('Y-m-d');
279
280
            $data->year = $document->getPublicationYear();
281
282
            $notes = $document->getNotes();
283
284
            if ($notes && is_array($notes)) {
285
                $data->notes = $notes;
286
            } else {
287
                $data->notes = array();
288
            }
289
290
            $files = $document->getFile();
291
            if ($files->count() > 0) {
292
                $data->hasFiles = true;
293
            } else {
294
                $data->hasFiles = false;
295
            }
296
297
298
            $internalFormat = new \EWW\Dpf\Helper\InternalFormat($document->getXmlData());
299
300
            //$persons = array_merge($internalFormat->getAuthors(), $internalFormat->getPublishers());
301
            $persons = $internalFormat->getPersons();
302
303
            $fobIdentifiers = [];
304
            $personData = [];
305
            foreach ($persons as $person) {
306
                $fobIdentifiers[] = $person['fobId'];
307
                $personData[] = $person;
308
                //$data->persons[] = $person['name'];
309
                $data->persons[] = $person['fobId'];
310
311
                foreach ($person['affiliations'] as $affiliation) {
312
                    $data->affiliation[] = $affiliation;
313
                }
314
315
                foreach ($person['affiliationIdentifiers'] as $affiliationIdentifier) {
316
                    $data->affiliation[] = $affiliationIdentifier;
317
                }
318
            }
319
320
            $data->fobIdentifiers = $fobIdentifiers;
321
            $data->personData = $personData;
322
323
            if (sizeof($persons) > 0) {
324
                if (array_key_exists('family', $persons[0])) {
325
                    $data->personsSort = $persons[0]['family'];
326
                }
327
            }
328
329
            $data->source = $document->getSourceDetails();
330
331
            $data->universityCollection = false;
332
            if ($data->collections && is_array($data->collections)) {
333
                foreach ($data->collections as $collection) {
334
                    if ($collection == $this->getSettings()['universityCollection']) {
335
                        $data->universityCollection = true;
336
                        break;
337
                    }
338
                }
339
            }
340
341
            //$embargoDate = $document->getEmbargoDate();
342
            if ($embargoDate instanceof \DateTime) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $embargoDate seems to be never defined.
Loading history...
343
                $data->embargoDate = $embargoDate->format("Y-m-d");
344
            } else {
345
                $data->embargoDate = null;
346
            }
347
348
            $data->originalSourceTitle = $internalFormat->getOriginalSourceTitle();
349
350
            $data->fobIdentifiers = $internalFormat->getPersonFisIdentifiers();
351
352
            $this->client->index([
353
                'refresh' => 'wait_for',
354
                'index' => $this->indexName,
355
                'id' => $document->getDocumentIdentifier(),
356
                'body' => $data
357
            ]);
358
359
        }
360
361
    }
362
363
364
    /**
365
     * Deletes a document from the index
366
     *
367
     * @param string $identifier
368
     */
369
    public function delete($identifier)
370
    {
371
        try {
372
373
            $params = [
374
                'refresh' => 'wait_for',
375
                'index' => $this->indexName,
376
                'id' => $identifier
377
            ];
378
379
            $this->client->delete($params);
380
381
        } catch (\Exception $e) {
382
            /** @var $logger \TYPO3\CMS\Core\Log\Logger */
383
            $logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(__CLASS__);
384
            $logger->warning('Document could not be deleted from the index.',
385
                [
386
                    'Document identifier' => $identifier
387
                ]
388
            );
389
        }
390
    }
391
392
393
    /**
394
     * @param $identifier
395
     */
396
    public function getDocument($identifier)
397
    {
398
        $params = [
399
            'index' => $this->indexName,
400
            'id'    => $identifier
401
        ];
402
403
        return $this->client->get($params);
404
    }
405
406
407
    /**
408
     * performs the
409
     * @param  array $query search query
410
     * @return array        result list
411
     */
412
    public function search($query, $type = null)
413
    {
414
        try {
415
            // define type and index
416
            if (empty($query['index'])) {
417
                $query['index'] = $this->indexName;
418
            }
419
            if (!empty($type)) {
420
                //$query['type'] = $type;
421
                // $query['type'] = $this->type;
422
            }
423
424
            // Search request
425
            $results = $this->client->search($query);
426
427
            //$this->hits = $results['hits']['total'];
428
429
            //$this->resultList = $results['hits'];
430
431
            $this->results = $results;
432
433
            return $this->results;
434
        } catch ( \Elasticsearch\Common\Exceptions\Curl\CouldNotConnectToHost $exception) {
435
            throw new \EWW\Dpf\Exceptions\ElasticSearchConnectionErrorException("Could not connect to repository server.");
436
        } catch (\Elasticsearch\Common\Exceptions\Curl\CouldNotResolveHostException $exception) {
437
            throw new \EWW\Dpf\Exceptions\ElasticSearchConnectionErrorException("Could not connect to repository server.");
438
        }
439
    }
440
441
    /**
442
     * Get the results
443
     * @return mixed
444
     */
445
    public function getResults()
446
    {
447
        // return results from the last search request
448
        return $this->results;
449
    }
450
}
451