Passed
Pull Request — master (#195)
by
unknown
07:04
created

ElasticSearch   A

Complexity

Total Complexity 33

Size/Duplication

Total Lines 429
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 203
dl 0
loc 429
rs 9.76
c 1
b 0
f 0
wmc 33

8 Methods

Rating   Name   Duplication   Size   Complexity  
A getResults() 0 4 1
A search() 0 26 5
B initializeIndex() 0 131 2
A delete() 0 18 2
A getDocument() 0 8 1
A getSettings() 0 6 1
F index() 0 113 19
A __construct() 0 26 2
1
<?php
2
namespace EWW\Dpf\Services\ElasticSearch;
3
4
/*
5
 * This file is part of the TYPO3 CMS project.
6
 *
7
 * It is free software; you can redistribute it and/or modify it under
8
 * the terms of the GNU General Public License, either version 2
9
 * of the License, or any later version.
10
 *
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 *
14
 * The TYPO3 project - inspiring people to share!
15
 */
16
17
use Elasticsearch\ClientBuilder;
18
use EWW\Dpf\Domain\Workflow\DocumentWorkflow;
19
use TYPO3\CMS\Extbase\Object\ObjectManager;
20
use EWW\Dpf\Configuration\ClientConfigurationManager;
21
use EWW\Dpf\Domain\Model\Document;
22
use TYPO3\CMS\Core\Utility\GeneralUtility;
23
use TYPO3\CMS\Core\Log\LogManager;
24
use TYPO3\CMS\Extbase\Utility\LocalizationUtility;
25
26
class ElasticSearch
27
{
28
    /**
29
     *
30
     * @var \TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface
31
     * @inject
32
     */
33
    protected $configurationManager;
34
35
    /**
36
     * frontendUserRepository
37
     *
38
     * @var \EWW\Dpf\Domain\Repository\FrontendUserRepository
39
     * @inject
40
     */
41
    protected $frontendUserRepository = null;
42
43
    protected $client;
44
45
    protected $server = 'host.docker.internal'; //127.0.0.1';
46
47
    protected $port = '9200';
48
49
    protected $indexName = 'kitodo_publication';
50
51
    //protected $mapping = '';
52
53
    //protected $hits;
54
55
    protected $results;
56
57
    protected $elasticsearchMapper;
58
59
60
    /**
61
     * elasticsearch client constructor
62
     */
63
    public function __construct()
64
    {
65
        $objectManager = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(ObjectManager::class);
66
67
        $this->elasticsearchMapper = $objectManager->get(ElasticsearchMapper::class);
68
69
        $clientConfigurationManager = $objectManager->get(ClientConfigurationManager::class);
70
71
        $this->server = $clientConfigurationManager->getElasticSearchHost();
72
        $this->port = $clientConfigurationManager->getElasticSearchPort();
73
74
        $hosts = array(
75
            $this->server . ':' . $this->port,
76
        );
77
78
        $clientBuilder = ClientBuilder::create();
79
        $clientBuilder->setHosts($hosts);
80
        $this->client = $clientBuilder->build();
81
82
        try {
83
            $this->initializeIndex($this->indexName);
84
        } catch (\Throwable $e) {
85
            $message = LocalizationUtility::translate(
86
                'elasticsearch.notRunning', 'dpf'
87
            );
88
            die($message);
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
89
        }
90
    }
91
92
    /**
93
     * Get typoscript settings
94
     *
95
     * @return mixed
96
     */
97
    public function getSettings()
98
    {
99
        $frameworkConfiguration = $this->configurationManager->getConfiguration(
100
            \TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface::CONFIGURATION_TYPE_FRAMEWORK
101
        );
102
        return $frameworkConfiguration['settings'];
103
    }
104
105
106
    /**
107
     * Creates an index named by $indexName if it doesn't exist.
108
     *
109
     * @param $indexName
110
     */
111
    protected function initializeIndex($indexName)
112
    {
113
        $paramsIndex = [
114
            'index' => $indexName,
115
            'body' => [
116
                'settings' => [
117
                    //'index.requests.cache.enable' => false,
118
                    'analysis' => [
119
                        'filter' => [
120
                            'ngram' => [
121
                                'type' => 'ngram',
122
                                'min_gram' => 3,
123
                                'max_gram' => 3,
124
                                'token_chars' => [
125
                                    'letter',
126
                                    'digit'
127
                                ],
128
                            ]
129
                        ],
130
                        'analyzer' => [
131
                            'keyword_lowercase' => [
132
                                'tokenizer' => 'keyword',
133
                                'filter' => ['lowercase']
134
                            ]
135
                        ],
136
                        'normalizer' => [
137
                            'lowercase_normalizer' => [
138
                                'type' => 'custom',
139
                                'char_filter' => [],
140
                                'filter' => [
141
                                    'lowercase',
142
                                    'asciifolding'
143
                                ]
144
                            ]
145
                        ]
146
                    ]
147
                ],
148
                'mappings' => [
149
                    '_source' => [
150
                        'enabled' => true
151
                    ],
152
                    //'dynamic' => 'strict',
153
                    'properties' => [
154
                        'title' => [
155
                            'type' => 'text',
156
                            'fields' => [
157
                                'keyword' => [
158
                                    'type' => 'keyword',
159
                                    'normalizer' => 'lowercase_normalizer'
160
                                ]
161
                            ]
162
                        ],
163
                        'state' => [
164
                            'type' => 'keyword'
165
                        ],
166
                        'aliasState' => [
167
                            'type' => 'keyword'
168
                        ],
169
                        'year' => [
170
                            'type' => 'integer'
171
                        ],
172
                        'persons' => [
173
                            'type' => 'keyword'
174
                        ],
175
                        'personsSort' => [
176
                            'type' => 'text',
177
                            'fields' => [
178
                                'keyword' => [
179
                                    'type' => 'keyword',
180
                                    'normalizer' => 'lowercase_normalizer'
181
                                ]
182
                            ]
183
                        ],
184
                        'doctype' => [
185
                            'type' => 'keyword'
186
                        ],
187
                        'collections' => [
188
                            'type' => 'keyword'
189
                        ],
190
                        'hasFiles' => [
191
                            'type' => 'keyword'
192
                        ],
193
                        'creator' => [
194
                            'type' => 'keyword'
195
                        ],
196
                        'creatorRole' => [
197
                            'type' => 'keyword'
198
                        ],
199
                        'source' => [
200
                            'type' => 'text'
201
                        ],
202
                        'fobIdentifiers' => [
203
                            'type' => 'keyword'
204
                        ],
205
                        'personData' => [
206
                            //'enabled' => false,
207
                            'properties' => [
208
                                'name' => [
209
                                    'type' => 'keyword'
210
                                ],
211
                                'fobId' => [
212
                                    //'type' => 'keyword'
213
                                    'enabled' => false
214
                                ],
215
                                'index' => [
216
                                    //'type' => 'integer'
217
                                    'enabled' => false
218
                                ]
219
                            ]
220
                        ],
221
                        'affiliation' => [
222
                            'type' => 'keyword'
223
                        ],
224
                        'process_number' => [
225
                            'type' => 'keyword'
226
                        ],
227
                        'creationDate' => [
228
                            'type' =>  'date',
229
                            'format'=>  "yyyy-MM-dd"
230
                        ],
231
                        'embargoDate' => [
232
                            'type' =>  'date',
233
                            'format'=>  "yyyy-MM-dd"
234
                        ]
235
                    ]
236
                ]
237
            ]
238
        ];
239
240
        if (!$this->client->indices()->exists(['index' => $indexName])) {
241
            $this->client->indices()->create($paramsIndex);
242
        }
243
    }
244
245
    /**
246
     * Adds an document to the index.
247
     *
248
     * @param Document $document
249
     */
250
    public function index($document)
251
    {
252
        $data = json_decode($this->elasticsearchMapper->getElasticsearchJson($document));
253
254
        if ($data) {
255
256
            $data->state = $document->getState();
257
            $data->aliasState = DocumentWorkflow::STATE_TO_ALIASSTATE_MAPPING[$document->getState()];
258
259
            $data->objectIdentifier = $document->getObjectIdentifier();
260
261
            if (!$data->identifier || !is_array($data->identifier)) {
262
                $data->identifier = [];
263
            }
264
            $data->identifier[] = $document->getObjectIdentifier();
265
            $data->identifier[] = $document->getProcessNumber();
266
267
            if ($document->getCreator()) {
268
                $data->creator = $document->getCreator();
269
            } else {
270
                $data->creator = null;
271
            }
272
273
274
            if ($document->getCreator()) {
275
                /** @var \EWW\Dpf\Domain\Model\FrontendUser $creatorFeUser */
276
                $creatorFeUser = $this->frontendUserRepository->findByUid($document->getCreator());
277
                $data->creatorRole = $creatorFeUser->getUserRole();
278
            } else {
279
                $data->creatorRole = '';
280
            }
281
282
            $creationDate = new \DateTime($document->getCreationDate());
283
284
            $data->creationDate = $creationDate->format('Y-m-d');
285
286
            $data->year = $document->getPublicationYear();
287
288
            $notes = $document->getNotes();
289
290
            if ($notes && is_array($notes)) {
291
                $data->notes = $notes;
292
            } else {
293
                $data->notes = array();
294
            }
295
296
            $files = $document->getFile();
297
            if ($files->count() > 0) {
298
                $data->hasFiles = true;
299
            } else {
300
                $data->hasFiles = false;
301
            }
302
303
304
            $internalFormat = new \EWW\Dpf\Helper\InternalFormat($document->getXmlData());
305
306
            //$persons = array_merge($internalFormat->getAuthors(), $internalFormat->getPublishers());
307
            $persons = $internalFormat->getPersons();
308
309
            $fobIdentifiers = [];
310
            $personData = [];
311
            foreach ($persons as $person) {
312
                $fobIdentifiers[] = $person['fobId'];
313
                $personData[] = $person;
314
                //$data->persons[] = $person['name'];
315
                $data->persons[] = $person['fobId'];
316
317
                foreach ($person['affiliations'] as $affiliation) {
318
                    $data->affiliation[] = $affiliation;
319
                }
320
321
                foreach ($person['affiliationIdentifiers'] as $affiliationIdentifier) {
322
                    $data->affiliation[] = $affiliationIdentifier;
323
                }
324
            }
325
326
            $data->fobIdentifiers = $fobIdentifiers;
327
            $data->personData = $personData;
328
329
            if (sizeof($persons) > 0) {
330
                if (array_key_exists('family', $persons[0])) {
331
                    $data->personsSort = $persons[0]['family'];
332
                }
333
            }
334
335
            $data->source = $document->getSourceDetails();
336
337
            $data->universityCollection = false;
338
            if ($data->collections && is_array($data->collections)) {
339
                foreach ($data->collections as $collection) {
340
                    if ($collection == $this->getSettings()['universityCollection']) {
341
                        $data->universityCollection = true;
342
                        break;
343
                    }
344
                }
345
            }
346
347
            $embargoDate = $document->getEmbargoDate();
348
            if ($embargoDate instanceof \DateTime) {
0 ignored issues
show
introduced by
$embargoDate is always a sub-type of DateTime.
Loading history...
349
                $data->embargoDate = $embargoDate->format("Y-m-d");
350
            } else {
351
                $data->embargoDate = null;
352
            }
353
354
            $data->originalSourceTitle = $internalFormat->getOriginalSourceTitle();
355
356
            $data->fobIdentifiers = $internalFormat->getPersonFisIdentifiers();
357
358
            $this->client->index([
359
                'refresh' => 'wait_for',
360
                'index' => $this->indexName,
361
                'id' => $document->getDocumentIdentifier(),
362
                'body' => $data
363
            ]);
364
365
        }
366
367
    }
368
369
370
    /**
371
     * Deletes a document from the index
372
     *
373
     * @param string $identifier
374
     */
375
    public function delete($identifier)
376
    {
377
        try {
378
379
            $params = [
380
                'refresh' => 'wait_for',
381
                'index' => $this->indexName,
382
                'id' => $identifier
383
            ];
384
385
            $this->client->delete($params);
386
387
        } catch (\Exception $e) {
388
            /** @var $logger \TYPO3\CMS\Core\Log\Logger */
389
            $logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(__CLASS__);
390
            $logger->warning('Document could not be deleted from the index.',
391
                [
392
                    'Document identifier' => $identifier
393
                ]
394
            );
395
        }
396
    }
397
398
399
    /**
400
     * @param $identifier
401
     */
402
    public function getDocument($identifier)
403
    {
404
        $params = [
405
            'index' => $this->indexName,
406
            'id'    => $identifier
407
        ];
408
409
        return $this->client->get($params);
410
    }
411
412
413
    /**
414
     * performs the
415
     * @param  array $query search query
416
     * @return array        result list
417
     */
418
    public function search($query, $type = null)
419
    {
420
        try {
421
            // define type and index
422
            if (empty($query['index'])) {
423
                $query['index'] = $this->indexName;
424
            }
425
            if (!empty($type)) {
426
                //$query['type'] = $type;
427
                // $query['type'] = $this->type;
428
            }
429
430
            // Search request
431
            $results = $this->client->search($query);
432
433
            //$this->hits = $results['hits']['total'];
434
435
            //$this->resultList = $results['hits'];
436
437
            $this->results = $results;
438
439
            return $this->results;
440
        } catch ( \Elasticsearch\Common\Exceptions\Curl\CouldNotConnectToHost $exception) {
441
            throw new \EWW\Dpf\Exceptions\ElasticSearchConnectionErrorException("Could not connect to repository server.");
442
        } catch (\Elasticsearch\Common\Exceptions\Curl\CouldNotResolveHostException $exception) {
443
            throw new \EWW\Dpf\Exceptions\ElasticSearchConnectionErrorException("Could not connect to repository server.");
444
        }
445
    }
446
447
    /**
448
     * Get the results
449
     * @return mixed
450
     */
451
    public function getResults()
452
    {
453
        // return results from the last search request
454
        return $this->results;
455
    }
456
}
457