Passed
Push — master ( 134b43...f02d33 )
by Ralf
09:50
created

ElasticSearch::getIndexName()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
c 0
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 0
1
<?php
2
namespace EWW\Dpf\Services\ElasticSearch;
3
4
/*
5
 * This file is part of the TYPO3 CMS project.
6
 *
7
 * It is free software; you can redistribute it and/or modify it under
8
 * the terms of the GNU General Public License, either version 2
9
 * of the License, or any later version.
10
 *
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 *
14
 * The TYPO3 project - inspiring people to share!
15
 */
16
17
use Elasticsearch\ClientBuilder;
18
use Elasticsearch\Common\Exceptions\Curl\CouldNotConnectToHost;
19
use Elasticsearch\Common\Exceptions\Curl\CouldNotResolveHostException;
20
use EWW\Dpf\Domain\Repository\FrontendUserRepository;
21
use EWW\Dpf\Domain\Workflow\DocumentWorkflow;
22
use EWW\Dpf\Exceptions\ElasticSearchConnectionErrorException;
23
use EWW\Dpf\Exceptions\ElasticSearchMissingIndexNameException;
24
use TYPO3\CMS\Extbase\Object\ObjectManager;
25
use EWW\Dpf\Configuration\ClientConfigurationManager;
26
use EWW\Dpf\Domain\Model\Document;
27
use TYPO3\CMS\Core\Utility\GeneralUtility;
28
use TYPO3\CMS\Core\Log\LogManager;
29
use TYPO3\CMS\Extbase\Utility\LocalizationUtility;
30
31
class ElasticSearch
32
{
33
    /**
34
     * @var \EWW\Dpf\Configuration\ClientConfigurationManager
35
     */
36
    protected $clientConfigurationManager;
37
38
    /**
39
     * @var \Elasticsearch\Client
40
     */
41
    protected $client;
42
43
    protected $server = 'host.docker.internal'; //127.0.0.1';
44
45
    protected $port = '9200';
46
47
    protected $indexName = 'kitodo_publication';
48
49
    protected $results;
50
51
52
    protected $elasticsearchMapper;
53
54
    /**
55
     * @var int
56
     */
57
    protected $clientPid = 0;
58
59
    /**
60
     * elasticsearch client constructor
61
     * @param int|null $clientPid
62
     * @throws ElasticSearchMissingIndexNameException
63
     */
64
    public function __construct($clientPid = null)
65
    {
66
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
67
68
        $this->elasticsearchMapper = $objectManager->get(ElasticsearchMapper::class);
69
70
        $this->clientConfigurationManager = $objectManager->get(ClientConfigurationManager::class);
71
72
        if ($clientPid) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $clientPid of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
73
            $this->clientConfigurationManager->setConfigurationPid($clientPid);
74
            $this->clientPid = $clientPid;
75
        }
76
77
        $this->server = $this->clientConfigurationManager->getElasticSearchHost();
78
        $this->port = $this->clientConfigurationManager->getElasticSearchPort();
79
        $this->indexName = $this->clientConfigurationManager->getElasticSearchIndexName();
80
81
        if (empty($this->indexName)) {
82
            throw new ElasticSearchMissingIndexNameException('Missing search index name.');
83
        }
84
85
        $hosts = array(
86
            $this->server . ':' . $this->port,
87
        );
88
89
        $clientBuilder = ClientBuilder::create();
90
        $clientBuilder->setHosts($hosts);
91
        $this->client = $clientBuilder->build();
92
93
        try {
94
            $this->initializeIndex($this->indexName);
95
        } catch (\Throwable $e) {
96
            $message = LocalizationUtility::translate(
97
                'elasticsearch.notRunning', 'dpf'
98
            );
99
            die($message);
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
100
        }
101
    }
102
103
    /**
104
     * @return string|null
105
     */
106
    protected function getIndexName()
107
    {
108
        return $this->indexName;
109
    }
110
111
    /**
112
     * Creates an index named by $indexName if it doesn't exist.
113
     *
114
     * @param $indexName
115
     */
116
    protected function initializeIndex($indexName)
117
    {
118
        $paramsIndex = [
119
            'index' => $indexName,
120
            'body' => [
121
                'settings' => [
122
                    //'index.requests.cache.enable' => false,
123
                    'analysis' => [
124
                        'filter' => [
125
                            'ngram' => [
126
                                'type' => 'ngram',
127
                                'min_gram' => 3,
128
                                'max_gram' => 3,
129
                                'token_chars' => [
130
                                    'letter',
131
                                    'digit'
132
                                ],
133
                            ]
134
                        ],
135
                        'analyzer' => [
136
                            'keyword_lowercase' => [
137
                                'tokenizer' => 'keyword',
138
                                'filter' => ['lowercase']
139
                            ]
140
                        ],
141
                        'normalizer' => [
142
                            'lowercase_normalizer' => [
143
                                'type' => 'custom',
144
                                'char_filter' => [],
145
                                'filter' => [
146
                                    'lowercase',
147
                                    'asciifolding'
148
                                ]
149
                            ]
150
                        ]
151
                    ]
152
                ],
153
                'mappings' => [
154
                    '_source' => [
155
                        'enabled' => true
156
                    ],
157
                    //'dynamic' => 'strict',
158
                    'properties' => [
159
                        'title' => [
160
                            'type' => 'text',
161
                            'fields' => [
162
                                'keyword' => [
163
                                    'type' => 'keyword',
164
                                    'normalizer' => 'lowercase_normalizer'
165
                                ]
166
                            ]
167
                        ],
168
                        'state' => [
169
                            'type' => 'keyword'
170
                        ],
171
                        'aliasState' => [
172
                            'type' => 'keyword'
173
                        ],
174
                        'year' => [
175
                            'type' => 'integer'
176
                        ],
177
                        'persons' => [
178
                            'type' => 'keyword'
179
                        ],
180
                        'personsSort' => [
181
                            'type' => 'text',
182
                            'fields' => [
183
                                'keyword' => [
184
                                    'type' => 'keyword',
185
                                    'normalizer' => 'lowercase_normalizer'
186
                                ]
187
                            ]
188
                        ],
189
                        'doctype' => [
190
                            'type' => 'keyword'
191
                        ],
192
                        'collections' => [
193
                            'type' => 'keyword'
194
                        ],
195
                        'hasFiles' => [
196
                            'type' => 'keyword'
197
                        ],
198
                        'creator' => [
199
                            'type' => 'keyword'
200
                        ],
201
                        'creatorRole' => [
202
                            'type' => 'keyword'
203
                        ],
204
                        'source' => [
205
                            'type' => 'text'
206
                        ],
207
                        'fobIdentifiers' => [
208
                            'type' => 'keyword'
209
                        ],
210
                        'personData' => [
211
                            //'enabled' => false,
212
                            'properties' => [
213
                                'name' => [
214
                                    'type' => 'keyword'
215
                                ],
216
                                'fobId' => [
217
                                    //'type' => 'keyword'
218
                                    'enabled' => false
219
                                ],
220
                                'index' => [
221
                                    //'type' => 'integer'
222
                                    'enabled' => false
223
                                ]
224
                            ]
225
                        ],
226
                        'affiliation' => [
227
                            'type' => 'keyword'
228
                        ],
229
                        'process_number' => [
230
                            'type' => 'keyword'
231
                        ],
232
                        'creationDate' => [
233
                            'type' =>  'date',
234
                            'format'=>  "yyyy-MM-dd"
235
                        ],
236
                        'embargoDate' => [
237
                            'type' =>  'date',
238
                            'format'=>  "yyyy-MM-dd"
239
                        ]
240
                    ]
241
                ]
242
            ]
243
        ];
244
245
        if (!$this->client->indices()->exists(['index' => $indexName])) {
246
            $this->client->indices()->create($paramsIndex);
247
        }
248
    }
249
250
    /**
251
     * Adds an document to the index.
252
     *
253
     * @param Document $document
254
     */
255
    public function index($document)
256
    {
257
        try {
258
            $data = json_decode($this->elasticsearchMapper->getElasticsearchJson($document));
259
        } catch (\Throwable $throwable) {
260
            // Fixme: The solution via json_decode and the XSLT file needs to be replaced.
261
        }
262
263
        if (!$data) {
264
            $data->title[] = $document->getTitle();
265
            $data->doctype = $document->getDocumentType()->getName();
266
        }
267
268
        if ($data) {
269
270
            $data->state = $document->getState();
271
            $data->aliasState = DocumentWorkflow::STATE_TO_ALIASSTATE_MAPPING[$document->getState()];
272
273
            $data->objectIdentifier = $document->getObjectIdentifier();
274
275
            if (!$data->identifier || !is_array($data->identifier)) {
276
                $data->identifier = [];
277
            }
278
            $data->identifier[] = $document->getObjectIdentifier();
279
            $data->identifier[] = $document->getProcessNumber();
280
281
            if ($document->getCreator()) {
282
                $data->creator = $document->getCreator();
283
            } else {
284
                $data->creator = null;
285
            }
286
287
            if ($document->getCreator()) {
288
                $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
289
                $frontendUserRepository = $objectManager->get(FrontendUserRepository::class);
290
291
                /** @var \EWW\Dpf\Domain\Model\FrontendUser $creatorFeUser */
292
                $creatorFeUser = $frontendUserRepository->findByUid($document->getCreator());
293
                if ($creatorFeUser) {
0 ignored issues
show
introduced by
$creatorFeUser is of type EWW\Dpf\Domain\Model\FrontendUser, thus it always evaluated to true.
Loading history...
294
                    $data->creatorRole = $creatorFeUser->getUserRole();
295
                } else {
296
                    $data->creatorRole = '';
297
                }
298
            } else {
299
                $data->creatorRole = '';
300
            }
301
302
            $creationDate = new \DateTime($document->getCreationDate());
303
304
            $data->creationDate = $creationDate->format('Y-m-d');
305
306
            $data->year = $document->getPublicationYear();
307
308
            $notes = $document->getNotes();
309
310
            if ($notes && is_array($notes)) {
311
                $data->notes = $notes;
312
            } else {
313
                $data->notes = array();
314
            }
315
316
            $files = $document->getFile();
317
            if ($files->count() > 0) {
318
                $data->hasFiles = true;
319
            } else {
320
                $data->hasFiles = false;
321
            }
322
323
            $internalFormat = new \EWW\Dpf\Helper\InternalFormat($document->getXmlData(), $this->clientPid);
324
325
            //$persons = array_merge($internalFormat->getAuthors(), $internalFormat->getPublishers());
326
            $persons = $internalFormat->getPersons();
327
328
            $fobIdentifiers = [];
329
            $personData = [];
330
            foreach ($persons as $person) {
331
                $fobIdentifiers[] = $person['fobId'];
332
                $personData[] = $person;
333
                //$data->persons[] = $person['name'];
334
                $data->persons[] = $person['fobId'];
335
336
                foreach ($person['affiliations'] as $affiliation) {
337
                    $data->affiliation[] = $affiliation;
338
                }
339
340
                foreach ($person['affiliationIdentifiers'] as $affiliationIdentifier) {
341
                    $data->affiliation[] = $affiliationIdentifier;
342
                }
343
            }
344
345
            $data->fobIdentifiers = $fobIdentifiers;
346
            $data->personData = $personData;
347
348
            if (sizeof($persons) > 0) {
349
                if (array_key_exists('family', $persons[0])) {
350
                    $data->personsSort = $persons[0]['family'];
351
                }
352
            }
353
354
            $data->source = $document->getSourceDetails();
355
356
            $data->universityCollection = false;
357
            if ($data->collections && is_array($data->collections)) {
358
                foreach ($data->collections as $collection) {
359
                    if ($collection == $this->clientConfigurationManager->getUniversityCollection()) {
360
                        $data->universityCollection = true;
361
                        break;
362
                    }
363
                }
364
            }
365
366
            $embargoDate = $document->getEmbargoDate();
367
            if ($embargoDate instanceof \DateTime) {
0 ignored issues
show
introduced by
$embargoDate is always a sub-type of DateTime.
Loading history...
368
                $data->embargoDate = $embargoDate->format("Y-m-d");
369
            } else {
370
                $data->embargoDate = null;
371
            }
372
373
            $data->originalSourceTitle = $internalFormat->getOriginalSourceTitle();
374
375
            $data->fobIdentifiers = $internalFormat->getPersonFisIdentifiers();
376
377
            $this->client->index([
378
                'refresh' => 'wait_for',
379
                'index' => $this->getIndexName(),
380
                'id' => $document->getDocumentIdentifier(),
381
                'body' => $data
382
            ]);
383
384
        }
385
386
    }
387
388
389
    /**
390
     * Deletes a document from the index
391
     *
392
     * @param string $identifier
393
     */
394
    public function delete($identifier)
395
    {
396
        try {
397
398
            $params = [
399
                'refresh' => 'wait_for',
400
                'index' => $this->getIndexName(),
401
                'id' => $identifier
402
            ];
403
404
            $this->client->delete($params);
405
406
        } catch (\Exception $e) {
407
            /** @var $logger \TYPO3\CMS\Core\Log\Logger */
408
            $logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(__CLASS__);
409
            $logger->warning('Document could not be deleted from the index.',
410
                [
411
                    'Document identifier' => $identifier
412
                ]
413
            );
414
        }
415
    }
416
417
418
    /**
419
     * @param $identifier
420
     */
421
    public function getDocument($identifier)
422
    {
423
        $params = [
424
            'index' => $this->getIndexName(),
425
            'id'    => $identifier
426
        ];
427
428
        return $this->client->get($params);
429
    }
430
431
432
    /**
433
     * performs the
434
     * @param  array $query search query
435
     * @return array        result list
436
     */
437
    public function search($query, $type = null)
438
    {
439
        try {
440
            // define type and index
441
            if (empty($query['index'])) {
442
                $query['index'] = $this->getIndexName();
443
            }
444
            if (!empty($type)) {
445
                //$query['type'] = $type;
446
                // $query['type'] = $this->type;
447
            }
448
449
            // Search request
450
            $results = $this->client->search($query);
451
452
            //$this->hits = $results['hits']['total'];
453
454
            //$this->resultList = $results['hits'];
455
456
            $this->results = $results;
457
458
            return $this->results;
459
        } catch (CouldNotConnectToHost $exception) {
460
            throw new ElasticSearchConnectionErrorException("Could not connect to repository server.");
461
        } catch (CouldNotResolveHostException $exception) {
462
            throw new ElasticSearchConnectionErrorException("Could not connect to repository server.");
463
        }
464
    }
465
466
    /**
467
     * Get the results
468
     * @return mixed
469
     */
470
    public function getResults()
471
    {
472
        // return results from the last search request
473
        return $this->results;
474
    }
475
}
476