Passed
Pull Request — master (#103)
by Alexander
03:37
created

Indexer::processLogical()   F

Complexity

Conditions 23
Paths 675

Size

Total Lines 101
Code Lines 69

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 23
eloc 69
c 2
b 0
f 0
nc 675
nop 2
dl 0
loc 101
rs 0.4513

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use Kitodo\Dlf\Domain\Repository\DocumentRepository;
16
use Kitodo\Dlf\Domain\Model\Document;
17
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
18
use TYPO3\CMS\Core\Database\ConnectionPool;
19
use TYPO3\CMS\Core\Localization\LanguageService;
20
use TYPO3\CMS\Core\Messaging\FlashMessage;
21
use TYPO3\CMS\Core\Utility\GeneralUtility;
22
use TYPO3\CMS\Core\Utility\MathUtility;
23
use TYPO3\CMS\Extbase\Object\ObjectManager;
24
use Ubl\Iiif\Presentation\Common\Model\Resources\AnnotationContainerInterface;
25
use Ubl\Iiif\Tools\IiifHelper;
26
27
/**
28
 * Indexer class for the 'dlf' extension
29
 *
30
 * @author Sebastian Meyer <[email protected]>
31
 * @package TYPO3
32
 * @subpackage dlf
33
 * @access public
34
 */
35
class Indexer
36
{
37
    /**
38
     * The extension key
39
     *
40
     * @var string
41
     * @access public
42
     */
43
    public static $extKey = 'dlf';
44
45
    /**
46
     * Array of metadata fields' configuration
47
     * @see loadIndexConf()
48
     *
49
     * @var array
50
     * @access protected
51
     */
52
    protected static $fields = [
53
        'autocomplete' => [],
54
        'facets' => [],
55
        'sortables' => [],
56
        'indexed' => [],
57
        'stored' => [],
58
        'tokenized' => [],
59
        'fieldboost' => []
60
    ];
61
62
    /**
63
     * Is the index configuration loaded?
64
     * @see $fields
65
     *
66
     * @var bool
67
     * @access protected
68
     */
69
    protected static $fieldsLoaded = false;
70
71
    /**
72
     * List of already processed documents
73
     *
74
     * @var array
75
     * @access protected
76
     */
77
    protected static $processedDocs = [];
78
79
    /**
80
     * Instance of \Kitodo\Dlf\Common\Solr class
81
     *
82
     * @var \Kitodo\Dlf\Common\Solr
83
     * @access protected
84
     */
85
    protected static $solr;
86
87
    /**
88
     * Insert given document into Solr index
89
     *
90
     * @access public
91
     *
92
     * @param \Kitodo\Dlf\Domain\Model\Document $document: The document to add
93
     *
94
     * @return bool true on success or false on failure
95
     */
96
    public static function add(Document $document)
97
    {
98
        if (in_array($document->getUid(), self::$processedDocs)) {
99
            return true;
100
        } elseif (self::solrConnect($document->getSolrcore(), $document->getPid())) {
101
            $success = true;
102
            Helper::getLanguageService()->includeLLFile('EXT:dlf/Resources/Private/Language/locallang_be.xlf');
103
            // Handle multi-volume documents.
104
            if ($parentId = $document->getPartof()) {
105
                // initialize documentRepository
106
                // TODO: When we drop support for TYPO3v9, we needn't/shouldn't use ObjectManager anymore
107
                $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
108
                $documentRepository = $objectManager->get(DocumentRepository::class);
109
                // get parent document
110
                $parent = $documentRepository->findByUid($parentId);
111
                if ($parent) {
112
                    // get XML document of parent
113
                    $doc = Doc::getInstance($parent->getLocation(), ['storagePid' => $parent->getPid()], true);
114
                    if ($doc !== null) {
115
                        $parent->setDoc($doc);
116
                        $success = self::add($parent);
117
                    } else {
118
                        Helper::log('Could not load parent document with UID ' . $document->getDoc()->parentId, LOG_SEVERITY_ERROR);
119
                        return false;
120
                    }
121
                }
122
            }
123
            try {
124
                // Add document to list of processed documents.
125
                self::$processedDocs[] = $document->getUid();
126
                // Delete old Solr documents.
127
                $updateQuery = self::$solr->service->createUpdate();
128
                $updateQuery->addDeleteQuery('uid:' . $document->getUid());
129
                self::$solr->service->update($updateQuery);
130
131
                // Index every logical unit as separate Solr document.
132
                foreach ($document->getDoc()->tableOfContents as $logicalUnit) {
133
                    if ($success) {
134
                        $success = self::processLogical($document, $logicalUnit);
135
                    } else {
136
                        break;
137
                    }
138
                }
139
                // Index full text files if available.
140
                if ($document->getDoc()->hasFulltext) {
141
                    foreach ($document->getDoc()->physicalStructure as $pageNumber => $xmlId) {
142
                        if ($success) {
143
                            $success = self::processPhysical($document, $pageNumber, $document->getDoc()->physicalStructureInfo[$xmlId]);
144
                        } else {
145
                            break;
146
                        }
147
                    }
148
                }
149
                // Commit all changes.
150
                $updateQuery = self::$solr->service->createUpdate();
151
                $updateQuery->addCommit();
152
                self::$solr->service->update($updateQuery);
153
154
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
155
                    if ($success) {
156
                        Helper::addMessage(
157
                            sprintf(Helper::getLanguageService()->getLL('flash.documentIndexed'), $document->getTitle(), $document->getUid()),
158
                            Helper::getLanguageService()->getLL('flash.done'),
159
                            FlashMessage::OK,
160
                            true,
161
                            'core.template.flashMessages'
162
                        );
163
                    } else {
164
                        Helper::addMessage(
165
                            sprintf(Helper::getLanguageService()->getLL('flash.documentNotIndexed'), $document->getTitle(), $document->getUid()),
166
                            Helper::getLanguageService()->getLL('flash.error'),
167
                            FlashMessage::ERROR,
168
                            true,
169
                            'core.template.flashMessages'
170
                        );
171
                    }
172
                }
173
                return $success;
174
            } catch (\Exception $e) {
175
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
176
                    Helper::addMessage(
177
                        Helper::getLanguageService()->getLL('flash.solrException') . ' ' . htmlspecialchars($e->getMessage()),
178
                        Helper::getLanguageService()->getLL('flash.error'),
179
                        FlashMessage::ERROR,
180
                        true,
181
                        'core.template.flashMessages'
182
                    );
183
                }
184
                Helper::log('Apache Solr threw exception: "' . $e->getMessage() . '"', LOG_SEVERITY_ERROR);
185
                return false;
186
            }
187
        } else {
188
            if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
189
                Helper::addMessage(
190
                    Helper::getLanguageService()->getLL('flash.solrNoConnection'),
191
                    Helper::getLanguageService()->getLL('flash.warning'),
192
                    FlashMessage::WARNING,
193
                    true,
194
                    'core.template.flashMessages'
195
                );
196
            }
197
            Helper::log('Could not connect to Apache Solr server', LOG_SEVERITY_ERROR);
198
            return false;
199
        }
200
    }
201
202
    /**
203
     * Returns the dynamic index field name for the given metadata field.
204
     *
205
     * @access public
206
     *
207
     * @param string $index_name: The metadata field's name in database
208
     * @param int $pid: UID of the configuration page
209
     *
210
     * @return string The field's dynamic index name
211
     */
212
    public static function getIndexFieldName($index_name, $pid = 0)
213
    {
214
        // Sanitize input.
215
        $pid = max(intval($pid), 0);
216
        if (!$pid) {
217
            Helper::log('Invalid PID ' . $pid . ' for metadata configuration', LOG_SEVERITY_ERROR);
218
            return '';
219
        }
220
        // Load metadata configuration.
221
        self::loadIndexConf($pid);
222
        // Build field's suffix.
223
        $suffix = (in_array($index_name, self::$fields['tokenized']) ? 't' : 'u');
224
        $suffix .= (in_array($index_name, self::$fields['stored']) ? 's' : 'u');
225
        $suffix .= (in_array($index_name, self::$fields['indexed']) ? 'i' : 'u');
226
        $index_name .= '_' . $suffix;
227
        return $index_name;
228
    }
229
230
    /**
231
     * Load indexing configuration
232
     *
233
     * @access protected
234
     *
235
     * @param int $pid: The configuration page's UID
236
     *
237
     * @return void
238
     */
239
    protected static function loadIndexConf($pid)
240
    {
241
        if (!self::$fieldsLoaded) {
242
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
243
                ->getQueryBuilderForTable('tx_dlf_metadata');
244
245
            // Get the metadata indexing options.
246
            $result = $queryBuilder
247
                ->select(
248
                    'tx_dlf_metadata.index_name AS index_name',
249
                    'tx_dlf_metadata.index_tokenized AS index_tokenized',
250
                    'tx_dlf_metadata.index_stored AS index_stored',
251
                    'tx_dlf_metadata.index_indexed AS index_indexed',
252
                    'tx_dlf_metadata.is_sortable AS is_sortable',
253
                    'tx_dlf_metadata.is_facet AS is_facet',
254
                    'tx_dlf_metadata.is_listed AS is_listed',
255
                    'tx_dlf_metadata.index_autocomplete AS index_autocomplete',
256
                    'tx_dlf_metadata.index_boost AS index_boost'
257
                )
258
                ->from('tx_dlf_metadata')
259
                ->where(
260
                    $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($pid)),
261
                    Helper::whereExpression('tx_dlf_metadata')
262
                )
263
                ->execute();
264
265
            while ($indexing = $result->fetch()) {
266
                if ($indexing['index_tokenized']) {
267
                    self::$fields['tokenized'][] = $indexing['index_name'];
268
                }
269
                if (
270
                    $indexing['index_stored']
271
                    || $indexing['is_listed']
272
                ) {
273
                    self::$fields['stored'][] = $indexing['index_name'];
274
                }
275
                if (
276
                    $indexing['index_indexed']
277
                    || $indexing['index_autocomplete']
278
                ) {
279
                    self::$fields['indexed'][] = $indexing['index_name'];
280
                }
281
                if ($indexing['is_sortable']) {
282
                    self::$fields['sortables'][] = $indexing['index_name'];
283
                }
284
                if ($indexing['is_facet']) {
285
                    self::$fields['facets'][] = $indexing['index_name'];
286
                }
287
                if ($indexing['index_autocomplete']) {
288
                    self::$fields['autocomplete'][] = $indexing['index_name'];
289
                }
290
                if ($indexing['index_boost'] > 0.0) {
291
                    self::$fields['fieldboost'][$indexing['index_name']] = floatval($indexing['index_boost']);
292
                } else {
293
                    self::$fields['fieldboost'][$indexing['index_name']] = false;
294
                }
295
            }
296
            self::$fieldsLoaded = true;
297
        }
298
    }
299
300
    /**
301
     * Processes a logical unit (and its children) for the Solr index
302
     *
303
     * @access protected
304
     *
305
     * @param \Kitodo\Dlf\Domain\Model\Document $document: The METS document
306
     * @param array $logicalUnit: Array of the logical unit to process
307
     *
308
     * @return bool true on success or false on failure
309
     */
310
    protected static function processLogical(Document $document, array $logicalUnit)
311
    {
312
        $success = true;
313
        $doc = $document->getDoc();
314
        $doc->cPid = $document->getPid();
315
        // Get metadata for logical unit.
316
        $metadata = $doc->metadataArray[$logicalUnit['id']];
317
        if (!empty($metadata)) {
318
            $metadata['author'] = self::removeAppendsFromAuthor($metadata['author']);
319
            // set Owner if available
320
            if ($document->getOwner()) {
321
                $metadata['owner'][0] = $document->getOwner()->getIndexName();
322
            }
323
            // Create new Solr document.
324
            $updateQuery = self::$solr->service->createUpdate();
325
            $solrDoc = $updateQuery->createDocument();
0 ignored issues
show
Unused Code introduced by
The assignment to $solrDoc is dead and can be removed.
Loading history...
326
            $solrDoc = self::getSolrDocument($updateQuery, $document, $logicalUnit);
327
            if (MathUtility::canBeInterpretedAsInteger($logicalUnit['points'])) {
328
                $solrDoc->setField('page', $logicalUnit['points']);
0 ignored issues
show
Bug introduced by
The method setField() does not exist on Solarium\Core\Query\DocumentInterface. It seems like you code against a sub-type of Solarium\Core\Query\DocumentInterface such as Solarium\Plugin\MinimumScoreFilter\Document or Solarium\QueryType\Update\Query\Document. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

328
                $solrDoc->/** @scrutinizer ignore-call */ 
329
                          setField('page', $logicalUnit['points']);
Loading history...
329
            }
330
            if ($logicalUnit['id'] == $doc->toplevelId) {
331
                $solrDoc->setField('thumbnail', $doc->thumbnail);
332
            } elseif (!empty($logicalUnit['thumbnailId'])) {
333
                $solrDoc->setField('thumbnail', $doc->getFileLocation($logicalUnit['thumbnailId']));
334
            }
335
            // There can be only one toplevel unit per UID, independently of backend configuration
336
            $solrDoc->setField('toplevel', $logicalUnit['id'] == $doc->toplevelId ? true : false);
337
            $solrDoc->setField('title', $metadata['title'][0], self::$fields['fieldboost']['title']);
338
            $solrDoc->setField('volume', $metadata['volume'][0], self::$fields['fieldboost']['volume']);
339
            $solrDoc->setField('record_id', $metadata['record_id'][0]);
340
            $solrDoc->setField('purl', $metadata['purl'][0]);
341
            $solrDoc->setField('location', $document->getLocation());
342
            $solrDoc->setField('urn', $metadata['urn']);
343
            $solrDoc->setField('license', $metadata['license']);
344
            $solrDoc->setField('terms', $metadata['terms']);
345
            $solrDoc->setField('restrictions', $metadata['restrictions']);
346
            $coordinates = json_decode($metadata['coordinates'][0]);
347
            if (is_object($coordinates)) {
348
                $solrDoc->setField('geom', json_encode($coordinates->features[0]));
349
            }
350
            $autocomplete = [];
351
            foreach ($metadata as $index_name => $data) {
352
                if (
353
                    !empty($data)
354
                    && substr($index_name, -8) !== '_sorting'
355
                ) {
356
                    $solrDoc->setField(self::getIndexFieldName($index_name, $document->getPid()), $data, self::$fields['fieldboost'][$index_name]);
357
                    if (in_array($index_name, self::$fields['sortables'])) {
358
                        // Add sortable fields to index.
359
                        $solrDoc->setField($index_name . '_sorting', $metadata[$index_name . '_sorting'][0]);
360
                    }
361
                    if (in_array($index_name, self::$fields['facets'])) {
362
                        // Add facets to index.
363
                        $solrDoc->setField($index_name . '_faceting', $data);
364
                    }
365
                    if (in_array($index_name, self::$fields['autocomplete'])) {
366
                        $autocomplete = array_merge($autocomplete, $data);
367
                    }
368
                }
369
            }
370
            // Add autocomplete values to index.
371
            if (!empty($autocomplete)) {
372
                $solrDoc->setField('autocomplete', $autocomplete);
373
            }
374
            // Add collection information to logical sub-elements if applicable.
375
            if (
376
                in_array('collection', self::$fields['facets'])
377
                && empty($metadata['collection'])
378
                && !empty($doc->metadataArray[$doc->toplevelId]['collection'])
379
            ) {
380
                $solrDoc->setField('collection_faceting', $doc->metadataArray[$doc->toplevelId]['collection']);
381
            }
382
            try {
383
                $updateQuery->addDocument($solrDoc);
384
                self::$solr->service->update($updateQuery);
385
            } catch (\Exception $e) {
386
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
387
                    Helper::addMessage(
388
                        Helper::getLanguageService()->getLL('flash.solrException') . '<br />' . htmlspecialchars($e->getMessage()),
389
                        Helper::getLanguageService()->getLL('flash.error'),
390
                        FlashMessage::ERROR,
391
                        true,
392
                        'core.template.flashMessages'
393
                    );
394
                }
395
                Helper::log('Apache Solr threw exception: "' . $e->getMessage() . '"', LOG_SEVERITY_ERROR);
396
                return false;
397
            }
398
        }
399
        // Check for child elements...
400
        if (!empty($logicalUnit['children'])) {
401
            foreach ($logicalUnit['children'] as $child) {
402
                if ($success) {
403
                    // ...and process them, too.
404
                    $success = self::processLogical($document, $child);
405
                } else {
406
                    break;
407
                }
408
            }
409
        }
410
        return $success;
411
    }
412
413
    /**
414
     * Processes a physical unit for the Solr index
415
     *
416
     * @access protected
417
     *
418
     * @param \Kitodo\Dlf\Domain\Model\Document $document: The METS document
419
     * @param int $page: The page number
420
     * @param array $physicalUnit: Array of the physical unit to process
421
     *
422
     * @return bool true on success or false on failure
423
     */
424
    protected static function processPhysical(Document $document, $page, array $physicalUnit)
425
    {
426
        $doc = $document->getDoc();
427
        $doc->cPid = $document->getPid();
428
        if ($doc->hasFulltext && $fullText = $doc->getFullText($physicalUnit['id'])) {
429
            // Read extension configuration.
430
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
431
            // Create new Solr document.
432
            $updateQuery = self::$solr->service->createUpdate();
433
            $solrDoc = self::getSolrDocument($updateQuery, $document, $physicalUnit, $fullText);
434
            $solrDoc->setField('page', $page);
435
            $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']);
436
            while ($fileGrpThumb = array_shift($fileGrpsThumb)) {
437
                if (!empty($physicalUnit['files'][$fileGrpThumb])) {
438
                    $solrDoc->setField('thumbnail', $doc->getFileLocation($physicalUnit['files'][$fileGrpThumb]));
439
                    break;
440
                }
441
            }
442
            $solrDoc->setField('toplevel', false);
443
            $solrDoc->setField('type', $physicalUnit['type'], self::$fields['fieldboost']['type']);
444
            $solrDoc->setField('collection', $doc->metadataArray[$doc->toplevelId]['collection']);
445
446
            $solrDoc->setField('fulltext', $fullText);
447
            if (is_array($doc->metadataArray[$doc->toplevelId])) {
448
                // Add faceting information to physical sub-elements if applicable.
449
                foreach ($doc->metadataArray[$doc->toplevelId] as $index_name => $data) {
450
                    if (
451
                        !empty($data)
452
                        && substr($index_name, -8) !== '_sorting'
453
                    ) {
454
455
                        if (in_array($index_name, self::$fields['facets'])) {
456
                            // Remove appended "valueURI" from authors' names for indexing.
457
                            if ($index_name == 'author') {
458
                                $data = self::removeAppendsFromAuthor($data);
459
                            }
460
                            // Add facets to index.
461
                            $solrDoc->setField($index_name . '_faceting', $data);
462
                        }
463
                    }
464
                }
465
            }
466
            // Add collection information to physical sub-elements if applicable.
467
            if (
468
                in_array('collection', self::$fields['facets'])
469
                && !empty($doc->metadataArray[$doc->toplevelId]['collection'])
470
            ) {
471
                $solrDoc->setField('collection_faceting', $doc->metadataArray[$doc->toplevelId]['collection']);
472
            }
473
            try {
474
                $updateQuery->addDocument($solrDoc);
475
                self::$solr->service->update($updateQuery);
476
            } catch (\Exception $e) {
477
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
478
                    Helper::addMessage(
479
                        Helper::getLanguageService()->getLL('flash.solrException') . '<br />' . htmlspecialchars($e->getMessage()),
480
                        Helper::getLanguageService()->getLL('flash.error'),
481
                        FlashMessage::ERROR,
482
                        true,
483
                        'core.template.flashMessages'
484
                    );
485
                }
486
                Helper::log('Apache Solr threw exception: "' . $e->getMessage() . '"', LOG_SEVERITY_ERROR);
487
                return false;
488
            }
489
        }
490
        return true;
491
    }
492
493
    /**
494
     * Connects to Solr server.
495
     *
496
     * @access protected
497
     *
498
     * @param int $core: UID of the Solr core
499
     * @param int $pid: UID of the configuration page
500
     *
501
     * @return bool true on success or false on failure
502
     */
503
    protected static function solrConnect($core, $pid = 0)
504
    {
505
        // Get Solr instance.
506
        if (!self::$solr) {
507
            // Connect to Solr server.
508
            $solr = Solr::getInstance($core);
509
            if ($solr->ready) {
510
                self::$solr = $solr;
511
                // Load indexing configuration if needed.
512
                if ($pid) {
513
                    self::loadIndexConf($pid);
514
                }
515
            } else {
516
                return false;
517
            }
518
        }
519
        return true;
520
    }
521
522
    /**
523
     * Get SOLR document with set standard fields (identical for logical and physical unit)
524
     *
525
     * @access private
526
     *
527
     * @param \Solarium\QueryType\Update\Query\Query $updateQuery solarium query
528
     * @param \Kitodo\Dlf\Domain\Model\Document $document: The METS document
529
     * @param array $unit: Array of the logical or physical unit to process
530
     * @param string $fullText: Text containing full text for indexing
531
     *
532
     * @return \Solarium\Core\Query\DocumentInterface
533
     */
534
    private static function getSolrDocument($updateQuery, $document, $unit, $fullText = '') {
535
        $solrDoc = $updateQuery->createDocument();
536
        // Create unique identifier from document's UID and unit's XML ID.
537
        $solrDoc->setField('id', $document->getUid() . $unit['id']);
538
        $solrDoc->setField('uid', $document->getUid());
539
        $solrDoc->setField('pid', $document->getPid());
540
        $solrDoc->setField('partof', $document->getPartof());
541
        $solrDoc->setField('root', $document->getDoc()->rootId);
542
        $solrDoc->setField('sid', $unit['id']);
543
        $solrDoc->setField('type', $unit['type'], self::$fields['fieldboost']['type']);
544
        $solrDoc->setField('collection', $document->getDoc()->metadataArray[$document->getDoc()->toplevelId]['collection']);
545
        $solrDoc->setField('fulltext', $fullText);
546
        return $solrDoc;
547
    }
548
549
    /**
550
     * Remove appended "valueURI" from authors' names for indexing.
551
     *
552
     * @access private
553
     *
554
     * @param array|string $authors: Array or string containing author/authors
555
     *
556
     * @return array|string
557
     */
558
    private static function removeAppendsFromAuthor($authors) {
559
        if (is_array($authors)) {
560
            foreach ($authors as $i => $author) {
561
                $splitName = explode(chr(31), $author);
562
                $authors[$i] = $splitName[0];
563
            }
564
        }
565
        return $authors;
566
    }
567
568
    /**
569
     * Prevent instantiation by hiding the constructor
570
     *
571
     * @access private
572
     */
573
    private function __construct()
574
    {
575
        // This is a static class, thus no instances should be created.
576
    }
577
}
578