Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — master (#673)
by Alexander
08:19 queued 05:35
created

Document::getTitledata()   A

Complexity

Conditions 6
Paths 6

Size

Total Lines 20
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 11
c 0
b 0
f 0
dl 0
loc 20
rs 9.2222
cc 6
nc 6
nop 1
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
16
use TYPO3\CMS\Core\Database\ConnectionPool;
17
use TYPO3\CMS\Core\Log\LogManager;
18
use TYPO3\CMS\Core\Utility\GeneralUtility;
19
use TYPO3\CMS\Core\Utility\MathUtility;
20
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
21
use Ubl\Iiif\Tools\IiifHelper;
22
23
/**
24
 * Document class for the 'dlf' extension
25
 *
26
 * @author Sebastian Meyer <[email protected]>
27
 * @author Henrik Lochmann <[email protected]>
28
 * @package TYPO3
29
 * @subpackage dlf
30
 * @access public
31
 * @property int $cPid This holds the PID for the configuration
32
 * @property-read bool $hasFulltext Are there any fulltext files available?
33
 * @property-read string $location This holds the documents location
34
 * @property-read array $metadataArray This holds the documents' parsed metadata array
35
 * @property-read int $numPages The holds the total number of pages
36
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
37
 * @property-read array $physicalStructure This holds the physical structure
38
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
39
 * @property-read int $pid This holds the PID of the document or zero if not in database
40
 * @property-read bool $ready Is the document instantiated successfully?
41
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
42
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
43
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
44
 * @property-read array $tableOfContents This holds the logical structure
45
 * @property-read string $thumbnail This holds the document's thumbnail location
46
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
47
 * @property-read mixed $uid This holds the UID or the URL of the document
48
 * @abstract
49
 */
50
abstract class Document
51
{
52
    /**
53
     * This holds the logger
54
     *
55
     * @var LogManager
56
     * @access protected
57
     */
58
    protected $logger;
59
60
    /**
61
     * This holds the PID for the configuration
62
     *
63
     * @var int
64
     * @access protected
65
     */
66
    protected $cPid = 0;
67
68
    /**
69
     * The extension key
70
     *
71
     * @var string
72
     * @access public
73
     */
74
    public static $extKey = 'dlf';
75
76
    /**
77
     * This holds the configuration for all supported metadata encodings
78
     * @see loadFormats()
79
     *
80
     * @var array
81
     * @access protected
82
     */
83
    protected $formats = [
84
        'OAI' => [
85
            'rootElement' => 'OAI-PMH',
86
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
87
        ],
88
        'METS' => [
89
            'rootElement' => 'mets',
90
            'namespaceURI' => 'http://www.loc.gov/METS/',
91
        ],
92
        'XLINK' => [
93
            'rootElement' => 'xlink',
94
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
95
        ]
96
    ];
97
98
    /**
99
     * Are the available metadata formats loaded?
100
     * @see $formats
101
     *
102
     * @var bool
103
     * @access protected
104
     */
105
    protected $formatsLoaded = false;
106
107
    /**
108
     * Are there any fulltext files available? This also includes IIIF text annotations
109
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
110
     * annotations as fulltext.
111
     *
112
     * @var bool
113
     * @access protected
114
     */
115
    protected $hasFulltext = false;
116
117
    /**
118
     * Last searched logical and physical page
119
     *
120
     * @var array
121
     * @access protected
122
     */
123
    protected $lastSearchedPhysicalPage = ['logicalPage' => null, 'physicalPage' => null];
124
125
    /**
126
     * This holds the documents location
127
     *
128
     * @var string
129
     * @access protected
130
     */
131
    protected $location = '';
132
133
    /**
134
     * This holds the logical units
135
     *
136
     * @var array
137
     * @access protected
138
     */
139
    protected $logicalUnits = [];
140
141
    /**
142
     * This holds the documents' parsed metadata array with their corresponding
143
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
144
     *
145
     * @var array
146
     * @access protected
147
     */
148
    protected $metadataArray = [];
149
150
    /**
151
     * Is the metadata array loaded?
152
     * @see $metadataArray
153
     *
154
     * @var bool
155
     * @access protected
156
     */
157
    protected $metadataArrayLoaded = false;
158
159
    /**
160
     * The holds the total number of pages
161
     *
162
     * @var int
163
     * @access protected
164
     */
165
    protected $numPages = 0;
166
167
    /**
168
     * This holds the UID of the parent document or zero if not multi-volumed
169
     *
170
     * @var int
171
     * @access protected
172
     */
173
    protected $parentId = 0;
174
175
    /**
176
     * This holds the physical structure
177
     *
178
     * @var array
179
     * @access protected
180
     */
181
    protected $physicalStructure = [];
182
183
    /**
184
     * This holds the physical structure metadata
185
     *
186
     * @var array
187
     * @access protected
188
     */
189
    protected $physicalStructureInfo = [];
190
191
    /**
192
     * Is the physical structure loaded?
193
     * @see $physicalStructure
194
     *
195
     * @var bool
196
     * @access protected
197
     */
198
    protected $physicalStructureLoaded = false;
199
200
    /**
201
     * This holds the PID of the document or zero if not in database
202
     *
203
     * @var int
204
     * @access protected
205
     */
206
    protected $pid = 0;
207
208
    /**
209
     * This holds the documents' raw text pages with their corresponding
210
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
211
     *
212
     * @var array
213
     * @access protected
214
     */
215
    protected $rawTextArray = [];
216
217
    /**
218
     * Is the document instantiated successfully?
219
     *
220
     * @var bool
221
     * @access protected
222
     */
223
    protected $ready = false;
224
225
    /**
226
     * The METS file's / IIIF manifest's record identifier
227
     *
228
     * @var string
229
     * @access protected
230
     */
231
    protected $recordId;
232
233
    /**
234
     * This holds the singleton object of the document
235
     *
236
     * @var array (\Kitodo\Dlf\Common\Document)
237
     * @static
238
     * @access protected
239
     */
240
    protected static $registry = [];
241
242
    /**
243
     * This holds the UID of the root document or zero if not multi-volumed
244
     *
245
     * @var int
246
     * @access protected
247
     */
248
    protected $rootId = 0;
249
250
    /**
251
     * Is the root id loaded?
252
     * @see $rootId
253
     *
254
     * @var bool
255
     * @access protected
256
     */
257
    protected $rootIdLoaded = false;
258
259
    /**
260
     * This holds the smLinks between logical and physical structMap
261
     *
262
     * @var array
263
     * @access protected
264
     */
265
    protected $smLinks = ['l2p' => [], 'p2l' => []];
266
267
    /**
268
     * Are the smLinks loaded?
269
     * @see $smLinks
270
     *
271
     * @var bool
272
     * @access protected
273
     */
274
    protected $smLinksLoaded = false;
275
276
    /**
277
     * This holds the logical structure
278
     *
279
     * @var array
280
     * @access protected
281
     */
282
    protected $tableOfContents = [];
283
284
    /**
285
     * Is the table of contents loaded?
286
     * @see $tableOfContents
287
     *
288
     * @var bool
289
     * @access protected
290
     */
291
    protected $tableOfContentsLoaded = false;
292
293
    /**
294
     * This holds the document's thumbnail location
295
     *
296
     * @var string
297
     * @access protected
298
     */
299
    protected $thumbnail = '';
300
301
    /**
302
     * Is the document's thumbnail location loaded?
303
     * @see $thumbnail
304
     *
305
     * @var bool
306
     * @access protected
307
     */
308
    protected $thumbnailLoaded = false;
309
310
    /**
311
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
312
     *
313
     * @var string
314
     * @access protected
315
     */
316
    protected $toplevelId = '';
317
318
    /**
319
     * This holds the UID or the URL of the document
320
     *
321
     * @var mixed
322
     * @access protected
323
     */
324
    protected $uid = 0;
325
326
    /**
327
     * This holds the whole XML file as \SimpleXMLElement object
328
     *
329
     * @var \SimpleXMLElement
330
     * @access protected
331
     */
332
    protected $xml;
333
334
    /**
335
     * This clears the static registry to prevent memory exhaustion
336
     *
337
     * @access public
338
     *
339
     * @static
340
     *
341
     * @return void
342
     */
343
    public static function clearRegistry()
344
    {
345
        // Reset registry array.
346
        self::$registry = [];
347
    }
348
349
    /**
350
     * This ensures that the recordId, if existent, is retrieved from the document
351
     *
352
     * @access protected
353
     *
354
     * @abstract
355
     *
356
     * @param int $pid: ID of the configuration page with the recordId config
357
     *
358
     */
359
    protected abstract function establishRecordId($pid);
360
361
    /**
362
     * Source document PHP object which is represented by a Document instance
363
     *
364
     * @access protected
365
     *
366
     * @abstract
367
     *
368
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
369
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
370
     */
371
    protected abstract function getDocument();
372
373
    /**
374
     * This gets the location of a downloadable file for a physical page or track
375
     *
376
     * @access public
377
     *
378
     * @abstract
379
     *
380
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
381
     *
382
     * @return string    The file's location as URL
383
     */
384
    public abstract function getDownloadLocation($id);
385
386
    /**
387
     * This gets the location of a file representing a physical page or track
388
     *
389
     * @access public
390
     *
391
     * @abstract
392
     *
393
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
394
     *
395
     * @return string The file's location as URL
396
     */
397
    public abstract function getFileLocation($id);
398
399
    /**
400
     * This gets the MIME type of a file representing a physical page or track
401
     *
402
     * @access public
403
     *
404
     * @abstract
405
     *
406
     * @param string $id: The @ID attribute of the file node
407
     *
408
     * @return string The file's MIME type
409
     */
410
    public abstract function getFileMimeType($id);
411
412
    /**
413
     * This is a singleton class, thus an instance must be created by this method
414
     *
415
     * @access public
416
     *
417
     * @static
418
     *
419
     * @param mixed $uid: The unique identifier of the document to parse, the URL of XML file or the IRI of the IIIF resource
420
     * @param int $pid: If > 0, then only document with this PID gets loaded
421
     * @param bool $forceReload: Force reloading the document instead of returning the cached instance
422
     *
423
     * @return \Kitodo\Dlf\Common\Document Instance of this class, either MetsDocument or IiifManifest
424
     */
425
    public static function &getInstance($uid, $pid = 0, $forceReload = false)
426
    {
427
        // Sanitize input.
428
        $pid = max(intval($pid), 0);
429
        if (!$forceReload) {
430
            $regObj = Helper::digest($uid);
431
            if (
432
                is_object(self::$registry[$regObj])
433
                && self::$registry[$regObj] instanceof self
434
            ) {
435
                // Check if instance has given PID.
436
                if (
437
                    !$pid
438
                    || !self::$registry[$regObj]->pid
439
                    || $pid == self::$registry[$regObj]->pid
440
                ) {
441
                    // Return singleton instance if available.
442
                    return self::$registry[$regObj];
443
                }
444
            } else {
445
                // Check the user's session...
446
                $sessionData = Helper::loadFromSession(get_called_class());
447
                if (
448
                    is_object($sessionData[$regObj])
449
                    && $sessionData[$regObj] instanceof self
450
                ) {
451
                    // Check if instance has given PID.
452
                    if (
453
                        !$pid
454
                        || !$sessionData[$regObj]->pid
455
                        || $pid == $sessionData[$regObj]->pid
456
                    ) {
457
                        // ...and restore registry.
458
                        self::$registry[$regObj] = $sessionData[$regObj];
459
                        return self::$registry[$regObj];
460
                    }
461
                }
462
            }
463
        }
464
        // Create new instance depending on format (METS or IIIF) ...
465
        $instance = null;
466
        $documentFormat = null;
467
        $xml = null;
468
        $iiif = null;
469
        // Try to get document format from database
470
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
471
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
472
                ->getQueryBuilderForTable('tx_dlf_documents');
473
474
            $queryBuilder
475
                ->select(
476
                    'tx_dlf_documents.location AS location',
477
                    'tx_dlf_documents.document_format AS document_format'
478
                )
479
                ->from('tx_dlf_documents');
480
481
            // Get UID of document with given record identifier.
482
            if ($pid) {
483
                $queryBuilder
484
                    ->where(
485
                        $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
486
                        $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid)),
487
                        Helper::whereExpression('tx_dlf_documents')
488
                    );
489
            } else {
490
                $queryBuilder
491
                    ->where(
492
                        $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
493
                        Helper::whereExpression('tx_dlf_documents')
494
                    );
495
            }
496
497
            $result = $queryBuilder
498
                ->setMaxResults(1)
499
                ->execute();
500
501
            if ($resArray = $result->fetch()) {
502
                $documentFormat = $resArray['document_format'];
503
            }
504
        } else {
505
            // Get document format from content of remote document
506
            // Cast to string for safety reasons.
507
            $location = (string) $uid;
508
            // Try to load a file from the url
509
            if (GeneralUtility::isValidUrl($location)) {
510
                // Load extension configuration
511
                $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
512
                // Set user-agent to identify self when fetching XML data.
513
                if (!empty($extConf['useragent'])) {
514
                    @ini_set('user_agent', $extConf['useragent']);
515
                }
516
                $content = GeneralUtility::getUrl($location);
517
                if ($content !== false) {
518
                    // TODO use single place to load xml
519
                    // Turn off libxml's error logging.
520
                    $libxmlErrors = libxml_use_internal_errors(true);
521
                    // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
522
                    $previousValueOfEntityLoader = libxml_disable_entity_loader(true);
523
                    // Try to load XML from file.
524
                    $xml = simplexml_load_string($content);
525
                    // reset entity loader setting
526
                    libxml_disable_entity_loader($previousValueOfEntityLoader);
527
                    // Reset libxml's error logging.
528
                    libxml_use_internal_errors($libxmlErrors);
529
                    if ($xml !== false) {
530
                        /* @var $xml \SimpleXMLElement */
531
                        $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
532
                        $xpathResult = $xml->xpath('//mets:mets');
533
                        $documentFormat = !empty($xpathResult) ? 'METS' : null;
534
                    } else {
535
                        // Try to load file as IIIF resource instead.
536
                        $contentAsJsonArray = json_decode($content, true);
537
                        if ($contentAsJsonArray !== null) {
538
                            // Load plugin configuration.
539
                            $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
540
                            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
541
                            IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
542
                            IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
543
                            $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
544
                            if ($iiif instanceof IiifResourceInterface) {
545
                                $documentFormat = 'IIIF';
546
                            }
547
                        }
548
                    }
549
                }
550
            }
551
        }
552
        // Sanitize input.
553
        $pid = max(intval($pid), 0);
554
        if ($documentFormat == 'METS') {
555
            $instance = new MetsDocument($uid, $pid, $xml);
556
        } elseif ($documentFormat == 'IIIF') {
557
            $instance = new IiifManifest($uid, $pid, $iiif);
558
        }
559
        // Save instance to registry.
560
        if (
561
            $instance instanceof self
562
            && $instance->ready) {
563
            self::$registry[Helper::digest($instance->uid)] = $instance;
564
            if ($instance->uid != $instance->location) {
565
                self::$registry[Helper::digest($instance->location)] = $instance;
566
            }
567
            // Load extension configuration
568
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
569
            // Save registry to session if caching is enabled.
570
            if (!empty($extConf['caching'])) {
571
                Helper::saveToSession(self::$registry, get_class($instance));
572
            }
573
            $instance->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($instance));
574
        }
575
        // Return new instance.
576
        return $instance;
577
    }
578
579
    /**
580
     * This gets details about a logical structure element
581
     *
582
     * @access public
583
     *
584
     * @abstract
585
     *
586
     * @param string $id: The @ID attribute of the logical structure node (METS) or
587
     * the @id property of the Manifest / Range (IIIF)
588
     * @param bool $recursive: Whether to include the child elements / resources
589
     *
590
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
591
     */
592
    public abstract function getLogicalStructure($id, $recursive = false);
593
594
    /**
595
     * This extracts all the metadata for a logical structure node
596
     *
597
     * @access public
598
     *
599
     * @abstract
600
     *
601
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
602
     * of the Manifest / Range (IIIF)
603
     * @param int $cPid: The PID for the metadata definitions
604
     *                       (defaults to $this->cPid or $this->pid)
605
     *
606
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
607
     */
608
    public abstract function getMetadata($id, $cPid = 0);
609
610
    /**
611
     * This returns the first corresponding physical page number of a given logical page label
612
     *
613
     * @access public
614
     *
615
     * @param string $logicalPage: The label (or a part of the label) of the logical page
616
     *
617
     * @return int The physical page number
618
     */
619
    public function getPhysicalPage($logicalPage)
620
    {
621
        if (
622
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
623
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
624
        ) {
625
            return $this->lastSearchedPhysicalPage['physicalPage'];
626
        } else {
627
            $physicalPage = 0;
628
            foreach ($this->physicalStructureInfo as $page) {
629
                if (strpos($page['orderlabel'], $logicalPage) !== false) {
630
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
631
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
632
                    return $physicalPage;
633
                }
634
                $physicalPage++;
635
            }
636
        }
637
        return 1;
638
    }
639
640
    /**
641
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas. Text might be
642
     * given as ALTO for METS or as annotations or ALTO for IIIF resources.
643
     *
644
     * @access public
645
     *
646
     * @abstract
647
     *
648
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
649
     * of the Manifest / Range (IIIF)
650
     *
651
     * @return string The OCR full text
652
     */
653
    public abstract function getFullText($id);
654
655
    /**
656
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas from an
657
     * XML full text representation (currently only ALTO). For IIIF manifests, ALTO documents have
658
     * to be given in the Canvas' / Manifest's "seeAlso" property.
659
     *
660
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
661
     * of the Manifest / Range (IIIF)
662
     *
663
     * @return string The OCR full text
664
     */
665
    protected function getFullTextFromXml($id)
666
    {
667
        $fullText = '';
668
        // Load available text formats, ...
669
        $this->loadFormats();
670
        // ... physical structure ...
671
        $this->_getPhysicalStructure();
672
        // ... and extension configuration.
673
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
674
        $fileGrpsFulltext = GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']);
675
        if (!empty($this->physicalStructureInfo[$id])) {
676
            while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
677
                if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
678
                    // Get full text file.
679
                    $fileContent = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
680
                    if ($fileContent !== false) {
681
                        $textFormat = $this->getTextFormat($fileContent);
682
                    } else {
683
                        $this->logger->warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');
1 ignored issue
show
Bug introduced by
The method warning() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

683
                        $this->logger->/** @scrutinizer ignore-call */ 
684
                                       warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
684
                        return $fullText;
685
                    }
686
                    break;
687
                }
688
            }
689
        } else {
690
            $this->logger->warning('Invalid structure node @ID "' . $id . '"');
691
            return $fullText;
692
        }
693
        // Is this text format supported?
694
        // This part actually differs from previous version of indexed OCR
695
        if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $textFormat does not seem to be defined for all execution paths leading up to this point.
Loading history...
696
            $textMiniOcr = '';
697
            if (!empty($this->formats[$textFormat]['class'])) {
698
                $class = $this->formats[$textFormat]['class'];
699
                // Get the raw text from class.
700
                if (
701
                    class_exists($class)
702
                    && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
703
                ) {
704
                    // Load XML from file.
705
                    $ocrTextXml = $this->getXmlObject($fileContent);
706
                    $textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
707
                    $this->rawTextArray[$id] = $textMiniOcr;
708
                } else {
709
                    $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
710
                }
711
            }
712
            $fullText = $textMiniOcr;
713
        } else {
714
            $this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
715
        }
716
        return $fullText;
717
    }
718
719
    /**
720
     * Get format of the OCR full text
721
     *
722
     * @access private
723
     *
724
     * @param string $fileContent: content of the XML file
725
     *
726
     * @return string The format of the OCR full text
727
     */
728
    private function getTextFormat($fileContent)
729
    {
730
        // Get the root element's name as text format.
731
        return strtoupper($this->getXmlObject($fileContent)->getName());
732
    }
733
734
    /**
735
     * Get the OCR full text as object
736
     *
737
     * @access private
738
     *
739
     * @param string $fileContent: content of the XML file
740
     *
741
     * @return \SimpleXMLElement The OCR full text as object
742
     */
743
    private function getXmlObject($fileContent)
744
    {
745
        // Turn off libxml's error logging.
746
        $libxmlErrors = libxml_use_internal_errors(true);
747
        // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept.
748
        $previousValueOfEntityLoader = libxml_disable_entity_loader(true);
749
        // Load XML from file.
750
        $ocrTextXml = simplexml_load_string($fileContent);
751
        // Reset entity loader setting.
752
        libxml_disable_entity_loader($previousValueOfEntityLoader);
753
        // Reset libxml's error logging.
754
        libxml_use_internal_errors($libxmlErrors);
755
        // Get the root element.
756
        return $ocrTextXml;
757
    }
758
759
    /**
760
     * This determines a title for the given document
761
     *
762
     * @access public
763
     *
764
     * @static
765
     *
766
     * @param int $uid: The UID of the document
767
     * @param bool $recursive: Search superior documents for a title, too?
768
     *
769
     * @return string The title of the document itself or a parent document
770
     */
771
    public static function getTitle($uid, $recursive = false)
772
    {
773
        $logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(__CLASS__);
774
775
        $title = '';
776
        // Sanitize input.
777
        $uid = max(intval($uid), 0);
778
        if ($uid) {
779
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
780
                ->getQueryBuilderForTable('tx_dlf_documents');
781
782
            $result = $queryBuilder
783
                ->select(
784
                    'tx_dlf_documents.title',
785
                    'tx_dlf_documents.partof'
786
                )
787
                ->from('tx_dlf_documents')
788
                ->where(
789
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
790
                    Helper::whereExpression('tx_dlf_documents')
791
                )
792
                ->setMaxResults(1)
793
                ->execute();
794
795
            if ($resArray = $result->fetch()) {
796
                // Get title information.
797
                $title = $resArray['title'];
798
                $partof = $resArray['partof'];
799
                // Search parent documents recursively for a title?
800
                if (
801
                    $recursive
802
                    && empty($title)
803
                    && intval($partof)
804
                    && $partof != $uid
805
                ) {
806
                    $title = self::getTitle($partof, true);
807
                }
808
            } else {
809
                $logger->warning('No document with UID ' . $uid . ' found or document not accessible');
810
            }
811
        } else {
812
            $logger->error('Invalid UID ' . $uid . ' for document');
813
        }
814
        return $title;
815
    }
816
817
    /**
818
     * This extracts all the metadata for the toplevel logical structure node / resource
819
     *
820
     * @access public
821
     *
822
     * @param int $cPid: The PID for the metadata definitions
823
     *
824
     * @return array The logical structure node's / resource's parsed metadata array
825
     */
826
    public function getTitledata($cPid = 0)
827
    {
828
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
829
        // Add information from METS structural map to titledata array.
830
        if ($this instanceof MetsDocument) {
831
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
832
        }
833
        // Set record identifier for METS file / IIIF manifest if not present.
834
        if (
835
            is_array($titledata)
836
            && array_key_exists('record_id', $titledata)
837
        ) {
838
            if (
839
                !empty($this->recordId)
840
                && !in_array($this->recordId, $titledata['record_id'])
841
            ) {
842
                array_unshift($titledata['record_id'], $this->recordId);
843
            }
844
        }
845
        return $titledata;
846
    }
847
848
    /**
849
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
850
     *
851
     * @access protected
852
     *
853
     * @param array $structure: logical structure array
854
     * @param int $depth: current tree depth
855
     * @param string $logId: ID of the logical structure whose depth is requested
856
     *
857
     * @return int|bool: false if structure with $logId is not a child of this substructure,
858
     * or the actual depth.
859
     */
860
    protected function getTreeDepth($structure, $depth, $logId)
861
    {
862
        foreach ($structure as $element) {
863
            if ($element['id'] == $logId) {
864
                return $depth;
865
            } elseif (array_key_exists('children', $element)) {
866
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
867
                if ($foundInChildren !== false) {
868
                    return $foundInChildren;
869
                }
870
            }
871
        }
872
        return false;
873
    }
874
875
    /**
876
     * Get the tree depth of a logical structure element within the table of content
877
     *
878
     * @access public
879
     *
880
     * @param string $logId: The id of the logical structure element whose depth is requested
881
     * @return int|bool tree depth as integer or false if no element with $logId exists within the TOC.
882
     */
883
    public function getStructureDepth($logId)
884
    {
885
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
886
    }
887
888
    /**
889
     * This sets some basic class properties
890
     *
891
     * @access protected
892
     *
893
     * @abstract
894
     *
895
     * @return void
896
     */
897
    protected abstract function init();
898
899
    /**
900
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
901
     *
902
     * @access protected
903
     *
904
     * @abstract
905
     *
906
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
907
     *
908
     * @return bool true if $preloadedDocument can actually be reused, false if it has to be loaded again
909
     */
910
    protected abstract function setPreloadedDocument($preloadedDocument);
911
912
    /**
913
     * METS/IIIF specific part of loading a location
914
     *
915
     * @access protected
916
     *
917
     * @abstract
918
     *
919
     * @param string $location: The URL of the file to load
920
     *
921
     * @return bool true on success or false on failure
922
     */
923
    protected abstract function loadLocation($location);
924
925
    /**
926
     * Load XML file / IIIF resource from URL
927
     *
928
     * @access protected
929
     *
930
     * @param string $location: The URL of the file to load
931
     *
932
     * @return bool true on success or false on failure
933
     */
934
    protected function load($location)
935
    {
936
        // Load XML / JSON-LD file.
937
        if (GeneralUtility::isValidUrl($location)) {
938
            // Load extension configuration
939
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
940
            // Set user-agent to identify self when fetching XML / JSON-LD data.
941
            if (!empty($extConf['useragent'])) {
942
                @ini_set('user_agent', $extConf['useragent']);
943
            }
944
            // the actual loading is format specific
945
            return $this->loadLocation($location);
946
        } else {
947
            $this->logger->error('Invalid file location "' . $location . '" for document loading');
1 ignored issue
show
Bug introduced by
The method error() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

947
            $this->logger->/** @scrutinizer ignore-call */ 
948
                           error('Invalid file location "' . $location . '" for document loading');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
948
        }
949
        return false;
950
    }
951
952
    /**
953
     * Analyze the document if it contains any fulltext that needs to be indexed.
954
     *
955
     * @access protected
956
     *
957
     * @abstract
958
     */
959
    protected abstract function ensureHasFulltextIsSet();
960
961
    /**
962
     * Register all available data formats
963
     *
964
     * @access protected
965
     *
966
     * @return void
967
     */
968
    protected function loadFormats()
969
    {
970
        if (!$this->formatsLoaded) {
971
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
972
                ->getQueryBuilderForTable('tx_dlf_formats');
973
974
            // Get available data formats from database.
975
            $result = $queryBuilder
976
                ->select(
977
                    'tx_dlf_formats.type AS type',
978
                    'tx_dlf_formats.root AS root',
979
                    'tx_dlf_formats.namespace AS namespace',
980
                    'tx_dlf_formats.class AS class'
981
                )
982
                ->from('tx_dlf_formats')
983
                ->where(
984
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0)
985
                )
986
                ->execute();
987
988
            while ($resArray = $result->fetch()) {
989
                // Update format registry.
990
                $this->formats[$resArray['type']] = [
991
                    'rootElement' => $resArray['root'],
992
                    'namespaceURI' => $resArray['namespace'],
993
                    'class' => $resArray['class']
994
                ];
995
            }
996
            $this->formatsLoaded = true;
997
        }
998
    }
999
1000
    /**
1001
     * Register all available namespaces for a \SimpleXMLElement object
1002
     *
1003
     * @access public
1004
     *
1005
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
1006
     *
1007
     * @return void
1008
     */
1009
    public function registerNamespaces(&$obj)
1010
    {
1011
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
1012
        $this->loadFormats();
1013
        // Do we have a \SimpleXMLElement or \DOMXPath object?
1014
        if ($obj instanceof \SimpleXMLElement) {
1015
            $method = 'registerXPathNamespace';
1016
        } elseif ($obj instanceof \DOMXPath) {
1017
            $method = 'registerNamespace';
1018
        } else {
1019
            $this->logger->error('Given object is neither a SimpleXMLElement nor a DOMXPath instance');
1020
            return;
1021
        }
1022
        // Register metadata format's namespaces.
1023
        foreach ($this->formats as $enc => $conf) {
1024
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
1025
        }
1026
    }
1027
1028
    /**
1029
     * This saves the document to the database and index
1030
     *
1031
     * @access public
1032
     *
1033
     * @param int $pid: The PID of the saved record
1034
     * @param int $core: The UID of the Solr core for indexing
1035
     * @param int|string $owner: UID or index_name of owner to set while indexing
1036
     *
1037
     * @return bool true on success or false on failure
1038
     */
1039
    public function save($pid = 0, $core = 0, $owner = null)
1040
    {
1041
        if (\TYPO3_MODE !== 'BE') {
1042
            $this->logger->error('Saving a document is only allowed in the backend');
1043
            return false;
1044
        }
1045
        // Make sure $pid is a non-negative integer.
1046
        $pid = max(intval($pid), 0);
1047
        // Make sure $core is a non-negative integer.
1048
        $core = max(intval($core), 0);
1049
        // If $pid is not given, try to get it elsewhere.
1050
        if (
1051
            !$pid
1052
            && $this->pid
1053
        ) {
1054
            // Retain current PID.
1055
            $pid = $this->pid;
1056
        } elseif (!$pid) {
1057
            $this->logger->error('Invalid PID ' . $pid . ' for document saving');
1058
            return false;
1059
        }
1060
        // Set PID for metadata definitions.
1061
        $this->cPid = $pid;
1062
        // Set UID placeholder if not updating existing record.
1063
        if ($pid != $this->pid) {
1064
            $this->uid = uniqid('NEW');
1065
        }
1066
        // Get metadata array.
1067
        $metadata = $this->getTitledata($pid);
1068
        // Check for record identifier.
1069
        if (empty($metadata['record_id'][0])) {
1070
            $this->logger->error('No record identifier found to avoid duplication');
1071
            return false;
1072
        }
1073
        // Load plugin configuration.
1074
        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
1075
1076
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1077
            ->getQueryBuilderForTable('tx_dlf_structures');
1078
1079
        // Get UID for structure type.
1080
        $result = $queryBuilder
1081
            ->select('tx_dlf_structures.uid AS uid')
1082
            ->from('tx_dlf_structures')
1083
            ->where(
1084
                $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($pid)),
1085
                $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
1086
                Helper::whereExpression('tx_dlf_structures')
1087
            )
1088
            ->setMaxResults(1)
1089
            ->execute();
1090
1091
        if ($resArray = $result->fetch()) {
1092
            $structure = $resArray['uid'];
1093
        } else {
1094
            $this->logger->error('Could not identify document/structure type "' . $queryBuilder->expr()->literal($metadata['type'][0]) . '"');
1095
            return false;
1096
        }
1097
        $metadata['type'][0] = $structure;
1098
1099
        // Remove appended "valueURI" from authors' names for storing in database.
1100
        foreach ($metadata['author'] as $i => $author) {
1101
            $splitName = explode(chr(31), $author);
1102
            $metadata['author'][$i] = $splitName[0];
1103
        }
1104
1105
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1106
            ->getQueryBuilderForTable('tx_dlf_collections');
1107
1108
        // Get UIDs for collections.
1109
        $result = $queryBuilder
1110
            ->select(
1111
                'tx_dlf_collections.index_name AS index_name',
1112
                'tx_dlf_collections.uid AS uid'
1113
            )
1114
            ->from('tx_dlf_collections')
1115
            ->where(
1116
                $queryBuilder->expr()->eq('tx_dlf_collections.pid', intval($pid)),
1117
                $queryBuilder->expr()->in('tx_dlf_collections.sys_language_uid', [-1, 0]),
1118
                Helper::whereExpression('tx_dlf_collections')
1119
            )
1120
            ->execute();
1121
1122
        $collUid = [];
1123
        while ($resArray = $result->fetch()) {
1124
            $collUid[$resArray['index_name']] = $resArray['uid'];
1125
        }
1126
        $collections = [];
1127
        foreach ($metadata['collection'] as $collection) {
1128
            if (!empty($collUid[$collection])) {
1129
                // Add existing collection's UID.
1130
                $collections[] = $collUid[$collection];
1131
            } else {
1132
                // Insert new collection.
1133
                $collNewUid = uniqid('NEW');
1134
                $collData['tx_dlf_collections'][$collNewUid] = [
1135
                    'pid' => $pid,
1136
                    'label' => $collection,
1137
                    'index_name' => $collection,
1138
                    'oai_name' => (!empty($conf['publishNewCollections']) ? Helper::getCleanString($collection) : ''),
1139
                    'description' => '',
1140
                    'documents' => 0,
1141
                    'owner' => 0,
1142
                    'status' => 0,
1143
                ];
1144
                $substUid = Helper::processDBasAdmin($collData);
1145
                // Prevent double insertion.
1146
                unset($collData);
1147
                // Add new collection's UID.
1148
                $collections[] = $substUid[$collNewUid];
1149
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1150
                    Helper::addMessage(
1151
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newCollection'), $collection, $substUid[$collNewUid])),
1152
                        Helper::getMessage('flash.attention', true),
1153
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1154
                        true
1155
                    );
1156
                }
1157
            }
1158
        }
1159
        $metadata['collection'] = $collections;
1160
1161
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1162
            ->getQueryBuilderForTable('tx_dlf_libraries');
1163
1164
        // Get UID for owner.
1165
        if (empty($owner)) {
1166
            $owner = empty($metadata['owner'][0]) ? $metadata['owner'][0] : 'default';
1167
        }
1168
        if (!MathUtility::canBeInterpretedAsInteger($owner)) {
1169
            $result = $queryBuilder
1170
                ->select('tx_dlf_libraries.uid AS uid')
1171
                ->from('tx_dlf_libraries')
1172
                ->where(
1173
                    $queryBuilder->expr()->eq('tx_dlf_libraries.pid', intval($pid)),
1174
                    $queryBuilder->expr()->eq('tx_dlf_libraries.index_name', $queryBuilder->expr()->literal($owner)),
1175
                    Helper::whereExpression('tx_dlf_libraries')
1176
                )
1177
                ->setMaxResults(1)
1178
                ->execute();
1179
1180
            if ($resArray = $result->fetch()) {
1181
                $ownerUid = $resArray['uid'];
1182
            } else {
1183
                // Insert new library.
1184
                $libNewUid = uniqid('NEW');
1185
                $libData['tx_dlf_libraries'][$libNewUid] = [
1186
                    'pid' => $pid,
1187
                    'label' => $owner,
1188
                    'index_name' => $owner,
1189
                    'website' => '',
1190
                    'contact' => '',
1191
                    'image' => '',
1192
                    'oai_label' => '',
1193
                    'oai_base' => '',
1194
                    'opac_label' => '',
1195
                    'opac_base' => '',
1196
                    'union_label' => '',
1197
                    'union_base' => '',
1198
                ];
1199
                $substUid = Helper::processDBasAdmin($libData);
1200
                // Add new library's UID.
1201
                $ownerUid = $substUid[$libNewUid];
1202
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1203
                    Helper::addMessage(
1204
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newLibrary'), $owner, $ownerUid)),
1205
                        Helper::getMessage('flash.attention', true),
1206
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1207
                        true
1208
                    );
1209
                }
1210
            }
1211
            $owner = $ownerUid;
1212
        }
1213
        $metadata['owner'][0] = $owner;
1214
        // Get UID of parent document.
1215
        $partof = $this->getParentDocumentUidForSaving($pid, $core, $owner);
1216
        // Use the date of publication or title as alternative sorting metric for parts of multi-part works.
1217
        if (!empty($partof)) {
1218
            if (
1219
                empty($metadata['volume'][0])
1220
                && !empty($metadata['year'][0])
1221
            ) {
1222
                $metadata['volume'] = $metadata['year'];
1223
            }
1224
            if (empty($metadata['volume_sorting'][0])) {
1225
                // If METS @ORDER is given it is preferred over year_sorting and year.
1226
                if (!empty($metadata['mets_order'][0])) {
1227
                    $metadata['volume_sorting'][0] = $metadata['mets_order'][0];
1228
                } elseif (!empty($metadata['year_sorting'][0])) {
1229
                    $metadata['volume_sorting'][0] = $metadata['year_sorting'][0];
1230
                } elseif (!empty($metadata['year'][0])) {
1231
                    $metadata['volume_sorting'][0] = $metadata['year'][0];
1232
                }
1233
            }
1234
            // If volume_sorting is still empty, try to use title_sorting or METS @ORDERLABEL finally (workaround for newspapers)
1235
            if (empty($metadata['volume_sorting'][0])) {
1236
                if (!empty($metadata['title_sorting'][0])) {
1237
                    $metadata['volume_sorting'][0] = $metadata['title_sorting'][0];
1238
                } elseif (!empty($metadata['mets_orderlabel'][0])) {
1239
                    $metadata['volume_sorting'][0] = $metadata['mets_orderlabel'][0];
1240
                }
1241
            }
1242
        }
1243
1244
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1245
            ->getQueryBuilderForTable('tx_dlf_metadata');
1246
1247
        // Get metadata for lists and sorting.
1248
        $result = $queryBuilder
1249
            ->select(
1250
                'tx_dlf_metadata.index_name AS index_name',
1251
                'tx_dlf_metadata.is_listed AS is_listed',
1252
                'tx_dlf_metadata.is_sortable AS is_sortable'
1253
            )
1254
            ->from('tx_dlf_metadata')
1255
            ->where(
1256
                $queryBuilder->expr()->orX(
1257
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_listed', 1),
1258
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_sortable', 1)
1259
                ),
1260
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($pid)),
1261
                Helper::whereExpression('tx_dlf_metadata')
1262
            )
1263
            ->execute();
1264
1265
        $listed = [];
1266
        $sortable = [];
1267
1268
        while ($resArray = $result->fetch()) {
1269
            if (!empty($metadata[$resArray['index_name']])) {
1270
                if ($resArray['is_listed']) {
1271
                    $listed[$resArray['index_name']] = $metadata[$resArray['index_name']];
1272
                }
1273
                if ($resArray['is_sortable']) {
1274
                    $sortable[$resArray['index_name']] = $metadata[$resArray['index_name']][0];
1275
                }
1276
            }
1277
        }
1278
        // Fill data array.
1279
        $data['tx_dlf_documents'][$this->uid] = [
1280
            'pid' => $pid,
1281
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['starttime'] => 0,
1282
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['endtime'] => 0,
1283
            'prod_id' => $metadata['prod_id'][0],
1284
            'location' => $this->location,
1285
            'record_id' => $metadata['record_id'][0],
1286
            'opac_id' => $metadata['opac_id'][0],
1287
            'union_id' => $metadata['union_id'][0],
1288
            'urn' => $metadata['urn'][0],
1289
            'purl' => $metadata['purl'][0],
1290
            'title' => $metadata['title'][0],
1291
            'title_sorting' => $metadata['title_sorting'][0],
1292
            'author' => implode('; ', $metadata['author']),
1293
            'year' => implode('; ', $metadata['year']),
1294
            'place' => implode('; ', $metadata['place']),
1295
            'thumbnail' => $this->_getThumbnail(true),
1296
            'metadata' => serialize($listed),
1297
            'metadata_sorting' => serialize($sortable),
1298
            'structure' => $metadata['type'][0],
1299
            'partof' => $partof,
1300
            'volume' => $metadata['volume'][0],
1301
            'volume_sorting' => $metadata['volume_sorting'][0],
1302
            'license' => $metadata['license'][0],
1303
            'terms' => $metadata['terms'][0],
1304
            'restrictions' => $metadata['restrictions'][0],
1305
            'out_of_print' => $metadata['out_of_print'][0],
1306
            'rights_info' => $metadata['rights_info'][0],
1307
            'collections' => $metadata['collection'],
1308
            'mets_label' => $metadata['mets_label'][0],
1309
            'mets_orderlabel' => $metadata['mets_orderlabel'][0],
1310
            'mets_order' => $metadata['mets_order'][0],
1311
            'owner' => $metadata['owner'][0],
1312
            'solrcore' => $core,
1313
            'status' => 0,
1314
            'document_format' => $metadata['document_format'][0],
1315
        ];
1316
        // Unhide hidden documents.
1317
        if (!empty($conf['unhideOnIndex'])) {
1318
            $data['tx_dlf_documents'][$this->uid][$GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['disabled']] = 0;
1319
        }
1320
        // Process data.
1321
        $newIds = Helper::processDBasAdmin($data);
1322
        // Replace placeholder with actual UID.
1323
        if (strpos($this->uid, 'NEW') === 0) {
1324
            $this->uid = $newIds[$this->uid];
1325
            $this->pid = $pid;
1326
            $this->parentId = $partof;
1327
        }
1328
        if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1329
            Helper::addMessage(
1330
                htmlspecialchars(sprintf(Helper::getMessage('flash.documentSaved'), $metadata['title'][0], $this->uid)),
1331
                Helper::getMessage('flash.done', true),
1332
                \TYPO3\CMS\Core\Messaging\FlashMessage::OK,
1333
                true
1334
            );
1335
        }
1336
        // Add document to index.
1337
        if ($core) {
1338
            return Indexer::add($this, $core);
1339
        } else {
1340
            $this->logger->notice('Invalid UID "' . $core . '" for Solr core');
1 ignored issue
show
Bug introduced by
The method notice() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1340
            $this->logger->/** @scrutinizer ignore-call */ 
1341
                           notice('Invalid UID "' . $core . '" for Solr core');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1341
            return false;
1342
        }
1343
    }
1344
1345
    /**
1346
     * Get the ID of the parent document if the current document has one. Also save a parent document
1347
     * to the database and the Solr index if their $pid and the current $pid differ.
1348
     * Currently only applies to METS documents.
1349
     *
1350
     * @access protected
1351
     *
1352
     * @abstract
1353
     *
1354
     * @return int The parent document's id.
1355
     */
1356
    protected abstract function getParentDocumentUidForSaving($pid, $core, $owner);
1357
1358
    /**
1359
     * This returns $this->cPid via __get()
1360
     *
1361
     * @access protected
1362
     *
1363
     * @return int The PID of the metadata definitions
1364
     */
1365
    protected function _getCPid()
1366
    {
1367
        return $this->cPid;
1368
    }
1369
1370
    /**
1371
     * This returns $this->hasFulltext via __get()
1372
     *
1373
     * @access protected
1374
     *
1375
     * @return bool Are there any fulltext files available?
1376
     */
1377
    protected function _getHasFulltext()
1378
    {
1379
        $this->ensureHasFulltextIsSet();
1380
        return $this->hasFulltext;
1381
    }
1382
1383
    /**
1384
     * This returns $this->location via __get()
1385
     *
1386
     * @access protected
1387
     *
1388
     * @return string The location of the document
1389
     */
1390
    protected function _getLocation()
1391
    {
1392
        return $this->location;
1393
    }
1394
1395
    /**
1396
     * Format specific part of building the document's metadata array
1397
     *
1398
     * @access protected
1399
     *
1400
     * @abstract
1401
     *
1402
     * @param int $cPid
1403
     */
1404
    protected abstract function prepareMetadataArray($cPid);
1405
1406
    /**
1407
     * This builds an array of the document's metadata
1408
     *
1409
     * @access protected
1410
     *
1411
     * @return array Array of metadata with their corresponding logical structure node ID as key
1412
     */
1413
    protected function _getMetadataArray()
1414
    {
1415
        // Set metadata definitions' PID.
1416
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
1417
        if (!$cPid) {
1418
            $this->logger->error('Invalid PID ' . $cPid . ' for metadata definitions');
1419
            return [];
1420
        }
1421
        if (
1422
            !$this->metadataArrayLoaded
1423
            || $this->metadataArray[0] != $cPid
1424
        ) {
1425
            $this->prepareMetadataArray($cPid);
1426
            $this->metadataArray[0] = $cPid;
1427
            $this->metadataArrayLoaded = true;
1428
        }
1429
        return $this->metadataArray;
1430
    }
1431
1432
    /**
1433
     * This returns $this->numPages via __get()
1434
     *
1435
     * @access protected
1436
     *
1437
     * @return int The total number of pages and/or tracks
1438
     */
1439
    protected function _getNumPages()
1440
    {
1441
        $this->_getPhysicalStructure();
1442
        return $this->numPages;
1443
    }
1444
1445
    /**
1446
     * This returns $this->parentId via __get()
1447
     *
1448
     * @access protected
1449
     *
1450
     * @return int The UID of the parent document or zero if not applicable
1451
     */
1452
    protected function _getParentId()
1453
    {
1454
        return $this->parentId;
1455
    }
1456
1457
    /**
1458
     * This builds an array of the document's physical structure
1459
     *
1460
     * @access protected
1461
     *
1462
     * @abstract
1463
     *
1464
     * @return array Array of physical elements' id, type, label and file representations ordered
1465
     * by @ORDER attribute / IIIF Sequence's Canvases
1466
     */
1467
    protected abstract function _getPhysicalStructure();
1468
1469
    /**
1470
     * This gives an array of the document's physical structure metadata
1471
     *
1472
     * @access protected
1473
     *
1474
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1475
     */
1476
    protected function _getPhysicalStructureInfo()
1477
    {
1478
        // Is there no physical structure array yet?
1479
        if (!$this->physicalStructureLoaded) {
1480
            // Build physical structure array.
1481
            $this->_getPhysicalStructure();
1482
        }
1483
        return $this->physicalStructureInfo;
1484
    }
1485
1486
    /**
1487
     * This returns $this->pid via __get()
1488
     *
1489
     * @access protected
1490
     *
1491
     * @return int The PID of the document or zero if not in database
1492
     */
1493
    protected function _getPid()
1494
    {
1495
        return $this->pid;
1496
    }
1497
1498
    /**
1499
     * This returns $this->ready via __get()
1500
     *
1501
     * @access protected
1502
     *
1503
     * @return bool Is the document instantiated successfully?
1504
     */
1505
    protected function _getReady()
1506
    {
1507
        return $this->ready;
1508
    }
1509
1510
    /**
1511
     * This returns $this->recordId via __get()
1512
     *
1513
     * @access protected
1514
     *
1515
     * @return mixed The METS file's / IIIF manifest's record identifier
1516
     */
1517
    protected function _getRecordId()
1518
    {
1519
        return $this->recordId;
1520
    }
1521
1522
    /**
1523
     * This returns $this->rootId via __get()
1524
     *
1525
     * @access protected
1526
     *
1527
     * @return int The UID of the root document or zero if not applicable
1528
     */
1529
    protected function _getRootId()
1530
    {
1531
        if (!$this->rootIdLoaded) {
1532
            if ($this->parentId) {
1533
                $parent = self::getInstance($this->parentId, $this->pid);
1534
                $this->rootId = $parent->rootId;
1535
            }
1536
            $this->rootIdLoaded = true;
1537
        }
1538
        return $this->rootId;
1539
    }
1540
1541
    /**
1542
     * This returns the smLinks between logical and physical structMap (METS) and models the
1543
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1544
     *
1545
     * @access protected
1546
     *
1547
     * @abstract
1548
     *
1549
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1550
     */
1551
    protected abstract function _getSmLinks();
1552
1553
    /**
1554
     * This builds an array of the document's logical structure
1555
     *
1556
     * @access protected
1557
     *
1558
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1559
     */
1560
    protected function _getTableOfContents()
1561
    {
1562
        // Is there no logical structure array yet?
1563
        if (!$this->tableOfContentsLoaded) {
1564
            // Get all logical structures.
1565
            $this->getLogicalStructure('', true);
1566
            $this->tableOfContentsLoaded = true;
1567
        }
1568
        return $this->tableOfContents;
1569
    }
1570
1571
    /**
1572
     * This returns the document's thumbnail location
1573
     *
1574
     * @access protected
1575
     *
1576
     * @abstract
1577
     *
1578
     * @param bool $forceReload: Force reloading the thumbnail instead of returning the cached value
1579
     *
1580
     * @return string The document's thumbnail location
1581
     */
1582
    protected abstract function _getThumbnail($forceReload = false);
1583
1584
    /**
1585
     * This returns the ID of the toplevel logical structure node
1586
     *
1587
     * @access protected
1588
     *
1589
     * @abstract
1590
     *
1591
     * @return string The logical structure node's ID
1592
     */
1593
    protected abstract function _getToplevelId();
1594
1595
    /**
1596
     * This returns $this->uid via __get()
1597
     *
1598
     * @access protected
1599
     *
1600
     * @return mixed The UID or the URL of the document
1601
     */
1602
    protected function _getUid()
1603
    {
1604
        return $this->uid;
1605
    }
1606
1607
    /**
1608
     * This sets $this->cPid via __set()
1609
     *
1610
     * @access protected
1611
     *
1612
     * @param int $value: The new PID for the metadata definitions
1613
     *
1614
     * @return void
1615
     */
1616
    protected function _setCPid($value)
1617
    {
1618
        $this->cPid = max(intval($value), 0);
1619
    }
1620
1621
    /**
1622
     * This magic method is invoked each time a clone is called on the object variable
1623
     *
1624
     * @access protected
1625
     *
1626
     * @return void
1627
     */
1628
    protected function __clone()
1629
    {
1630
        // This method is defined as protected because singleton objects should not be cloned.
1631
    }
1632
1633
    /**
1634
     * This is a singleton class, thus the constructor should be private/protected
1635
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Document::getInstance())
1636
     *
1637
     * @access protected
1638
     *
1639
     * @param int $uid: The UID of the document to parse or URL to XML file
1640
     * @param int $pid: If > 0, then only document with this PID gets loaded
1641
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1642
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1643
     *
1644
     * @return void
1645
     */
1646
    protected function __construct($uid, $pid, $preloadedDocument)
1647
    {
1648
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1649
            ->getQueryBuilderForTable('tx_dlf_documents');
1650
        $location = '';
1651
        // Prepare to check database for the requested document.
1652
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
1653
            $whereClause = $queryBuilder->expr()->andX(
1654
                $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
1655
                Helper::whereExpression('tx_dlf_documents')
1656
            );
1657
        } else {
1658
            // Try to load METS file / IIIF manifest.
1659
            if ($this->setPreloadedDocument($preloadedDocument) || (GeneralUtility::isValidUrl($uid)
1660
                && $this->load($uid))) {
1661
                // Initialize core METS object.
1662
                $this->init();
1663
                if ($this->getDocument() !== null) {
1664
                    // Cast to string for safety reasons.
1665
                    $location = (string) $uid;
1666
                    $this->establishRecordId($pid);
1667
                } else {
1668
                    // No METS / IIIF part found.
1669
                    return;
1670
                }
1671
            } else {
1672
                // Loading failed.
1673
                return;
1674
            }
1675
            if (
1676
                !empty($location)
1677
                && !empty($this->recordId)
1678
            ) {
1679
                // Try to match record identifier or location (both should be unique).
1680
                $whereClause = $queryBuilder->expr()->andX(
1681
                    $queryBuilder->expr()->orX(
1682
                        $queryBuilder->expr()->eq('tx_dlf_documents.location', $queryBuilder->expr()->literal($location)),
1683
                        $queryBuilder->expr()->eq('tx_dlf_documents.record_id', $queryBuilder->expr()->literal($this->recordId))
1684
                    ),
1685
                    Helper::whereExpression('tx_dlf_documents')
1686
                );
1687
            } else {
1688
                // Can't persistently identify document, don't try to match at all.
1689
                $whereClause = '1=-1';
1690
            }
1691
        }
1692
        // Check for PID if needed.
1693
        if ($pid) {
1694
            $whereClause = $queryBuilder->expr()->andX(
1695
                $whereClause,
1696
                $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid))
1697
            );
1698
        }
1699
        // Get document PID and location from database.
1700
        $result = $queryBuilder
1701
            ->select(
1702
                'tx_dlf_documents.uid AS uid',
1703
                'tx_dlf_documents.pid AS pid',
1704
                'tx_dlf_documents.record_id AS record_id',
1705
                'tx_dlf_documents.partof AS partof',
1706
                'tx_dlf_documents.thumbnail AS thumbnail',
1707
                'tx_dlf_documents.location AS location'
1708
            )
1709
            ->from('tx_dlf_documents')
1710
            ->where($whereClause)
1711
            ->setMaxResults(1)
1712
            ->execute();
1713
1714
        if ($resArray = $result->fetch()) {
1715
            $this->uid = $resArray['uid'];
1716
            $this->pid = $resArray['pid'];
1717
            $this->recordId = $resArray['record_id'];
1718
            $this->parentId = $resArray['partof'];
1719
            $this->thumbnail = $resArray['thumbnail'];
1720
            $this->location = $resArray['location'];
1721
            $this->thumbnailLoaded = true;
1722
            // Load XML file if necessary...
1723
            if (
1724
                $this->getDocument() === null
1725
                && $this->load($this->location)
1726
            ) {
1727
                // ...and set some basic properties.
1728
                $this->init();
1729
            }
1730
            // Do we have a METS / IIIF object now?
1731
            if ($this->getDocument() !== null) {
1732
                // Set new location if necessary.
1733
                if (!empty($location)) {
1734
                    $this->location = $location;
1735
                }
1736
                // Document ready!
1737
                $this->ready = true;
1738
            }
1739
        } elseif ($this->getDocument() !== null) {
1740
            // Set location as UID for documents not in database.
1741
            $this->uid = $location;
1742
            $this->location = $location;
1743
            // Document ready!
1744
            $this->ready = true;
1745
        } else {
1746
            $this->logger->error('No document with UID ' . $uid . ' found or document not accessible');
1747
        }
1748
    }
1749
1750
    /**
1751
     * This magic method is called each time an invisible property is referenced from the object
1752
     *
1753
     * @access public
1754
     *
1755
     * @param string $var: Name of variable to get
1756
     *
1757
     * @return mixed Value of $this->$var
1758
     */
1759
    public function __get($var)
1760
    {
1761
        $method = '_get' . ucfirst($var);
1762
        if (
1763
            !property_exists($this, $var)
1764
            || !method_exists($this, $method)
1765
        ) {
1766
            $this->logger->warning('There is no getter function for property "' . $var . '"');
1767
            return;
1768
        } else {
1769
            return $this->$method();
1770
        }
1771
    }
1772
1773
    /**
1774
     * This magic method is called each time an invisible property is checked for isset() or empty()
1775
     *
1776
     * @access public
1777
     *
1778
     * @param string $var: Name of variable to check
1779
     *
1780
     * @return bool true if variable is set and not empty, false otherwise
1781
     */
1782
    public function __isset($var)
1783
    {
1784
        return !empty($this->__get($var));
1785
    }
1786
1787
    /**
1788
     * This magic method is called each time an invisible property is referenced from the object
1789
     *
1790
     * @access public
1791
     *
1792
     * @param string $var: Name of variable to set
1793
     * @param mixed $value: New value of variable
1794
     *
1795
     * @return void
1796
     */
1797
    public function __set($var, $value)
1798
    {
1799
        $method = '_set' . ucfirst($var);
1800
        if (
1801
            !property_exists($this, $var)
1802
            || !method_exists($this, $method)
1803
        ) {
1804
            $this->logger->warning('There is no setter function for property "' . $var . '"');
1805
        } else {
1806
            $this->$method($value);
1807
        }
1808
    }
1809
}
1810