Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — master (#680)
by
unknown
02:46
created

Document::getDocumentFormatFromDatabase()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 36
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 24
c 1
b 0
f 0
dl 0
loc 36
rs 9.536
cc 3
nc 4
nop 2
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
16
use TYPO3\CMS\Core\Database\ConnectionPool;
17
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
18
use TYPO3\CMS\Core\Log\LogManager;
19
use TYPO3\CMS\Core\Utility\GeneralUtility;
20
use TYPO3\CMS\Core\Utility\MathUtility;
21
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
22
use Ubl\Iiif\Tools\IiifHelper;
23
24
/**
25
 * Document class for the 'dlf' extension
26
 *
27
 * @author Sebastian Meyer <[email protected]>
28
 * @author Henrik Lochmann <[email protected]>
29
 * @package TYPO3
30
 * @subpackage dlf
31
 * @access public
32
 * @property int $cPid This holds the PID for the configuration
33
 * @property-read bool $hasFulltext Are there any fulltext files available?
34
 * @property-read string $location This holds the documents location
35
 * @property-read array $metadataArray This holds the documents' parsed metadata array
36
 * @property-read int $numPages The holds the total number of pages
37
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
38
 * @property-read array $physicalStructure This holds the physical structure
39
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
40
 * @property-read int $pid This holds the PID of the document or zero if not in database
41
 * @property-read bool $ready Is the document instantiated successfully?
42
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
43
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
44
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
45
 * @property-read array $tableOfContents This holds the logical structure
46
 * @property-read string $thumbnail This holds the document's thumbnail location
47
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
48
 * @property-read mixed $uid This holds the UID or the URL of the document
49
 * @abstract
50
 */
51
abstract class Document
52
{
53
    /**
54
     * This holds the logger
55
     *
56
     * @var LogManager
57
     * @access protected
58
     */
59
    protected $logger;
60
61
    /**
62
     * This holds the PID for the configuration
63
     *
64
     * @var int
65
     * @access protected
66
     */
67
    protected $cPid = 0;
68
69
    /**
70
     * The extension key
71
     *
72
     * @var string
73
     * @access public
74
     */
75
    public static $extKey = 'dlf';
76
77
    /**
78
     * This holds the configuration for all supported metadata encodings
79
     * @see loadFormats()
80
     *
81
     * @var array
82
     * @access protected
83
     */
84
    protected $formats = [
85
        'OAI' => [
86
            'rootElement' => 'OAI-PMH',
87
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
88
        ],
89
        'METS' => [
90
            'rootElement' => 'mets',
91
            'namespaceURI' => 'http://www.loc.gov/METS/',
92
        ],
93
        'XLINK' => [
94
            'rootElement' => 'xlink',
95
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
96
        ]
97
    ];
98
99
    /**
100
     * Are the available metadata formats loaded?
101
     * @see $formats
102
     *
103
     * @var bool
104
     * @access protected
105
     */
106
    protected $formatsLoaded = false;
107
108
    /**
109
     * Are there any fulltext files available? This also includes IIIF text annotations
110
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
111
     * annotations as fulltext.
112
     *
113
     * @var bool
114
     * @access protected
115
     */
116
    protected $hasFulltext = false;
117
118
    /**
119
     * Last searched logical and physical page
120
     *
121
     * @var array
122
     * @access protected
123
     */
124
    protected $lastSearchedPhysicalPage = ['logicalPage' => null, 'physicalPage' => null];
125
126
    /**
127
     * This holds the documents location
128
     *
129
     * @var string
130
     * @access protected
131
     */
132
    protected $location = '';
133
134
    /**
135
     * This holds the logical units
136
     *
137
     * @var array
138
     * @access protected
139
     */
140
    protected $logicalUnits = [];
141
142
    /**
143
     * This holds the documents' parsed metadata array with their corresponding
144
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
145
     *
146
     * @var array
147
     * @access protected
148
     */
149
    protected $metadataArray = [];
150
151
    /**
152
     * Is the metadata array loaded?
153
     * @see $metadataArray
154
     *
155
     * @var bool
156
     * @access protected
157
     */
158
    protected $metadataArrayLoaded = false;
159
160
    /**
161
     * The holds the total number of pages
162
     *
163
     * @var int
164
     * @access protected
165
     */
166
    protected $numPages = 0;
167
168
    /**
169
     * This holds the UID of the parent document or zero if not multi-volumed
170
     *
171
     * @var int
172
     * @access protected
173
     */
174
    protected $parentId = 0;
175
176
    /**
177
     * This holds the physical structure
178
     *
179
     * @var array
180
     * @access protected
181
     */
182
    protected $physicalStructure = [];
183
184
    /**
185
     * This holds the physical structure metadata
186
     *
187
     * @var array
188
     * @access protected
189
     */
190
    protected $physicalStructureInfo = [];
191
192
    /**
193
     * Is the physical structure loaded?
194
     * @see $physicalStructure
195
     *
196
     * @var bool
197
     * @access protected
198
     */
199
    protected $physicalStructureLoaded = false;
200
201
    /**
202
     * This holds the PID of the document or zero if not in database
203
     *
204
     * @var int
205
     * @access protected
206
     */
207
    protected $pid = 0;
208
209
    /**
210
     * This holds the documents' raw text pages with their corresponding
211
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
212
     *
213
     * @var array
214
     * @access protected
215
     */
216
    protected $rawTextArray = [];
217
218
    /**
219
     * Is the document instantiated successfully?
220
     *
221
     * @var bool
222
     * @access protected
223
     */
224
    protected $ready = false;
225
226
    /**
227
     * The METS file's / IIIF manifest's record identifier
228
     *
229
     * @var string
230
     * @access protected
231
     */
232
    protected $recordId;
233
234
    /**
235
     * This holds the singleton object of the document
236
     *
237
     * @var array (\Kitodo\Dlf\Common\Document)
238
     * @static
239
     * @access protected
240
     */
241
    protected static $registry = [];
242
243
    /**
244
     * This holds the UID of the root document or zero if not multi-volumed
245
     *
246
     * @var int
247
     * @access protected
248
     */
249
    protected $rootId = 0;
250
251
    /**
252
     * Is the root id loaded?
253
     * @see $rootId
254
     *
255
     * @var bool
256
     * @access protected
257
     */
258
    protected $rootIdLoaded = false;
259
260
    /**
261
     * This holds the smLinks between logical and physical structMap
262
     *
263
     * @var array
264
     * @access protected
265
     */
266
    protected $smLinks = ['l2p' => [], 'p2l' => []];
267
268
    /**
269
     * Are the smLinks loaded?
270
     * @see $smLinks
271
     *
272
     * @var bool
273
     * @access protected
274
     */
275
    protected $smLinksLoaded = false;
276
277
    /**
278
     * This holds the logical structure
279
     *
280
     * @var array
281
     * @access protected
282
     */
283
    protected $tableOfContents = [];
284
285
    /**
286
     * Is the table of contents loaded?
287
     * @see $tableOfContents
288
     *
289
     * @var bool
290
     * @access protected
291
     */
292
    protected $tableOfContentsLoaded = false;
293
294
    /**
295
     * This holds the document's thumbnail location
296
     *
297
     * @var string
298
     * @access protected
299
     */
300
    protected $thumbnail = '';
301
302
    /**
303
     * Is the document's thumbnail location loaded?
304
     * @see $thumbnail
305
     *
306
     * @var bool
307
     * @access protected
308
     */
309
    protected $thumbnailLoaded = false;
310
311
    /**
312
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
313
     *
314
     * @var string
315
     * @access protected
316
     */
317
    protected $toplevelId = '';
318
319
    /**
320
     * This holds the UID or the URL of the document
321
     *
322
     * @var mixed
323
     * @access protected
324
     */
325
    protected $uid = 0;
326
327
    /**
328
     * This holds the whole XML file as \SimpleXMLElement object
329
     *
330
     * @var \SimpleXMLElement
331
     * @access protected
332
     */
333
    protected $xml;
334
335
    /**
336
     * This clears the static registry to prevent memory exhaustion
337
     *
338
     * @access public
339
     *
340
     * @static
341
     *
342
     * @return void
343
     */
344
    public static function clearRegistry()
345
    {
346
        // Reset registry array.
347
        self::$registry = [];
348
    }
349
350
    /**
351
     * This ensures that the recordId, if existent, is retrieved from the document
352
     *
353
     * @access protected
354
     *
355
     * @abstract
356
     *
357
     * @param int $pid: ID of the configuration page with the recordId config
358
     *
359
     */
360
    protected abstract function establishRecordId($pid);
361
362
    /**
363
     * Source document PHP object which is represented by a Document instance
364
     *
365
     * @access protected
366
     *
367
     * @abstract
368
     *
369
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
370
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
371
     */
372
    protected abstract function getDocument();
373
374
    /**
375
     * This gets the location of a downloadable file for a physical page or track
376
     *
377
     * @access public
378
     *
379
     * @abstract
380
     *
381
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
382
     *
383
     * @return string    The file's location as URL
384
     */
385
    public abstract function getDownloadLocation($id);
386
387
    /**
388
     * This gets the location of a file representing a physical page or track
389
     *
390
     * @access public
391
     *
392
     * @abstract
393
     *
394
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
395
     *
396
     * @return string The file's location as URL
397
     */
398
    public abstract function getFileLocation($id);
399
400
    /**
401
     * This gets the MIME type of a file representing a physical page or track
402
     *
403
     * @access public
404
     *
405
     * @abstract
406
     *
407
     * @param string $id: The @ID attribute of the file node
408
     *
409
     * @return string The file's MIME type
410
     */
411
    public abstract function getFileMimeType($id);
412
413
    /**
414
     * This is a singleton class, thus an instance must be created by this method
415
     *
416
     * @access public
417
     *
418
     * @static
419
     *
420
     * @param mixed $uid: The unique identifier of the document to parse, the URL of XML file or the IRI of the IIIF resource
421
     * @param int $pid: If > 0, then only document with this PID gets loaded
422
     * @param bool $forceReload: Force reloading the document instead of returning the cached instance
423
     *
424
     * @return \Kitodo\Dlf\Common\Document Instance of this class, either MetsDocument or IiifManifest
425
     */
426
    public static function &getInstance($uid, $pid = 0, $forceReload = false)
427
    {
428
        // Sanitize input.
429
        $pid = max(intval($pid), 0);
430
        if (!$forceReload) {
431
            $regObj = Helper::digest($uid);
432
            if (
433
                is_object(self::$registry[$regObj])
434
                && self::$registry[$regObj] instanceof self
435
            ) {
436
                // Check if instance has given PID.
437
                if (
438
                    !$pid
439
                    || !self::$registry[$regObj]->pid
440
                    || $pid == self::$registry[$regObj]->pid
441
                ) {
442
                    // Return singleton instance if available.
443
                    return self::$registry[$regObj];
444
                }
445
            } else {
446
                // Check the user's session...
447
                $sessionData = Helper::loadFromSession(get_called_class());
448
                if (
449
                    is_object($sessionData[$regObj])
450
                    && $sessionData[$regObj] instanceof self
451
                ) {
452
                    // Check if instance has given PID.
453
                    if (
454
                        !$pid
455
                        || !$sessionData[$regObj]->pid
456
                        || $pid == $sessionData[$regObj]->pid
457
                    ) {
458
                        // ...and restore registry.
459
                        self::$registry[$regObj] = $sessionData[$regObj];
460
                        return self::$registry[$regObj];
461
                    }
462
                }
463
            }
464
        }
465
        // Create new instance depending on format (METS or IIIF) ...
466
        $instance = null;
467
        $documentFormat = null;
468
        $xml = null;
469
        $iiif = null;
470
        // Try to get document format from database
471
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
472
            $documentFormat = self::getDocumentFormatFromDatabase($uid, $pid);
473
        } else {
474
            // Get document format from content of remote document
475
            $documentFormat = self::getDocumentFormatFromRemote($uid);
476
        }
477
        // Sanitize input.
478
        $pid = max(intval($pid), 0);
479
        if ($documentFormat == 'METS') {
480
            $instance = new MetsDocument($uid, $pid, $xml);
481
        } elseif ($documentFormat == 'IIIF') {
482
            $instance = new IiifManifest($uid, $pid, $iiif);
483
        }
484
        // Save instance to registry.
485
        if (
486
            $instance instanceof self
487
            && $instance->ready) {
488
            self::$registry[Helper::digest($instance->uid)] = $instance;
489
            if ($instance->uid != $instance->location) {
490
                self::$registry[Helper::digest($instance->location)] = $instance;
491
            }
492
            // Load extension configuration
493
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
494
            // Save registry to session if caching is enabled.
495
            if (!empty($extConf['caching'])) {
496
                Helper::saveToSession(self::$registry, get_class($instance));
497
            }
498
            $instance->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($instance));
499
        }
500
        // Return new instance.
501
        return $instance;
502
    }
503
504
    /**
505
     * This gets the document format for documents stored in database.
506
     *
507
     * @access private
508
     *
509
     * @static
510
     *
511
     * @param mixed $uid: The unique identifier of the document to parse, the URL of XML file or the IRI of the IIIF resource
512
     * @param int $pid: If > 0, then only document with this PID gets loaded
513
     *
514
     * @return string The document format
515
     */
516
    private static function getDocumentFormatFromDatabase($uid, $pid) {
517
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
518
                ->getQueryBuilderForTable('tx_dlf_documents');
519
520
        $queryBuilder
521
            ->select(
522
                'tx_dlf_documents.location AS location',
523
                'tx_dlf_documents.document_format AS document_format'
524
            )
525
            ->from('tx_dlf_documents');
526
527
        // Get UID of document with given record identifier.
528
        if ($pid) {
529
            $queryBuilder
530
                ->where(
531
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
532
                    $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid)),
533
                    Helper::whereExpression('tx_dlf_documents')
534
                );
535
        } else {
536
            $queryBuilder
537
                ->where(
538
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
539
                    Helper::whereExpression('tx_dlf_documents')
540
                );
541
        }
542
543
        $result = $queryBuilder
544
            ->setMaxResults(1)
545
            ->execute();
546
547
        if ($resArray = $result->fetch()) {
548
            return $resArray['document_format'];
549
        }
550
551
        return '';
552
    }
553
554
    /**
555
     * This gets the document format for remote documents.
556
     *
557
     * @access private
558
     *
559
     * @static
560
     *
561
     * @param mixed $uid: The unique identifier of the document to parse, the URL of XML file or the IRI of the IIIF resource
562
     *
563
     * @return string The document format
564
     */
565
    private static function getDocumentFormatFromRemote($uid) {
566
        $documentFormat = '';
567
568
        // Cast to string for safety reasons.
569
        $location = (string) $uid;
570
        // Try to load a file from the url
571
        if (GeneralUtility::isValidUrl($location)) {
572
            // Load extension configuration
573
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
574
            // Set user-agent to identify self when fetching XML data.
575
            if (!empty($extConf['useragent'])) {
576
                @ini_set('user_agent', $extConf['useragent']);
577
            }
578
            $content = GeneralUtility::getUrl($location);
579
            if ($content !== false) {
580
                $xml = Helper::getXmlFileAsString($content);
581
                if ($xml !== false) {
582
                    /* @var $xml \SimpleXMLElement */
583
                    $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
584
                    $xpathResult = $xml->xpath('//mets:mets');
585
                    $documentFormat = !empty($xpathResult) ? 'METS' : null;
586
                } else {
587
                    // Try to load file as IIIF resource instead.
588
                    $contentAsJsonArray = json_decode($content, true);
589
                    if ($contentAsJsonArray !== null) {
590
                        // Load plugin configuration.
591
                        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
592
                        IiifHelper::setUrlReader(IiifUrlReader::getInstance());
593
                        IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
594
                        IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
595
                        $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
596
                        if ($iiif instanceof IiifResourceInterface) {
597
                            $documentFormat = 'IIIF';
598
                        }
599
                    }
600
                }
601
            }
602
        }
603
604
        return $documentFormat;
605
    }
606
607
    /**
608
     * This gets details about a logical structure element
609
     *
610
     * @access public
611
     *
612
     * @abstract
613
     *
614
     * @param string $id: The @ID attribute of the logical structure node (METS) or
615
     * the @id property of the Manifest / Range (IIIF)
616
     * @param bool $recursive: Whether to include the child elements / resources
617
     *
618
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
619
     */
620
    public abstract function getLogicalStructure($id, $recursive = false);
621
622
    /**
623
     * This extracts all the metadata for a logical structure node
624
     *
625
     * @access public
626
     *
627
     * @abstract
628
     *
629
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
630
     * of the Manifest / Range (IIIF)
631
     * @param int $cPid: The PID for the metadata definitions
632
     *                       (defaults to $this->cPid or $this->pid)
633
     *
634
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
635
     */
636
    public abstract function getMetadata($id, $cPid = 0);
637
638
    /**
639
     * This returns the first corresponding physical page number of a given logical page label
640
     *
641
     * @access public
642
     *
643
     * @param string $logicalPage: The label (or a part of the label) of the logical page
644
     *
645
     * @return int The physical page number
646
     */
647
    public function getPhysicalPage($logicalPage)
648
    {
649
        if (
650
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
651
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
652
        ) {
653
            return $this->lastSearchedPhysicalPage['physicalPage'];
654
        } else {
655
            $physicalPage = 0;
656
            foreach ($this->physicalStructureInfo as $page) {
657
                if (strpos($page['orderlabel'], $logicalPage) !== false) {
658
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
659
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
660
                    return $physicalPage;
661
                }
662
                $physicalPage++;
663
            }
664
        }
665
        return 1;
666
    }
667
668
    /**
669
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas. Text might be
670
     * given as ALTO for METS or as annotations or ALTO for IIIF resources.
671
     *
672
     * @access public
673
     *
674
     * @abstract
675
     *
676
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
677
     * of the Manifest / Range (IIIF)
678
     *
679
     * @return string The OCR full text
680
     */
681
    public abstract function getFullText($id);
682
683
    /**
684
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas from an
685
     * XML full text representation (currently only ALTO). For IIIF manifests, ALTO documents have
686
     * to be given in the Canvas' / Manifest's "seeAlso" property.
687
     *
688
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
689
     * of the Manifest / Range (IIIF)
690
     *
691
     * @return string The OCR full text
692
     */
693
    protected function getFullTextFromXml($id)
694
    {
695
        $fullText = '';
696
        // Load available text formats, ...
697
        $this->loadFormats();
698
        // ... physical structure ...
699
        $this->_getPhysicalStructure();
700
        // ... and extension configuration.
701
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
702
        $fileGrpsFulltext = GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']);
703
        if (!empty($this->physicalStructureInfo[$id])) {
704
            while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
705
                if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
706
                    // Get full text file.
707
                    $fileContent = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
708
                    if ($fileContent !== false) {
709
                        $textFormat = $this->getTextFormat($fileContent);
710
                    } else {
711
                        $this->logger->warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');
1 ignored issue
show
Bug introduced by
The method warning() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

711
                        $this->logger->/** @scrutinizer ignore-call */ 
712
                                       warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
712
                        return $fullText;
713
                    }
714
                    break;
715
                }
716
            }
717
        } else {
718
            $this->logger->warning('Invalid structure node @ID "' . $id . '"');
719
            return $fullText;
720
        }
721
        // Is this text format supported?
722
        // This part actually differs from previous version of indexed OCR
723
        if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $textFormat does not seem to be defined for all execution paths leading up to this point.
Loading history...
724
            $textMiniOcr = '';
725
            if (!empty($this->formats[$textFormat]['class'])) {
726
                $class = $this->formats[$textFormat]['class'];
727
                // Get the raw text from class.
728
                if (
729
                    class_exists($class)
730
                    && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
731
                ) {
732
                    // Load XML from file.
733
                    $ocrTextXml = Helper::getXmlFileAsString($fileContent);
734
                    $textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
735
                    $this->rawTextArray[$id] = $textMiniOcr;
736
                } else {
737
                    $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
738
                }
739
            }
740
            $fullText = $textMiniOcr;
741
        } else {
742
            $this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
743
        }
744
        return $fullText;
745
    }
746
747
    /**
748
     * Get format of the OCR full text
749
     *
750
     * @access private
751
     *
752
     * @param string $fileContent: content of the XML file
753
     *
754
     * @return string The format of the OCR full text
755
     */
756
    private function getTextFormat($fileContent)
757
    {
758
        // Get the root element's name as text format.
759
        return strtoupper(Helper::getXmlFileAsString($fileContent)->getName());
760
    }
761
762
    /**
763
     * This determines a title for the given document
764
     *
765
     * @access public
766
     *
767
     * @static
768
     *
769
     * @param int $uid: The UID of the document
770
     * @param bool $recursive: Search superior documents for a title, too?
771
     *
772
     * @return string The title of the document itself or a parent document
773
     */
774
    public static function getTitle($uid, $recursive = false)
775
    {
776
        $logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(__CLASS__);
777
778
        $title = '';
779
        // Sanitize input.
780
        $uid = max(intval($uid), 0);
781
        if ($uid) {
782
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
783
                ->getQueryBuilderForTable('tx_dlf_documents');
784
785
            $result = $queryBuilder
786
                ->select(
787
                    'tx_dlf_documents.title',
788
                    'tx_dlf_documents.partof'
789
                )
790
                ->from('tx_dlf_documents')
791
                ->where(
792
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
793
                    Helper::whereExpression('tx_dlf_documents')
794
                )
795
                ->setMaxResults(1)
796
                ->execute();
797
798
            if ($resArray = $result->fetch()) {
799
                // Get title information.
800
                $title = $resArray['title'];
801
                $partof = $resArray['partof'];
802
                // Search parent documents recursively for a title?
803
                if (
804
                    $recursive
805
                    && empty($title)
806
                    && intval($partof)
807
                    && $partof != $uid
808
                ) {
809
                    $title = self::getTitle($partof, true);
810
                }
811
            } else {
812
                $logger->warning('No document with UID ' . $uid . ' found or document not accessible');
813
            }
814
        } else {
815
            $logger->error('Invalid UID ' . $uid . ' for document');
816
        }
817
        return $title;
818
    }
819
820
    /**
821
     * This extracts all the metadata for the toplevel logical structure node / resource
822
     *
823
     * @access public
824
     *
825
     * @param int $cPid: The PID for the metadata definitions
826
     *
827
     * @return array The logical structure node's / resource's parsed metadata array
828
     */
829
    public function getTitledata($cPid = 0)
830
    {
831
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
832
        // Add information from METS structural map to titledata array.
833
        if ($this instanceof MetsDocument) {
834
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
835
        }
836
        // Set record identifier for METS file / IIIF manifest if not present.
837
        if (
838
            is_array($titledata)
839
            && array_key_exists('record_id', $titledata)
840
        ) {
841
            if (
842
                !empty($this->recordId)
843
                && !in_array($this->recordId, $titledata['record_id'])
844
            ) {
845
                array_unshift($titledata['record_id'], $this->recordId);
846
            }
847
        }
848
        return $titledata;
849
    }
850
851
    /**
852
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
853
     *
854
     * @access protected
855
     *
856
     * @param array $structure: logical structure array
857
     * @param int $depth: current tree depth
858
     * @param string $logId: ID of the logical structure whose depth is requested
859
     *
860
     * @return int|bool: false if structure with $logId is not a child of this substructure,
861
     * or the actual depth.
862
     */
863
    protected function getTreeDepth($structure, $depth, $logId)
864
    {
865
        foreach ($structure as $element) {
866
            if ($element['id'] == $logId) {
867
                return $depth;
868
            } elseif (array_key_exists('children', $element)) {
869
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
870
                if ($foundInChildren !== false) {
871
                    return $foundInChildren;
872
                }
873
            }
874
        }
875
        return false;
876
    }
877
878
    /**
879
     * Get the tree depth of a logical structure element within the table of content
880
     *
881
     * @access public
882
     *
883
     * @param string $logId: The id of the logical structure element whose depth is requested
884
     * @return int|bool tree depth as integer or false if no element with $logId exists within the TOC.
885
     */
886
    public function getStructureDepth($logId)
887
    {
888
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
889
    }
890
891
    /**
892
     * This sets some basic class properties
893
     *
894
     * @access protected
895
     *
896
     * @abstract
897
     *
898
     * @return void
899
     */
900
    protected abstract function init();
901
902
    /**
903
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
904
     *
905
     * @access protected
906
     *
907
     * @abstract
908
     *
909
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
910
     *
911
     * @return bool true if $preloadedDocument can actually be reused, false if it has to be loaded again
912
     */
913
    protected abstract function setPreloadedDocument($preloadedDocument);
914
915
    /**
916
     * METS/IIIF specific part of loading a location
917
     *
918
     * @access protected
919
     *
920
     * @abstract
921
     *
922
     * @param string $location: The URL of the file to load
923
     *
924
     * @return bool true on success or false on failure
925
     */
926
    protected abstract function loadLocation($location);
927
928
    /**
929
     * Load XML file / IIIF resource from URL
930
     *
931
     * @access protected
932
     *
933
     * @param string $location: The URL of the file to load
934
     *
935
     * @return bool true on success or false on failure
936
     */
937
    protected function load($location)
938
    {
939
        // Load XML / JSON-LD file.
940
        if (GeneralUtility::isValidUrl($location)) {
941
            // Load extension configuration
942
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
943
            // Set user-agent to identify self when fetching XML / JSON-LD data.
944
            if (!empty($extConf['useragent'])) {
945
                @ini_set('user_agent', $extConf['useragent']);
946
            }
947
            // the actual loading is format specific
948
            return $this->loadLocation($location);
949
        } else {
950
            $this->logger->error('Invalid file location "' . $location . '" for document loading');
1 ignored issue
show
Bug introduced by
The method error() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

950
            $this->logger->/** @scrutinizer ignore-call */ 
951
                           error('Invalid file location "' . $location . '" for document loading');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
951
        }
952
        return false;
953
    }
954
955
    /**
956
     * Analyze the document if it contains any fulltext that needs to be indexed.
957
     *
958
     * @access protected
959
     *
960
     * @abstract
961
     */
962
    protected abstract function ensureHasFulltextIsSet();
963
964
    /**
965
     * Register all available data formats
966
     *
967
     * @access protected
968
     *
969
     * @return void
970
     */
971
    protected function loadFormats()
972
    {
973
        if (!$this->formatsLoaded) {
974
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
975
                ->getQueryBuilderForTable('tx_dlf_formats');
976
977
            // Get available data formats from database.
978
            $result = $queryBuilder
979
                ->select(
980
                    'tx_dlf_formats.type AS type',
981
                    'tx_dlf_formats.root AS root',
982
                    'tx_dlf_formats.namespace AS namespace',
983
                    'tx_dlf_formats.class AS class'
984
                )
985
                ->from('tx_dlf_formats')
986
                ->where(
987
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0)
988
                )
989
                ->execute();
990
991
            while ($resArray = $result->fetch()) {
992
                // Update format registry.
993
                $this->formats[$resArray['type']] = [
994
                    'rootElement' => $resArray['root'],
995
                    'namespaceURI' => $resArray['namespace'],
996
                    'class' => $resArray['class']
997
                ];
998
            }
999
            $this->formatsLoaded = true;
1000
        }
1001
    }
1002
1003
    /**
1004
     * Register all available namespaces for a \SimpleXMLElement object
1005
     *
1006
     * @access public
1007
     *
1008
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
1009
     *
1010
     * @return void
1011
     */
1012
    public function registerNamespaces(&$obj)
1013
    {
1014
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
1015
        $this->loadFormats();
1016
        // Do we have a \SimpleXMLElement or \DOMXPath object?
1017
        if ($obj instanceof \SimpleXMLElement) {
1018
            $method = 'registerXPathNamespace';
1019
        } elseif ($obj instanceof \DOMXPath) {
1020
            $method = 'registerNamespace';
1021
        } else {
1022
            $this->logger->error('Given object is neither a SimpleXMLElement nor a DOMXPath instance');
1023
            return;
1024
        }
1025
        // Register metadata format's namespaces.
1026
        foreach ($this->formats as $enc => $conf) {
1027
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
1028
        }
1029
    }
1030
1031
    /**
1032
     * This saves the document to the database and index
1033
     *
1034
     * @access public
1035
     *
1036
     * @param int $pid: The PID of the saved record
1037
     * @param int $core: The UID of the Solr core for indexing
1038
     * @param int|string $owner: UID or index_name of owner to set while indexing
1039
     *
1040
     * @return bool true on success or false on failure
1041
     */
1042
    public function save($pid = 0, $core = 0, $owner = null)
1043
    {
1044
        if (\TYPO3_MODE !== 'BE') {
1045
            $this->logger->error('Saving a document is only allowed in the backend');
1046
            return false;
1047
        }
1048
        // Make sure $pid is a non-negative integer.
1049
        $pid = max(intval($pid), 0);
1050
        // Make sure $core is a non-negative integer.
1051
        $core = max(intval($core), 0);
1052
        // If $pid is not given, try to get it elsewhere.
1053
        if (
1054
            !$pid
1055
            && $this->pid
1056
        ) {
1057
            // Retain current PID.
1058
            $pid = $this->pid;
1059
        } elseif (!$pid) {
1060
            $this->logger->error('Invalid PID ' . $pid . ' for document saving');
1061
            return false;
1062
        }
1063
        // Set PID for metadata definitions.
1064
        $this->cPid = $pid;
1065
        // Set UID placeholder if not updating existing record.
1066
        if ($pid != $this->pid) {
1067
            $this->uid = uniqid('NEW');
1068
        }
1069
        // Get metadata array.
1070
        $metadata = $this->getTitledata($pid);
1071
        // Check for record identifier.
1072
        if (empty($metadata['record_id'][0])) {
1073
            $this->logger->error('No record identifier found to avoid duplication');
1074
            return false;
1075
        }
1076
        // Load plugin configuration.
1077
        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
1078
1079
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1080
            ->getQueryBuilderForTable('tx_dlf_structures');
1081
1082
        // Get UID for structure type.
1083
        $result = $queryBuilder
1084
            ->select('tx_dlf_structures.uid AS uid')
1085
            ->from('tx_dlf_structures')
1086
            ->where(
1087
                $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($pid)),
1088
                $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
1089
                Helper::whereExpression('tx_dlf_structures')
1090
            )
1091
            ->setMaxResults(1)
1092
            ->execute();
1093
1094
        if ($resArray = $result->fetch()) {
1095
            $structure = $resArray['uid'];
1096
        } else {
1097
            $this->logger->error('Could not identify document/structure type "' . $queryBuilder->expr()->literal($metadata['type'][0]) . '"');
1098
            return false;
1099
        }
1100
        $metadata['type'][0] = $structure;
1101
1102
        // Remove appended "valueURI" from authors' names for storing in database.
1103
        foreach ($metadata['author'] as $i => $author) {
1104
            $splitName = explode(chr(31), $author);
1105
            $metadata['author'][$i] = $splitName[0];
1106
        }
1107
1108
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1109
            ->getQueryBuilderForTable('tx_dlf_collections');
1110
        // Get hidden records, too.
1111
        $queryBuilder
1112
            ->getRestrictions()
1113
            ->removeByType(HiddenRestriction::class);
1114
1115
        // Get UIDs for collections.
1116
        $result = $queryBuilder
1117
            ->select(
1118
                'tx_dlf_collections.index_name AS index_name',
1119
                'tx_dlf_collections.uid AS uid'
1120
            )
1121
            ->from('tx_dlf_collections')
1122
            ->where(
1123
                $queryBuilder->expr()->eq('tx_dlf_collections.pid', intval($pid)),
1124
                $queryBuilder->expr()->in('tx_dlf_collections.sys_language_uid', [-1, 0])
1125
            )
1126
            ->execute();
1127
1128
        $collUid = [];
1129
        while ($resArray = $result->fetch()) {
1130
            $collUid[$resArray['index_name']] = $resArray['uid'];
1131
        }
1132
        $collections = [];
1133
        foreach ($metadata['collection'] as $collection) {
1134
            if (!empty($collUid[$collection])) {
1135
                // Add existing collection's UID.
1136
                $collections[] = $collUid[$collection];
1137
            } else {
1138
                // Insert new collection.
1139
                $collNewUid = uniqid('NEW');
1140
                $collData['tx_dlf_collections'][$collNewUid] = [
1141
                    'pid' => $pid,
1142
                    'label' => $collection,
1143
                    'index_name' => $collection,
1144
                    'oai_name' => (!empty($conf['publishNewCollections']) ? Helper::getCleanString($collection) : ''),
1145
                    'description' => '',
1146
                    'documents' => 0,
1147
                    'owner' => 0,
1148
                    'status' => 0,
1149
                ];
1150
                $substUid = Helper::processDBasAdmin($collData);
1151
                // Prevent double insertion.
1152
                unset($collData);
1153
                // Add new collection's UID.
1154
                $collections[] = $substUid[$collNewUid];
1155
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1156
                    Helper::addMessage(
1157
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newCollection'), $collection, $substUid[$collNewUid])),
1158
                        Helper::getMessage('flash.attention', true),
1159
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1160
                        true
1161
                    );
1162
                }
1163
            }
1164
        }
1165
        $metadata['collection'] = $collections;
1166
1167
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1168
            ->getQueryBuilderForTable('tx_dlf_libraries');
1169
1170
        // Get UID for owner.
1171
        if (empty($owner)) {
1172
            $owner = empty($metadata['owner'][0]) ? $metadata['owner'][0] : 'default';
1173
        }
1174
        if (!MathUtility::canBeInterpretedAsInteger($owner)) {
1175
            $result = $queryBuilder
1176
                ->select('tx_dlf_libraries.uid AS uid')
1177
                ->from('tx_dlf_libraries')
1178
                ->where(
1179
                    $queryBuilder->expr()->eq('tx_dlf_libraries.pid', intval($pid)),
1180
                    $queryBuilder->expr()->eq('tx_dlf_libraries.index_name', $queryBuilder->expr()->literal($owner)),
1181
                    Helper::whereExpression('tx_dlf_libraries')
1182
                )
1183
                ->setMaxResults(1)
1184
                ->execute();
1185
1186
            if ($resArray = $result->fetch()) {
1187
                $ownerUid = $resArray['uid'];
1188
            } else {
1189
                // Insert new library.
1190
                $libNewUid = uniqid('NEW');
1191
                $libData['tx_dlf_libraries'][$libNewUid] = [
1192
                    'pid' => $pid,
1193
                    'label' => $owner,
1194
                    'index_name' => $owner,
1195
                    'website' => '',
1196
                    'contact' => '',
1197
                    'image' => '',
1198
                    'oai_label' => '',
1199
                    'oai_base' => '',
1200
                    'opac_label' => '',
1201
                    'opac_base' => '',
1202
                    'union_label' => '',
1203
                    'union_base' => '',
1204
                ];
1205
                $substUid = Helper::processDBasAdmin($libData);
1206
                // Add new library's UID.
1207
                $ownerUid = $substUid[$libNewUid];
1208
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1209
                    Helper::addMessage(
1210
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newLibrary'), $owner, $ownerUid)),
1211
                        Helper::getMessage('flash.attention', true),
1212
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1213
                        true
1214
                    );
1215
                }
1216
            }
1217
            $owner = $ownerUid;
1218
        }
1219
        $metadata['owner'][0] = $owner;
1220
        // Get UID of parent document.
1221
        $partof = $this->getParentDocumentUidForSaving($pid, $core, $owner);
1222
        // Use the date of publication or title as alternative sorting metric for parts of multi-part works.
1223
        if (!empty($partof)) {
1224
            if (
1225
                empty($metadata['volume'][0])
1226
                && !empty($metadata['year'][0])
1227
            ) {
1228
                $metadata['volume'] = $metadata['year'];
1229
            }
1230
            if (empty($metadata['volume_sorting'][0])) {
1231
                // If METS @ORDER is given it is preferred over year_sorting and year.
1232
                if (!empty($metadata['mets_order'][0])) {
1233
                    $metadata['volume_sorting'][0] = $metadata['mets_order'][0];
1234
                } elseif (!empty($metadata['year_sorting'][0])) {
1235
                    $metadata['volume_sorting'][0] = $metadata['year_sorting'][0];
1236
                } elseif (!empty($metadata['year'][0])) {
1237
                    $metadata['volume_sorting'][0] = $metadata['year'][0];
1238
                }
1239
            }
1240
            // If volume_sorting is still empty, try to use title_sorting or METS @ORDERLABEL finally (workaround for newspapers)
1241
            if (empty($metadata['volume_sorting'][0])) {
1242
                if (!empty($metadata['title_sorting'][0])) {
1243
                    $metadata['volume_sorting'][0] = $metadata['title_sorting'][0];
1244
                } elseif (!empty($metadata['mets_orderlabel'][0])) {
1245
                    $metadata['volume_sorting'][0] = $metadata['mets_orderlabel'][0];
1246
                }
1247
            }
1248
        }
1249
1250
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1251
            ->getQueryBuilderForTable('tx_dlf_metadata');
1252
1253
        // Get metadata for lists and sorting.
1254
        $result = $queryBuilder
1255
            ->select(
1256
                'tx_dlf_metadata.index_name AS index_name',
1257
                'tx_dlf_metadata.is_listed AS is_listed',
1258
                'tx_dlf_metadata.is_sortable AS is_sortable'
1259
            )
1260
            ->from('tx_dlf_metadata')
1261
            ->where(
1262
                $queryBuilder->expr()->orX(
1263
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_listed', 1),
1264
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_sortable', 1)
1265
                ),
1266
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($pid)),
1267
                Helper::whereExpression('tx_dlf_metadata')
1268
            )
1269
            ->execute();
1270
1271
        $listed = [];
1272
        $sortable = [];
1273
1274
        while ($resArray = $result->fetch()) {
1275
            if (!empty($metadata[$resArray['index_name']])) {
1276
                if ($resArray['is_listed']) {
1277
                    $listed[$resArray['index_name']] = $metadata[$resArray['index_name']];
1278
                }
1279
                if ($resArray['is_sortable']) {
1280
                    $sortable[$resArray['index_name']] = $metadata[$resArray['index_name']][0];
1281
                }
1282
            }
1283
        }
1284
        // Fill data array.
1285
        $data['tx_dlf_documents'][$this->uid] = [
1286
            'pid' => $pid,
1287
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['starttime'] => 0,
1288
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['endtime'] => 0,
1289
            'prod_id' => $metadata['prod_id'][0],
1290
            'location' => $this->location,
1291
            'record_id' => $metadata['record_id'][0],
1292
            'opac_id' => $metadata['opac_id'][0],
1293
            'union_id' => $metadata['union_id'][0],
1294
            'urn' => $metadata['urn'][0],
1295
            'purl' => $metadata['purl'][0],
1296
            'title' => $metadata['title'][0],
1297
            'title_sorting' => $metadata['title_sorting'][0],
1298
            'author' => implode('; ', $metadata['author']),
1299
            'year' => implode('; ', $metadata['year']),
1300
            'place' => implode('; ', $metadata['place']),
1301
            'thumbnail' => $this->_getThumbnail(true),
1302
            'metadata' => serialize($listed),
1303
            'metadata_sorting' => serialize($sortable),
1304
            'structure' => $metadata['type'][0],
1305
            'partof' => $partof,
1306
            'volume' => $metadata['volume'][0],
1307
            'volume_sorting' => $metadata['volume_sorting'][0],
1308
            'license' => $metadata['license'][0],
1309
            'terms' => $metadata['terms'][0],
1310
            'restrictions' => $metadata['restrictions'][0],
1311
            'out_of_print' => $metadata['out_of_print'][0],
1312
            'rights_info' => $metadata['rights_info'][0],
1313
            'collections' => $metadata['collection'],
1314
            'mets_label' => $metadata['mets_label'][0],
1315
            'mets_orderlabel' => $metadata['mets_orderlabel'][0],
1316
            'mets_order' => $metadata['mets_order'][0],
1317
            'owner' => $metadata['owner'][0],
1318
            'solrcore' => $core,
1319
            'status' => 0,
1320
            'document_format' => $metadata['document_format'][0],
1321
        ];
1322
        // Unhide hidden documents.
1323
        if (!empty($conf['unhideOnIndex'])) {
1324
            $data['tx_dlf_documents'][$this->uid][$GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['disabled']] = 0;
1325
        }
1326
        // Process data.
1327
        $newIds = Helper::processDBasAdmin($data);
1328
        // Replace placeholder with actual UID.
1329
        if (strpos($this->uid, 'NEW') === 0) {
1330
            $this->uid = $newIds[$this->uid];
1331
            $this->pid = $pid;
1332
            $this->parentId = $partof;
1333
        }
1334
        if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1335
            Helper::addMessage(
1336
                htmlspecialchars(sprintf(Helper::getMessage('flash.documentSaved'), $metadata['title'][0], $this->uid)),
1337
                Helper::getMessage('flash.done', true),
1338
                \TYPO3\CMS\Core\Messaging\FlashMessage::OK,
1339
                true
1340
            );
1341
        }
1342
        // Add document to index.
1343
        if ($core) {
1344
            return Indexer::add($this, $core);
1345
        } else {
1346
            $this->logger->notice('Invalid UID "' . $core . '" for Solr core');
1 ignored issue
show
Bug introduced by
The method notice() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1346
            $this->logger->/** @scrutinizer ignore-call */ 
1347
                           notice('Invalid UID "' . $core . '" for Solr core');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1347
            return false;
1348
        }
1349
    }
1350
1351
    /**
1352
     * Get the ID of the parent document if the current document has one. Also save a parent document
1353
     * to the database and the Solr index if their $pid and the current $pid differ.
1354
     * Currently only applies to METS documents.
1355
     *
1356
     * @access protected
1357
     *
1358
     * @abstract
1359
     *
1360
     * @return int The parent document's id.
1361
     */
1362
    protected abstract function getParentDocumentUidForSaving($pid, $core, $owner);
1363
1364
    /**
1365
     * This returns $this->cPid via __get()
1366
     *
1367
     * @access protected
1368
     *
1369
     * @return int The PID of the metadata definitions
1370
     */
1371
    protected function _getCPid()
1372
    {
1373
        return $this->cPid;
1374
    }
1375
1376
    /**
1377
     * This returns $this->hasFulltext via __get()
1378
     *
1379
     * @access protected
1380
     *
1381
     * @return bool Are there any fulltext files available?
1382
     */
1383
    protected function _getHasFulltext()
1384
    {
1385
        $this->ensureHasFulltextIsSet();
1386
        return $this->hasFulltext;
1387
    }
1388
1389
    /**
1390
     * This returns $this->location via __get()
1391
     *
1392
     * @access protected
1393
     *
1394
     * @return string The location of the document
1395
     */
1396
    protected function _getLocation()
1397
    {
1398
        return $this->location;
1399
    }
1400
1401
    /**
1402
     * Format specific part of building the document's metadata array
1403
     *
1404
     * @access protected
1405
     *
1406
     * @abstract
1407
     *
1408
     * @param int $cPid
1409
     */
1410
    protected abstract function prepareMetadataArray($cPid);
1411
1412
    /**
1413
     * This builds an array of the document's metadata
1414
     *
1415
     * @access protected
1416
     *
1417
     * @return array Array of metadata with their corresponding logical structure node ID as key
1418
     */
1419
    protected function _getMetadataArray()
1420
    {
1421
        // Set metadata definitions' PID.
1422
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
1423
        if (!$cPid) {
1424
            $this->logger->error('Invalid PID ' . $cPid . ' for metadata definitions');
1425
            return [];
1426
        }
1427
        if (
1428
            !$this->metadataArrayLoaded
1429
            || $this->metadataArray[0] != $cPid
1430
        ) {
1431
            $this->prepareMetadataArray($cPid);
1432
            $this->metadataArray[0] = $cPid;
1433
            $this->metadataArrayLoaded = true;
1434
        }
1435
        return $this->metadataArray;
1436
    }
1437
1438
    /**
1439
     * This returns $this->numPages via __get()
1440
     *
1441
     * @access protected
1442
     *
1443
     * @return int The total number of pages and/or tracks
1444
     */
1445
    protected function _getNumPages()
1446
    {
1447
        $this->_getPhysicalStructure();
1448
        return $this->numPages;
1449
    }
1450
1451
    /**
1452
     * This returns $this->parentId via __get()
1453
     *
1454
     * @access protected
1455
     *
1456
     * @return int The UID of the parent document or zero if not applicable
1457
     */
1458
    protected function _getParentId()
1459
    {
1460
        return $this->parentId;
1461
    }
1462
1463
    /**
1464
     * This builds an array of the document's physical structure
1465
     *
1466
     * @access protected
1467
     *
1468
     * @abstract
1469
     *
1470
     * @return array Array of physical elements' id, type, label and file representations ordered
1471
     * by @ORDER attribute / IIIF Sequence's Canvases
1472
     */
1473
    protected abstract function _getPhysicalStructure();
1474
1475
    /**
1476
     * This gives an array of the document's physical structure metadata
1477
     *
1478
     * @access protected
1479
     *
1480
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1481
     */
1482
    protected function _getPhysicalStructureInfo()
1483
    {
1484
        // Is there no physical structure array yet?
1485
        if (!$this->physicalStructureLoaded) {
1486
            // Build physical structure array.
1487
            $this->_getPhysicalStructure();
1488
        }
1489
        return $this->physicalStructureInfo;
1490
    }
1491
1492
    /**
1493
     * This returns $this->pid via __get()
1494
     *
1495
     * @access protected
1496
     *
1497
     * @return int The PID of the document or zero if not in database
1498
     */
1499
    protected function _getPid()
1500
    {
1501
        return $this->pid;
1502
    }
1503
1504
    /**
1505
     * This returns $this->ready via __get()
1506
     *
1507
     * @access protected
1508
     *
1509
     * @return bool Is the document instantiated successfully?
1510
     */
1511
    protected function _getReady()
1512
    {
1513
        return $this->ready;
1514
    }
1515
1516
    /**
1517
     * This returns $this->recordId via __get()
1518
     *
1519
     * @access protected
1520
     *
1521
     * @return mixed The METS file's / IIIF manifest's record identifier
1522
     */
1523
    protected function _getRecordId()
1524
    {
1525
        return $this->recordId;
1526
    }
1527
1528
    /**
1529
     * This returns $this->rootId via __get()
1530
     *
1531
     * @access protected
1532
     *
1533
     * @return int The UID of the root document or zero if not applicable
1534
     */
1535
    protected function _getRootId()
1536
    {
1537
        if (!$this->rootIdLoaded) {
1538
            if ($this->parentId) {
1539
                $parent = self::getInstance($this->parentId, $this->pid);
1540
                $this->rootId = $parent->rootId;
1541
            }
1542
            $this->rootIdLoaded = true;
1543
        }
1544
        return $this->rootId;
1545
    }
1546
1547
    /**
1548
     * This returns the smLinks between logical and physical structMap (METS) and models the
1549
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1550
     *
1551
     * @access protected
1552
     *
1553
     * @abstract
1554
     *
1555
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1556
     */
1557
    protected abstract function _getSmLinks();
1558
1559
    /**
1560
     * This builds an array of the document's logical structure
1561
     *
1562
     * @access protected
1563
     *
1564
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1565
     */
1566
    protected function _getTableOfContents()
1567
    {
1568
        // Is there no logical structure array yet?
1569
        if (!$this->tableOfContentsLoaded) {
1570
            // Get all logical structures.
1571
            $this->getLogicalStructure('', true);
1572
            $this->tableOfContentsLoaded = true;
1573
        }
1574
        return $this->tableOfContents;
1575
    }
1576
1577
    /**
1578
     * This returns the document's thumbnail location
1579
     *
1580
     * @access protected
1581
     *
1582
     * @abstract
1583
     *
1584
     * @param bool $forceReload: Force reloading the thumbnail instead of returning the cached value
1585
     *
1586
     * @return string The document's thumbnail location
1587
     */
1588
    protected abstract function _getThumbnail($forceReload = false);
1589
1590
    /**
1591
     * This returns the ID of the toplevel logical structure node
1592
     *
1593
     * @access protected
1594
     *
1595
     * @abstract
1596
     *
1597
     * @return string The logical structure node's ID
1598
     */
1599
    protected abstract function _getToplevelId();
1600
1601
    /**
1602
     * This returns $this->uid via __get()
1603
     *
1604
     * @access protected
1605
     *
1606
     * @return mixed The UID or the URL of the document
1607
     */
1608
    protected function _getUid()
1609
    {
1610
        return $this->uid;
1611
    }
1612
1613
    /**
1614
     * This sets $this->cPid via __set()
1615
     *
1616
     * @access protected
1617
     *
1618
     * @param int $value: The new PID for the metadata definitions
1619
     *
1620
     * @return void
1621
     */
1622
    protected function _setCPid($value)
1623
    {
1624
        $this->cPid = max(intval($value), 0);
1625
    }
1626
1627
    /**
1628
     * This magic method is invoked each time a clone is called on the object variable
1629
     *
1630
     * @access protected
1631
     *
1632
     * @return void
1633
     */
1634
    protected function __clone()
1635
    {
1636
        // This method is defined as protected because singleton objects should not be cloned.
1637
    }
1638
1639
    /**
1640
     * This is a singleton class, thus the constructor should be private/protected
1641
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Document::getInstance())
1642
     *
1643
     * @access protected
1644
     *
1645
     * @param int $uid: The UID of the document to parse or URL to XML file
1646
     * @param int $pid: If > 0, then only document with this PID gets loaded
1647
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1648
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1649
     *
1650
     * @return void
1651
     */
1652
    protected function __construct($uid, $pid, $preloadedDocument)
1653
    {
1654
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1655
            ->getQueryBuilderForTable('tx_dlf_documents');
1656
        $location = '';
1657
        // Prepare to check database for the requested document.
1658
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
1659
            $whereClause = $queryBuilder->expr()->andX(
1660
                $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
1661
                Helper::whereExpression('tx_dlf_documents')
1662
            );
1663
        } else {
1664
            // Try to load METS file / IIIF manifest.
1665
            if ($this->setPreloadedDocument($preloadedDocument) || (GeneralUtility::isValidUrl($uid)
1666
                && $this->load($uid))) {
1667
                // Initialize core METS object.
1668
                $this->init();
1669
                if ($this->getDocument() !== null) {
1670
                    // Cast to string for safety reasons.
1671
                    $location = (string) $uid;
1672
                    $this->establishRecordId($pid);
1673
                } else {
1674
                    // No METS / IIIF part found.
1675
                    return;
1676
                }
1677
            } else {
1678
                // Loading failed.
1679
                return;
1680
            }
1681
            if (
1682
                !empty($location)
1683
                && !empty($this->recordId)
1684
            ) {
1685
                // Try to match record identifier or location (both should be unique).
1686
                $whereClause = $queryBuilder->expr()->andX(
1687
                    $queryBuilder->expr()->orX(
1688
                        $queryBuilder->expr()->eq('tx_dlf_documents.location', $queryBuilder->expr()->literal($location)),
1689
                        $queryBuilder->expr()->eq('tx_dlf_documents.record_id', $queryBuilder->expr()->literal($this->recordId))
1690
                    ),
1691
                    Helper::whereExpression('tx_dlf_documents')
1692
                );
1693
            } else {
1694
                // Can't persistently identify document, don't try to match at all.
1695
                $whereClause = '1=-1';
1696
            }
1697
        }
1698
        // Check for PID if needed.
1699
        if ($pid) {
1700
            $whereClause = $queryBuilder->expr()->andX(
1701
                $whereClause,
1702
                $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid))
1703
            );
1704
        }
1705
        // Get document PID and location from database.
1706
        $result = $queryBuilder
1707
            ->select(
1708
                'tx_dlf_documents.uid AS uid',
1709
                'tx_dlf_documents.pid AS pid',
1710
                'tx_dlf_documents.record_id AS record_id',
1711
                'tx_dlf_documents.partof AS partof',
1712
                'tx_dlf_documents.thumbnail AS thumbnail',
1713
                'tx_dlf_documents.location AS location'
1714
            )
1715
            ->from('tx_dlf_documents')
1716
            ->where($whereClause)
1717
            ->setMaxResults(1)
1718
            ->execute();
1719
1720
        if ($resArray = $result->fetch()) {
1721
            $this->uid = $resArray['uid'];
1722
            $this->pid = $resArray['pid'];
1723
            $this->recordId = $resArray['record_id'];
1724
            $this->parentId = $resArray['partof'];
1725
            $this->thumbnail = $resArray['thumbnail'];
1726
            $this->location = $resArray['location'];
1727
            $this->thumbnailLoaded = true;
1728
            // Load XML file if necessary...
1729
            if (
1730
                $this->getDocument() === null
1731
                && $this->load($this->location)
1732
            ) {
1733
                // ...and set some basic properties.
1734
                $this->init();
1735
            }
1736
            // Do we have a METS / IIIF object now?
1737
            if ($this->getDocument() !== null) {
1738
                // Set new location if necessary.
1739
                if (!empty($location)) {
1740
                    $this->location = $location;
1741
                }
1742
                // Document ready!
1743
                $this->ready = true;
1744
            }
1745
        } elseif ($this->getDocument() !== null) {
1746
            // Set location as UID for documents not in database.
1747
            $this->uid = $location;
1748
            $this->location = $location;
1749
            // Document ready!
1750
            $this->ready = true;
1751
        } else {
1752
            $this->logger->error('No document with UID ' . $uid . ' found or document not accessible');
1753
        }
1754
    }
1755
1756
    /**
1757
     * This magic method is called each time an invisible property is referenced from the object
1758
     *
1759
     * @access public
1760
     *
1761
     * @param string $var: Name of variable to get
1762
     *
1763
     * @return mixed Value of $this->$var
1764
     */
1765
    public function __get($var)
1766
    {
1767
        $method = '_get' . ucfirst($var);
1768
        if (
1769
            !property_exists($this, $var)
1770
            || !method_exists($this, $method)
1771
        ) {
1772
            $this->logger->warning('There is no getter function for property "' . $var . '"');
1773
            return;
1774
        } else {
1775
            return $this->$method();
1776
        }
1777
    }
1778
1779
    /**
1780
     * This magic method is called each time an invisible property is checked for isset() or empty()
1781
     *
1782
     * @access public
1783
     *
1784
     * @param string $var: Name of variable to check
1785
     *
1786
     * @return bool true if variable is set and not empty, false otherwise
1787
     */
1788
    public function __isset($var)
1789
    {
1790
        return !empty($this->__get($var));
1791
    }
1792
1793
    /**
1794
     * This magic method is called each time an invisible property is referenced from the object
1795
     *
1796
     * @access public
1797
     *
1798
     * @param string $var: Name of variable to set
1799
     * @param mixed $value: New value of variable
1800
     *
1801
     * @return void
1802
     */
1803
    public function __set($var, $value)
1804
    {
1805
        $method = '_set' . ucfirst($var);
1806
        if (
1807
            !property_exists($this, $var)
1808
            || !method_exists($this, $method)
1809
        ) {
1810
            $this->logger->warning('There is no setter function for property "' . $var . '"');
1811
        } else {
1812
            $this->$method($value);
1813
        }
1814
    }
1815
}
1816