Passed
Pull Request — master (#81)
by
unknown
02:33
created

Document::_getRecordId()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
c 0
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 0
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
16
use TYPO3\CMS\Core\Database\ConnectionPool;
17
use TYPO3\CMS\Core\Log\LogManager;
18
use TYPO3\CMS\Core\Utility\GeneralUtility;
19
use TYPO3\CMS\Core\Utility\MathUtility;
20
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
21
use Ubl\Iiif\Tools\IiifHelper;
22
23
/**
24
 * Document class for the 'dlf' extension
25
 *
26
 * @author Sebastian Meyer <[email protected]>
27
 * @author Henrik Lochmann <[email protected]>
28
 * @package TYPO3
29
 * @subpackage dlf
30
 * @access public
31
 * @property int $cPid This holds the PID for the configuration
32
 * @property-read bool $hasFulltext Are there any fulltext files available?
33
 * @property-read string $location This holds the documents location
34
 * @property-read array $metadataArray This holds the documents' parsed metadata array
35
 * @property-read int $numPages The holds the total number of pages
36
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
37
 * @property-read array $physicalStructure This holds the physical structure
38
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
39
 * @property-read int $pid This holds the PID of the document or zero if not in database
40
 * @property-read bool $ready Is the document instantiated successfully?
41
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
42
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
43
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
44
 * @property-read array $tableOfContents This holds the logical structure
45
 * @property-read string $thumbnail This holds the document's thumbnail location
46
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
47
 * @property-read mixed $uid This holds the UID or the URL of the document
48
 * @abstract
49
 */
50
abstract class Document
51
{
52
    /**
53
     * This holds the logger
54
     *
55
     * @var LogManager
56
     * @access protected
57
     */
58
    protected $logger;
59
60
    /**
61
     * This holds the PID for the configuration
62
     *
63
     * @var int
64
     * @access protected
65
     */
66
    protected $cPid = 0;
67
68
    /**
69
     * The extension key
70
     *
71
     * @var string
72
     * @access public
73
     */
74
    public static $extKey = 'dlf';
75
76
    /**
77
     * This holds the configuration for all supported metadata encodings
78
     * @see loadFormats()
79
     *
80
     * @var array
81
     * @access protected
82
     */
83
    protected $formats = [
84
        'OAI' => [
85
            'rootElement' => 'OAI-PMH',
86
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
87
        ],
88
        'METS' => [
89
            'rootElement' => 'mets',
90
            'namespaceURI' => 'http://www.loc.gov/METS/',
91
        ],
92
        'XLINK' => [
93
            'rootElement' => 'xlink',
94
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
95
        ]
96
    ];
97
98
    /**
99
     * Are the available metadata formats loaded?
100
     * @see $formats
101
     *
102
     * @var bool
103
     * @access protected
104
     */
105
    protected $formatsLoaded = false;
106
107
    /**
108
     * Are there any fulltext files available? This also includes IIIF text annotations
109
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
110
     * annotations as fulltext.
111
     *
112
     * @var bool
113
     * @access protected
114
     */
115
    protected $hasFulltext = false;
116
117
    /**
118
     * Last searched logical and physical page
119
     *
120
     * @var array
121
     * @access protected
122
     */
123
    protected $lastSearchedPhysicalPage = ['logicalPage' => null, 'physicalPage' => null];
124
125
    /**
126
     * This holds the documents location
127
     *
128
     * @var string
129
     * @access protected
130
     */
131
    protected $location = '';
132
133
    /**
134
     * This holds the logical units
135
     *
136
     * @var array
137
     * @access protected
138
     */
139
    protected $logicalUnits = [];
140
141
    /**
142
     * This holds the documents' parsed metadata array with their corresponding
143
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
144
     *
145
     * @var array
146
     * @access protected
147
     */
148
    protected $metadataArray = [];
149
150
    /**
151
     * Is the metadata array loaded?
152
     * @see $metadataArray
153
     *
154
     * @var bool
155
     * @access protected
156
     */
157
    protected $metadataArrayLoaded = false;
158
159
    /**
160
     * The holds the total number of pages
161
     *
162
     * @var int
163
     * @access protected
164
     */
165
    protected $numPages = 0;
166
167
    /**
168
     * This holds the UID of the parent document or zero if not multi-volumed
169
     *
170
     * @var int
171
     * @access protected
172
     */
173
    protected $parentId = 0;
174
175
    /**
176
     * This holds the physical structure
177
     *
178
     * @var array
179
     * @access protected
180
     */
181
    protected $physicalStructure = [];
182
183
    /**
184
     * This holds the physical structure metadata
185
     *
186
     * @var array
187
     * @access protected
188
     */
189
    protected $physicalStructureInfo = [];
190
191
    /**
192
     * Is the physical structure loaded?
193
     * @see $physicalStructure
194
     *
195
     * @var bool
196
     * @access protected
197
     */
198
    protected $physicalStructureLoaded = false;
199
200
    /**
201
     * This holds the PID of the document or zero if not in database
202
     *
203
     * @var int
204
     * @access protected
205
     */
206
    protected $pid = 0;
207
208
    /**
209
     * This holds the documents' raw text pages with their corresponding
210
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
211
     *
212
     * @var array
213
     * @access protected
214
     */
215
    protected $rawTextArray = [];
216
217
    /**
218
     * Is the document instantiated successfully?
219
     *
220
     * @var bool
221
     * @access protected
222
     */
223
    protected $ready = false;
224
225
    /**
226
     * The METS file's / IIIF manifest's record identifier
227
     *
228
     * @var string
229
     * @access protected
230
     */
231
    protected $recordId;
232
233
    /**
234
     * This holds the singleton object of the document
235
     *
236
     * @var array (\Kitodo\Dlf\Common\Document)
237
     * @static
238
     * @access protected
239
     */
240
    protected static $registry = [];
241
242
    /**
243
     * This holds the UID of the root document or zero if not multi-volumed
244
     *
245
     * @var int
246
     * @access protected
247
     */
248
    protected $rootId = 0;
249
250
    /**
251
     * Is the root id loaded?
252
     * @see $rootId
253
     *
254
     * @var bool
255
     * @access protected
256
     */
257
    protected $rootIdLoaded = false;
258
259
    /**
260
     * This holds the smLinks between logical and physical structMap
261
     *
262
     * @var array
263
     * @access protected
264
     */
265
    protected $smLinks = ['l2p' => [], 'p2l' => []];
266
267
    /**
268
     * Are the smLinks loaded?
269
     * @see $smLinks
270
     *
271
     * @var bool
272
     * @access protected
273
     */
274
    protected $smLinksLoaded = false;
275
276
    /**
277
     * This holds the logical structure
278
     *
279
     * @var array
280
     * @access protected
281
     */
282
    protected $tableOfContents = [];
283
284
    /**
285
     * Is the table of contents loaded?
286
     * @see $tableOfContents
287
     *
288
     * @var bool
289
     * @access protected
290
     */
291
    protected $tableOfContentsLoaded = false;
292
293
    /**
294
     * This holds the document's thumbnail location
295
     *
296
     * @var string
297
     * @access protected
298
     */
299
    protected $thumbnail = '';
300
301
    /**
302
     * Is the document's thumbnail location loaded?
303
     * @see $thumbnail
304
     *
305
     * @var bool
306
     * @access protected
307
     */
308
    protected $thumbnailLoaded = false;
309
310
    /**
311
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
312
     *
313
     * @var string
314
     * @access protected
315
     */
316
    protected $toplevelId = '';
317
318
    /**
319
     * This holds the UID or the URL of the document
320
     *
321
     * @var mixed
322
     * @access protected
323
     */
324
    protected $uid = 0;
325
326
    /**
327
     * This holds the whole XML file as \SimpleXMLElement object
328
     *
329
     * @var \SimpleXMLElement
330
     * @access protected
331
     */
332
    protected $xml;
333
334
    /**
335
     * This clears the static registry to prevent memory exhaustion
336
     *
337
     * @access public
338
     *
339
     * @static
340
     *
341
     * @return void
342
     */
343
    public static function clearRegistry()
344
    {
345
        // Reset registry array.
346
        self::$registry = [];
347
    }
348
349
    /**
350
     * This ensures that the recordId, if existent, is retrieved from the document
351
     *
352
     * @access protected
353
     *
354
     * @abstract
355
     *
356
     * @param int $pid: ID of the configuration page with the recordId config
357
     *
358
     */
359
    protected abstract function establishRecordId($pid);
360
361
    /**
362
     * Source document PHP object which is represented by a Document instance
363
     *
364
     * @access protected
365
     *
366
     * @abstract
367
     *
368
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
369
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
370
     */
371
    protected abstract function getDocument();
372
373
    /**
374
     * This gets the location of a downloadable file for a physical page or track
375
     *
376
     * @access public
377
     *
378
     * @abstract
379
     *
380
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
381
     *
382
     * @return string    The file's location as URL
383
     */
384
    public abstract function getDownloadLocation($id);
385
386
    /**
387
     * This gets the location of a file representing a physical page or track
388
     *
389
     * @access public
390
     *
391
     * @abstract
392
     *
393
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
394
     *
395
     * @return string The file's location as URL
396
     */
397
    public abstract function getFileLocation($id);
398
399
    /**
400
     * This gets the MIME type of a file representing a physical page or track
401
     *
402
     * @access public
403
     *
404
     * @abstract
405
     *
406
     * @param string $id: The @ID attribute of the file node
407
     *
408
     * @return string The file's MIME type
409
     */
410
    public abstract function getFileMimeType($id);
411
412
    /**
413
     * This is a singleton class, thus an instance must be created by this method
414
     *
415
     * @access public
416
     *
417
     * @static
418
     *
419
     * @param mixed $uid: The unique identifier of the document to parse, the URL of XML file or the IRI of the IIIF resource
420
     * @param int $pid: If > 0, then only document with this PID gets loaded
421
     * @param bool $forceReload: Force reloading the document instead of returning the cached instance
422
     *
423
     * @return \Kitodo\Dlf\Common\Document Instance of this class, either MetsDocument or IiifManifest
424
     */
425
    public static function &getInstance($uid, $pid = 0, $forceReload = false)
426
    {
427
        // Sanitize input.
428
        $pid = max(intval($pid), 0);
429
        if (!$forceReload) {
430
            $regObj = Helper::digest($uid);
431
            if (
432
                is_object(self::$registry[$regObj])
433
                && self::$registry[$regObj] instanceof self
434
            ) {
435
                // Check if instance has given PID.
436
                if (
437
                    !$pid
438
                    || !self::$registry[$regObj]->pid
439
                    || $pid == self::$registry[$regObj]->pid
440
                ) {
441
                    // Return singleton instance if available.
442
                    return self::$registry[$regObj];
443
                }
444
            } else {
445
                // Check the user's session...
446
                $sessionData = Helper::loadFromSession(get_called_class());
447
                if (
448
                    is_object($sessionData[$regObj])
449
                    && $sessionData[$regObj] instanceof self
450
                ) {
451
                    // Check if instance has given PID.
452
                    if (
453
                        !$pid
454
                        || !$sessionData[$regObj]->pid
455
                        || $pid == $sessionData[$regObj]->pid
456
                    ) {
457
                        // ...and restore registry.
458
                        self::$registry[$regObj] = $sessionData[$regObj];
459
                        return self::$registry[$regObj];
460
                    }
461
                }
462
            }
463
        }
464
        // Create new instance depending on format (METS or IIIF) ...
465
        $instance = null;
466
        $documentFormat = null;
467
        $xml = null;
468
        $iiif = null;
469
        // Try to get document format from database
470
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
471
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
472
                ->getQueryBuilderForTable('tx_dlf_documents');
473
474
            $queryBuilder
475
                ->select(
476
                    'tx_dlf_documents.location AS location',
477
                    'tx_dlf_documents.document_format AS document_format'
478
                )
479
                ->from('tx_dlf_documents');
480
481
            // Get UID of document with given record identifier.
482
            if ($pid) {
483
                $queryBuilder
484
                    ->where(
485
                        $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
486
                        $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid)),
487
                        Helper::whereExpression('tx_dlf_documents')
488
                    );
489
            } else {
490
                $queryBuilder
491
                    ->where(
492
                        $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
493
                        Helper::whereExpression('tx_dlf_documents')
494
                    );
495
            }
496
497
            $result = $queryBuilder
498
                ->setMaxResults(1)
499
                ->execute();
500
501
            if ($resArray = $result->fetch()) {
502
                $documentFormat = $resArray['document_format'];
503
            }
504
        } else {
505
            // Get document format from content of remote document
506
            // Cast to string for safety reasons.
507
            $location = (string) $uid;
508
            // Try to load a file from the url
509
            if (GeneralUtility::isValidUrl($location)) {
510
                // Load extension configuration
511
                $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
512
                // Set user-agent to identify self when fetching XML data.
513
                if (!empty($extConf['useragent'])) {
514
                    @ini_set('user_agent', $extConf['useragent']);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for ini_set(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

514
                    /** @scrutinizer ignore-unhandled */ @ini_set('user_agent', $extConf['useragent']);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
515
                }
516
                $content = GeneralUtility::getUrl($location);
517
                if ($content !== false) {
518
                    // TODO use single place to load xml
519
                    // Turn off libxml's error logging.
520
                    $libxmlErrors = libxml_use_internal_errors(true);
521
                    // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
522
                    $previousValueOfEntityLoader = libxml_disable_entity_loader(true);
523
                    // Try to load XML from file.
524
                    $xml = simplexml_load_string($content);
525
                    // reset entity loader setting
526
                    libxml_disable_entity_loader($previousValueOfEntityLoader);
527
                    // Reset libxml's error logging.
528
                    libxml_use_internal_errors($libxmlErrors);
529
                    if ($xml !== false) {
530
                        /* @var $xml \SimpleXMLElement */
531
                        $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
532
                        $xpathResult = $xml->xpath('//mets:mets');
533
                        $documentFormat = !empty($xpathResult) ? 'METS' : null;
534
                    } else {
535
                        // Try to load file as IIIF resource instead.
536
                        $contentAsJsonArray = json_decode($content, true);
537
                        if ($contentAsJsonArray !== null) {
538
                            // Load plugin configuration.
539
                            $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
540
                            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
541
                            IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
542
                            IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
543
                            $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
544
                            if ($iiif instanceof IiifResourceInterface) {
545
                                $documentFormat = 'IIIF';
546
                            }
547
                        }
548
                    }
549
                }
550
            }
551
        }
552
        // Sanitize input.
553
        $pid = max(intval($pid), 0);
554
        if ($documentFormat == 'METS') {
555
            $instance = new MetsDocument($uid, $pid, $xml);
556
        } elseif ($documentFormat == 'IIIF') {
557
            $instance = new IiifManifest($uid, $pid, $iiif);
558
        }
559
        // Save instance to registry.
560
        if (
561
            $instance instanceof self
562
            && $instance->ready) {
563
            self::$registry[Helper::digest($instance->uid)] = $instance;
564
            if ($instance->uid != $instance->location) {
565
                self::$registry[Helper::digest($instance->location)] = $instance;
566
            }
567
            // Load extension configuration
568
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
569
            // Save registry to session if caching is enabled.
570
            if (!empty($extConf['caching'])) {
571
                Helper::saveToSession(self::$registry, get_class($instance));
572
            }
573
        }
574
        $instance->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($instance));
0 ignored issues
show
Bug introduced by
It seems like $instance can also be of type null; however, parameter $object of get_class() does only seem to accept object, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

574
        $instance->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class(/** @scrutinizer ignore-type */ $instance));
Loading history...
575
        // Return new instance.
576
        return $instance;
577
    }
578
579
    /**
580
     * This gets details about a logical structure element
581
     *
582
     * @access public
583
     *
584
     * @abstract
585
     *
586
     * @param string $id: The @ID attribute of the logical structure node (METS) or
587
     * the @id property of the Manifest / Range (IIIF)
588
     * @param bool $recursive: Whether to include the child elements / resources
589
     *
590
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
591
     */
592
    public abstract function getLogicalStructure($id, $recursive = false);
593
594
    /**
595
     * This extracts all the metadata for a logical structure node
596
     *
597
     * @access public
598
     *
599
     * @abstract
600
     *
601
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
602
     * of the Manifest / Range (IIIF)
603
     * @param int $cPid: The PID for the metadata definitions
604
     *                       (defaults to $this->cPid or $this->pid)
605
     *
606
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
607
     */
608
    public abstract function getMetadata($id, $cPid = 0);
609
610
    /**
611
     * This returns the first corresponding physical page number of a given logical page label
612
     *
613
     * @access public
614
     *
615
     * @param string $logicalPage: The label (or a part of the label) of the logical page
616
     *
617
     * @return int The physical page number
618
     */
619
    public function getPhysicalPage($logicalPage)
620
    {
621
        if (
622
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
623
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
624
        ) {
625
            return $this->lastSearchedPhysicalPage['physicalPage'];
626
        } else {
627
            $physicalPage = 0;
628
            foreach ($this->physicalStructureInfo as $page) {
629
                if (strpos($page['orderlabel'], $logicalPage) !== false) {
630
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
631
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
632
                    return $physicalPage;
633
                }
634
                $physicalPage++;
635
            }
636
        }
637
        return 1;
638
    }
639
640
    /**
641
     * This extracts the raw text for a physical structure node / IIIF Manifest / Canvas. Text might be
642
     * given as ALTO for METS or as annotations or ALTO for IIIF resources. If IIIF plain text annotations
643
     * with the motivation "painting" should be treated as full text representations, the extension has to be
644
     * configured accordingly.
645
     *
646
     * @access public
647
     *
648
     * @abstract
649
     *
650
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
651
     * of the Manifest / Range (IIIF)
652
     *
653
     * @return string The physical structure node's / IIIF resource's raw text
654
     */
655
    public abstract function getRawText($id);
656
657
    /**
658
     * This extracts the raw text for a physical structure node / IIIF Manifest / Canvas from an
659
     * XML fulltext representation (currently only ALTO). For IIIF manifests, ALTO documents have
660
     * to be given in the Canvas' / Manifest's "seeAlso" property.
661
     *
662
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
663
     * of the Manifest / Range (IIIF)
664
     *
665
     * @return string The physical structure node's / IIIF resource's raw text from XML
666
     */
667
    protected function getRawTextFromXml($id)
668
    {
669
        $rawText = '';
670
        // Load available text formats, ...
671
        $this->loadFormats();
672
        // ... physical structure ...
673
        $this->_getPhysicalStructure();
674
        // ... and extension configuration.
675
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
676
        $fileGrpsFulltext = GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']);
677
        if (!empty($this->physicalStructureInfo[$id])) {
678
            while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
679
                if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
680
                    // Get fulltext file.
681
                    $file = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
682
                    if ($file !== false) {
683
                        // Turn off libxml's error logging.
684
                        $libxmlErrors = libxml_use_internal_errors(true);
685
                        // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept.
686
                        $previousValueOfEntityLoader = libxml_disable_entity_loader(true);
687
                        // Load XML from file.
688
                        $rawTextXml = simplexml_load_string($file);
689
                        // Reset entity loader setting.
690
                        libxml_disable_entity_loader($previousValueOfEntityLoader);
691
                        // Reset libxml's error logging.
692
                        libxml_use_internal_errors($libxmlErrors);
693
                        // Get the root element's name as text format.
694
                        $textFormat = strtoupper($rawTextXml->getName());
695
                    } else {
696
                        $this->logger->warning('Couldn\'t load fulltext file for structure node @ID "' . $id . '"');
0 ignored issues
show
Bug introduced by
The method warning() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

696
                        $this->logger->/** @scrutinizer ignore-call */ 
697
                                       warning('Couldn\'t load fulltext file for structure node @ID "' . $id . '"');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
697
                        return $rawText;
698
                    }
699
                    break;
700
                }
701
            }
702
        } else {
703
            $this->logger->warning('Invalid structure node @ID "' . $id . '"');
704
            return $rawText;
705
        }
706
        // Is this text format supported?
707
        if (
708
            !empty($rawTextXml)
709
            && !empty($this->formats[$textFormat])
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $textFormat does not seem to be defined for all execution paths leading up to this point.
Loading history...
710
        ) {
711
            if (!empty($this->formats[$textFormat]['class'])) {
712
                $class = $this->formats[$textFormat]['class'];
713
                // Get the raw text from class.
714
                if (
715
                    class_exists($class)
716
                    && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
717
                ) {
718
                    $rawText = $obj->getRawText($rawTextXml);
719
                    $this->rawTextArray[$id] = $rawText;
720
                } else {
721
                    $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
722
                }
723
            }
724
        } else {
725
            $this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
726
        }
727
        return $rawText;
728
    }
729
730
    /**
731
     * This determines a title for the given document
732
     *
733
     * @access public
734
     *
735
     * @static
736
     *
737
     * @param int $uid: The UID of the document
738
     * @param bool $recursive: Search superior documents for a title, too?
739
     *
740
     * @return string The title of the document itself or a parent document
741
     */
742
    public static function getTitle($uid, $recursive = false)
743
    {
744
        $logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(__CLASS__);
745
746
        $title = '';
747
        // Sanitize input.
748
        $uid = max(intval($uid), 0);
749
        if ($uid) {
750
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
751
                ->getQueryBuilderForTable('tx_dlf_documents');
752
753
            $result = $queryBuilder
754
                ->select(
755
                    'tx_dlf_documents.title',
756
                    'tx_dlf_documents.partof'
757
                )
758
                ->from('tx_dlf_documents')
759
                ->where(
760
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
761
                    Helper::whereExpression('tx_dlf_documents')
762
                )
763
                ->setMaxResults(1)
764
                ->execute();
765
766
            if ($resArray = $result->fetch()) {
767
                // Get title information.
768
                $title = $resArray['title'];
769
                $partof = $resArray['partof'];
770
                // Search parent documents recursively for a title?
771
                if (
772
                    $recursive
773
                    && empty($title)
774
                    && intval($partof)
775
                    && $partof != $uid
776
                ) {
777
                    $title = self::getTitle($partof, true);
778
                }
779
            } else {
780
                $logger->warning('No document with UID ' . $uid . ' found or document not accessible');
781
            }
782
        } else {
783
            $logger->error('Invalid UID ' . $uid . ' for document');
784
        }
785
        return $title;
786
    }
787
788
    /**
789
     * This extracts all the metadata for the toplevel logical structure node / resource
790
     *
791
     * @access public
792
     *
793
     * @param int $cPid: The PID for the metadata definitions
794
     *
795
     * @return array The logical structure node's / resource's parsed metadata array
796
     */
797
    public function getTitledata($cPid = 0)
798
    {
799
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
800
        // Add information from METS structural map to titledata array.
801
        if ($this instanceof MetsDocument) {
802
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
803
        }
804
        // Set record identifier for METS file / IIIF manifest if not present.
805
        if (
806
            is_array($titledata)
807
            && array_key_exists('record_id', $titledata)
808
        ) {
809
            if (
810
                !empty($this->recordId)
811
                && !in_array($this->recordId, $titledata['record_id'])
812
            ) {
813
                array_unshift($titledata['record_id'], $this->recordId);
814
            }
815
        }
816
        return $titledata;
817
    }
818
819
    /**
820
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
821
     *
822
     * @access protected
823
     *
824
     * @param array $structure: logical structure array
825
     * @param int $depth: current tree depth
826
     * @param string $logId: ID of the logical structure whose depth is requested
827
     *
828
     * @return int|bool: false if structure with $logId is not a child of this substructure,
829
     * or the actual depth.
830
     */
831
    protected function getTreeDepth($structure, $depth, $logId)
832
    {
833
        foreach ($structure as $element) {
834
            if ($element['id'] == $logId) {
835
                return $depth;
836
            } elseif (array_key_exists('children', $element)) {
837
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
838
                if ($foundInChildren !== false) {
839
                    return $foundInChildren;
840
                }
841
            }
842
        }
843
        return false;
844
    }
845
846
    /**
847
     * Get the tree depth of a logical structure element within the table of content
848
     *
849
     * @access public
850
     *
851
     * @param string $logId: The id of the logical structure element whose depth is requested
852
     * @return int|bool tree depth as integer or false if no element with $logId exists within the TOC.
853
     */
854
    public function getStructureDepth($logId)
855
    {
856
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
857
    }
858
859
    /**
860
     * This sets some basic class properties
861
     *
862
     * @access protected
863
     *
864
     * @abstract
865
     *
866
     * @return void
867
     */
868
    protected abstract function init();
869
870
    /**
871
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
872
     *
873
     * @access protected
874
     *
875
     * @abstract
876
     *
877
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
878
     *
879
     * @return bool true if $preloadedDocument can actually be reused, false if it has to be loaded again
880
     */
881
    protected abstract function setPreloadedDocument($preloadedDocument);
882
883
    /**
884
     * METS/IIIF specific part of loading a location
885
     *
886
     * @access protected
887
     *
888
     * @abstract
889
     *
890
     * @param string $location: The URL of the file to load
891
     *
892
     * @return bool true on success or false on failure
893
     */
894
    protected abstract function loadLocation($location);
895
896
    /**
897
     * Load XML file / IIIF resource from URL
898
     *
899
     * @access protected
900
     *
901
     * @param string $location: The URL of the file to load
902
     *
903
     * @return bool true on success or false on failure
904
     */
905
    protected function load($location)
906
    {
907
        // Load XML / JSON-LD file.
908
        if (GeneralUtility::isValidUrl($location)) {
909
            // Load extension configuration
910
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
911
            // Set user-agent to identify self when fetching XML / JSON-LD data.
912
            if (!empty($extConf['useragent'])) {
913
                @ini_set('user_agent', $extConf['useragent']);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for ini_set(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

913
                /** @scrutinizer ignore-unhandled */ @ini_set('user_agent', $extConf['useragent']);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
914
            }
915
            // the actual loading is format specific
916
            return $this->loadLocation($location);
917
        } else {
918
            $this->logger->error('Invalid file location "' . $location . '" for document loading');
0 ignored issues
show
Bug introduced by
The method error() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

918
            $this->logger->/** @scrutinizer ignore-call */ 
919
                           error('Invalid file location "' . $location . '" for document loading');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
919
        }
920
        return false;
921
    }
922
923
    /**
924
     * Analyze the document if it contains any fulltext that needs to be indexed.
925
     *
926
     * @access protected
927
     *
928
     * @abstract
929
     */
930
    protected abstract function ensureHasFulltextIsSet();
931
932
    /**
933
     * Register all available data formats
934
     *
935
     * @access protected
936
     *
937
     * @return void
938
     */
939
    protected function loadFormats()
940
    {
941
        if (!$this->formatsLoaded) {
942
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
943
                ->getQueryBuilderForTable('tx_dlf_formats');
944
945
            // Get available data formats from database.
946
            $result = $queryBuilder
947
                ->select(
948
                    'tx_dlf_formats.type AS type',
949
                    'tx_dlf_formats.root AS root',
950
                    'tx_dlf_formats.namespace AS namespace',
951
                    'tx_dlf_formats.class AS class'
952
                )
953
                ->from('tx_dlf_formats')
954
                ->where(
955
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0)
956
                )
957
                ->execute();
958
959
            while ($resArray = $result->fetch()) {
960
                // Update format registry.
961
                $this->formats[$resArray['type']] = [
962
                    'rootElement' => $resArray['root'],
963
                    'namespaceURI' => $resArray['namespace'],
964
                    'class' => $resArray['class']
965
                ];
966
            }
967
            $this->formatsLoaded = true;
968
        }
969
    }
970
971
    /**
972
     * Register all available namespaces for a \SimpleXMLElement object
973
     *
974
     * @access public
975
     *
976
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
977
     *
978
     * @return void
979
     */
980
    public function registerNamespaces(&$obj)
981
    {
982
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
983
        $this->loadFormats();
984
        // Do we have a \SimpleXMLElement or \DOMXPath object?
985
        if ($obj instanceof \SimpleXMLElement) {
986
            $method = 'registerXPathNamespace';
987
        } elseif ($obj instanceof \DOMXPath) {
988
            $method = 'registerNamespace';
989
        } else {
990
            $this->logger->error('Given object is neither a SimpleXMLElement nor a DOMXPath instance');
991
            return;
992
        }
993
        // Register metadata format's namespaces.
994
        foreach ($this->formats as $enc => $conf) {
995
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
996
        }
997
    }
998
999
    /**
1000
     * This saves the document to the database and index
1001
     *
1002
     * @access public
1003
     *
1004
     * @param int $pid: The PID of the saved record
1005
     * @param int $core: The UID of the Solr core for indexing
1006
     * @param int|string $owner: UID or index_name of owner to set while indexing
1007
     *
1008
     * @return bool true on success or false on failure
1009
     */
1010
    public function save($pid = 0, $core = 0, $owner = null)
1011
    {
1012
        if (\TYPO3_MODE !== 'BE') {
1013
            $this->logger->error('Saving a document is only allowed in the backend');
1014
            return false;
1015
        }
1016
        // Make sure $pid is a non-negative integer.
1017
        $pid = max(intval($pid), 0);
1018
        // Make sure $core is a non-negative integer.
1019
        $core = max(intval($core), 0);
1020
        // If $pid is not given, try to get it elsewhere.
1021
        if (
1022
            !$pid
1023
            && $this->pid
1024
        ) {
1025
            // Retain current PID.
1026
            $pid = $this->pid;
1027
        } elseif (!$pid) {
1028
            $this->logger->error('Invalid PID ' . $pid . ' for document saving');
1029
            return false;
1030
        }
1031
        // Set PID for metadata definitions.
1032
        $this->cPid = $pid;
1033
        // Set UID placeholder if not updating existing record.
1034
        if ($pid != $this->pid) {
1035
            $this->uid = uniqid('NEW');
0 ignored issues
show
Bug introduced by
The property uid is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1036
        }
1037
        // Get metadata array.
1038
        $metadata = $this->getTitledata($pid);
1039
        // Check for record identifier.
1040
        if (empty($metadata['record_id'][0])) {
1041
            $this->logger->error('No record identifier found to avoid duplication');
1042
            return false;
1043
        }
1044
        // Load plugin configuration.
1045
        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
1046
1047
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1048
            ->getQueryBuilderForTable('tx_dlf_structures');
1049
1050
        // Get UID for structure type.
1051
        $result = $queryBuilder
1052
            ->select('tx_dlf_structures.uid AS uid')
1053
            ->from('tx_dlf_structures')
1054
            ->where(
1055
                $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($pid)),
1056
                $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
1057
                Helper::whereExpression('tx_dlf_structures')
1058
            )
1059
            ->setMaxResults(1)
1060
            ->execute();
1061
1062
        if ($resArray = $result->fetch()) {
1063
            $structure = $resArray['uid'];
1064
        } else {
1065
            $this->logger->error('Could not identify document/structure type "' . $queryBuilder->expr()->literal($metadata['type'][0]) . '"');
1066
            return false;
1067
        }
1068
        $metadata['type'][0] = $structure;
1069
1070
        // Remove appended "valueURI" from authors' names for storing in database.
1071
        foreach ($metadata['author'] as $i => $author) {
1072
            $splitName = explode(chr(31), $author);
1073
            $metadata['author'][$i] = $splitName[0];
1074
        }
1075
1076
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1077
            ->getQueryBuilderForTable('tx_dlf_collections');
1078
1079
        // Get UIDs for collections.
1080
        $result = $queryBuilder
1081
            ->select(
1082
                'tx_dlf_collections.index_name AS index_name',
1083
                'tx_dlf_collections.uid AS uid'
1084
            )
1085
            ->from('tx_dlf_collections')
1086
            ->where(
1087
                $queryBuilder->expr()->eq('tx_dlf_collections.pid', intval($pid)),
1088
                $queryBuilder->expr()->in('tx_dlf_collections.sys_language_uid', [-1, 0]),
1089
                Helper::whereExpression('tx_dlf_collections')
1090
            )
1091
            ->execute();
1092
1093
        $collUid = [];
1094
        while ($resArray = $result->fetch()) {
1095
            $collUid[$resArray['index_name']] = $resArray['uid'];
1096
        }
1097
        $collections = [];
1098
        foreach ($metadata['collection'] as $collection) {
1099
            if (!empty($collUid[$collection])) {
1100
                // Add existing collection's UID.
1101
                $collections[] = $collUid[$collection];
1102
            } else {
1103
                // Insert new collection.
1104
                $collNewUid = uniqid('NEW');
1105
                $collData['tx_dlf_collections'][$collNewUid] = [
1106
                    'pid' => $pid,
1107
                    'label' => $collection,
1108
                    'index_name' => $collection,
1109
                    'oai_name' => (!empty($conf['publishNewCollections']) ? Helper::getCleanString($collection) : ''),
1110
                    'description' => '',
1111
                    'documents' => 0,
1112
                    'owner' => 0,
1113
                    'status' => 0,
1114
                ];
1115
                $substUid = Helper::processDBasAdmin($collData);
1116
                // Prevent double insertion.
1117
                unset($collData);
1118
                // Add new collection's UID.
1119
                $collections[] = $substUid[$collNewUid];
1120
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1121
                    Helper::addMessage(
1122
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newCollection'), $collection, $substUid[$collNewUid])),
1123
                        Helper::getMessage('flash.attention', true),
1124
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1125
                        true
1126
                    );
1127
                }
1128
            }
1129
        }
1130
        $metadata['collection'] = $collections;
1131
1132
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1133
            ->getQueryBuilderForTable('tx_dlf_libraries');
1134
1135
        // Get UID for owner.
1136
        if (empty($owner)) {
1137
            $owner = empty($metadata['owner'][0]) ? $metadata['owner'][0] : 'default';
1138
        }
1139
        if (!MathUtility::canBeInterpretedAsInteger($owner)) {
1140
            $result = $queryBuilder
1141
                ->select('tx_dlf_libraries.uid AS uid')
1142
                ->from('tx_dlf_libraries')
1143
                ->where(
1144
                    $queryBuilder->expr()->eq('tx_dlf_libraries.pid', intval($pid)),
1145
                    $queryBuilder->expr()->eq('tx_dlf_libraries.index_name', $queryBuilder->expr()->literal($owner)),
1146
                    Helper::whereExpression('tx_dlf_libraries')
1147
                )
1148
                ->setMaxResults(1)
1149
                ->execute();
1150
1151
            if ($resArray = $result->fetch()) {
1152
                $ownerUid = $resArray['uid'];
1153
            } else {
1154
                // Insert new library.
1155
                $libNewUid = uniqid('NEW');
1156
                $libData['tx_dlf_libraries'][$libNewUid] = [
0 ignored issues
show
Comprehensibility Best Practice introduced by
$libData was never initialized. Although not strictly required by PHP, it is generally a good practice to add $libData = array(); before regardless.
Loading history...
1157
                    'pid' => $pid,
1158
                    'label' => $owner,
1159
                    'index_name' => $owner,
1160
                    'website' => '',
1161
                    'contact' => '',
1162
                    'image' => '',
1163
                    'oai_label' => '',
1164
                    'oai_base' => '',
1165
                    'opac_label' => '',
1166
                    'opac_base' => '',
1167
                    'union_label' => '',
1168
                    'union_base' => '',
1169
                ];
1170
                $substUid = Helper::processDBasAdmin($libData);
1171
                // Add new library's UID.
1172
                $ownerUid = $substUid[$libNewUid];
1173
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1174
                    Helper::addMessage(
1175
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newLibrary'), $owner, $ownerUid)),
1176
                        Helper::getMessage('flash.attention', true),
1177
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1178
                        true
1179
                    );
1180
                }
1181
            }
1182
            $owner = $ownerUid;
1183
        }
1184
        $metadata['owner'][0] = $owner;
1185
        // Get UID of parent document.
1186
        $partof = $this->getParentDocumentUidForSaving($pid, $core, $owner);
1187
        // Use the date of publication or title as alternative sorting metric for parts of multi-part works.
1188
        if (!empty($partof)) {
1189
            if (
1190
                empty($metadata['volume'][0])
1191
                && !empty($metadata['year'][0])
1192
            ) {
1193
                $metadata['volume'] = $metadata['year'];
1194
            }
1195
            if (empty($metadata['volume_sorting'][0])) {
1196
                // If METS @ORDER is given it is preferred over year_sorting and year.
1197
                if (!empty($metadata['mets_order'][0])) {
1198
                    $metadata['volume_sorting'][0] = $metadata['mets_order'][0];
1199
                } elseif (!empty($metadata['year_sorting'][0])) {
1200
                    $metadata['volume_sorting'][0] = $metadata['year_sorting'][0];
1201
                } elseif (!empty($metadata['year'][0])) {
1202
                    $metadata['volume_sorting'][0] = $metadata['year'][0];
1203
                }
1204
            }
1205
            // If volume_sorting is still empty, try to use title_sorting or METS @ORDERLABEL finally (workaround for newspapers)
1206
            if (empty($metadata['volume_sorting'][0])) {
1207
                if (!empty($metadata['title_sorting'][0])) {
1208
                    $metadata['volume_sorting'][0] = $metadata['title_sorting'][0];
1209
                } elseif (!empty($metadata['mets_orderlabel'][0])) {
1210
                    $metadata['volume_sorting'][0] = $metadata['mets_orderlabel'][0];
1211
                }
1212
            }
1213
        }
1214
1215
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1216
            ->getQueryBuilderForTable('tx_dlf_metadata');
1217
1218
        // Get metadata for lists and sorting.
1219
        $result = $queryBuilder
1220
            ->select(
1221
                'tx_dlf_metadata.index_name AS index_name',
1222
                'tx_dlf_metadata.is_listed AS is_listed',
1223
                'tx_dlf_metadata.is_sortable AS is_sortable'
1224
            )
1225
            ->from('tx_dlf_metadata')
1226
            ->where(
1227
                $queryBuilder->expr()->orX(
1228
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_listed', 1),
1229
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_sortable', 1)
1230
                ),
1231
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($pid)),
1232
                Helper::whereExpression('tx_dlf_metadata')
1233
            )
1234
            ->execute();
1235
1236
        $listed = [];
1237
        $sortable = [];
1238
1239
        while ($resArray = $result->fetch()) {
1240
            if (!empty($metadata[$resArray['index_name']])) {
1241
                if ($resArray['is_listed']) {
1242
                    $listed[$resArray['index_name']] = $metadata[$resArray['index_name']];
1243
                }
1244
                if ($resArray['is_sortable']) {
1245
                    $sortable[$resArray['index_name']] = $metadata[$resArray['index_name']][0];
1246
                }
1247
            }
1248
        }
1249
        // Fill data array.
1250
        $data['tx_dlf_documents'][$this->uid] = [
0 ignored issues
show
Comprehensibility Best Practice introduced by
$data was never initialized. Although not strictly required by PHP, it is generally a good practice to add $data = array(); before regardless.
Loading history...
1251
            'pid' => $pid,
1252
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['starttime'] => 0,
1253
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['endtime'] => 0,
1254
            'prod_id' => $metadata['prod_id'][0],
1255
            'location' => $this->location,
1256
            'record_id' => $metadata['record_id'][0],
1257
            'opac_id' => $metadata['opac_id'][0],
1258
            'union_id' => $metadata['union_id'][0],
1259
            'urn' => $metadata['urn'][0],
1260
            'purl' => $metadata['purl'][0],
1261
            'title' => $metadata['title'][0],
1262
            'title_sorting' => $metadata['title_sorting'][0],
1263
            'author' => implode('; ', $metadata['author']),
1264
            'year' => implode('; ', $metadata['year']),
1265
            'place' => implode('; ', $metadata['place']),
1266
            'thumbnail' => $this->_getThumbnail(true),
1267
            'metadata' => serialize($listed),
1268
            'metadata_sorting' => serialize($sortable),
1269
            'structure' => $metadata['type'][0],
1270
            'partof' => $partof,
1271
            'volume' => $metadata['volume'][0],
1272
            'volume_sorting' => $metadata['volume_sorting'][0],
1273
            'license' => $metadata['license'][0],
1274
            'terms' => $metadata['terms'][0],
1275
            'restrictions' => $metadata['restrictions'][0],
1276
            'out_of_print' => $metadata['out_of_print'][0],
1277
            'rights_info' => $metadata['rights_info'][0],
1278
            'collections' => $metadata['collection'],
1279
            'mets_label' => $metadata['mets_label'][0],
1280
            'mets_orderlabel' => $metadata['mets_orderlabel'][0],
1281
            'mets_order' => $metadata['mets_order'][0],
1282
            'owner' => $metadata['owner'][0],
1283
            'solrcore' => $core,
1284
            'status' => 0,
1285
            'document_format' => $metadata['document_format'][0],
1286
        ];
1287
        // Unhide hidden documents.
1288
        if (!empty($conf['unhideOnIndex'])) {
1289
            $data['tx_dlf_documents'][$this->uid][$GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['disabled']] = 0;
1290
        }
1291
        // Process data.
1292
        $newIds = Helper::processDBasAdmin($data);
1293
        // Replace placeholder with actual UID.
1294
        if (strpos($this->uid, 'NEW') === 0) {
1295
            $this->uid = $newIds[$this->uid];
1296
            $this->pid = $pid;
0 ignored issues
show
Bug introduced by
The property pid is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1297
            $this->parentId = $partof;
0 ignored issues
show
Bug introduced by
The property parentId is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1298
        }
1299
        if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1300
            Helper::addMessage(
1301
                htmlspecialchars(sprintf(Helper::getMessage('flash.documentSaved'), $metadata['title'][0], $this->uid)),
1302
                Helper::getMessage('flash.done', true),
1303
                \TYPO3\CMS\Core\Messaging\FlashMessage::OK,
1304
                true
1305
            );
1306
        }
1307
        // Add document to index.
1308
        if ($core) {
1309
            Indexer::add($this, $core);
1310
        } else {
1311
            $this->logger->notice('Invalid UID "' . $core . '" for Solr core');
0 ignored issues
show
Bug introduced by
The method notice() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1311
            $this->logger->/** @scrutinizer ignore-call */ 
1312
                           notice('Invalid UID "' . $core . '" for Solr core');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1312
        }
1313
        return true;
1314
    }
1315
1316
    /**
1317
     * Get the ID of the parent document if the current document has one. Also save a parent document
1318
     * to the database and the Solr index if their $pid and the current $pid differ.
1319
     * Currently only applies to METS documents.
1320
     *
1321
     * @access protected
1322
     *
1323
     * @abstract
1324
     *
1325
     * @return int The parent document's id.
1326
     */
1327
    protected abstract function getParentDocumentUidForSaving($pid, $core, $owner);
1328
1329
    /**
1330
     * This returns $this->cPid via __get()
1331
     *
1332
     * @access protected
1333
     *
1334
     * @return int The PID of the metadata definitions
1335
     */
1336
    protected function _getCPid()
1337
    {
1338
        return $this->cPid;
1339
    }
1340
1341
    /**
1342
     * This returns $this->hasFulltext via __get()
1343
     *
1344
     * @access protected
1345
     *
1346
     * @return bool Are there any fulltext files available?
1347
     */
1348
    protected function _getHasFulltext()
1349
    {
1350
        $this->ensureHasFulltextIsSet();
1351
        return $this->hasFulltext;
1352
    }
1353
1354
    /**
1355
     * This returns $this->location via __get()
1356
     *
1357
     * @access protected
1358
     *
1359
     * @return string The location of the document
1360
     */
1361
    protected function _getLocation()
1362
    {
1363
        return $this->location;
1364
    }
1365
1366
    /**
1367
     * Format specific part of building the document's metadata array
1368
     *
1369
     * @access protected
1370
     *
1371
     * @abstract
1372
     *
1373
     * @param int $cPid
1374
     */
1375
    protected abstract function prepareMetadataArray($cPid);
1376
1377
    /**
1378
     * This builds an array of the document's metadata
1379
     *
1380
     * @access protected
1381
     *
1382
     * @return array Array of metadata with their corresponding logical structure node ID as key
1383
     */
1384
    protected function _getMetadataArray()
1385
    {
1386
        // Set metadata definitions' PID.
1387
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
1388
        if (!$cPid) {
1389
            $this->logger->error('Invalid PID ' . $cPid . ' for metadata definitions');
1390
            return [];
1391
        }
1392
        if (
1393
            !$this->metadataArrayLoaded
1394
            || $this->metadataArray[0] != $cPid
1395
        ) {
1396
            $this->prepareMetadataArray($cPid);
1397
            $this->metadataArray[0] = $cPid;
0 ignored issues
show
Bug introduced by
The property metadataArray is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1398
            $this->metadataArrayLoaded = true;
1399
        }
1400
        return $this->metadataArray;
1401
    }
1402
1403
    /**
1404
     * This returns $this->numPages via __get()
1405
     *
1406
     * @access protected
1407
     *
1408
     * @return int The total number of pages and/or tracks
1409
     */
1410
    protected function _getNumPages()
1411
    {
1412
        $this->_getPhysicalStructure();
1413
        return $this->numPages;
1414
    }
1415
1416
    /**
1417
     * This returns $this->parentId via __get()
1418
     *
1419
     * @access protected
1420
     *
1421
     * @return int The UID of the parent document or zero if not applicable
1422
     */
1423
    protected function _getParentId()
1424
    {
1425
        return $this->parentId;
1426
    }
1427
1428
    /**
1429
     * This builds an array of the document's physical structure
1430
     *
1431
     * @access protected
1432
     *
1433
     * @abstract
1434
     *
1435
     * @return array Array of physical elements' id, type, label and file representations ordered
1436
     * by @ORDER attribute / IIIF Sequence's Canvases
1437
     */
1438
    protected abstract function _getPhysicalStructure();
1439
1440
    /**
1441
     * This gives an array of the document's physical structure metadata
1442
     *
1443
     * @access protected
1444
     *
1445
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1446
     */
1447
    protected function _getPhysicalStructureInfo()
1448
    {
1449
        // Is there no physical structure array yet?
1450
        if (!$this->physicalStructureLoaded) {
1451
            // Build physical structure array.
1452
            $this->_getPhysicalStructure();
1453
        }
1454
        return $this->physicalStructureInfo;
1455
    }
1456
1457
    /**
1458
     * This returns $this->pid via __get()
1459
     *
1460
     * @access protected
1461
     *
1462
     * @return int The PID of the document or zero if not in database
1463
     */
1464
    protected function _getPid()
1465
    {
1466
        return $this->pid;
1467
    }
1468
1469
    /**
1470
     * This returns $this->ready via __get()
1471
     *
1472
     * @access protected
1473
     *
1474
     * @return bool Is the document instantiated successfully?
1475
     */
1476
    protected function _getReady()
1477
    {
1478
        return $this->ready;
1479
    }
1480
1481
    /**
1482
     * This returns $this->recordId via __get()
1483
     *
1484
     * @access protected
1485
     *
1486
     * @return mixed The METS file's / IIIF manifest's record identifier
1487
     */
1488
    protected function _getRecordId()
1489
    {
1490
        return $this->recordId;
1491
    }
1492
1493
    /**
1494
     * This returns $this->rootId via __get()
1495
     *
1496
     * @access protected
1497
     *
1498
     * @return int The UID of the root document or zero if not applicable
1499
     */
1500
    protected function _getRootId()
1501
    {
1502
        if (!$this->rootIdLoaded) {
1503
            if ($this->parentId) {
1504
                $parent = self::getInstance($this->parentId, $this->pid);
1505
                $this->rootId = $parent->rootId;
0 ignored issues
show
Bug introduced by
The property rootId is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1506
            }
1507
            $this->rootIdLoaded = true;
1508
        }
1509
        return $this->rootId;
1510
    }
1511
1512
    /**
1513
     * This returns the smLinks between logical and physical structMap (METS) and models the
1514
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1515
     *
1516
     * @access protected
1517
     *
1518
     * @abstract
1519
     *
1520
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1521
     */
1522
    protected abstract function _getSmLinks();
1523
1524
    /**
1525
     * This builds an array of the document's logical structure
1526
     *
1527
     * @access protected
1528
     *
1529
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1530
     */
1531
    protected function _getTableOfContents()
1532
    {
1533
        // Is there no logical structure array yet?
1534
        if (!$this->tableOfContentsLoaded) {
1535
            // Get all logical structures.
1536
            $this->getLogicalStructure('', true);
1537
            $this->tableOfContentsLoaded = true;
1538
        }
1539
        return $this->tableOfContents;
1540
    }
1541
1542
    /**
1543
     * This returns the document's thumbnail location
1544
     *
1545
     * @access protected
1546
     *
1547
     * @abstract
1548
     *
1549
     * @param bool $forceReload: Force reloading the thumbnail instead of returning the cached value
1550
     *
1551
     * @return string The document's thumbnail location
1552
     */
1553
    protected abstract function _getThumbnail($forceReload = false);
1554
1555
    /**
1556
     * This returns the ID of the toplevel logical structure node
1557
     *
1558
     * @access protected
1559
     *
1560
     * @abstract
1561
     *
1562
     * @return string The logical structure node's ID
1563
     */
1564
    protected abstract function _getToplevelId();
1565
1566
    /**
1567
     * This returns $this->uid via __get()
1568
     *
1569
     * @access protected
1570
     *
1571
     * @return mixed The UID or the URL of the document
1572
     */
1573
    protected function _getUid()
1574
    {
1575
        return $this->uid;
1576
    }
1577
1578
    /**
1579
     * This sets $this->cPid via __set()
1580
     *
1581
     * @access protected
1582
     *
1583
     * @param int $value: The new PID for the metadata definitions
1584
     *
1585
     * @return void
1586
     */
1587
    protected function _setCPid($value)
1588
    {
1589
        $this->cPid = max(intval($value), 0);
1590
    }
1591
1592
    /**
1593
     * This magic method is invoked each time a clone is called on the object variable
1594
     *
1595
     * @access protected
1596
     *
1597
     * @return void
1598
     */
1599
    protected function __clone()
1600
    {
1601
        // This method is defined as protected because singleton objects should not be cloned.
1602
    }
1603
1604
    /**
1605
     * This is a singleton class, thus the constructor should be private/protected
1606
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Document::getInstance())
1607
     *
1608
     * @access protected
1609
     *
1610
     * @param int $uid: The UID of the document to parse or URL to XML file
1611
     * @param int $pid: If > 0, then only document with this PID gets loaded
1612
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1613
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1614
     *
1615
     * @return void
1616
     */
1617
    protected function __construct($uid, $pid, $preloadedDocument)
1618
    {
1619
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1620
            ->getQueryBuilderForTable('tx_dlf_documents');
1621
        $location = '';
1622
        // Prepare to check database for the requested document.
1623
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
1624
            $whereClause = $queryBuilder->expr()->andX(
1625
                $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
1626
                Helper::whereExpression('tx_dlf_documents')
1627
            );
1628
        } else {
1629
            // Try to load METS file / IIIF manifest.
1630
            if ($this->setPreloadedDocument($preloadedDocument) || (GeneralUtility::isValidUrl($uid)
1631
                && $this->load($uid))) {
1632
                // Initialize core METS object.
1633
                $this->init();
1634
                if ($this->getDocument() !== null) {
1635
                    // Cast to string for safety reasons.
1636
                    $location = (string) $uid;
1637
                    $this->establishRecordId($pid);
1638
                } else {
1639
                    // No METS / IIIF part found.
1640
                    return;
1641
                }
1642
            } else {
1643
                // Loading failed.
1644
                return;
1645
            }
1646
            if (
1647
                !empty($location)
1648
                && !empty($this->recordId)
1649
            ) {
1650
                // Try to match record identifier or location (both should be unique).
1651
                $whereClause = $queryBuilder->expr()->andX(
1652
                    $queryBuilder->expr()->orX(
1653
                        $queryBuilder->expr()->eq('tx_dlf_documents.location', $queryBuilder->expr()->literal($location)),
1654
                        $queryBuilder->expr()->eq('tx_dlf_documents.record_id', $queryBuilder->expr()->literal($this->recordId))
1655
                    ),
1656
                    Helper::whereExpression('tx_dlf_documents')
1657
                );
1658
            } else {
1659
                // Can't persistently identify document, don't try to match at all.
1660
                $whereClause = '1=-1';
1661
            }
1662
        }
1663
        // Check for PID if needed.
1664
        if ($pid) {
1665
            $whereClause = $queryBuilder->expr()->andX(
1666
                $whereClause,
1667
                $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid))
1668
            );
1669
        }
1670
        // Get document PID and location from database.
1671
        $result = $queryBuilder
1672
            ->select(
1673
                'tx_dlf_documents.uid AS uid',
1674
                'tx_dlf_documents.pid AS pid',
1675
                'tx_dlf_documents.record_id AS record_id',
1676
                'tx_dlf_documents.partof AS partof',
1677
                'tx_dlf_documents.thumbnail AS thumbnail',
1678
                'tx_dlf_documents.location AS location'
1679
            )
1680
            ->from('tx_dlf_documents')
1681
            ->where($whereClause)
1682
            ->setMaxResults(1)
1683
            ->execute();
1684
1685
        if ($resArray = $result->fetch()) {
1686
            $this->uid = $resArray['uid'];
0 ignored issues
show
Bug introduced by
The property uid is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1687
            $this->pid = $resArray['pid'];
0 ignored issues
show
Bug introduced by
The property pid is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1688
            $this->recordId = $resArray['record_id'];
0 ignored issues
show
Bug introduced by
The property recordId is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1689
            $this->parentId = $resArray['partof'];
0 ignored issues
show
Bug introduced by
The property parentId is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1690
            $this->thumbnail = $resArray['thumbnail'];
0 ignored issues
show
Bug introduced by
The property thumbnail is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1691
            $this->location = $resArray['location'];
0 ignored issues
show
Bug introduced by
The property location is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1692
            $this->thumbnailLoaded = true;
1693
            // Load XML file if necessary...
1694
            if (
1695
                $this->getDocument() === null
1696
                && $this->load($this->location)
1697
            ) {
1698
                // ...and set some basic properties.
1699
                $this->init();
1700
            }
1701
            // Do we have a METS / IIIF object now?
1702
            if ($this->getDocument() !== null) {
1703
                // Set new location if necessary.
1704
                if (!empty($location)) {
1705
                    $this->location = $location;
1706
                }
1707
                // Document ready!
1708
                $this->ready = true;
0 ignored issues
show
Bug introduced by
The property ready is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1709
            }
1710
        } elseif ($this->getDocument() !== null) {
1711
            // Set location as UID for documents not in database.
1712
            $this->uid = $location;
1713
            $this->location = $location;
1714
            // Document ready!
1715
            $this->ready = true;
1716
        } else {
1717
            $this->logger->error('No document with UID ' . $uid . ' found or document not accessible');
1718
        }
1719
    }
1720
1721
    /**
1722
     * This magic method is called each time an invisible property is referenced from the object
1723
     *
1724
     * @access public
1725
     *
1726
     * @param string $var: Name of variable to get
1727
     *
1728
     * @return mixed Value of $this->$var
1729
     */
1730
    public function __get($var)
1731
    {
1732
        $method = '_get' . ucfirst($var);
1733
        if (
1734
            !property_exists($this, $var)
1735
            || !method_exists($this, $method)
1736
        ) {
1737
            $this->logger->warning('There is no getter function for property "' . $var . '"');
1738
            return;
1739
        } else {
1740
            return $this->$method();
1741
        }
1742
    }
1743
1744
    /**
1745
     * This magic method is called each time an invisible property is checked for isset() or empty()
1746
     *
1747
     * @access public
1748
     *
1749
     * @param string $var: Name of variable to check
1750
     *
1751
     * @return bool true if variable is set and not empty, false otherwise
1752
     */
1753
    public function __isset($var)
1754
    {
1755
        return !empty($this->__get($var));
1756
    }
1757
1758
    /**
1759
     * This magic method is called each time an invisible property is referenced from the object
1760
     *
1761
     * @access public
1762
     *
1763
     * @param string $var: Name of variable to set
1764
     * @param mixed $value: New value of variable
1765
     *
1766
     * @return void
1767
     */
1768
    public function __set($var, $value)
1769
    {
1770
        $method = '_set' . ucfirst($var);
1771
        if (
1772
            !property_exists($this, $var)
1773
            || !method_exists($this, $method)
1774
        ) {
1775
            $this->logger->warning('There is no setter function for property "' . $var . '"');
1776
        } else {
1777
            $this->$method($value);
1778
        }
1779
    }
1780
}
1781