Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — master (#430)
by Alexander
03:51
created

Document::__get()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 11
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 8
c 0
b 0
f 0
dl 0
loc 11
rs 10
cc 3
nc 2
nop 1
1
<?php
2
3
namespace Kitodo\Dlf\Common;
4
5
/**
6
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
7
 *
8
 * This file is part of the Kitodo and TYPO3 projects.
9
 *
10
 * @license GNU General Public License version 3 or later.
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 */
14
15
use TYPO3\CMS\Core\Database\ConnectionPool;
16
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
17
use TYPO3\CMS\Core\Utility\GeneralUtility;
18
use TYPO3\CMS\Core\Utility\MathUtility;
19
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
20
use Ubl\Iiif\Tools\IiifHelper;
21
22
/**
23
 * Document class for the 'dlf' extension
24
 *
25
 * @author Sebastian Meyer <[email protected]>
26
 * @author Henrik Lochmann <[email protected]>
27
 * @package TYPO3
28
 * @subpackage dlf
29
 * @access public
30
 * @property-write integer $cPid This holds the PID for the configuration
31
 * @property-read boolean $hasFulltext Are there any fulltext files available?
32
 * @property-read string $location This holds the documents location
33
 * @property-read array $metadataArray This holds the documents' parsed metadata array
34
 * @property-read integer $numPages The holds the total number of pages
35
 * @property-read integer $parentId This holds the UID of the parent document or zero if not multi-volumed
36
 * @property-read array $physicalStructure This holds the physical structure
37
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
38
 * @property-read integer $pid This holds the PID of the document or zero if not in database
39
 * @property-read boolean $ready Is the document instantiated successfully?
40
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
41
 * @property-read integer $rootId This holds the UID of the root document or zero if not multi-volumed
42
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
43
 * @property-read array $tableOfContents This holds the logical structure
44
 * @property-read string $thumbnail This holds the document's thumbnail location
45
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
46
 * @property-read mixed $uid This holds the UID or the URL of the document
47
 * @abstract
48
 */
49
abstract class Document
50
{
51
    /**
52
     * This holds the PID for the configuration
53
     *
54
     * @var integer
55
     * @access protected
56
     */
57
    protected $cPid = 0;
58
59
    /**
60
     * The extension key
61
     *
62
     * @var string
63
     * @access public
64
     */
65
    public static $extKey = 'dlf';
66
67
    /**
68
     * This holds the configuration for all supported metadata encodings
69
     * @see loadFormats()
70
     *
71
     * @var array
72
     * @access protected
73
     */
74
    protected $formats = [
75
        'OAI' => [
76
            'rootElement' => 'OAI-PMH',
77
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
78
        ],
79
        'METS' => [
80
            'rootElement' => 'mets',
81
            'namespaceURI' => 'http://www.loc.gov/METS/',
82
        ],
83
        'XLINK' => [
84
            'rootElement' => 'xlink',
85
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
86
        ]
87
    ];
88
89
    /**
90
     * Are the available metadata formats loaded?
91
     * @see $formats
92
     *
93
     * @var boolean
94
     * @access protected
95
     */
96
    protected $formatsLoaded = FALSE;
97
98
    /**
99
     * Are there any fulltext files available? This also includes IIIF text annotations
100
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
101
     * annotations as fulltext.
102
     *
103
     * @var boolean
104
     * @access protected
105
     */
106
    protected $hasFulltext = FALSE;
107
108
    /**
109
     * Last searched logical and physical page
110
     *
111
     * @var array
112
     * @access protected
113
     */
114
    protected $lastSearchedPhysicalPage = ['logicalPage' => NULL, 'physicalPage' => NULL];
115
116
    /**
117
     * This holds the documents location
118
     *
119
     * @var string
120
     * @access protected
121
     */
122
    protected $location = '';
123
124
    /**
125
     * This holds the logical units
126
     *
127
     * @var array
128
     * @access protected
129
     */
130
    protected $logicalUnits = [];
131
132
    /**
133
     * This holds the documents' parsed metadata array with their corresponding
134
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
135
     *
136
     * @var array
137
     * @access protected
138
     */
139
    protected $metadataArray = [];
140
141
    /**
142
     * Is the metadata array loaded?
143
     * @see $metadataArray
144
     *
145
     * @var boolean
146
     * @access protected
147
     */
148
    protected $metadataArrayLoaded = FALSE;
149
150
    /**
151
     * The holds the total number of pages
152
     *
153
     * @var integer
154
     * @access protected
155
     */
156
    protected $numPages = 0;
157
158
    /**
159
     * This holds the UID of the parent document or zero if not multi-volumed
160
     *
161
     * @var integer
162
     * @access protected
163
     */
164
    protected $parentId = 0;
165
166
    /**
167
     * This holds the physical structure
168
     *
169
     * @var array
170
     * @access protected
171
     */
172
    protected $physicalStructure = [];
173
174
    /**
175
     * This holds the physical structure metadata
176
     *
177
     * @var array
178
     * @access protected
179
     */
180
    protected $physicalStructureInfo = [];
181
182
    /**
183
     * Is the physical structure loaded?
184
     * @see $physicalStructure
185
     *
186
     * @var boolean
187
     * @access protected
188
     */
189
    protected $physicalStructureLoaded = FALSE;
190
191
    /**
192
     * This holds the PID of the document or zero if not in database
193
     *
194
     * @var integer
195
     * @access protected
196
     */
197
    protected $pid = 0;
198
199
    /**
200
     * This holds the documents' raw text pages with their corresponding
201
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
202
     *
203
     * @var array
204
     * @access protected
205
     */
206
    protected $rawTextArray = [];
207
208
    /**
209
     * Is the document instantiated successfully?
210
     *
211
     * @var boolean
212
     * @access protected
213
     */
214
    protected $ready = FALSE;
215
216
    /**
217
     * The METS file's / IIIF manifest's record identifier
218
     *
219
     * @var string
220
     * @access protected
221
     */
222
    protected $recordId;
223
224
    /**
225
     * This holds the singleton object of the document
226
     *
227
     * @var array (\Kitodo\Dlf\Common\Document)
228
     * @static
229
     * @access protected
230
     */
231
    protected static $registry = [];
232
233
    /**
234
     * This holds the UID of the root document or zero if not multi-volumed
235
     *
236
     * @var integer
237
     * @access protected
238
     */
239
    protected $rootId = 0;
240
241
    /**
242
     * Is the root id loaded?
243
     * @see $rootId
244
     *
245
     * @var boolean
246
     * @access protected
247
     */
248
    protected $rootIdLoaded = FALSE;
249
250
    /**
251
     * This holds the smLinks between logical and physical structMap
252
     *
253
     * @var array
254
     * @access protected
255
     */
256
    protected $smLinks = ['l2p' => [], 'p2l' => []];
257
258
    /**
259
     * Are the smLinks loaded?
260
     * @see $smLinks
261
     *
262
     * @var boolean
263
     * @access protected
264
     */
265
    protected $smLinksLoaded = FALSE;
266
267
    /**
268
     * This holds the logical structure
269
     *
270
     * @var array
271
     * @access protected
272
     */
273
    protected $tableOfContents = [];
274
275
    /**
276
     * Is the table of contents loaded?
277
     * @see $tableOfContents
278
     *
279
     * @var boolean
280
     * @access protected
281
     */
282
    protected $tableOfContentsLoaded = FALSE;
283
284
    /**
285
     * This holds the document's thumbnail location
286
     *
287
     * @var string
288
     * @access protected
289
     */
290
    protected $thumbnail = '';
291
292
    /**
293
     * Is the document's thumbnail location loaded?
294
     * @see $thumbnail
295
     *
296
     * @var boolean
297
     * @access protected
298
     */
299
    protected $thumbnailLoaded = FALSE;
300
301
    /**
302
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
303
     *
304
     * @var string
305
     * @access protected
306
     */
307
    protected $toplevelId = '';
308
309
    /**
310
     * This holds the UID or the URL of the document
311
     *
312
     * @var mixed
313
     * @access protected
314
     */
315
    protected $uid = 0;
316
317
    /**
318
     * This holds the whole XML file as \SimpleXMLElement object
319
     *
320
     * @var \SimpleXMLElement
321
     * @access protected
322
     */
323
    protected $xml;
324
325
    /**
326
     * This clears the static registry to prevent memory exhaustion
327
     *
328
     * @access public
329
     *
330
     * @static
331
     *
332
     * @return void
333
     */
334
    public static function clearRegistry()
335
    {
336
        // Reset registry array.
337
        self::$registry = [];
338
    }
339
340
    /**
341
     * This ensures that the recordId, if existent, is retrieved from the document
342
     *
343
     * @access protected
344
     *
345
     * @abstract
346
     *
347
     * @param integer $pid: ID of the configuration page with the recordId config
348
     *
349
     */
350
    protected abstract function establishRecordId($pid);
351
352
    /**
353
     * Source document PHP object which is represented by a Document instance
354
     *
355
     * @access protected
356
     *
357
     * @abstract
358
     *
359
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
360
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
361
     */
362
    protected abstract function getDocument();
363
364
    /**
365
     * This gets the location of a downloadable file for a physical page or track
366
     *
367
     * @access public
368
     *
369
     * @abstract
370
     *
371
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
372
     *
373
     * @return string    The file's location as URL
374
     */
375
    public abstract function getDownloadLocation($id);
376
377
    /**
378
     * This gets the location of a file representing a physical page or track
379
     *
380
     * @access public
381
     *
382
     * @abstract
383
     *
384
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
385
     *
386
     * @return string The file's location as URL
387
     */
388
    public abstract function getFileLocation($id);
389
390
    /**
391
     * This gets the MIME type of a file representing a physical page or track
392
     *
393
     * @access public
394
     *
395
     * @abstract
396
     *
397
     * @param string $id: The @ID attribute of the file node
398
     *
399
     * @return string The file's MIME type
400
     */
401
    public abstract function getFileMimeType($id);
402
403
    /**
404
     * This is a singleton class, thus an instance must be created by this method
405
     *
406
     * @access public
407
     *
408
     * @static
409
     *
410
     * @param mixed $uid: The unique identifier of the document to parse, the URL of XML file or the IRI of the IIIF resource
411
     * @param integer $pid: If > 0, then only document with this PID gets loaded
412
     * @param boolean $forceReload: Force reloading the document instead of returning the cached instance
413
     *
414
     * @return \Kitodo\Dlf\Common\Document Instance of this class, either MetsDocument or IiifManifest
415
     */
416
    public static function &getInstance($uid, $pid = 0, $forceReload = FALSE)
417
    {
418
        // Sanitize input.
419
        $pid = max(intval($pid), 0);
420
        if (!$forceReload) {
421
            $regObj = md5($uid);
422
            if (
423
                is_object(self::$registry[$regObj])
424
                && self::$registry[$regObj] instanceof self
425
            ) {
426
                // Check if instance has given PID.
427
                if (
428
                    !$pid
429
                    || !self::$registry[$regObj]->pid
430
                    || $pid == self::$registry[$regObj]->pid
431
                ) {
432
                    // Return singleton instance if available.
433
                    return self::$registry[$regObj];
434
                }
435
            } else {
436
                // Check the user's session...
437
                $sessionData = Helper::loadFromSession(get_called_class());
438
                if (
439
                    is_object($sessionData[$regObj])
440
                    && $sessionData[$regObj] instanceof self
441
                ) {
442
                    // Check if instance has given PID.
443
                    if (
444
                        !$pid
445
                        || !$sessionData[$regObj]->pid
446
                        || $pid == $sessionData[$regObj]->pid
447
                    ) {
448
                        // ...and restore registry.
449
                        self::$registry[$regObj] = $sessionData[$regObj];
450
                        return self::$registry[$regObj];
451
                    }
452
                }
453
            }
454
        }
455
        // Create new instance depending on format (METS or IIIF) ...
456
        $instance = NULL;
457
        $documentFormat = NULL;
458
        $xml = NULL;
459
        $iiif = NULL;
460
        // Try to get document format from database
461
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
462
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
463
                ->getQueryBuilderForTable('tx_dlf_documents');
464
465
            $queryBuilder
466
                ->select(
467
                    'tx_dlf_documents.location AS location',
468
                    'tx_dlf_documents.document_format AS document_format'
469
                )
470
                ->from('tx_dlf_documents');
471
472
            // Get UID of document with given record identifier.
473
            if ($pid) {
474
                $queryBuilder
475
                    ->where(
476
                        $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
477
                        $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid)),
478
                        Helper::whereExpression('tx_dlf_documents')
479
                    );
480
            } else {
481
                $queryBuilder
482
                    ->where(
483
                        $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
484
                        Helper::whereExpression('tx_dlf_documents')
485
                    );
486
            }
487
488
            $result = $queryBuilder
489
                ->setMaxResults(1)
490
                ->execute();
491
492
            if ($resArray = $result->fetch()) {
493
                $documentFormat = $resArray['document_format'];
494
            }
495
        } else {
496
            // Get document format from content of remote document
497
            // Cast to string for safety reasons.
498
            $location = (string) $uid;
499
            // Try to load a file from the url
500
            if (GeneralUtility::isValidUrl($location)) {
501
                // Load extension configuration
502
                $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['dlf']);
503
                // Set user-agent to identify self when fetching XML data.
504
                if (!empty($extConf['useragent'])) {
505
                    @ini_set('user_agent', $extConf['useragent']);
506
                }
507
                $content = GeneralUtility::getUrl($location);
508
                if ($content !== FALSE) {
509
                    // TODO use single place to load xml
510
                    // Turn off libxml's error logging.
511
                    $libxmlErrors = libxml_use_internal_errors(TRUE);
512
                    // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
513
                    $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE);
514
                    // Try to load XML from file.
515
                    $xml = simplexml_load_string($content);
516
                    // reset entity loader setting
517
                    libxml_disable_entity_loader($previousValueOfEntityLoader);
518
                    // Reset libxml's error logging.
519
                    libxml_use_internal_errors($libxmlErrors);
520
                    if ($xml !== FALSE) {
521
                        /* @var $xml \SimpleXMLElement */
522
                        $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
523
                        $xpathResult = $xml->xpath('//mets:mets');
524
                        $documentFormat = !empty($xpathResult) ? 'METS' : NULL;
525
                    } else {
526
                        // Try to load file as IIIF resource instead.
527
                        $contentAsJsonArray = json_decode($content, TRUE);
528
                        if ($contentAsJsonArray !== NULL) {
529
                            // Load plugin configuration.
530
                            $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
531
                            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
532
                            IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
533
                            IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
534
                            $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
535
                            if ($iiif instanceof IiifResourceInterface) {
536
                                $documentFormat = 'IIIF';
537
                            }
538
                        }
539
                    }
540
                }
541
            }
542
        }
543
        // Sanitize input.
544
        $pid = max(intval($pid), 0);
545
        if ($documentFormat == 'METS') {
546
            $instance = new MetsDocument($uid, $pid, $xml);
547
        } elseif ($documentFormat == 'IIIF') {
548
            $instance = new IiifManifest($uid, $pid, $iiif);
549
        }
550
        // Save instance to registry.
551
        if (
552
            $instance instanceof self
553
            && $instance->ready) {
554
            self::$registry[md5($instance->uid)] = $instance;
555
            if ($instance->uid != $instance->location) {
556
                self::$registry[md5($instance->location)] = $instance;
557
            }
558
            // Load extension configuration
559
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['dlf']);
560
            // Save registry to session if caching is enabled.
561
            if (!empty($extConf['caching'])) {
562
                Helper::saveToSession(self::$registry, get_class($instance));
563
            }
564
        }
565
        // Return new instance.
566
        return $instance;
567
    }
568
569
    /**
570
     * This gets details about a logical structure element
571
     *
572
     * @access public
573
     *
574
     * @abstract
575
     *
576
     * @param string $id: The @ID attribute of the logical structure node (METS) or
577
     * the @id property of the Manifest / Range (IIIF)
578
     * @param boolean $recursive: Whether to include the child elements / resources
579
     *
580
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
581
     */
582
    public abstract function getLogicalStructure($id, $recursive = FALSE);
583
584
    /**
585
     * This extracts all the metadata for a logical structure node
586
     *
587
     * @access public
588
     *
589
     * @abstract
590
     *
591
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
592
     * of the Manifest / Range (IIIF)
593
     * @param integer $cPid: The PID for the metadata definitions
594
     *                       (defaults to $this->cPid or $this->pid)
595
     *
596
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
597
     */
598
    public abstract function getMetadata($id, $cPid = 0);
599
600
    /**
601
     * This returns the first corresponding physical page number of a given logical page label
602
     *
603
     * @access public
604
     *
605
     * @param string $logicalPage: The label (or a part of the label) of the logical page
606
     *
607
     * @return integer The physical page number
608
     */
609
    public function getPhysicalPage($logicalPage)
610
    {
611
        if (
612
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
613
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
614
        ) {
615
            return $this->lastSearchedPhysicalPage['physicalPage'];
616
        } else {
617
            $physicalPage = 0;
618
            foreach ($this->physicalStructureInfo as $page) {
619
                if (strpos($page['orderlabel'], $logicalPage) !== FALSE) {
620
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
621
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
622
                    return $physicalPage;
623
                }
624
                $physicalPage++;
625
            }
626
        }
627
        return 1;
628
    }
629
630
    /**
631
     * This extracts the raw text for a physical structure node / IIIF Manifest / Canvas. Text might be
632
     * given as ALTO for METS or as annotations or ALTO for IIIF resources. If IIIF plain text annotations
633
     * with the motivation "painting" should be treated as full text representations, the extension has to be
634
     * configured accordingly.
635
     *
636
     * @access public
637
     *
638
     * @abstract
639
     *
640
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
641
     * of the Manifest / Range (IIIF)
642
     *
643
     * @return string The physical structure node's / IIIF resource's raw text
644
     */
645
    public abstract function getRawText($id);
646
647
    /**
648
     * This extracts the raw text for a physical structure node / IIIF Manifest / Canvas from an
649
     * XML fulltext representation (currently only ALTO). For IIIF manifests, ALTO documents have
650
     * to be given in the Canvas' / Manifest's "seeAlso" property.
651
     *
652
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
653
     * of the Manifest / Range (IIIF)
654
     *
655
     * @return string The physical structure node's / IIIF resource's raw text from XML
656
     */
657
    protected function getRawTextFromXml($id)
658
    {
659
        $rawText = '';
660
        // Load available text formats, ...
661
        $this->loadFormats();
662
        // ... physical structure ...
663
        $this->_getPhysicalStructure();
664
        // ... and extension configuration.
665
        $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
666
        if (!empty($this->physicalStructureInfo[$id])) {
667
            // Get fulltext file.
668
            $file = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$extConf['fileGrpFulltext']]));
669
            if ($file !== FALSE) {
670
                // Turn off libxml's error logging.
671
                $libxmlErrors = libxml_use_internal_errors(TRUE);
672
                // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept.
673
                $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE);
674
                // Load XML from file.
675
                $rawTextXml = simplexml_load_string($file);
676
                // Reset entity loader setting.
677
                libxml_disable_entity_loader($previousValueOfEntityLoader);
678
                // Reset libxml's error logging.
679
                libxml_use_internal_errors($libxmlErrors);
680
                // Get the root element's name as text format.
681
                $textFormat = strtoupper($rawTextXml->getName());
682
            } else {
683
                Helper::devLog('Couln\'t load fulltext file for structure node @ID "' . $id . '"', DEVLOG_SEVERITY_WARNING);
684
                return $rawText;
685
            }
686
        } else {
687
            Helper::devLog('Invalid structure node @ID "' . $id . '"', DEVLOG_SEVERITY_WARNING);
688
            return $rawText;
689
        }
690
        // Is this text format supported?
691
        if (
692
            !empty($rawTextXml)
693
            && !empty($this->formats[$textFormat])
694
        ) {
695
            if (!empty($this->formats[$textFormat]['class'])) {
696
                $class = $this->formats[$textFormat]['class'];
697
                // Get the raw text from class.
698
                if (
699
                    class_exists($class)
700
                    && ($obj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance($class)) instanceof FulltextInterface
701
                ) {
702
                    $rawText = $obj->getRawText($rawTextXml);
703
                    $this->rawTextArray[$id] = $rawText;
704
                } else {
705
                    Helper::devLog('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"', DEVLOG_SEVERITY_WARNING);
706
                }
707
            }
708
        } else {
709
            Helper::devLog('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"', DEVLOG_SEVERITY_WARNING);
710
        }
711
        return $rawText;
712
    }
713
714
    /**
715
     * This determines a title for the given document
716
     *
717
     * @access public
718
     *
719
     * @static
720
     *
721
     * @param integer $uid: The UID of the document
722
     * @param boolean $recursive: Search superior documents for a title, too?
723
     *
724
     * @return string The title of the document itself or a parent document
725
     */
726
    public static function getTitle($uid, $recursive = FALSE)
727
    {
728
        $title = '';
729
        // Sanitize input.
730
        $uid = max(intval($uid), 0);
731
        if ($uid) {
732
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
733
                ->getQueryBuilderForTable('tx_dlf_documents');
734
735
            $result = $queryBuilder
736
                ->select(
737
                    'tx_dlf_documents.title',
738
                    'tx_dlf_documents.partof'
739
                )
740
                ->from('tx_dlf_documents')
741
                ->where(
742
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
743
                    Helper::whereExpression('tx_dlf_documents')
744
                )
745
                ->setMaxResults(1)
746
                ->execute();
747
748
            if ($resArray = $result->fetch()) {
749
                // Get title information.
750
                $title = $resArray['title'];
751
                $partof = $resArray['partof'];
752
                // Search parent documents recursively for a title?
753
                if (
754
                    $recursive
755
                    && empty($title)
756
                    && intval($partof)
757
                    && $partof != $uid
758
                ) {
759
                    $title = self::getTitle($partof, TRUE);
760
                }
761
            } else {
762
                Helper::devLog('No document with UID ' . $uid . ' found or document not accessible', DEVLOG_SEVERITY_WARNING);
763
            }
764
        } else {
765
            Helper::devLog('Invalid UID ' . $uid . ' for document', DEVLOG_SEVERITY_ERROR);
766
        }
767
        return $title;
768
    }
769
770
    /**
771
     * This extracts all the metadata for the toplevel logical structure node / resource
772
     *
773
     * @access public
774
     *
775
     * @param integer $cPid: The PID for the metadata definitions
776
     *
777
     * @return array The logical structure node's / resource's parsed metadata array
778
     */
779
    public function getTitledata($cPid = 0)
780
    {
781
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
782
        // Add information from METS structural map to titledata array.
783
        if ($this instanceof MetsDocument) {
784
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
785
        }
786
        // Set record identifier for METS file / IIIF manifest if not present.
787
        if (
788
            is_array($titledata)
789
            && array_key_exists('record_id', $titledata)
790
        ) {
791
            if (
792
                !empty($this->recordId)
793
                && !in_array($this->recordId, $titledata['record_id'])
794
            ) {
795
                array_unshift($titledata['record_id'], $this->recordId);
796
            }
797
        }
798
        return $titledata;
799
    }
800
801
    /**
802
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
803
     *
804
     * @access protected
805
     *
806
     * @param array $structure: logical structure array
807
     * @param integer $depth: current tree depth
808
     * @param string $logId: ID of the logical structure whose depth is requested
809
     *
810
     * @return integer|boolean: FALSE if structure with $logId is not a child of this substructure,
811
     * or the actual depth.
812
     */
813
    protected function getTreeDepth($structure, $depth, $logId)
814
    {
815
        foreach ($structure as $element) {
816
            if ($element['id'] == $logId) {
817
                return $depth;
818
            } elseif (array_key_exists('children', $element)) {
819
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
820
                if ($foundInChildren !== FALSE) {
821
                    return $foundInChildren;
822
                }
823
            }
824
        }
825
        return FALSE;
826
    }
827
828
    /**
829
     * Get the tree depth of a logical structure element within the table of content
830
     *
831
     * @access public
832
     *
833
     * @param string $logId: The id of the logical structure element whose depth is requested
834
     * @return number|boolean tree depth as integer or FALSE if no element with $logId exists within the TOC.
835
     */
836
    public function getStructureDepth($logId)
837
    {
838
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
839
    }
840
841
    /**
842
     * This sets some basic class properties
843
     *
844
     * @access protected
845
     *
846
     * @abstract
847
     *
848
     * @return void
849
     */
850
    protected abstract function init();
851
852
    /**
853
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
854
     *
855
     * @access protected
856
     *
857
     * @abstract
858
     *
859
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
860
     *
861
     * @return boolean TRUE if $preloadedDocument can actually be reused, FALSE if it has to be loaded again
862
     */
863
    protected abstract function setPreloadedDocument($preloadedDocument);
864
865
    /**
866
     * METS/IIIF specific part of loading a location
867
     *
868
     * @access protected
869
     *
870
     * @abstract
871
     *
872
     * @param string $location: The URL of the file to load
873
     *
874
     * @return boolean TRUE on success or FALSE on failure
875
     */
876
    protected abstract function loadLocation($location);
877
878
    /**
879
     * Load XML file / IIIF resource from URL
880
     *
881
     * @access protected
882
     *
883
     * @param string $location: The URL of the file to load
884
     *
885
     * @return boolean TRUE on success or FALSE on failure
886
     */
887
    protected function load($location)
888
    {
889
        // Load XML / JSON-LD file.
890
        if (\TYPO3\CMS\Core\Utility\GeneralUtility::isValidUrl($location)) {
891
            // Load extension configuration
892
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['dlf']);
893
            // Set user-agent to identify self when fetching XML / JSON-LD data.
894
            if (!empty($extConf['useragent'])) {
895
                @ini_set('user_agent', $extConf['useragent']);
896
            }
897
            // the actual loading is format specific
898
            return $this->loadLocation($location);
899
        } else {
900
            Helper::devLog('Invalid file location "' . $location . '" for document loading', DEVLOG_SEVERITY_ERROR);
901
        }
902
        return FALSE;
903
    }
904
905
    /**
906
     * Analyze the document if it contains any fulltext that needs to be indexed.
907
     *
908
     * @access protected
909
     *
910
     * @abstract
911
     */
912
    protected abstract function ensureHasFulltextIsSet();
913
914
    /**
915
     * Register all available data formats
916
     *
917
     * @access protected
918
     *
919
     * @return void
920
     */
921
    protected function loadFormats()
922
    {
923
        if (!$this->formatsLoaded) {
924
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
925
                ->getQueryBuilderForTable('tx_dlf_formats');
926
927
            // Get available data formats from database.
928
            $result = $queryBuilder
929
                ->select(
930
                    'tx_dlf_formats.type AS type',
931
                    'tx_dlf_formats.root AS root',
932
                    'tx_dlf_formats.namespace AS namespace',
933
                    'tx_dlf_formats.class AS class'
934
                )
935
                ->from('tx_dlf_formats')
936
                ->where(
937
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0),
938
                    Helper::whereExpression('tx_dlf_formats')
939
                )
940
                ->execute();
941
942
            while ($resArray = $result->fetch()) {
943
                // Update format registry.
944
                $this->formats[$resArray['type']] = [
945
                    'rootElement' => $resArray['root'],
946
                    'namespaceURI' => $resArray['namespace'],
947
                    'class' => $resArray['class']
948
                ];
949
            }
950
            $this->formatsLoaded = TRUE;
951
        }
952
    }
953
954
    /**
955
     * Register all available namespaces for a \SimpleXMLElement object
956
     *
957
     * @access public
958
     *
959
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
960
     *
961
     * @return void
962
     */
963
    public function registerNamespaces(&$obj)
964
    {
965
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
966
        $this->loadFormats();
967
        // Do we have a \SimpleXMLElement or \DOMXPath object?
968
        if ($obj instanceof \SimpleXMLElement) {
969
            $method = 'registerXPathNamespace';
970
        } elseif ($obj instanceof \DOMXPath) {
971
            $method = 'registerNamespace';
972
        } else {
973
            Helper::devLog('Given object is neither a SimpleXMLElement nor a DOMXPath instance', DEVLOG_SEVERITY_ERROR);
974
            return;
975
        }
976
        // Register metadata format's namespaces.
977
        foreach ($this->formats as $enc => $conf) {
978
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
979
        }
980
    }
981
982
    /**
983
     * This saves the document to the database and index
984
     *
985
     * @access public
986
     *
987
     * @param integer $pid: The PID of the saved record
988
     * @param integer $core: The UID of the Solr core for indexing
989
     *
990
     * @return boolean TRUE on success or FALSE on failure
991
     */
992
    public function save($pid = 0, $core = 0)
993
    {
994
        if (\TYPO3_MODE !== 'BE') {
995
            Helper::devLog('Saving a document is only allowed in the backend', DEVLOG_SEVERITY_ERROR);
996
            return FALSE;
997
        }
998
        // Make sure $pid is a non-negative integer.
999
        $pid = max(intval($pid), 0);
1000
        // Make sure $core is a non-negative integer.
1001
        $core = max(intval($core), 0);
1002
        // If $pid is not given, try to get it elsewhere.
1003
        if (
1004
            !$pid
1005
            && $this->pid
1006
        ) {
1007
            // Retain current PID.
1008
            $pid = $this->pid;
1009
        } elseif (!$pid) {
1010
            Helper::devLog('Invalid PID ' . $pid . ' for document saving', DEVLOG_SEVERITY_ERROR);
1011
            return FALSE;
1012
        }
1013
        // Set PID for metadata definitions.
1014
        $this->cPid = $pid;
1015
        // Set UID placeholder if not updating existing record.
1016
        if ($pid != $this->pid) {
1017
            $this->uid = uniqid('NEW');
1018
        }
1019
        // Get metadata array.
1020
        $metadata = $this->getTitledata($pid);
1021
        // Check for record identifier.
1022
        if (empty($metadata['record_id'][0])) {
1023
            Helper::devLog('No record identifier found to avoid duplication', DEVLOG_SEVERITY_ERROR);
1024
            return FALSE;
1025
        }
1026
        // Load plugin configuration.
1027
        $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
1028
1029
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1030
            ->getQueryBuilderForTable('tx_dlf_structures');
1031
1032
        // Get UID for structure type.
1033
        $result = $queryBuilder
1034
            ->select('tx_dlf_structures.uid AS uid')
1035
            ->from('tx_dlf_structures')
1036
            ->where(
1037
                $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($pid)),
1038
                $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
1039
                Helper::whereExpression('tx_dlf_structures')
1040
            )
1041
            ->setMaxResults(1)
1042
            ->execute();
1043
1044
        if ($resArray = $result->fetch()) {
1045
            $structure = $resArray['uid'];
1046
        } else {
1047
            Helper::devLog('Could not identify document/structure type "' . $queryBuilder->expr()->literal($metadata['type'][0]) . '"', DEVLOG_SEVERITY_ERROR);
1048
            return FALSE;
1049
        }
1050
        $metadata['type'][0] = $structure;
1051
1052
        // Remove appended "valueURI" from authors' names for storing in database.
1053
        foreach ($metadata['author'] as $i => $author) {
1054
            $splitName = explode(chr(31), $author);
1055
            $metadata['author'][$i] = $splitName[0];
1056
        }
1057
1058
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1059
            ->getQueryBuilderForTable('tx_dlf_collections');
1060
1061
        // Get UIDs for collections.
1062
        $result = $queryBuilder
1063
            ->select(
1064
                'tx_dlf_collections.index_name AS index_name',
1065
                'tx_dlf_collections.uid AS uid'
1066
            )
1067
            ->from('tx_dlf_collections')
1068
            ->where(
1069
                $queryBuilder->expr()->eq('tx_dlf_collections.pid', intval($pid)),
1070
                $queryBuilder->expr()->in('tx_dlf_collections.sys_language_uid', [-1, 0]),
1071
                Helper::whereExpression('tx_dlf_collections')
1072
            )
1073
            ->execute();
1074
1075
        $collUid = [];
1076
        while ($resArray = $result->fetch()) {
1077
            $collUid[$resArray['index_name']] = $resArray['uid'];
1078
        }
1079
        $collections = [];
1080
        foreach ($metadata['collection'] as $collection) {
1081
            if (!empty($collUid[$collection])) {
1082
                // Add existing collection's UID.
1083
                $collections[] = $collUid[$collection];
1084
            } else {
1085
                // Insert new collection.
1086
                $collNewUid = uniqid('NEW');
1087
                $collData['tx_dlf_collections'][$collNewUid] = [
1088
                    'pid' => $pid,
1089
                    'label' => $collection,
1090
                    'index_name' => $collection,
1091
                    'oai_name' => (!empty($conf['publishNewCollections']) ? Helper::getCleanString($collection) : ''),
1092
                    'description' => '',
1093
                    'documents' => 0,
1094
                    'owner' => 0,
1095
                    'status' => 0,
1096
                ];
1097
                $substUid = Helper::processDBasAdmin($collData);
1098
                // Prevent double insertion.
1099
                unset($collData);
1100
                // Add new collection's UID.
1101
                $collections[] = $substUid[$collNewUid];
1102
                if ((\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI) == FALSE) {
1103
                    Helper::addMessage(
1104
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newCollection'), $collection, $substUid[$collNewUid])),
1105
                        Helper::getMessage('flash.attention', TRUE),
1106
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1107
                        TRUE
1108
                    );
1109
                }
1110
            }
1111
        }
1112
        $metadata['collection'] = $collections;
1113
1114
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1115
            ->getQueryBuilderForTable('tx_dlf_libraries');
1116
1117
        // Get UID for owner.
1118
        $owner = !empty($metadata['owner'][0]) ? $metadata['owner'][0] : 'default';
1119
1120
        $result = $queryBuilder
1121
            ->select('tx_dlf_libraries.uid AS uid')
1122
            ->from('tx_dlf_libraries')
1123
            ->where(
1124
                $queryBuilder->expr()->eq('tx_dlf_libraries.pid', intval($pid)),
1125
                $queryBuilder->expr()->eq('tx_dlf_libraries.index_name', $queryBuilder->expr()->literal($owner)),
1126
                Helper::whereExpression('tx_dlf_libraries')
1127
            )
1128
            ->setMaxResults(1)
1129
            ->execute();
1130
1131
        if ($resArray = $result->fetch()) {
1132
            $ownerUid = $resArray['uid'];
1133
        } else {
1134
            // Insert new library.
1135
            $libNewUid = uniqid('NEW');
1136
            $libData['tx_dlf_libraries'][$libNewUid] = [
1137
                'pid' => $pid,
1138
                'label' => $owner,
1139
                'index_name' => $owner,
1140
                'website' => '',
1141
                'contact' => '',
1142
                'image' => '',
1143
                'oai_label' => '',
1144
                'oai_base' => '',
1145
                'opac_label' => '',
1146
                'opac_base' => '',
1147
                'union_label' => '',
1148
                'union_base' => '',
1149
            ];
1150
            $substUid = Helper::processDBasAdmin($libData);
1151
            // Add new library's UID.
1152
            $ownerUid = $substUid[$libNewUid];
1153
            if ((\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI) == FALSE) {
1154
                Helper::addMessage(
1155
                    htmlspecialchars(sprintf(Helper::getMessage('flash.newLibrary'), $owner, $ownerUid)),
1156
                    Helper::getMessage('flash.attention', TRUE),
1157
                    \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1158
                    TRUE
1159
                );
1160
            }
1161
        }
1162
        $metadata['owner'][0] = $ownerUid;
1163
        // Get UID of parent document.
1164
        $partof = $this->getParentDocumentUidForSaving($pid, $core);
1165
        // Use the date of publication or title as alternative sorting metric for parts of multi-part works.
1166
        if (!empty($partof)) {
1167
            if (
1168
                empty($metadata['volume'][0])
1169
                && !empty($metadata['year'][0])
1170
            ) {
1171
                $metadata['volume'] = $metadata['year'];
1172
            }
1173
            if (empty($metadata['volume_sorting'][0])) {
1174
                if (!empty($metadata['year_sorting'][0])) {
1175
                    $metadata['volume_sorting'][0] = $metadata['year_sorting'][0];
1176
                } elseif (!empty($metadata['year'][0])) {
1177
                    $metadata['volume_sorting'][0] = $metadata['year'][0];
1178
                }
1179
            }
1180
            // If volume_sorting is still empty, try to use title_sorting finally (workaround for newspapers)
1181
            if (empty($metadata['volume_sorting'][0])) {
1182
                if (!empty($metadata['title_sorting'][0])) {
1183
                    $metadata['volume_sorting'][0] = $metadata['title_sorting'][0];
1184
                }
1185
            }
1186
        }
1187
1188
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1189
            ->getQueryBuilderForTable('tx_dlf_metadata');
1190
1191
        // Get metadata for lists and sorting.
1192
        $result = $queryBuilder
1193
            ->select(
1194
                'tx_dlf_metadata.index_name AS index_name',
1195
                'tx_dlf_metadata.is_listed AS is_listed',
1196
                'tx_dlf_metadata.is_sortable AS is_sortable'
1197
            )
1198
            ->from('tx_dlf_metadata')
1199
            ->where(
1200
                $queryBuilder->expr()->orX(
1201
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_listed', 1),
1202
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_sortable', 1)
1203
                ),
1204
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($pid)),
1205
                Helper::whereExpression('tx_dlf_metadata')
1206
            )
1207
            ->execute();
1208
1209
        $listed = [];
1210
        $sortable = [];
1211
1212
        while ($resArray = $result->fetch()) {
1213
            if (!empty($metadata[$resArray['index_name']])) {
1214
                if ($resArray['is_listed']) {
1215
                    $listed[$resArray['index_name']] = $metadata[$resArray['index_name']];
1216
                }
1217
                if ($resArray['is_sortable']) {
1218
                    $sortable[$resArray['index_name']] = $metadata[$resArray['index_name']][0];
1219
                }
1220
            }
1221
        }
1222
        // Fill data array.
1223
        $data['tx_dlf_documents'][$this->uid] = [
1224
            'pid' => $pid,
1225
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['starttime'] => 0,
1226
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['endtime'] => 0,
1227
            'prod_id' => $metadata['prod_id'][0],
1228
            'location' => $this->location,
1229
            'record_id' => $metadata['record_id'][0],
1230
            'opac_id' => $metadata['opac_id'][0],
1231
            'union_id' => $metadata['union_id'][0],
1232
            'urn' => $metadata['urn'][0],
1233
            'purl' => $metadata['purl'][0],
1234
            'title' => $metadata['title'][0],
1235
            'title_sorting' => $metadata['title_sorting'][0],
1236
            'author' => implode('; ', $metadata['author']),
1237
            'year' => implode('; ', $metadata['year']),
1238
            'place' => implode('; ', $metadata['place']),
1239
            'thumbnail' => $this->_getThumbnail(TRUE),
1240
            'metadata' => serialize($listed),
1241
            'metadata_sorting' => serialize($sortable),
1242
            'structure' => $metadata['type'][0],
1243
            'partof' => $partof,
1244
            'volume' => $metadata['volume'][0],
1245
            'volume_sorting' => $metadata['volume_sorting'][0],
1246
            'license' => $metadata['license'][0],
1247
            'terms' => $metadata['terms'][0],
1248
            'restrictions' => $metadata['restrictions'][0],
1249
            'out_of_print' => $metadata['out_of_print'][0],
1250
            'rights_info' => $metadata['rights_info'][0],
1251
            'collections' => $metadata['collection'],
1252
            'mets_label' => $metadata['mets_label'][0],
1253
            'mets_orderlabel' => $metadata['mets_orderlabel'][0],
1254
            'owner' => $metadata['owner'][0],
1255
            'solrcore' => $core,
1256
            'status' => 0,
1257
            'document_format' => $metadata['document_format'][0],
1258
        ];
1259
        // Unhide hidden documents.
1260
        if (!empty($conf['unhideOnIndex'])) {
1261
            $data['tx_dlf_documents'][$this->uid][$GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['disabled']] = 0;
1262
        }
1263
        // Process data.
1264
        $newIds = Helper::processDBasAdmin($data);
1265
        // Replace placeholder with actual UID.
1266
        if (strpos($this->uid, 'NEW') === 0) {
1267
            $this->uid = $newIds[$this->uid];
1268
            $this->pid = $pid;
1269
            $this->parentId = $partof;
1270
        }
1271
        if ((\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI) == FALSE) {
1272
            Helper::addMessage(
1273
                htmlspecialchars(sprintf(Helper::getMessage('flash.documentSaved'), $metadata['title'][0], $this->uid)),
1274
                Helper::getMessage('flash.done', TRUE),
1275
                \TYPO3\CMS\Core\Messaging\FlashMessage::OK,
1276
                TRUE
1277
            );
1278
        }
1279
        // Add document to index.
1280
        if ($core) {
1281
            Indexer::add($this, $core);
1282
        } else {
1283
            Helper::devLog('Invalid UID "' . $core . '" for Solr core', DEVLOG_SEVERITY_NOTICE);
1284
        }
1285
        return TRUE;
1286
    }
1287
1288
    /**
1289
     * Get the ID of the parent document if the current document has one. Also save a parent document
1290
     * to the database and the Solr index if their $pid and the current $pid differ.
1291
     * Currently only applies to METS documents.
1292
     *
1293
     * @access protected
1294
     *
1295
     * @abstract
1296
     *
1297
     * @return int The parent document's id.
1298
     */
1299
    protected abstract function getParentDocumentUidForSaving($pid, $core);
1300
1301
    /**
1302
     * This returns $this->hasFulltext via __get()
1303
     *
1304
     * @access protected
1305
     *
1306
     * @return boolean Are there any fulltext files available?
1307
     */
1308
    protected function _getHasFulltext()
1309
    {
1310
        $this->ensureHasFulltextIsSet();
1311
        return $this->hasFulltext;
1312
    }
1313
1314
    /**
1315
     * This returns $this->location via __get()
1316
     *
1317
     * @access protected
1318
     *
1319
     * @return string The location of the document
1320
     */
1321
    protected function _getLocation()
1322
    {
1323
        return $this->location;
1324
    }
1325
1326
    /**
1327
     * Format specific part of building the document's metadata array
1328
     *
1329
     * @access protected
1330
     *
1331
     * @abstract
1332
     *
1333
     * @param integer $cPid
1334
     */
1335
    protected abstract function prepareMetadataArray($cPid);
1336
1337
    /**
1338
     * This builds an array of the document's metadata
1339
     *
1340
     * @access protected
1341
     *
1342
     * @return array Array of metadata with their corresponding logical structure node ID as key
1343
     */
1344
    protected function _getMetadataArray()
1345
    {
1346
        // Set metadata definitions' PID.
1347
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
1348
        if (!$cPid) {
1349
            Helper::devLog('Invalid PID ' . $cPid . ' for metadata definitions', DEVLOG_SEVERITY_ERROR);
1350
            return [];
1351
        }
1352
        if (
1353
            !$this->metadataArrayLoaded
1354
            || $this->metadataArray[0] != $cPid
1355
        ) {
1356
            $this->prepareMetadataArray($cPid);
1357
            $this->metadataArray[0] = $cPid;
1358
            $this->metadataArrayLoaded = TRUE;
1359
        }
1360
        return $this->metadataArray;
1361
    }
1362
1363
    /**
1364
     * This returns $this->numPages via __get()
1365
     *
1366
     * @access protected
1367
     *
1368
     * @return integer The total number of pages and/or tracks
1369
     */
1370
    protected function _getNumPages()
1371
    {
1372
        $this->_getPhysicalStructure();
1373
        return $this->numPages;
1374
    }
1375
1376
    /**
1377
     * This returns $this->parentId via __get()
1378
     *
1379
     * @access protected
1380
     *
1381
     * @return integer The UID of the parent document or zero if not applicable
1382
     */
1383
    protected function _getParentId()
1384
    {
1385
        return $this->parentId;
1386
    }
1387
1388
    /**
1389
     * This builds an array of the document's physical structure
1390
     *
1391
     * @access protected
1392
     *
1393
     * @abstract
1394
     *
1395
     * @return array Array of physical elements' id, type, label and file representations ordered
1396
     * by @ORDER attribute / IIIF Sequence's Canvases
1397
     */
1398
    protected abstract function _getPhysicalStructure();
1399
1400
    /**
1401
     * This gives an array of the document's physical structure metadata
1402
     *
1403
     * @access protected
1404
     *
1405
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1406
     */
1407
    protected function _getPhysicalStructureInfo()
1408
    {
1409
        // Is there no physical structure array yet?
1410
        if (!$this->physicalStructureLoaded) {
1411
            // Build physical structure array.
1412
            $this->_getPhysicalStructure();
1413
        }
1414
        return $this->physicalStructureInfo;
1415
    }
1416
1417
    /**
1418
     * This returns $this->pid via __get()
1419
     *
1420
     * @access protected
1421
     *
1422
     * @return integer The PID of the document or zero if not in database
1423
     */
1424
    protected function _getPid()
1425
    {
1426
        return $this->pid;
1427
    }
1428
1429
    /**
1430
     * This returns $this->ready via __get()
1431
     *
1432
     * @access protected
1433
     *
1434
     * @return boolean Is the document instantiated successfully?
1435
     */
1436
    protected function _getReady()
1437
    {
1438
        return $this->ready;
1439
    }
1440
1441
    /**
1442
     * This returns $this->recordId via __get()
1443
     *
1444
     * @access protected
1445
     *
1446
     * @return mixed The METS file's / IIIF manifest's record identifier
1447
     */
1448
    protected function _getRecordId()
1449
    {
1450
        return $this->recordId;
1451
    }
1452
1453
    /**
1454
     * This returns $this->rootId via __get()
1455
     *
1456
     * @access protected
1457
     *
1458
     * @return integer The UID of the root document or zero if not applicable
1459
     */
1460
    protected function _getRootId()
1461
    {
1462
        if (!$this->rootIdLoaded) {
1463
            if ($this->parentId) {
1464
                $parent = self::getInstance($this->parentId, $this->pid);
1465
                $this->rootId = $parent->rootId;
1466
            }
1467
            $this->rootIdLoaded = TRUE;
1468
        }
1469
        return $this->rootId;
1470
    }
1471
1472
    /**
1473
     * This returns the smLinks between logical and physical structMap (METS) and models the
1474
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1475
     *
1476
     * @access protected
1477
     *
1478
     * @abstract
1479
     *
1480
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1481
     */
1482
    protected abstract function _getSmLinks();
1483
1484
    /**
1485
     * This builds an array of the document's logical structure
1486
     *
1487
     * @access protected
1488
     *
1489
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1490
     */
1491
    protected function _getTableOfContents()
1492
    {
1493
        // Is there no logical structure array yet?
1494
        if (!$this->tableOfContentsLoaded) {
1495
            // Get all logical structures.
1496
            $this->getLogicalStructure('', TRUE);
1497
            $this->tableOfContentsLoaded = TRUE;
1498
        }
1499
        return $this->tableOfContents;
1500
    }
1501
1502
    /**
1503
     * This returns the document's thumbnail location
1504
     *
1505
     * @access protected
1506
     *
1507
     * @abstract
1508
     *
1509
     * @param boolean $forceReload: Force reloading the thumbnail instead of returning the cached value
1510
     *
1511
     * @return string The document's thumbnail location
1512
     */
1513
    protected abstract function _getThumbnail($forceReload = FALSE);
1514
1515
    /**
1516
     * This returns the ID of the toplevel logical structure node
1517
     *
1518
     * @access protected
1519
     *
1520
     * @abstract
1521
     *
1522
     * @return string The logical structure node's ID
1523
     */
1524
    protected abstract function _getToplevelId();
1525
1526
    /**
1527
     * This returns $this->uid via __get()
1528
     *
1529
     * @access protected
1530
     *
1531
     * @return mixed The UID or the URL of the document
1532
     */
1533
    protected function _getUid()
1534
    {
1535
        return $this->uid;
1536
    }
1537
1538
    /**
1539
     * This sets $this->cPid via __set()
1540
     *
1541
     * @access protected
1542
     *
1543
     * @param integer $value: The new PID for the metadata definitions
1544
     *
1545
     * @return void
1546
     */
1547
    protected function _setCPid($value)
1548
    {
1549
        $this->cPid = max(intval($value), 0);
1550
    }
1551
1552
    /**
1553
     * This magic method is invoked each time a clone is called on the object variable
1554
     *
1555
     * @access protected
1556
     *
1557
     * @return void
1558
     */
1559
    protected function __clone()
1560
    {
1561
        // This method is defined as protected because singleton objects should not be cloned.
1562
    }
1563
1564
    /**
1565
     * This is a singleton class, thus the constructor should be private/protected
1566
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Document::getInstance())
1567
     *
1568
     * @access protected
1569
     *
1570
     * @param integer $uid: The UID of the document to parse or URL to XML file
1571
     * @param integer $pid: If > 0, then only document with this PID gets loaded
1572
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either NULL or the \SimpleXMLElement
1573
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1574
     *
1575
     * @return void
1576
     */
1577
    protected function __construct($uid, $pid, $preloadedDocument)
1578
    {
1579
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1580
            ->getQueryBuilderForTable('tx_dlf_documents');
1581
        $location = '';
1582
        // Prepare to check database for the requested document.
1583
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
1584
            $whereClause = $queryBuilder->expr()->andX(
1585
                $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
1586
                Helper::whereExpression('tx_dlf_documents')
1587
            );
1588
        } else {
1589
            // Try to load METS file / IIIF manifest.
1590
            if ($this->setPreloadedDocument($preloadedDocument) || (GeneralUtility::isValidUrl($uid)
1591
                && $this->load($uid))) {
1592
                // Initialize core METS object.
1593
                $this->init();
1594
                if ($this->getDocument() !== NULL) {
1595
                    // Cast to string for safety reasons.
1596
                    $location = (string) $uid;
1597
                    $this->establishRecordId($pid);
1598
                } else {
1599
                    // No METS / IIIF part found.
1600
                    return;
1601
                }
1602
            } else {
1603
                // Loading failed.
1604
                return;
1605
            }
1606
            if (
1607
                !empty($location)
1608
                && !empty($this->recordId)
1609
            ) {
1610
                // Try to match record identifier or location (both should be unique).
1611
                $whereClause = $queryBuilder->expr()->andX(
1612
                    $queryBuilder->expr()->orX(
1613
                        $queryBuilder->expr()->eq('tx_dlf_documents.location', $queryBuilder->expr()->literal($location)),
1614
                        $queryBuilder->expr()->eq('tx_dlf_documents.record_id', $queryBuilder->expr()->literal($this->recordId))
1615
                    ),
1616
                    Helper::whereExpression('tx_dlf_documents')
1617
                );
1618
            } else {
1619
                // Can't persistently identify document, don't try to match at all.
1620
                $whereClause = '1=-1';
1621
            }
1622
        }
1623
        // Check for PID if needed.
1624
        if ($pid) {
1625
            $whereClause = $queryBuilder->expr()->andX(
1626
                $whereClause,
1627
                $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid))
1628
            );
1629
        }
1630
        // Get document PID and location from database.
1631
        $result = $queryBuilder
1632
            ->select(
1633
                'tx_dlf_documents.uid AS uid',
1634
                'tx_dlf_documents.pid AS pid',
1635
                'tx_dlf_documents.record_id AS record_id',
1636
                'tx_dlf_documents.partof AS partof',
1637
                'tx_dlf_documents.thumbnail AS thumbnail',
1638
                'tx_dlf_documents.location AS location'
1639
            )
1640
            ->from('tx_dlf_documents')
1641
            ->where($whereClause)
1642
            ->setMaxResults(1)
1643
            ->execute();
1644
1645
        if ($resArray = $result->fetch()) {
1646
            $this->uid = $resArray['uid'];
1647
            $this->pid = $resArray['pid'];
1648
            $this->recordId = $resArray['record_id'];
1649
            $this->parentId = $resArray['partof'];
1650
            $this->thumbnail = $resArray['thumbnail'];
1651
            $this->location = $resArray['location'];
1652
            $this->thumbnailLoaded = TRUE;
1653
            // Load XML file if necessary...
1654
            if (
1655
                $this->getDocument() === NULL
1656
                && $this->load($this->location)
1657
            ) {
1658
                // ...and set some basic properties.
1659
                $this->init();
1660
            }
1661
            // Do we have a METS / IIIF object now?
1662
            if ($this->getDocument() !== NULL) {
1663
                // Set new location if necessary.
1664
                if (!empty($location)) {
1665
                    $this->location = $location;
1666
                }
1667
                // Document ready!
1668
                $this->ready = TRUE;
1669
            }
1670
        } elseif ($this->getDocument() !== NULL) {
1671
            // Set location as UID for documents not in database.
1672
            $this->uid = $location;
1673
            $this->location = $location;
1674
            // Document ready!
1675
            $this->ready = TRUE;
1676
        } else {
1677
            Helper::devLog('No document with UID ' . $uid . ' found or document not accessible', DEVLOG_SEVERITY_ERROR);
1678
        }
1679
    }
1680
1681
    /**
1682
     * This magic method is called each time an invisible property is referenced from the object
1683
     *
1684
     * @access public
1685
     *
1686
     * @param string $var: Name of variable to get
1687
     *
1688
     * @return mixed Value of $this->$var
1689
     */
1690
    public function __get($var)
1691
    {
1692
        $method = '_get' . ucfirst($var);
1693
        if (
1694
            !property_exists($this, $var)
1695
            || !method_exists($this, $method)
1696
        ) {
1697
            Helper::devLog('There is no getter function for property "' . $var . '"', DEVLOG_SEVERITY_WARNING);
1698
            return;
1699
        } else {
1700
            return $this->$method();
1701
        }
1702
    }
1703
1704
    /**
1705
     * This magic method is called each time an invisible property is checked for isset() or empty()
1706
     *
1707
     * @access public
1708
     *
1709
     * @param string $var: Name of variable to check
1710
     *
1711
     * @return boolean TRUE if variable is set and not empty, FALSE otherwise
1712
     */
1713
    public function __isset($var) {
1714
        return !empty($this->__get($var));
1715
    }
1716
1717
    /**
1718
     * This magic method is called each time an invisible property is referenced from the object
1719
     *
1720
     * @access public
1721
     *
1722
     * @param string $var: Name of variable to set
1723
     * @param mixed $value: New value of variable
1724
     *
1725
     * @return void
1726
     */
1727
    public function __set($var, $value)
1728
    {
1729
        $method = '_set' . ucfirst($var);
1730
        if (
1731
            !property_exists($this, $var)
1732
            || !method_exists($this, $method)
1733
        ) {
1734
            Helper::devLog('There is no setter function for property "' . $var . '"', DEVLOG_SEVERITY_WARNING);
1735
        } else {
1736
            $this->$method($value);
1737
        }
1738
    }
1739
}
1740