Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — dev-extbase-fluid (#746)
by Alexander
03:49
created

Doc::getPhysicalPage()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 19
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 13
nc 4
nop 1
dl 0
loc 19
rs 9.5222
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use Kitodo\Dlf\Domain\Repository\DocumentRepository;
16
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
17
use TYPO3\CMS\Core\Database\ConnectionPool;
18
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
19
use TYPO3\CMS\Core\Log\LogManager;
20
use TYPO3\CMS\Core\Utility\GeneralUtility;
21
use TYPO3\CMS\Core\Utility\MathUtility;
22
use TYPO3\CMS\Extbase\Configuration\ConfigurationManager;
23
use TYPO3\CMS\Extbase\Object\ObjectManager;
24
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
25
use Ubl\Iiif\Tools\IiifHelper;
26
27
/**
28
 * Document class for the 'dlf' extension
29
 *
30
 * @author Sebastian Meyer <[email protected]>
31
 * @author Henrik Lochmann <[email protected]>
32
 * @package TYPO3
33
 * @subpackage dlf
34
 * @access public
35
 * @property int $cPid This holds the PID for the configuration
36
 * @property-read bool $hasFulltext Are there any fulltext files available?
37
 * @property-read string $location This holds the documents location
38
 * @property-read array $metadataArray This holds the documents' parsed metadata array
39
 * @property-read int $numPages The holds the total number of pages
40
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
41
 * @property-read array $physicalStructure This holds the physical structure
42
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
43
 * @property-read int $pid This holds the PID of the document or zero if not in database
44
 * @property-read bool $ready Is the document instantiated successfully?
45
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
46
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
47
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
48
 * @property-read array $tableOfContents This holds the logical structure
49
 * @property-read string $thumbnail This holds the document's thumbnail location
50
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
51
 * @property-read mixed $uid This holds the UID or the URL of the document
52
 * @abstract
53
 */
54
abstract class Doc
55
{
56
    /**
57
     * This holds the logger
58
     *
59
     * @var LogManager
60
     * @access protected
61
     */
62
    protected $logger;
63
64
    /**
65
     * This holds the PID for the configuration
66
     *
67
     * @var int
68
     * @access protected
69
     */
70
    protected $cPid = 0;
71
72
    /**
73
     * The extension key
74
     *
75
     * @var string
76
     * @access public
77
     */
78
    public static $extKey = 'dlf';
79
80
    /**
81
     * This holds the configuration for all supported metadata encodings
82
     * @see loadFormats()
83
     *
84
     * @var array
85
     * @access protected
86
     */
87
    protected $formats = [
88
        'OAI' => [
89
            'rootElement' => 'OAI-PMH',
90
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
91
        ],
92
        'METS' => [
93
            'rootElement' => 'mets',
94
            'namespaceURI' => 'http://www.loc.gov/METS/',
95
        ],
96
        'XLINK' => [
97
            'rootElement' => 'xlink',
98
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
99
        ]
100
    ];
101
102
    /**
103
     * Are the available metadata formats loaded?
104
     * @see $formats
105
     *
106
     * @var bool
107
     * @access protected
108
     */
109
    protected $formatsLoaded = false;
110
111
    /**
112
     * Are there any fulltext files available? This also includes IIIF text annotations
113
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
114
     * annotations as fulltext.
115
     *
116
     * @var bool
117
     * @access protected
118
     */
119
    protected $hasFulltext = false;
120
121
    /**
122
     * Last searched logical and physical page
123
     *
124
     * @var array
125
     * @access protected
126
     */
127
    protected $lastSearchedPhysicalPage = ['logicalPage' => null, 'physicalPage' => null];
128
129
    /**
130
     * This holds the documents location
131
     *
132
     * @var string
133
     * @access protected
134
     */
135
    protected $location = '';
136
137
    /**
138
     * This holds the logical units
139
     *
140
     * @var array
141
     * @access protected
142
     */
143
    protected $logicalUnits = [];
144
145
    /**
146
     * This holds the documents' parsed metadata array with their corresponding
147
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
148
     *
149
     * @var array
150
     * @access protected
151
     */
152
    protected $metadataArray = [];
153
154
    /**
155
     * Is the metadata array loaded?
156
     * @see $metadataArray
157
     *
158
     * @var bool
159
     * @access protected
160
     */
161
    protected $metadataArrayLoaded = false;
162
163
    /**
164
     * The holds the total number of pages
165
     *
166
     * @var int
167
     * @access protected
168
     */
169
    protected $numPages = 0;
170
171
    /**
172
     * This holds the UID of the parent document or zero if not multi-volumed
173
     *
174
     * @var int
175
     * @access protected
176
     */
177
    protected $parentId = 0;
178
179
    /**
180
     * This holds the physical structure
181
     *
182
     * @var array
183
     * @access protected
184
     */
185
    protected $physicalStructure = [];
186
187
    /**
188
     * This holds the physical structure metadata
189
     *
190
     * @var array
191
     * @access protected
192
     */
193
    protected $physicalStructureInfo = [];
194
195
    /**
196
     * Is the physical structure loaded?
197
     * @see $physicalStructure
198
     *
199
     * @var bool
200
     * @access protected
201
     */
202
    protected $physicalStructureLoaded = false;
203
204
    /**
205
     * This holds the PID of the document or zero if not in database
206
     *
207
     * @var int
208
     * @access protected
209
     */
210
    protected $pid = 0;
211
212
    /**
213
     * This holds the documents' raw text pages with their corresponding
214
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
215
     *
216
     * @var array
217
     * @access protected
218
     */
219
    protected $rawTextArray = [];
220
221
    /**
222
     * Is the document instantiated successfully?
223
     *
224
     * @var bool
225
     * @access protected
226
     */
227
    protected $ready = false;
228
229
    /**
230
     * The METS file's / IIIF manifest's record identifier
231
     *
232
     * @var string
233
     * @access protected
234
     */
235
    protected $recordId;
236
237
    /**
238
     * This holds the singleton object of the document
239
     *
240
     * @var array (\Kitodo\Dlf\Common\Doc)
241
     * @static
242
     * @access protected
243
     */
244
    protected static $registry = [];
245
246
    /**
247
     * This holds the UID of the root document or zero if not multi-volumed
248
     *
249
     * @var int
250
     * @access protected
251
     */
252
    protected $rootId = 0;
253
254
    /**
255
     * Is the root id loaded?
256
     * @see $rootId
257
     *
258
     * @var bool
259
     * @access protected
260
     */
261
    protected $rootIdLoaded = false;
262
263
    /**
264
     * This holds the smLinks between logical and physical structMap
265
     *
266
     * @var array
267
     * @access protected
268
     */
269
    protected $smLinks = ['l2p' => [], 'p2l' => []];
270
271
    /**
272
     * Are the smLinks loaded?
273
     * @see $smLinks
274
     *
275
     * @var bool
276
     * @access protected
277
     */
278
    protected $smLinksLoaded = false;
279
280
    /**
281
     * This holds the logical structure
282
     *
283
     * @var array
284
     * @access protected
285
     */
286
    protected $tableOfContents = [];
287
288
    /**
289
     * Is the table of contents loaded?
290
     * @see $tableOfContents
291
     *
292
     * @var bool
293
     * @access protected
294
     */
295
    protected $tableOfContentsLoaded = false;
296
297
    /**
298
     * This holds the document's thumbnail location
299
     *
300
     * @var string
301
     * @access protected
302
     */
303
    protected $thumbnail = '';
304
305
    /**
306
     * Is the document's thumbnail location loaded?
307
     * @see $thumbnail
308
     *
309
     * @var bool
310
     * @access protected
311
     */
312
    protected $thumbnailLoaded = false;
313
314
    /**
315
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
316
     *
317
     * @var string
318
     * @access protected
319
     */
320
    protected $toplevelId = '';
321
322
    /**
323
     * This holds the UID or the URL of the document
324
     *
325
     * @var mixed
326
     * @access protected
327
     */
328
    protected $uid = 0;
329
330
    /**
331
     * This holds the whole XML file as \SimpleXMLElement object
332
     *
333
     * @var \SimpleXMLElement
334
     * @access protected
335
     */
336
    protected $xml;
337
338
    /**
339
     * This clears the static registry to prevent memory exhaustion
340
     *
341
     * @access public
342
     *
343
     * @static
344
     *
345
     * @return void
346
     */
347
    public static function clearRegistry()
348
    {
349
        // Reset registry array.
350
        self::$registry = [];
351
    }
352
353
    /**
354
     * This ensures that the recordId, if existent, is retrieved from the document
355
     *
356
     * @access protected
357
     *
358
     * @abstract
359
     *
360
     * @param int $pid: ID of the configuration page with the recordId config
361
     *
362
     */
363
    protected abstract function establishRecordId($pid);
364
365
    /**
366
     * Source document PHP object which is represented by a Document instance
367
     *
368
     * @access protected
369
     *
370
     * @abstract
371
     *
372
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
373
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
374
     */
375
    protected abstract function getDocument();
376
377
    /**
378
     * This gets the location of a downloadable file for a physical page or track
379
     *
380
     * @access public
381
     *
382
     * @abstract
383
     *
384
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
385
     *
386
     * @return string    The file's location as URL
387
     */
388
    public abstract function getDownloadLocation($id);
389
390
    /**
391
     * This gets the location of a file representing a physical page or track
392
     *
393
     * @access public
394
     *
395
     * @abstract
396
     *
397
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
398
     *
399
     * @return string The file's location as URL
400
     */
401
    public abstract function getFileLocation($id);
402
403
    /**
404
     * This gets the MIME type of a file representing a physical page or track
405
     *
406
     * @access public
407
     *
408
     * @abstract
409
     *
410
     * @param string $id: The @ID attribute of the file node
411
     *
412
     * @return string The file's MIME type
413
     */
414
    public abstract function getFileMimeType($id);
415
416
    /**
417
     * This is a singleton class, thus an instance must be created by this method
418
     *
419
     * @access public
420
     *
421
     * @static
422
     *
423
     * @param string $location: The URL of XML file or the IRI of the IIIF resource
424
     * @param array $settings
425
     * @param bool $forceReload: Force reloading the document instead of returning the cached instance
426
     *
427
     * @return \Kitodo\Dlf\Common\Doc Instance of this class, either MetsDocument or IiifManifest
428
     */
429
    public static function &getInstance($location, $settings = [], $forceReload = false)
430
    {
431
        // Create new instance depending on format (METS or IIIF) ...
432
        $instance = null;
433
        $documentFormat = null;
434
        $xml = null;
435
        $iiif = null;
436
437
        // Try to load a file from the url
438
        if (GeneralUtility::isValidUrl($location)) {
439
            // Load extension configuration
440
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
441
            // Set user-agent to identify self when fetching XML data.
442
            if (!empty($extConf['useragent'])) {
443
                @ini_set('user_agent', $extConf['useragent']);
444
            }
445
            $content = GeneralUtility::getUrl($location);
446
            if ($content !== false) {
447
                $xml = Helper::getXmlFileAsString($content);
448
                if ($xml !== false) {
449
                    /* @var $xml \SimpleXMLElement */
450
                    $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
451
                    $xpathResult = $xml->xpath('//mets:mets');
452
                    $documentFormat = !empty($xpathResult) ? 'METS' : null;
453
                } else {
454
                    // Try to load file as IIIF resource instead.
455
                    $contentAsJsonArray = json_decode($content, true);
456
                    if ($contentAsJsonArray !== null) {
457
                        // Load plugin configuration.
458
                        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
459
                        IiifHelper::setUrlReader(IiifUrlReader::getInstance());
460
                        IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
461
                        IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
462
                        $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
463
                        if ($iiif instanceof IiifResourceInterface) {
464
                            $documentFormat = 'IIIF';
465
                        }
466
                    }
467
                }
468
            }
469
        }
470
471
        // Sanitize input.
472
        $pid = max(intval($pid), 0);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $pid seems to be never defined.
Loading history...
473
        if ($documentFormat == 'METS') {
474
            $instance = new MetsDocument($uid, $pid, $xml);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $uid seems to be never defined.
Loading history...
475
        } elseif ($documentFormat == 'IIIF') {
476
            $instance = new IiifManifest($uid, $pid, $iiif);
477
        }
478
479
        return $instance;
480
    }
481
482
    /**
483
     * This gets details about a logical structure element
484
     *
485
     * @access public
486
     *
487
     * @abstract
488
     *
489
     * @param string $id: The @ID attribute of the logical structure node (METS) or
490
     * the @id property of the Manifest / Range (IIIF)
491
     * @param bool $recursive: Whether to include the child elements / resources
492
     *
493
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
494
     */
495
    public abstract function getLogicalStructure($id, $recursive = false);
496
497
    /**
498
     * This extracts all the metadata for a logical structure node
499
     *
500
     * @access public
501
     *
502
     * @abstract
503
     *
504
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
505
     * of the Manifest / Range (IIIF)
506
     * @param int $cPid: The PID for the metadata definitions
507
     *                       (defaults to $this->cPid or $this->pid)
508
     *
509
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
510
     */
511
    public abstract function getMetadata($id, $cPid = 0);
512
513
    /**
514
     * This returns the first corresponding physical page number of a given logical page label
515
     *
516
     * @access public
517
     *
518
     * @param string $logicalPage: The label (or a part of the label) of the logical page
519
     *
520
     * @return int The physical page number
521
     */
522
    public function getPhysicalPage($logicalPage)
523
    {
524
        if (
525
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
526
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
527
        ) {
528
            return $this->lastSearchedPhysicalPage['physicalPage'];
529
        } else {
530
            $physicalPage = 0;
531
            foreach ($this->physicalStructureInfo as $page) {
532
                if (strpos($page['orderlabel'], $logicalPage) !== false) {
533
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
534
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
535
                    return $physicalPage;
536
                }
537
                $physicalPage++;
538
            }
539
        }
540
        return 1;
541
    }
542
543
    /**
544
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas. Text might be
545
     * given as ALTO for METS or as annotations or ALTO for IIIF resources.
546
     *
547
     * @access public
548
     *
549
     * @abstract
550
     *
551
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
552
     * of the Manifest / Range (IIIF)
553
     *
554
     * @return string The OCR full text
555
     */
556
    public abstract function getFullText($id);
557
558
    /**
559
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas from an
560
     * XML full text representation (currently only ALTO). For IIIF manifests, ALTO documents have
561
     * to be given in the Canvas' / Manifest's "seeAlso" property.
562
     *
563
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
564
     * of the Manifest / Range (IIIF)
565
     *
566
     * @return string The OCR full text
567
     */
568
    protected function getFullTextFromXml($id)
569
    {
570
        $fullText = '';
571
        // Load available text formats, ...
572
        $this->loadFormats();
573
        // ... physical structure ...
574
        $this->_getPhysicalStructure();
575
        // ... and extension configuration.
576
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
577
        $fileGrpsFulltext = GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']);
578
        if (!empty($this->physicalStructureInfo[$id])) {
579
            while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
580
                if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
581
                    // Get full text file.
582
                    $fileContent = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
583
                    if ($fileContent !== false) {
584
                        $textFormat = $this->getTextFormat($fileContent);
585
                    } else {
586
                        $this->logger->warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');
587
                        return $fullText;
588
                    }
589
                    break;
590
                }
591
            }
592
        } else {
593
            $this->logger->warning('Invalid structure node @ID "' . $id . '"');
594
            return $fullText;
595
        }
596
        // Is this text format supported?
597
        // This part actually differs from previous version of indexed OCR
598
        if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $textFormat does not seem to be defined for all execution paths leading up to this point.
Loading history...
599
            $textMiniOcr = '';
600
            if (!empty($this->formats[$textFormat]['class'])) {
601
                $class = $this->formats[$textFormat]['class'];
602
                // Get the raw text from class.
603
                if (
604
                    class_exists($class)
605
                    && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
606
                ) {
607
                    // Load XML from file.
608
                    $ocrTextXml = Helper::getXmlFileAsString($fileContent);
609
                    $textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
610
                    $this->rawTextArray[$id] = $textMiniOcr;
611
                } else {
612
                    $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
613
                }
614
            }
615
            $fullText = $textMiniOcr;
616
        } else {
617
            $this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
618
        }
619
        return $fullText;
620
    }
621
622
    /**
623
     * Get format of the OCR full text
624
     *
625
     * @access private
626
     *
627
     * @param string $fileContent: content of the XML file
628
     *
629
     * @return string The format of the OCR full text
630
     */
631
    private function getTextFormat($fileContent)
632
    {
633
        // Get the root element's name as text format.
634
        return strtoupper(Helper::getXmlFileAsString($fileContent)->getName());
635
    }
636
637
    /**
638
     * This determines a title for the given document
639
     *
640
     * @access public
641
     *
642
     * @static
643
     *
644
     * @param int $uid: The UID of the document
645
     * @param bool $recursive: Search superior documents for a title, too?
646
     *
647
     * @return string The title of the document itself or a parent document
648
     */
649
    public static function getTitle($uid, $recursive = false)
650
    {
651
        $title = '';
652
        // Sanitize input.
653
        $uid = max(intval($uid), 0);
654
        if ($uid) {
655
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
656
                ->getQueryBuilderForTable('tx_dlf_documents');
657
658
            $result = $queryBuilder
659
                ->select(
660
                    'tx_dlf_documents.title',
661
                    'tx_dlf_documents.partof'
662
                )
663
                ->from('tx_dlf_documents')
664
                ->where(
665
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
666
                    Helper::whereExpression('tx_dlf_documents')
667
                )
668
                ->setMaxResults(1)
669
                ->execute();
670
671
            if ($resArray = $result->fetch()) {
672
                // Get title information.
673
                $title = $resArray['title'];
674
                $partof = $resArray['partof'];
675
                // Search parent documents recursively for a title?
676
                if (
677
                    $recursive
678
                    && empty($title)
679
                    && intval($partof)
680
                    && $partof != $uid
681
                ) {
682
                    $title = self::getTitle($partof, true);
683
                }
684
            } else {
685
                Helper::log('No document with UID ' . $uid . ' found or document not accessible', LOG_SEVERITY_WARNING);
686
            }
687
        } else {
688
            Helper::log('Invalid UID ' . $uid . ' for document', LOG_SEVERITY_ERROR);
689
        }
690
        return $title;
691
    }
692
693
    /**
694
     * This extracts all the metadata for the toplevel logical structure node / resource
695
     *
696
     * @access public
697
     *
698
     * @param int $cPid: The PID for the metadata definitions
699
     *
700
     * @return array The logical structure node's / resource's parsed metadata array
701
     */
702
    public function getTitledata($cPid = 0)
703
    {
704
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
705
        // Add information from METS structural map to titledata array.
706
        if ($this instanceof MetsDocument) {
707
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
708
        }
709
        // Set record identifier for METS file / IIIF manifest if not present.
710
        if (
711
            is_array($titledata)
712
            && array_key_exists('record_id', $titledata)
713
        ) {
714
            if (
715
                !empty($this->recordId)
716
                && !in_array($this->recordId, $titledata['record_id'])
717
            ) {
718
                array_unshift($titledata['record_id'], $this->recordId);
719
            }
720
        }
721
        return $titledata;
722
    }
723
724
    /**
725
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
726
     *
727
     * @access protected
728
     *
729
     * @param array $structure: logical structure array
730
     * @param int $depth: current tree depth
731
     * @param string $logId: ID of the logical structure whose depth is requested
732
     *
733
     * @return int|bool: false if structure with $logId is not a child of this substructure,
734
     * or the actual depth.
735
     */
736
    protected function getTreeDepth($structure, $depth, $logId)
737
    {
738
        foreach ($structure as $element) {
739
            if ($element['id'] == $logId) {
740
                return $depth;
741
            } elseif (array_key_exists('children', $element)) {
742
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
743
                if ($foundInChildren !== false) {
744
                    return $foundInChildren;
745
                }
746
            }
747
        }
748
        return false;
749
    }
750
751
    /**
752
     * Get the tree depth of a logical structure element within the table of content
753
     *
754
     * @access public
755
     *
756
     * @param string $logId: The id of the logical structure element whose depth is requested
757
     * @return int|bool tree depth as integer or false if no element with $logId exists within the TOC.
758
     */
759
    public function getStructureDepth($logId)
760
    {
761
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
762
    }
763
764
    /**
765
     * This sets some basic class properties
766
     *
767
     * @access protected
768
     *
769
     * @abstract
770
     *
771
     * @return void
772
     */
773
    protected abstract function init();
774
775
    /**
776
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
777
     *
778
     * @access protected
779
     *
780
     * @abstract
781
     *
782
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
783
     *
784
     * @return bool true if $preloadedDocument can actually be reused, false if it has to be loaded again
785
     */
786
    protected abstract function setPreloadedDocument($preloadedDocument);
787
788
    /**
789
     * METS/IIIF specific part of loading a location
790
     *
791
     * @access protected
792
     *
793
     * @abstract
794
     *
795
     * @param string $location: The URL of the file to load
796
     *
797
     * @return bool true on success or false on failure
798
     */
799
    protected abstract function loadLocation($location);
800
801
    /**
802
     * Load XML file / IIIF resource from URL
803
     *
804
     * @access protected
805
     *
806
     * @param string $location: The URL of the file to load
807
     *
808
     * @return bool true on success or false on failure
809
     */
810
    protected function load($location)
811
    {
812
        // Load XML / JSON-LD file.
813
        if (GeneralUtility::isValidUrl($location)) {
814
            // Load extension configuration
815
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
816
            // Set user-agent to identify self when fetching XML / JSON-LD data.
817
            if (!empty($extConf['useragent'])) {
818
                @ini_set('user_agent', $extConf['useragent']);
819
            }
820
            // the actual loading is format specific
821
            return $this->loadLocation($location);
822
        } else {
823
            $this->logger->error('Invalid file location "' . $location . '" for document loading');
824
        }
825
        return false;
826
    }
827
828
    /**
829
     * Analyze the document if it contains any fulltext that needs to be indexed.
830
     *
831
     * @access protected
832
     *
833
     * @abstract
834
     */
835
    protected abstract function ensureHasFulltextIsSet();
836
837
    /**
838
     * Register all available data formats
839
     *
840
     * @access protected
841
     *
842
     * @return void
843
     */
844
    protected function loadFormats()
845
    {
846
        if (!$this->formatsLoaded) {
847
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
848
                ->getQueryBuilderForTable('tx_dlf_formats');
849
850
            // Get available data formats from database.
851
            $result = $queryBuilder
852
                ->select(
853
                    'tx_dlf_formats.type AS type',
854
                    'tx_dlf_formats.root AS root',
855
                    'tx_dlf_formats.namespace AS namespace',
856
                    'tx_dlf_formats.class AS class'
857
                )
858
                ->from('tx_dlf_formats')
859
                ->where(
860
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0)
861
                )
862
                ->execute();
863
864
            while ($resArray = $result->fetch()) {
865
                // Update format registry.
866
                $this->formats[$resArray['type']] = [
867
                    'rootElement' => $resArray['root'],
868
                    'namespaceURI' => $resArray['namespace'],
869
                    'class' => $resArray['class']
870
                ];
871
            }
872
            $this->formatsLoaded = true;
873
        }
874
    }
875
876
    /**
877
     * Register all available namespaces for a \SimpleXMLElement object
878
     *
879
     * @access public
880
     *
881
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
882
     *
883
     * @return void
884
     */
885
    public function registerNamespaces(&$obj)
886
    {
887
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
888
        $this->loadFormats();
889
        // Do we have a \SimpleXMLElement or \DOMXPath object?
890
        if ($obj instanceof \SimpleXMLElement) {
891
            $method = 'registerXPathNamespace';
892
        } elseif ($obj instanceof \DOMXPath) {
893
            $method = 'registerNamespace';
894
        } else {
895
            $this->logger->error('Given object is neither a SimpleXMLElement nor a DOMXPath instance');
896
            return;
897
        }
898
        // Register metadata format's namespaces.
899
        foreach ($this->formats as $enc => $conf) {
900
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
901
        }
902
    }
903
904
    /**
905
     * This saves the document to the database and index
906
     *
907
     * @access public
908
     *
909
     * @param int $pid: The PID of the saved record
910
     * @param int $core: The UID of the Solr core for indexing
911
     * @param int|string $owner: UID or index_name of owner to set while indexing
912
     *
913
     * @return bool true on success or false on failure
914
     */
915
    public function save($pid = 0, $core = 0, $owner = null)
916
    {
917
        if (\TYPO3_MODE !== 'BE') {
918
            $this->logger->error('Saving a document is only allowed in the backend');
919
            return false;
920
        }
921
        // Make sure $pid is a non-negative integer.
922
        $pid = max(intval($pid), 0);
923
        // Make sure $core is a non-negative integer.
924
        $core = max(intval($core), 0);
925
        // If $pid is not given, try to get it elsewhere.
926
        if (
927
            !$pid
928
            && $this->pid
929
        ) {
930
            // Retain current PID.
931
            $pid = $this->pid;
932
        } elseif (!$pid) {
933
            $this->logger->error('Invalid PID ' . $pid . ' for document saving');
934
            return false;
935
        }
936
        // Set PID for metadata definitions.
937
        $this->cPid = $pid;
938
        // Set UID placeholder if not updating existing record.
939
        if ($pid != $this->pid) {
940
            $this->uid = uniqid('NEW');
0 ignored issues
show
Bug introduced by
The property uid is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
941
        }
942
        // Get metadata array.
943
        $metadata = $this->getTitledata($pid);
944
        // Check for record identifier.
945
        if (empty($metadata['record_id'][0])) {
946
            $this->logger->error('No record identifier found to avoid duplication');
947
            return false;
948
        }
949
        // Load plugin configuration.
950
        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
951
952
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
953
            ->getQueryBuilderForTable('tx_dlf_structures');
954
955
        // Get UID for structure type.
956
        $result = $queryBuilder
957
            ->select('tx_dlf_structures.uid AS uid')
958
            ->from('tx_dlf_structures')
959
            ->where(
960
                $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($pid)),
961
                $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
962
                Helper::whereExpression('tx_dlf_structures')
963
            )
964
            ->setMaxResults(1)
965
            ->execute();
966
967
        if ($resArray = $result->fetch()) {
968
            $structure = $resArray['uid'];
969
        } else {
970
            $this->logger->error('Could not identify document/structure type "' . $queryBuilder->expr()->literal($metadata['type'][0]) . '"');
971
            return false;
972
        }
973
        $metadata['type'][0] = $structure;
974
975
        // Remove appended "valueURI" from authors' names for storing in database.
976
        foreach ($metadata['author'] as $i => $author) {
977
            $splitName = explode(chr(31), $author);
978
            $metadata['author'][$i] = $splitName[0];
979
        }
980
981
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
982
            ->getQueryBuilderForTable('tx_dlf_collections');
983
        // Get hidden records, too.
984
        $queryBuilder
985
            ->getRestrictions()
986
            ->removeByType(HiddenRestriction::class);
987
988
        // Get UIDs for collections.
989
        $result = $queryBuilder
990
            ->select(
991
                'tx_dlf_collections.index_name AS index_name',
992
                'tx_dlf_collections.uid AS uid'
993
            )
994
            ->from('tx_dlf_collections')
995
            ->where(
996
                $queryBuilder->expr()->eq('tx_dlf_collections.pid', intval($pid)),
997
                $queryBuilder->expr()->in('tx_dlf_collections.sys_language_uid', [-1, 0])
998
            )
999
            ->execute();
1000
1001
        $collUid = [];
1002
        while ($resArray = $result->fetch()) {
1003
            $collUid[$resArray['index_name']] = $resArray['uid'];
1004
        }
1005
        $collections = [];
1006
        foreach ($metadata['collection'] as $collection) {
1007
            if (!empty($collUid[$collection])) {
1008
                // Add existing collection's UID.
1009
                $collections[] = $collUid[$collection];
1010
            } else {
1011
                // Insert new collection.
1012
                $collNewUid = uniqid('NEW');
1013
                $collData['tx_dlf_collections'][$collNewUid] = [
1014
                    'pid' => $pid,
1015
                    'label' => $collection,
1016
                    'index_name' => $collection,
1017
                    'oai_name' => (!empty($conf['publishNewCollections']) ? Helper::getCleanString($collection) : ''),
1018
                    'description' => '',
1019
                    'documents' => 0,
1020
                    'owner' => 0,
1021
                    'status' => 0,
1022
                ];
1023
                $substUid = Helper::processDBasAdmin($collData);
1024
                // Prevent double insertion.
1025
                unset($collData);
1026
                // Add new collection's UID.
1027
                $collections[] = $substUid[$collNewUid];
1028
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1029
                    Helper::addMessage(
1030
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newCollection'), $collection, $substUid[$collNewUid])),
1031
                        Helper::getMessage('flash.attention', true),
1032
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1033
                        true
1034
                    );
1035
                }
1036
            }
1037
        }
1038
        $metadata['collection'] = $collections;
1039
1040
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1041
            ->getQueryBuilderForTable('tx_dlf_libraries');
1042
1043
        // Get UID for owner.
1044
        if (empty($owner)) {
1045
            $owner = empty($metadata['owner'][0]) ? $metadata['owner'][0] : 'default';
1046
        }
1047
        if (!MathUtility::canBeInterpretedAsInteger($owner)) {
1048
            $result = $queryBuilder
1049
                ->select('tx_dlf_libraries.uid AS uid')
1050
                ->from('tx_dlf_libraries')
1051
                ->where(
1052
                    $queryBuilder->expr()->eq('tx_dlf_libraries.pid', intval($pid)),
1053
                    $queryBuilder->expr()->eq('tx_dlf_libraries.index_name', $queryBuilder->expr()->literal($owner)),
1054
                    Helper::whereExpression('tx_dlf_libraries')
1055
                )
1056
                ->setMaxResults(1)
1057
                ->execute();
1058
1059
            if ($resArray = $result->fetch()) {
1060
                $ownerUid = $resArray['uid'];
1061
            } else {
1062
                // Insert new library.
1063
                $libNewUid = uniqid('NEW');
1064
                $libData['tx_dlf_libraries'][$libNewUid] = [
1065
                    'pid' => $pid,
1066
                    'label' => $owner,
1067
                    'index_name' => $owner,
1068
                    'website' => '',
1069
                    'contact' => '',
1070
                    'image' => '',
1071
                    'oai_label' => '',
1072
                    'oai_base' => '',
1073
                    'opac_label' => '',
1074
                    'opac_base' => '',
1075
                    'union_label' => '',
1076
                    'union_base' => '',
1077
                ];
1078
                $substUid = Helper::processDBasAdmin($libData);
1079
                // Add new library's UID.
1080
                $ownerUid = $substUid[$libNewUid];
1081
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1082
                    Helper::addMessage(
1083
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newLibrary'), $owner, $ownerUid)),
1084
                        Helper::getMessage('flash.attention', true),
1085
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1086
                        true
1087
                    );
1088
                }
1089
            }
1090
            $owner = $ownerUid;
1091
        }
1092
        $metadata['owner'][0] = $owner;
1093
        // Get UID of parent document.
1094
        $partof = $this->getParentDocumentUidForSaving($pid, $core, $owner);
1095
        // Use the date of publication or title as alternative sorting metric for parts of multi-part works.
1096
        if (!empty($partof)) {
1097
            if (
1098
                empty($metadata['volume'][0])
1099
                && !empty($metadata['year'][0])
1100
            ) {
1101
                $metadata['volume'] = $metadata['year'];
1102
            }
1103
            if (empty($metadata['volume_sorting'][0])) {
1104
                // If METS @ORDER is given it is preferred over year_sorting and year.
1105
                if (!empty($metadata['mets_order'][0])) {
1106
                    $metadata['volume_sorting'][0] = $metadata['mets_order'][0];
1107
                } elseif (!empty($metadata['year_sorting'][0])) {
1108
                    $metadata['volume_sorting'][0] = $metadata['year_sorting'][0];
1109
                } elseif (!empty($metadata['year'][0])) {
1110
                    $metadata['volume_sorting'][0] = $metadata['year'][0];
1111
                }
1112
            }
1113
            // If volume_sorting is still empty, try to use title_sorting or METS @ORDERLABEL finally (workaround for newspapers)
1114
            if (empty($metadata['volume_sorting'][0])) {
1115
                if (!empty($metadata['title_sorting'][0])) {
1116
                    $metadata['volume_sorting'][0] = $metadata['title_sorting'][0];
1117
                } elseif (!empty($metadata['mets_orderlabel'][0])) {
1118
                    $metadata['volume_sorting'][0] = $metadata['mets_orderlabel'][0];
1119
                }
1120
            }
1121
        }
1122
1123
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1124
            ->getQueryBuilderForTable('tx_dlf_metadata');
1125
1126
        // Get metadata for lists and sorting.
1127
        $result = $queryBuilder
1128
            ->select(
1129
                'tx_dlf_metadata.index_name AS index_name',
1130
                'tx_dlf_metadata.is_listed AS is_listed',
1131
                'tx_dlf_metadata.is_sortable AS is_sortable'
1132
            )
1133
            ->from('tx_dlf_metadata')
1134
            ->where(
1135
                $queryBuilder->expr()->orX(
1136
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_listed', 1),
1137
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_sortable', 1)
1138
                ),
1139
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($pid)),
1140
                Helper::whereExpression('tx_dlf_metadata')
1141
            )
1142
            ->execute();
1143
1144
        $listed = [];
1145
        $sortable = [];
1146
1147
        while ($resArray = $result->fetch()) {
1148
            if (!empty($metadata[$resArray['index_name']])) {
1149
                if ($resArray['is_listed']) {
1150
                    $listed[$resArray['index_name']] = $metadata[$resArray['index_name']];
1151
                }
1152
                if ($resArray['is_sortable']) {
1153
                    $sortable[$resArray['index_name']] = $metadata[$resArray['index_name']][0];
1154
                }
1155
            }
1156
        }
1157
        // Fill data array.
1158
        $data['tx_dlf_documents'][$this->uid] = [
1159
            'pid' => $pid,
1160
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['starttime'] => 0,
1161
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['endtime'] => 0,
1162
            'prod_id' => $metadata['prod_id'][0],
1163
            'location' => $this->location,
1164
            'record_id' => $metadata['record_id'][0],
1165
            'opac_id' => $metadata['opac_id'][0],
1166
            'union_id' => $metadata['union_id'][0],
1167
            'urn' => $metadata['urn'][0],
1168
            'purl' => $metadata['purl'][0],
1169
            'title' => $metadata['title'][0],
1170
            'title_sorting' => $metadata['title_sorting'][0],
1171
            'author' => implode('; ', $metadata['author']),
1172
            'year' => implode('; ', $metadata['year']),
1173
            'place' => implode('; ', $metadata['place']),
1174
            'thumbnail' => $this->_getThumbnail(true),
1175
            'metadata' => serialize($listed),
1176
            'metadata_sorting' => serialize($sortable),
1177
            'structure' => $metadata['type'][0],
1178
            'partof' => $partof,
1179
            'volume' => $metadata['volume'][0],
1180
            'volume_sorting' => $metadata['volume_sorting'][0],
1181
            'license' => $metadata['license'][0],
1182
            'terms' => $metadata['terms'][0],
1183
            'restrictions' => $metadata['restrictions'][0],
1184
            'out_of_print' => $metadata['out_of_print'][0],
1185
            'rights_info' => $metadata['rights_info'][0],
1186
            'collections' => $metadata['collection'],
1187
            'mets_label' => $metadata['mets_label'][0],
1188
            'mets_orderlabel' => $metadata['mets_orderlabel'][0],
1189
            'mets_order' => $metadata['mets_order'][0],
1190
            'owner' => $metadata['owner'][0],
1191
            'solrcore' => $core,
1192
            'status' => 0,
1193
            'document_format' => $metadata['document_format'][0],
1194
        ];
1195
        // Unhide hidden documents.
1196
        if (!empty($conf['unhideOnIndex'])) {
1197
            $data['tx_dlf_documents'][$this->uid][$GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['disabled']] = 0;
1198
        }
1199
        // Process data.
1200
        $newIds = Helper::processDBasAdmin($data);
1201
        // Replace placeholder with actual UID.
1202
        if (strpos($this->uid, 'NEW') === 0) {
1203
            $this->uid = $newIds[$this->uid];
1204
            $this->pid = $pid;
0 ignored issues
show
Bug introduced by
The property pid is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1205
            $this->parentId = $partof;
0 ignored issues
show
Bug introduced by
The property parentId is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1206
        }
1207
        if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1208
            Helper::addMessage(
1209
                htmlspecialchars(sprintf(Helper::getMessage('flash.documentSaved'), $metadata['title'][0], $this->uid)),
1210
                Helper::getMessage('flash.done', true),
1211
                \TYPO3\CMS\Core\Messaging\FlashMessage::OK,
1212
                true
1213
            );
1214
        }
1215
        // Add document to index.
1216
        if ($core) {
1217
            return Indexer::add($this, $core);
0 ignored issues
show
Bug introduced by
$this of type Kitodo\Dlf\Common\Doc is incompatible with the type Kitodo\Dlf\Domain\Model\Document expected by parameter $document of Kitodo\Dlf\Common\Indexer::add(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1217
            return Indexer::add(/** @scrutinizer ignore-type */ $this, $core);
Loading history...
1218
        } else {
1219
            $this->logger->notice('Invalid UID "' . $core . '" for Solr core');
1220
            return false;
1221
        }
1222
    }
1223
1224
    /**
1225
     * Get the ID of the parent document if the current document has one. Also save a parent document
1226
     * to the database and the Solr index if their $pid and the current $pid differ.
1227
     * Currently only applies to METS documents.
1228
     *
1229
     * @access protected
1230
     *
1231
     * @abstract
1232
     *
1233
     * @return int The parent document's id.
1234
     */
1235
    protected abstract function getParentDocumentUidForSaving($pid, $core, $owner);
1236
1237
    /**
1238
     * This returns $this->cPid via __get()
1239
     *
1240
     * @access protected
1241
     *
1242
     * @return int The PID of the metadata definitions
1243
     */
1244
    protected function _getCPid()
1245
    {
1246
        return $this->cPid;
1247
    }
1248
1249
    /**
1250
     * This returns $this->hasFulltext via __get()
1251
     *
1252
     * @access protected
1253
     *
1254
     * @return bool Are there any fulltext files available?
1255
     */
1256
    protected function _getHasFulltext()
1257
    {
1258
        $this->ensureHasFulltextIsSet();
1259
        return $this->hasFulltext;
1260
    }
1261
1262
    /**
1263
     * This returns $this->location via __get()
1264
     *
1265
     * @access protected
1266
     *
1267
     * @return string The location of the document
1268
     */
1269
    protected function _getLocation()
1270
    {
1271
        return $this->location;
1272
    }
1273
1274
    /**
1275
     * Format specific part of building the document's metadata array
1276
     *
1277
     * @access protected
1278
     *
1279
     * @abstract
1280
     *
1281
     * @param int $cPid
1282
     */
1283
    protected abstract function prepareMetadataArray($cPid);
1284
1285
    /**
1286
     * This builds an array of the document's metadata
1287
     *
1288
     * @access protected
1289
     *
1290
     * @return array Array of metadata with their corresponding logical structure node ID as key
1291
     */
1292
    protected function _getMetadataArray()
1293
    {
1294
        // Set metadata definitions' PID.
1295
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
1296
        if (!$cPid) {
1297
            $this->logger->error('Invalid PID ' . $cPid . ' for metadata definitions');
1298
            return [];
1299
        }
1300
        if (
1301
            !$this->metadataArrayLoaded
1302
            || $this->metadataArray[0] != $cPid
1303
        ) {
1304
            $this->prepareMetadataArray($cPid);
1305
            $this->metadataArray[0] = $cPid;
0 ignored issues
show
Bug introduced by
The property metadataArray is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1306
            $this->metadataArrayLoaded = true;
1307
        }
1308
        return $this->metadataArray;
1309
    }
1310
1311
    /**
1312
     * This returns $this->numPages via __get()
1313
     *
1314
     * @access protected
1315
     *
1316
     * @return int The total number of pages and/or tracks
1317
     */
1318
    protected function _getNumPages()
1319
    {
1320
        $this->_getPhysicalStructure();
1321
        return $this->numPages;
1322
    }
1323
1324
    /**
1325
     * This returns $this->parentId via __get()
1326
     *
1327
     * @access protected
1328
     *
1329
     * @return int The UID of the parent document or zero if not applicable
1330
     */
1331
    protected function _getParentId()
1332
    {
1333
        return $this->parentId;
1334
    }
1335
1336
    /**
1337
     * This builds an array of the document's physical structure
1338
     *
1339
     * @access protected
1340
     *
1341
     * @abstract
1342
     *
1343
     * @return array Array of physical elements' id, type, label and file representations ordered
1344
     * by @ORDER attribute / IIIF Sequence's Canvases
1345
     */
1346
    protected abstract function _getPhysicalStructure();
1347
1348
    /**
1349
     * This gives an array of the document's physical structure metadata
1350
     *
1351
     * @access protected
1352
     *
1353
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1354
     */
1355
    protected function _getPhysicalStructureInfo()
1356
    {
1357
        // Is there no physical structure array yet?
1358
        if (!$this->physicalStructureLoaded) {
1359
            // Build physical structure array.
1360
            $this->_getPhysicalStructure();
1361
        }
1362
        return $this->physicalStructureInfo;
1363
    }
1364
1365
    /**
1366
     * This returns $this->pid via __get()
1367
     *
1368
     * @access protected
1369
     *
1370
     * @return int The PID of the document or zero if not in database
1371
     */
1372
    protected function _getPid()
1373
    {
1374
        return $this->pid;
1375
    }
1376
1377
    /**
1378
     * This returns $this->ready via __get()
1379
     *
1380
     * @access protected
1381
     *
1382
     * @return bool Is the document instantiated successfully?
1383
     */
1384
    protected function _getReady()
1385
    {
1386
        return $this->ready;
1387
    }
1388
1389
    /**
1390
     * This returns $this->recordId via __get()
1391
     *
1392
     * @access protected
1393
     *
1394
     * @return mixed The METS file's / IIIF manifest's record identifier
1395
     */
1396
    protected function _getRecordId()
1397
    {
1398
        return $this->recordId;
1399
    }
1400
1401
    /**
1402
     * This returns $this->rootId via __get()
1403
     *
1404
     * @access protected
1405
     *
1406
     * @return int The UID of the root document or zero if not applicable
1407
     */
1408
    protected function _getRootId()
1409
    {
1410
        if (!$this->rootIdLoaded) {
1411
            if ($this->parentId) {
1412
                $parent = self::getInstance($this->parentId, ['storagePid' => $this->pid]);
1413
                $this->rootId = $parent->rootId;
0 ignored issues
show
Bug introduced by
The property rootId is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1414
            }
1415
            $this->rootIdLoaded = true;
1416
        }
1417
        return $this->rootId;
1418
    }
1419
1420
    /**
1421
     * This returns the smLinks between logical and physical structMap (METS) and models the
1422
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1423
     *
1424
     * @access protected
1425
     *
1426
     * @abstract
1427
     *
1428
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1429
     */
1430
    protected abstract function _getSmLinks();
1431
1432
    /**
1433
     * This builds an array of the document's logical structure
1434
     *
1435
     * @access protected
1436
     *
1437
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1438
     */
1439
    protected function _getTableOfContents()
1440
    {
1441
        // Is there no logical structure array yet?
1442
        if (!$this->tableOfContentsLoaded) {
1443
            // Get all logical structures.
1444
            $this->getLogicalStructure('', true);
1445
            $this->tableOfContentsLoaded = true;
1446
        }
1447
        return $this->tableOfContents;
1448
    }
1449
1450
    /**
1451
     * This returns the document's thumbnail location
1452
     *
1453
     * @access protected
1454
     *
1455
     * @abstract
1456
     *
1457
     * @param bool $forceReload: Force reloading the thumbnail instead of returning the cached value
1458
     *
1459
     * @return string The document's thumbnail location
1460
     */
1461
    protected abstract function _getThumbnail($forceReload = false);
1462
1463
    /**
1464
     * This returns the ID of the toplevel logical structure node
1465
     *
1466
     * @access protected
1467
     *
1468
     * @abstract
1469
     *
1470
     * @return string The logical structure node's ID
1471
     */
1472
    protected abstract function _getToplevelId();
1473
1474
    /**
1475
     * This returns $this->uid via __get()
1476
     *
1477
     * @access protected
1478
     *
1479
     * @return mixed The UID or the URL of the document
1480
     */
1481
    protected function _getUid()
1482
    {
1483
        return $this->uid;
1484
    }
1485
1486
    /**
1487
     * This sets $this->cPid via __set()
1488
     *
1489
     * @access protected
1490
     *
1491
     * @param int $value: The new PID for the metadata definitions
1492
     *
1493
     * @return void
1494
     */
1495
    protected function _setCPid($value)
1496
    {
1497
        $this->cPid = max(intval($value), 0);
1498
    }
1499
1500
    /**
1501
     * This is a singleton class, thus the constructor should be private/protected
1502
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Doc::getInstance())
1503
     *
1504
     * @access protected
1505
     *
1506
     * @param int $uid: The UID of the document to parse or URL to XML file
1507
     * @param int $pid: If > 0, then only document with this PID gets loaded
1508
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1509
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1510
     *
1511
     * @return void
1512
     */
1513
    protected function __construct($uid, $pid, $preloadedDocument)
1514
    {
1515
        $this->setPreloadedDocument($preloadedDocument);
1516
        $this->init();
1517
        $this->establishRecordId($pid);
1518
        $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger();
1519
        return;
1520
1521
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
0 ignored issues
show
Unused Code introduced by
$queryBuilder = TYPO3\CM...ble('tx_dlf_documents') is not reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
1522
            ->getQueryBuilderForTable('tx_dlf_documents');
1523
        $location = '';
1524
        // Prepare to check database for the requested document.
1525
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
1526
            $whereClause = $queryBuilder->expr()->andX(
1527
                $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
1528
                Helper::whereExpression('tx_dlf_documents')
1529
            );
1530
        } else {
1531
            // Try to load METS file / IIIF manifest.
1532
            if ($this->setPreloadedDocument($preloadedDocument) || (GeneralUtility::isValidUrl($uid)
1533
                && $this->load($uid))) {
1534
                // Initialize core METS object.
1535
                $this->init();
1536
                if ($this->getDocument() !== null) {
1537
                    // Cast to string for safety reasons.
1538
                    $location = (string) $uid;
1539
                    $this->establishRecordId($pid);
1540
                } else {
1541
                    // No METS / IIIF part found.
1542
                    return;
1543
                }
1544
            } else {
1545
                // Loading failed.
1546
                return;
1547
            }
1548
            if (
1549
                !empty($location)
1550
                && !empty($this->recordId)
1551
            ) {
1552
                // Try to match record identifier or location (both should be unique).
1553
                $whereClause = $queryBuilder->expr()->andX(
1554
                    $queryBuilder->expr()->orX(
1555
                        $queryBuilder->expr()->eq('tx_dlf_documents.location', $queryBuilder->expr()->literal($location)),
1556
                        $queryBuilder->expr()->eq('tx_dlf_documents.record_id', $queryBuilder->expr()->literal($this->recordId))
1557
                    ),
1558
                    Helper::whereExpression('tx_dlf_documents')
1559
                );
1560
            } else {
1561
                // Can't persistently identify document, don't try to match at all.
1562
                $whereClause = '1=-1';
1563
            }
1564
        }
1565
        // Check for PID if needed.
1566
        if ($pid) {
1567
            $whereClause = $queryBuilder->expr()->andX(
1568
                $whereClause,
1569
                $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid))
1570
            );
1571
        }
1572
        // Get document PID and location from database.
1573
        $result = $queryBuilder
1574
            ->select(
1575
                'tx_dlf_documents.uid AS uid',
1576
                'tx_dlf_documents.pid AS pid',
1577
                'tx_dlf_documents.record_id AS record_id',
1578
                'tx_dlf_documents.partof AS partof',
1579
                'tx_dlf_documents.thumbnail AS thumbnail',
1580
                'tx_dlf_documents.location AS location'
1581
            )
1582
            ->from('tx_dlf_documents')
1583
            ->where($whereClause)
1584
            ->setMaxResults(1)
1585
            ->execute();
1586
1587
        if ($resArray = $result->fetch()) {
1588
            $this->uid = $resArray['uid'];
1589
            $this->pid = $resArray['pid'];
1590
            $this->recordId = $resArray['record_id'];
1591
            $this->parentId = $resArray['partof'];
1592
            $this->thumbnail = $resArray['thumbnail'];
1593
            $this->location = $resArray['location'];
1594
            $this->thumbnailLoaded = true;
1595
            // Load XML file if necessary...
1596
            if (
1597
                $this->getDocument() === null
1598
                && $this->load($this->location)
1599
            ) {
1600
                // ...and set some basic properties.
1601
                $this->init();
1602
            }
1603
            // Do we have a METS / IIIF object now?
1604
            if ($this->getDocument() !== null) {
1605
                // Set new location if necessary.
1606
                if (!empty($location)) {
1607
                    $this->location = $location;
1608
                }
1609
                // Document ready!
1610
                $this->ready = true;
1611
            }
1612
        } elseif ($this->getDocument() !== null) {
1613
            // Set location as UID for documents not in database.
1614
            $this->uid = $location;
1615
            $this->location = $location;
1616
            // Document ready!
1617
            $this->ready = true;
1618
        } else {
1619
            $this->logger->error('No document with UID ' . $uid . ' found or document not accessible');
1620
        }
1621
    }
1622
1623
    /**
1624
     * This magic method is called each time an invisible property is referenced from the object
1625
     *
1626
     * @access public
1627
     *
1628
     * @param string $var: Name of variable to get
1629
     *
1630
     * @return mixed Value of $this->$var
1631
     */
1632
    public function __get($var)
1633
    {
1634
        $method = '_get' . ucfirst($var);
1635
        if (
1636
            !property_exists($this, $var)
1637
            || !method_exists($this, $method)
1638
        ) {
1639
            $this->logger->warning('There is no getter function for property "' . $var . '"');
1640
            return;
1641
        } else {
1642
            return $this->$method();
1643
        }
1644
    }
1645
1646
    /**
1647
     * This magic method is called each time an invisible property is checked for isset() or empty()
1648
     *
1649
     * @access public
1650
     *
1651
     * @param string $var: Name of variable to check
1652
     *
1653
     * @return bool true if variable is set and not empty, false otherwise
1654
     */
1655
    public function __isset($var)
1656
    {
1657
        return !empty($this->__get($var));
1658
    }
1659
1660
    /**
1661
     * This magic method is called each time an invisible property is referenced from the object
1662
     *
1663
     * @access public
1664
     *
1665
     * @param string $var: Name of variable to set
1666
     * @param mixed $value: New value of variable
1667
     *
1668
     * @return void
1669
     */
1670
    public function __set($var, $value)
1671
    {
1672
        $method = '_set' . ucfirst($var);
1673
        if (
1674
            !property_exists($this, $var)
1675
            || !method_exists($this, $method)
1676
        ) {
1677
            $this->logger->warning('There is no setter function for property "' . $var . '"');
1678
        } else {
1679
            $this->$method($value);
1680
        }
1681
    }
1682
}
1683