Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — dev-extbase-fluid (#746)
by Alexander
03:16 queued 12s
created

Doc::_getHasFulltext()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use Kitodo\Dlf\Domain\Repository\DocumentRepository;
16
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
17
use TYPO3\CMS\Core\Database\ConnectionPool;
18
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
19
use TYPO3\CMS\Core\Log\LogManager;
20
use TYPO3\CMS\Core\Utility\GeneralUtility;
21
use TYPO3\CMS\Core\Utility\MathUtility;
22
use TYPO3\CMS\Extbase\Configuration\ConfigurationManager;
23
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
24
use Ubl\Iiif\Tools\IiifHelper;
25
26
/**
27
 * Document class for the 'dlf' extension
28
 *
29
 * @author Sebastian Meyer <[email protected]>
30
 * @author Henrik Lochmann <[email protected]>
31
 * @package TYPO3
32
 * @subpackage dlf
33
 * @access public
34
 * @property int $cPid This holds the PID for the configuration
35
 * @property-read bool $hasFulltext Are there any fulltext files available?
36
 * @property-read string $location This holds the documents location
37
 * @property-read array $metadataArray This holds the documents' parsed metadata array
38
 * @property-read int $numPages The holds the total number of pages
39
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
40
 * @property-read array $physicalStructure This holds the physical structure
41
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
42
 * @property-read int $pid This holds the PID of the document or zero if not in database
43
 * @property-read bool $ready Is the document instantiated successfully?
44
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
45
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
46
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
47
 * @property-read array $tableOfContents This holds the logical structure
48
 * @property-read string $thumbnail This holds the document's thumbnail location
49
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
50
 * @property-read mixed $uid This holds the UID or the URL of the document
51
 * @abstract
52
 */
53
abstract class Doc
54
{
55
    /**
56
     * This holds the logger
57
     *
58
     * @var LogManager
59
     * @access protected
60
     */
61
    protected $logger;
62
63
    /**
64
     * This holds the PID for the configuration
65
     *
66
     * @var int
67
     * @access protected
68
     */
69
    protected $cPid = 0;
70
71
    /**
72
     * The extension key
73
     *
74
     * @var string
75
     * @access public
76
     */
77
    public static $extKey = 'dlf';
78
79
    /**
80
     * This holds the configuration for all supported metadata encodings
81
     * @see loadFormats()
82
     *
83
     * @var array
84
     * @access protected
85
     */
86
    protected $formats = [
87
        'OAI' => [
88
            'rootElement' => 'OAI-PMH',
89
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
90
        ],
91
        'METS' => [
92
            'rootElement' => 'mets',
93
            'namespaceURI' => 'http://www.loc.gov/METS/',
94
        ],
95
        'XLINK' => [
96
            'rootElement' => 'xlink',
97
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
98
        ]
99
    ];
100
101
    /**
102
     * Are the available metadata formats loaded?
103
     * @see $formats
104
     *
105
     * @var bool
106
     * @access protected
107
     */
108
    protected $formatsLoaded = false;
109
110
    /**
111
     * Are there any fulltext files available? This also includes IIIF text annotations
112
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
113
     * annotations as fulltext.
114
     *
115
     * @var bool
116
     * @access protected
117
     */
118
    protected $hasFulltext = false;
119
120
    /**
121
     * Last searched logical and physical page
122
     *
123
     * @var array
124
     * @access protected
125
     */
126
    protected $lastSearchedPhysicalPage = ['logicalPage' => null, 'physicalPage' => null];
127
128
    /**
129
     * This holds the documents location
130
     *
131
     * @var string
132
     * @access protected
133
     */
134
    protected $location = '';
135
136
    /**
137
     * This holds the logical units
138
     *
139
     * @var array
140
     * @access protected
141
     */
142
    protected $logicalUnits = [];
143
144
    /**
145
     * This holds the documents' parsed metadata array with their corresponding
146
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
147
     *
148
     * @var array
149
     * @access protected
150
     */
151
    protected $metadataArray = [];
152
153
    /**
154
     * Is the metadata array loaded?
155
     * @see $metadataArray
156
     *
157
     * @var bool
158
     * @access protected
159
     */
160
    protected $metadataArrayLoaded = false;
161
162
    /**
163
     * The holds the total number of pages
164
     *
165
     * @var int
166
     * @access protected
167
     */
168
    protected $numPages = 0;
169
170
    /**
171
     * This holds the UID of the parent document or zero if not multi-volumed
172
     *
173
     * @var int
174
     * @access protected
175
     */
176
    protected $parentId = 0;
177
178
    /**
179
     * This holds the physical structure
180
     *
181
     * @var array
182
     * @access protected
183
     */
184
    protected $physicalStructure = [];
185
186
    /**
187
     * This holds the physical structure metadata
188
     *
189
     * @var array
190
     * @access protected
191
     */
192
    protected $physicalStructureInfo = [];
193
194
    /**
195
     * Is the physical structure loaded?
196
     * @see $physicalStructure
197
     *
198
     * @var bool
199
     * @access protected
200
     */
201
    protected $physicalStructureLoaded = false;
202
203
    /**
204
     * This holds the PID of the document or zero if not in database
205
     *
206
     * @var int
207
     * @access protected
208
     */
209
    protected $pid = 0;
210
211
    /**
212
     * This holds the documents' raw text pages with their corresponding
213
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
214
     *
215
     * @var array
216
     * @access protected
217
     */
218
    protected $rawTextArray = [];
219
220
    /**
221
     * Is the document instantiated successfully?
222
     *
223
     * @var bool
224
     * @access protected
225
     */
226
    protected $ready = false;
227
228
    /**
229
     * The METS file's / IIIF manifest's record identifier
230
     *
231
     * @var string
232
     * @access protected
233
     */
234
    protected $recordId;
235
236
    /**
237
     * This holds the singleton object of the document
238
     *
239
     * @var array (\Kitodo\Dlf\Common\Doc)
240
     * @static
241
     * @access protected
242
     */
243
    protected static $registry = [];
244
245
    /**
246
     * This holds the UID of the root document or zero if not multi-volumed
247
     *
248
     * @var int
249
     * @access protected
250
     */
251
    protected $rootId = 0;
252
253
    /**
254
     * Is the root id loaded?
255
     * @see $rootId
256
     *
257
     * @var bool
258
     * @access protected
259
     */
260
    protected $rootIdLoaded = false;
261
262
    /**
263
     * This holds the smLinks between logical and physical structMap
264
     *
265
     * @var array
266
     * @access protected
267
     */
268
    protected $smLinks = ['l2p' => [], 'p2l' => []];
269
270
    /**
271
     * Are the smLinks loaded?
272
     * @see $smLinks
273
     *
274
     * @var bool
275
     * @access protected
276
     */
277
    protected $smLinksLoaded = false;
278
279
    /**
280
     * This holds the logical structure
281
     *
282
     * @var array
283
     * @access protected
284
     */
285
    protected $tableOfContents = [];
286
287
    /**
288
     * Is the table of contents loaded?
289
     * @see $tableOfContents
290
     *
291
     * @var bool
292
     * @access protected
293
     */
294
    protected $tableOfContentsLoaded = false;
295
296
    /**
297
     * This holds the document's thumbnail location
298
     *
299
     * @var string
300
     * @access protected
301
     */
302
    protected $thumbnail = '';
303
304
    /**
305
     * Is the document's thumbnail location loaded?
306
     * @see $thumbnail
307
     *
308
     * @var bool
309
     * @access protected
310
     */
311
    protected $thumbnailLoaded = false;
312
313
    /**
314
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
315
     *
316
     * @var string
317
     * @access protected
318
     */
319
    protected $toplevelId = '';
320
321
    /**
322
     * This holds the UID or the URL of the document
323
     *
324
     * @var mixed
325
     * @access protected
326
     */
327
    protected $uid = 0;
328
329
    /**
330
     * This holds the whole XML file as \SimpleXMLElement object
331
     *
332
     * @var \SimpleXMLElement
333
     * @access protected
334
     */
335
    protected $xml;
336
337
    /**
338
     * This clears the static registry to prevent memory exhaustion
339
     *
340
     * @access public
341
     *
342
     * @static
343
     *
344
     * @return void
345
     */
346
    public static function clearRegistry()
347
    {
348
        // Reset registry array.
349
        self::$registry = [];
350
    }
351
352
    /**
353
     * This ensures that the recordId, if existent, is retrieved from the document
354
     *
355
     * @access protected
356
     *
357
     * @abstract
358
     *
359
     * @param int $pid: ID of the configuration page with the recordId config
360
     *
361
     */
362
    protected abstract function establishRecordId($pid);
363
364
    /**
365
     * Source document PHP object which is represented by a Document instance
366
     *
367
     * @access protected
368
     *
369
     * @abstract
370
     *
371
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
372
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
373
     */
374
    protected abstract function getDocument();
375
376
    /**
377
     * This gets the location of a downloadable file for a physical page or track
378
     *
379
     * @access public
380
     *
381
     * @abstract
382
     *
383
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
384
     *
385
     * @return string    The file's location as URL
386
     */
387
    public abstract function getDownloadLocation($id);
388
389
    /**
390
     * This gets the location of a file representing a physical page or track
391
     *
392
     * @access public
393
     *
394
     * @abstract
395
     *
396
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
397
     *
398
     * @return string The file's location as URL
399
     */
400
    public abstract function getFileLocation($id);
401
402
    /**
403
     * This gets the MIME type of a file representing a physical page or track
404
     *
405
     * @access public
406
     *
407
     * @abstract
408
     *
409
     * @param string $id: The @ID attribute of the file node
410
     *
411
     * @return string The file's MIME type
412
     */
413
    public abstract function getFileMimeType($id);
414
415
    /**
416
     * This is a singleton class, thus an instance must be created by this method
417
     *
418
     * @access public
419
     *
420
     * @static
421
     *
422
     * @param string $location: The URL of XML file or the IRI of the IIIF resource
423
     * @param array $settings
424
     * @param bool $forceReload: Force reloading the document instead of returning the cached instance
425
     *
426
     * @return \Kitodo\Dlf\Common\Doc Instance of this class, either MetsDocument or IiifManifest
427
     */
428
    public static function &getInstance($location, $settings = [], $forceReload = false)
429
    {
430
        // Create new instance depending on format (METS or IIIF) ...
431
        $instance = null;
432
        $documentFormat = null;
433
        $xml = null;
434
        $iiif = null;
435
436
        // Try to load a file from the url
437
        if (GeneralUtility::isValidUrl($location)) {
438
            // Load extension configuration
439
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
440
            // Set user-agent to identify self when fetching XML data.
441
            if (!empty($extConf['useragent'])) {
442
                @ini_set('user_agent', $extConf['useragent']);
443
            }
444
            $content = GeneralUtility::getUrl($location);
445
            if ($content !== false) {
446
                $xml = Helper::getXmlFileAsString($content);
447
                if ($xml !== false) {
448
                    /* @var $xml \SimpleXMLElement */
449
                    $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
450
                    $xpathResult = $xml->xpath('//mets:mets');
451
                    $documentFormat = !empty($xpathResult) ? 'METS' : null;
452
                } else {
453
                    // Try to load file as IIIF resource instead.
454
                    $contentAsJsonArray = json_decode($content, true);
455
                    if ($contentAsJsonArray !== null) {
456
                        // Load plugin configuration.
457
                        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
458
                        IiifHelper::setUrlReader(IiifUrlReader::getInstance());
459
                        IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
460
                        IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
461
                        $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
462
                        if ($iiif instanceof IiifResourceInterface) {
463
                            $documentFormat = 'IIIF';
464
                        }
465
                    }
466
                }
467
            }
468
        }
469
470
        // Sanitize input.
471
        $pid = max(intval($settings['storagePid']), 0);
472
        if ($documentFormat == 'METS') {
473
            $instance = new MetsDocument($uid, $pid, $xml);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $uid seems to be never defined.
Loading history...
474
        } elseif ($documentFormat == 'IIIF') {
475
            $instance = new IiifManifest($uid, $pid, $iiif);
476
        }
477
478
        return $instance;
479
    }
480
481
    /**
482
     * This gets details about a logical structure element
483
     *
484
     * @access public
485
     *
486
     * @abstract
487
     *
488
     * @param string $id: The @ID attribute of the logical structure node (METS) or
489
     * the @id property of the Manifest / Range (IIIF)
490
     * @param bool $recursive: Whether to include the child elements / resources
491
     *
492
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
493
     */
494
    public abstract function getLogicalStructure($id, $recursive = false);
495
496
    /**
497
     * This extracts all the metadata for a logical structure node
498
     *
499
     * @access public
500
     *
501
     * @abstract
502
     *
503
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
504
     * of the Manifest / Range (IIIF)
505
     * @param int $cPid: The PID for the metadata definitions
506
     *                       (defaults to $this->cPid or $this->pid)
507
     *
508
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
509
     */
510
    public abstract function getMetadata($id, $cPid = 0);
511
512
    /**
513
     * This returns the first corresponding physical page number of a given logical page label
514
     *
515
     * @access public
516
     *
517
     * @param string $logicalPage: The label (or a part of the label) of the logical page
518
     *
519
     * @return int The physical page number
520
     */
521
    public function getPhysicalPage($logicalPage)
522
    {
523
        if (
524
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
525
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
526
        ) {
527
            return $this->lastSearchedPhysicalPage['physicalPage'];
528
        } else {
529
            $physicalPage = 0;
530
            foreach ($this->physicalStructureInfo as $page) {
531
                if (strpos($page['orderlabel'], $logicalPage) !== false) {
532
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
533
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
534
                    return $physicalPage;
535
                }
536
                $physicalPage++;
537
            }
538
        }
539
        return 1;
540
    }
541
542
    /**
543
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas. Text might be
544
     * given as ALTO for METS or as annotations or ALTO for IIIF resources.
545
     *
546
     * @access public
547
     *
548
     * @abstract
549
     *
550
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
551
     * of the Manifest / Range (IIIF)
552
     *
553
     * @return string The OCR full text
554
     */
555
    public abstract function getFullText($id);
556
557
    /**
558
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas from an
559
     * XML full text representation (currently only ALTO). For IIIF manifests, ALTO documents have
560
     * to be given in the Canvas' / Manifest's "seeAlso" property.
561
     *
562
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
563
     * of the Manifest / Range (IIIF)
564
     *
565
     * @return string The OCR full text
566
     */
567
    protected function getFullTextFromXml($id)
568
    {
569
        $fullText = '';
570
        // Load available text formats, ...
571
        $this->loadFormats();
572
        // ... physical structure ...
573
        $this->_getPhysicalStructure();
574
        // ... and extension configuration.
575
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
576
        $fileGrpsFulltext = GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']);
577
        if (!empty($this->physicalStructureInfo[$id])) {
578
            while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
579
                if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
580
                    // Get full text file.
581
                    $fileContent = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
582
                    if ($fileContent !== false) {
583
                        $textFormat = $this->getTextFormat($fileContent);
584
                    } else {
585
                        $this->logger->warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');
586
                        return $fullText;
587
                    }
588
                    break;
589
                }
590
            }
591
        } else {
592
            $this->logger->warning('Invalid structure node @ID "' . $id . '"');
593
            return $fullText;
594
        }
595
        // Is this text format supported?
596
        // This part actually differs from previous version of indexed OCR
597
        if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $textFormat does not seem to be defined for all execution paths leading up to this point.
Loading history...
598
            $textMiniOcr = '';
599
            if (!empty($this->formats[$textFormat]['class'])) {
600
                $class = $this->formats[$textFormat]['class'];
601
                // Get the raw text from class.
602
                if (
603
                    class_exists($class)
604
                    && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
605
                ) {
606
                    // Load XML from file.
607
                    $ocrTextXml = Helper::getXmlFileAsString($fileContent);
608
                    $textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
609
                    $this->rawTextArray[$id] = $textMiniOcr;
610
                } else {
611
                    $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
612
                }
613
            }
614
            $fullText = $textMiniOcr;
615
        } else {
616
            $this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
617
        }
618
        return $fullText;
619
    }
620
621
    /**
622
     * Get format of the OCR full text
623
     *
624
     * @access private
625
     *
626
     * @param string $fileContent: content of the XML file
627
     *
628
     * @return string The format of the OCR full text
629
     */
630
    private function getTextFormat($fileContent)
631
    {
632
        // Get the root element's name as text format.
633
        return strtoupper(Helper::getXmlFileAsString($fileContent)->getName());
634
    }
635
636
    /**
637
     * This determines a title for the given document
638
     *
639
     * @access public
640
     *
641
     * @static
642
     *
643
     * @param int $uid: The UID of the document
644
     * @param bool $recursive: Search superior documents for a title, too?
645
     *
646
     * @return string The title of the document itself or a parent document
647
     */
648
    public static function getTitle($uid, $recursive = false)
649
    {
650
        $title = '';
651
        // Sanitize input.
652
        $uid = max(intval($uid), 0);
653
        if ($uid) {
654
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
655
                ->getQueryBuilderForTable('tx_dlf_documents');
656
657
            $result = $queryBuilder
658
                ->select(
659
                    'tx_dlf_documents.title',
660
                    'tx_dlf_documents.partof'
661
                )
662
                ->from('tx_dlf_documents')
663
                ->where(
664
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
665
                    Helper::whereExpression('tx_dlf_documents')
666
                )
667
                ->setMaxResults(1)
668
                ->execute();
669
670
            if ($resArray = $result->fetch()) {
671
                // Get title information.
672
                $title = $resArray['title'];
673
                $partof = $resArray['partof'];
674
                // Search parent documents recursively for a title?
675
                if (
676
                    $recursive
677
                    && empty($title)
678
                    && intval($partof)
679
                    && $partof != $uid
680
                ) {
681
                    $title = self::getTitle($partof, true);
682
                }
683
            } else {
684
                Helper::log('No document with UID ' . $uid . ' found or document not accessible', LOG_SEVERITY_WARNING);
685
            }
686
        } else {
687
            Helper::log('Invalid UID ' . $uid . ' for document', LOG_SEVERITY_ERROR);
688
        }
689
        return $title;
690
    }
691
692
    /**
693
     * This extracts all the metadata for the toplevel logical structure node / resource
694
     *
695
     * @access public
696
     *
697
     * @param int $cPid: The PID for the metadata definitions
698
     *
699
     * @return array The logical structure node's / resource's parsed metadata array
700
     */
701
    public function getTitledata($cPid = 0)
702
    {
703
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
704
        // Add information from METS structural map to titledata array.
705
        if ($this instanceof MetsDocument) {
706
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
707
        }
708
        // Set record identifier for METS file / IIIF manifest if not present.
709
        if (
710
            is_array($titledata)
711
            && array_key_exists('record_id', $titledata)
712
        ) {
713
            if (
714
                !empty($this->recordId)
715
                && !in_array($this->recordId, $titledata['record_id'])
716
            ) {
717
                array_unshift($titledata['record_id'], $this->recordId);
718
            }
719
        }
720
        return $titledata;
721
    }
722
723
    /**
724
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
725
     *
726
     * @access protected
727
     *
728
     * @param array $structure: logical structure array
729
     * @param int $depth: current tree depth
730
     * @param string $logId: ID of the logical structure whose depth is requested
731
     *
732
     * @return int|bool: false if structure with $logId is not a child of this substructure,
733
     * or the actual depth.
734
     */
735
    protected function getTreeDepth($structure, $depth, $logId)
736
    {
737
        foreach ($structure as $element) {
738
            if ($element['id'] == $logId) {
739
                return $depth;
740
            } elseif (array_key_exists('children', $element)) {
741
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
742
                if ($foundInChildren !== false) {
743
                    return $foundInChildren;
744
                }
745
            }
746
        }
747
        return false;
748
    }
749
750
    /**
751
     * Get the tree depth of a logical structure element within the table of content
752
     *
753
     * @access public
754
     *
755
     * @param string $logId: The id of the logical structure element whose depth is requested
756
     * @return int|bool tree depth as integer or false if no element with $logId exists within the TOC.
757
     */
758
    public function getStructureDepth($logId)
759
    {
760
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
761
    }
762
763
    /**
764
     * This sets some basic class properties
765
     *
766
     * @access protected
767
     *
768
     * @abstract
769
     *
770
     * @return void
771
     */
772
    protected abstract function init();
773
774
    /**
775
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
776
     *
777
     * @access protected
778
     *
779
     * @abstract
780
     *
781
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
782
     *
783
     * @return bool true if $preloadedDocument can actually be reused, false if it has to be loaded again
784
     */
785
    protected abstract function setPreloadedDocument($preloadedDocument);
786
787
    /**
788
     * METS/IIIF specific part of loading a location
789
     *
790
     * @access protected
791
     *
792
     * @abstract
793
     *
794
     * @param string $location: The URL of the file to load
795
     *
796
     * @return bool true on success or false on failure
797
     */
798
    protected abstract function loadLocation($location);
799
800
    /**
801
     * Load XML file / IIIF resource from URL
802
     *
803
     * @access protected
804
     *
805
     * @param string $location: The URL of the file to load
806
     *
807
     * @return bool true on success or false on failure
808
     */
809
    protected function load($location)
810
    {
811
        // Load XML / JSON-LD file.
812
        if (GeneralUtility::isValidUrl($location)) {
813
            // Load extension configuration
814
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
815
            // Set user-agent to identify self when fetching XML / JSON-LD data.
816
            if (!empty($extConf['useragent'])) {
817
                @ini_set('user_agent', $extConf['useragent']);
818
            }
819
            // the actual loading is format specific
820
            return $this->loadLocation($location);
821
        } else {
822
            $this->logger->error('Invalid file location "' . $location . '" for document loading');
823
        }
824
        return false;
825
    }
826
827
    /**
828
     * Analyze the document if it contains any fulltext that needs to be indexed.
829
     *
830
     * @access protected
831
     *
832
     * @abstract
833
     */
834
    protected abstract function ensureHasFulltextIsSet();
835
836
    /**
837
     * Register all available data formats
838
     *
839
     * @access protected
840
     *
841
     * @return void
842
     */
843
    protected function loadFormats()
844
    {
845
        if (!$this->formatsLoaded) {
846
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
847
                ->getQueryBuilderForTable('tx_dlf_formats');
848
849
            // Get available data formats from database.
850
            $result = $queryBuilder
851
                ->select(
852
                    'tx_dlf_formats.type AS type',
853
                    'tx_dlf_formats.root AS root',
854
                    'tx_dlf_formats.namespace AS namespace',
855
                    'tx_dlf_formats.class AS class'
856
                )
857
                ->from('tx_dlf_formats')
858
                ->where(
859
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0)
860
                )
861
                ->execute();
862
863
            while ($resArray = $result->fetch()) {
864
                // Update format registry.
865
                $this->formats[$resArray['type']] = [
866
                    'rootElement' => $resArray['root'],
867
                    'namespaceURI' => $resArray['namespace'],
868
                    'class' => $resArray['class']
869
                ];
870
            }
871
            $this->formatsLoaded = true;
872
        }
873
    }
874
875
    /**
876
     * Register all available namespaces for a \SimpleXMLElement object
877
     *
878
     * @access public
879
     *
880
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
881
     *
882
     * @return void
883
     */
884
    public function registerNamespaces(&$obj)
885
    {
886
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
887
        $this->loadFormats();
888
        // Do we have a \SimpleXMLElement or \DOMXPath object?
889
        if ($obj instanceof \SimpleXMLElement) {
890
            $method = 'registerXPathNamespace';
891
        } elseif ($obj instanceof \DOMXPath) {
892
            $method = 'registerNamespace';
893
        } else {
894
            $this->logger->error('Given object is neither a SimpleXMLElement nor a DOMXPath instance');
895
            return;
896
        }
897
        // Register metadata format's namespaces.
898
        foreach ($this->formats as $enc => $conf) {
899
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
900
        }
901
    }
902
903
    /**
904
     * This saves the document to the database and index
905
     *
906
     * @access public
907
     *
908
     * @param int $pid: The PID of the saved record
909
     * @param int $core: The UID of the Solr core for indexing
910
     * @param int|string $owner: UID or index_name of owner to set while indexing
911
     *
912
     * @return bool true on success or false on failure
913
     */
914
    public function save($pid = 0, $core = 0, $owner = null)
915
    {
916
        if (\TYPO3_MODE !== 'BE') {
917
            $this->logger->error('Saving a document is only allowed in the backend');
918
            return false;
919
        }
920
        // Make sure $pid is a non-negative integer.
921
        $pid = max(intval($pid), 0);
922
        // Make sure $core is a non-negative integer.
923
        $core = max(intval($core), 0);
924
        // If $pid is not given, try to get it elsewhere.
925
        if (
926
            !$pid
927
            && $this->pid
928
        ) {
929
            // Retain current PID.
930
            $pid = $this->pid;
931
        } elseif (!$pid) {
932
            $this->logger->error('Invalid PID ' . $pid . ' for document saving');
933
            return false;
934
        }
935
        // Set PID for metadata definitions.
936
        $this->cPid = $pid;
937
        // Set UID placeholder if not updating existing record.
938
        if ($pid != $this->pid) {
939
            $this->uid = uniqid('NEW');
0 ignored issues
show
Bug introduced by
The property uid is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
940
        }
941
        // Get metadata array.
942
        $metadata = $this->getTitledata($pid);
943
        // Check for record identifier.
944
        if (empty($metadata['record_id'][0])) {
945
            $this->logger->error('No record identifier found to avoid duplication');
946
            return false;
947
        }
948
        // Load plugin configuration.
949
        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
950
951
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
952
            ->getQueryBuilderForTable('tx_dlf_structures');
953
954
        // Get UID for structure type.
955
        $result = $queryBuilder
956
            ->select('tx_dlf_structures.uid AS uid')
957
            ->from('tx_dlf_structures')
958
            ->where(
959
                $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($pid)),
960
                $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
961
                Helper::whereExpression('tx_dlf_structures')
962
            )
963
            ->setMaxResults(1)
964
            ->execute();
965
966
        if ($resArray = $result->fetch()) {
967
            $structure = $resArray['uid'];
968
        } else {
969
            $this->logger->error('Could not identify document/structure type "' . $queryBuilder->expr()->literal($metadata['type'][0]) . '"');
970
            return false;
971
        }
972
        $metadata['type'][0] = $structure;
973
974
        // Remove appended "valueURI" from authors' names for storing in database.
975
        foreach ($metadata['author'] as $i => $author) {
976
            $splitName = explode(chr(31), $author);
977
            $metadata['author'][$i] = $splitName[0];
978
        }
979
980
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
981
            ->getQueryBuilderForTable('tx_dlf_collections');
982
        // Get hidden records, too.
983
        $queryBuilder
984
            ->getRestrictions()
985
            ->removeByType(HiddenRestriction::class);
986
987
        // Get UIDs for collections.
988
        $result = $queryBuilder
989
            ->select(
990
                'tx_dlf_collections.index_name AS index_name',
991
                'tx_dlf_collections.uid AS uid'
992
            )
993
            ->from('tx_dlf_collections')
994
            ->where(
995
                $queryBuilder->expr()->eq('tx_dlf_collections.pid', intval($pid)),
996
                $queryBuilder->expr()->in('tx_dlf_collections.sys_language_uid', [-1, 0])
997
            )
998
            ->execute();
999
1000
        $collUid = [];
1001
        while ($resArray = $result->fetch()) {
1002
            $collUid[$resArray['index_name']] = $resArray['uid'];
1003
        }
1004
        $collections = [];
1005
        foreach ($metadata['collection'] as $collection) {
1006
            if (!empty($collUid[$collection])) {
1007
                // Add existing collection's UID.
1008
                $collections[] = $collUid[$collection];
1009
            } else {
1010
                // Insert new collection.
1011
                $collNewUid = uniqid('NEW');
1012
                $collData['tx_dlf_collections'][$collNewUid] = [
1013
                    'pid' => $pid,
1014
                    'label' => $collection,
1015
                    'index_name' => $collection,
1016
                    'oai_name' => (!empty($conf['publishNewCollections']) ? Helper::getCleanString($collection) : ''),
1017
                    'description' => '',
1018
                    'documents' => 0,
1019
                    'owner' => 0,
1020
                    'status' => 0,
1021
                ];
1022
                $substUid = Helper::processDBasAdmin($collData);
1023
                // Prevent double insertion.
1024
                unset($collData);
1025
                // Add new collection's UID.
1026
                $collections[] = $substUid[$collNewUid];
1027
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1028
                    Helper::addMessage(
1029
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newCollection'), $collection, $substUid[$collNewUid])),
1030
                        Helper::getMessage('flash.attention', true),
1031
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1032
                        true
1033
                    );
1034
                }
1035
            }
1036
        }
1037
        $metadata['collection'] = $collections;
1038
1039
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1040
            ->getQueryBuilderForTable('tx_dlf_libraries');
1041
1042
        // Get UID for owner.
1043
        if (empty($owner)) {
1044
            $owner = empty($metadata['owner'][0]) ? $metadata['owner'][0] : 'default';
1045
        }
1046
        if (!MathUtility::canBeInterpretedAsInteger($owner)) {
1047
            $result = $queryBuilder
1048
                ->select('tx_dlf_libraries.uid AS uid')
1049
                ->from('tx_dlf_libraries')
1050
                ->where(
1051
                    $queryBuilder->expr()->eq('tx_dlf_libraries.pid', intval($pid)),
1052
                    $queryBuilder->expr()->eq('tx_dlf_libraries.index_name', $queryBuilder->expr()->literal($owner)),
1053
                    Helper::whereExpression('tx_dlf_libraries')
1054
                )
1055
                ->setMaxResults(1)
1056
                ->execute();
1057
1058
            if ($resArray = $result->fetch()) {
1059
                $ownerUid = $resArray['uid'];
1060
            } else {
1061
                // Insert new library.
1062
                $libNewUid = uniqid('NEW');
1063
                $libData['tx_dlf_libraries'][$libNewUid] = [
1064
                    'pid' => $pid,
1065
                    'label' => $owner,
1066
                    'index_name' => $owner,
1067
                    'website' => '',
1068
                    'contact' => '',
1069
                    'image' => '',
1070
                    'oai_label' => '',
1071
                    'oai_base' => '',
1072
                    'opac_label' => '',
1073
                    'opac_base' => '',
1074
                    'union_label' => '',
1075
                    'union_base' => '',
1076
                ];
1077
                $substUid = Helper::processDBasAdmin($libData);
1078
                // Add new library's UID.
1079
                $ownerUid = $substUid[$libNewUid];
1080
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1081
                    Helper::addMessage(
1082
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newLibrary'), $owner, $ownerUid)),
1083
                        Helper::getMessage('flash.attention', true),
1084
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1085
                        true
1086
                    );
1087
                }
1088
            }
1089
            $owner = $ownerUid;
1090
        }
1091
        $metadata['owner'][0] = $owner;
1092
        // Get UID of parent document.
1093
        $partof = $this->getParentDocumentUidForSaving($pid, $core, $owner);
1094
        // Use the date of publication or title as alternative sorting metric for parts of multi-part works.
1095
        if (!empty($partof)) {
1096
            if (
1097
                empty($metadata['volume'][0])
1098
                && !empty($metadata['year'][0])
1099
            ) {
1100
                $metadata['volume'] = $metadata['year'];
1101
            }
1102
            if (empty($metadata['volume_sorting'][0])) {
1103
                // If METS @ORDER is given it is preferred over year_sorting and year.
1104
                if (!empty($metadata['mets_order'][0])) {
1105
                    $metadata['volume_sorting'][0] = $metadata['mets_order'][0];
1106
                } elseif (!empty($metadata['year_sorting'][0])) {
1107
                    $metadata['volume_sorting'][0] = $metadata['year_sorting'][0];
1108
                } elseif (!empty($metadata['year'][0])) {
1109
                    $metadata['volume_sorting'][0] = $metadata['year'][0];
1110
                }
1111
            }
1112
            // If volume_sorting is still empty, try to use title_sorting or METS @ORDERLABEL finally (workaround for newspapers)
1113
            if (empty($metadata['volume_sorting'][0])) {
1114
                if (!empty($metadata['title_sorting'][0])) {
1115
                    $metadata['volume_sorting'][0] = $metadata['title_sorting'][0];
1116
                } elseif (!empty($metadata['mets_orderlabel'][0])) {
1117
                    $metadata['volume_sorting'][0] = $metadata['mets_orderlabel'][0];
1118
                }
1119
            }
1120
        }
1121
1122
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1123
            ->getQueryBuilderForTable('tx_dlf_metadata');
1124
1125
        // Get metadata for lists and sorting.
1126
        $result = $queryBuilder
1127
            ->select(
1128
                'tx_dlf_metadata.index_name AS index_name',
1129
                'tx_dlf_metadata.is_listed AS is_listed',
1130
                'tx_dlf_metadata.is_sortable AS is_sortable'
1131
            )
1132
            ->from('tx_dlf_metadata')
1133
            ->where(
1134
                $queryBuilder->expr()->orX(
1135
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_listed', 1),
1136
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_sortable', 1)
1137
                ),
1138
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($pid)),
1139
                Helper::whereExpression('tx_dlf_metadata')
1140
            )
1141
            ->execute();
1142
1143
        $listed = [];
1144
        $sortable = [];
1145
1146
        while ($resArray = $result->fetch()) {
1147
            if (!empty($metadata[$resArray['index_name']])) {
1148
                if ($resArray['is_listed']) {
1149
                    $listed[$resArray['index_name']] = $metadata[$resArray['index_name']];
1150
                }
1151
                if ($resArray['is_sortable']) {
1152
                    $sortable[$resArray['index_name']] = $metadata[$resArray['index_name']][0];
1153
                }
1154
            }
1155
        }
1156
        // Fill data array.
1157
        $data['tx_dlf_documents'][$this->uid] = [
1158
            'pid' => $pid,
1159
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['starttime'] => 0,
1160
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['endtime'] => 0,
1161
            'prod_id' => $metadata['prod_id'][0],
1162
            'location' => $this->location,
1163
            'record_id' => $metadata['record_id'][0],
1164
            'opac_id' => $metadata['opac_id'][0],
1165
            'union_id' => $metadata['union_id'][0],
1166
            'urn' => $metadata['urn'][0],
1167
            'purl' => $metadata['purl'][0],
1168
            'title' => $metadata['title'][0],
1169
            'title_sorting' => $metadata['title_sorting'][0],
1170
            'author' => implode('; ', $metadata['author']),
1171
            'year' => implode('; ', $metadata['year']),
1172
            'place' => implode('; ', $metadata['place']),
1173
            'thumbnail' => $this->_getThumbnail(true),
1174
            'metadata' => serialize($listed),
1175
            'metadata_sorting' => serialize($sortable),
1176
            'structure' => $metadata['type'][0],
1177
            'partof' => $partof,
1178
            'volume' => $metadata['volume'][0],
1179
            'volume_sorting' => $metadata['volume_sorting'][0],
1180
            'license' => $metadata['license'][0],
1181
            'terms' => $metadata['terms'][0],
1182
            'restrictions' => $metadata['restrictions'][0],
1183
            'out_of_print' => $metadata['out_of_print'][0],
1184
            'rights_info' => $metadata['rights_info'][0],
1185
            'collections' => $metadata['collection'],
1186
            'mets_label' => $metadata['mets_label'][0],
1187
            'mets_orderlabel' => $metadata['mets_orderlabel'][0],
1188
            'mets_order' => $metadata['mets_order'][0],
1189
            'owner' => $metadata['owner'][0],
1190
            'solrcore' => $core,
1191
            'status' => 0,
1192
            'document_format' => $metadata['document_format'][0],
1193
        ];
1194
        // Unhide hidden documents.
1195
        if (!empty($conf['unhideOnIndex'])) {
1196
            $data['tx_dlf_documents'][$this->uid][$GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['disabled']] = 0;
1197
        }
1198
        // Process data.
1199
        $newIds = Helper::processDBasAdmin($data);
1200
        // Replace placeholder with actual UID.
1201
        if (strpos($this->uid, 'NEW') === 0) {
1202
            $this->uid = $newIds[$this->uid];
1203
            $this->pid = $pid;
0 ignored issues
show
Bug introduced by
The property pid is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1204
            $this->parentId = $partof;
0 ignored issues
show
Bug introduced by
The property parentId is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1205
        }
1206
        if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1207
            Helper::addMessage(
1208
                htmlspecialchars(sprintf(Helper::getMessage('flash.documentSaved'), $metadata['title'][0], $this->uid)),
1209
                Helper::getMessage('flash.done', true),
1210
                \TYPO3\CMS\Core\Messaging\FlashMessage::OK,
1211
                true
1212
            );
1213
        }
1214
        // Add document to index.
1215
        if ($core) {
1216
            return Indexer::add($this, $core);
0 ignored issues
show
Bug introduced by
$this of type Kitodo\Dlf\Common\Doc is incompatible with the type Kitodo\Dlf\Domain\Model\Document expected by parameter $document of Kitodo\Dlf\Common\Indexer::add(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1216
            return Indexer::add(/** @scrutinizer ignore-type */ $this, $core);
Loading history...
1217
        } else {
1218
            $this->logger->notice('Invalid UID "' . $core . '" for Solr core');
1219
            return false;
1220
        }
1221
    }
1222
1223
    /**
1224
     * Get the ID of the parent document if the current document has one. Also save a parent document
1225
     * to the database and the Solr index if their $pid and the current $pid differ.
1226
     * Currently only applies to METS documents.
1227
     *
1228
     * @access protected
1229
     *
1230
     * @abstract
1231
     *
1232
     * @return int The parent document's id.
1233
     */
1234
    protected abstract function getParentDocumentUidForSaving($pid, $core, $owner);
1235
1236
    /**
1237
     * This returns $this->cPid via __get()
1238
     *
1239
     * @access protected
1240
     *
1241
     * @return int The PID of the metadata definitions
1242
     */
1243
    protected function _getCPid()
1244
    {
1245
        return $this->cPid;
1246
    }
1247
1248
    /**
1249
     * This returns $this->hasFulltext via __get()
1250
     *
1251
     * @access protected
1252
     *
1253
     * @return bool Are there any fulltext files available?
1254
     */
1255
    protected function _getHasFulltext()
1256
    {
1257
        $this->ensureHasFulltextIsSet();
1258
        return $this->hasFulltext;
1259
    }
1260
1261
    /**
1262
     * This returns $this->location via __get()
1263
     *
1264
     * @access protected
1265
     *
1266
     * @return string The location of the document
1267
     */
1268
    protected function _getLocation()
1269
    {
1270
        return $this->location;
1271
    }
1272
1273
    /**
1274
     * Format specific part of building the document's metadata array
1275
     *
1276
     * @access protected
1277
     *
1278
     * @abstract
1279
     *
1280
     * @param int $cPid
1281
     */
1282
    protected abstract function prepareMetadataArray($cPid);
1283
1284
    /**
1285
     * This builds an array of the document's metadata
1286
     *
1287
     * @access protected
1288
     *
1289
     * @return array Array of metadata with their corresponding logical structure node ID as key
1290
     */
1291
    protected function _getMetadataArray()
1292
    {
1293
        // Set metadata definitions' PID.
1294
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
1295
        if (!$cPid) {
1296
            $this->logger->error('Invalid PID ' . $cPid . ' for metadata definitions');
1297
            return [];
1298
        }
1299
        if (
1300
            !$this->metadataArrayLoaded
1301
            || $this->metadataArray[0] != $cPid
1302
        ) {
1303
            $this->prepareMetadataArray($cPid);
1304
            $this->metadataArray[0] = $cPid;
0 ignored issues
show
Bug introduced by
The property metadataArray is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1305
            $this->metadataArrayLoaded = true;
1306
        }
1307
        return $this->metadataArray;
1308
    }
1309
1310
    /**
1311
     * This returns $this->numPages via __get()
1312
     *
1313
     * @access protected
1314
     *
1315
     * @return int The total number of pages and/or tracks
1316
     */
1317
    protected function _getNumPages()
1318
    {
1319
        $this->_getPhysicalStructure();
1320
        return $this->numPages;
1321
    }
1322
1323
    /**
1324
     * This returns $this->parentId via __get()
1325
     *
1326
     * @access protected
1327
     *
1328
     * @return int The UID of the parent document or zero if not applicable
1329
     */
1330
    protected function _getParentId()
1331
    {
1332
        return $this->parentId;
1333
    }
1334
1335
    /**
1336
     * This builds an array of the document's physical structure
1337
     *
1338
     * @access protected
1339
     *
1340
     * @abstract
1341
     *
1342
     * @return array Array of physical elements' id, type, label and file representations ordered
1343
     * by @ORDER attribute / IIIF Sequence's Canvases
1344
     */
1345
    protected abstract function _getPhysicalStructure();
1346
1347
    /**
1348
     * This gives an array of the document's physical structure metadata
1349
     *
1350
     * @access protected
1351
     *
1352
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1353
     */
1354
    protected function _getPhysicalStructureInfo()
1355
    {
1356
        // Is there no physical structure array yet?
1357
        if (!$this->physicalStructureLoaded) {
1358
            // Build physical structure array.
1359
            $this->_getPhysicalStructure();
1360
        }
1361
        return $this->physicalStructureInfo;
1362
    }
1363
1364
    /**
1365
     * This returns $this->pid via __get()
1366
     *
1367
     * @access protected
1368
     *
1369
     * @return int The PID of the document or zero if not in database
1370
     */
1371
    protected function _getPid()
1372
    {
1373
        return $this->pid;
1374
    }
1375
1376
    /**
1377
     * This returns $this->ready via __get()
1378
     *
1379
     * @access protected
1380
     *
1381
     * @return bool Is the document instantiated successfully?
1382
     */
1383
    protected function _getReady()
1384
    {
1385
        return $this->ready;
1386
    }
1387
1388
    /**
1389
     * This returns $this->recordId via __get()
1390
     *
1391
     * @access protected
1392
     *
1393
     * @return mixed The METS file's / IIIF manifest's record identifier
1394
     */
1395
    protected function _getRecordId()
1396
    {
1397
        return $this->recordId;
1398
    }
1399
1400
    /**
1401
     * This returns $this->rootId via __get()
1402
     *
1403
     * @access protected
1404
     *
1405
     * @return int The UID of the root document or zero if not applicable
1406
     */
1407
    protected function _getRootId()
1408
    {
1409
        if (!$this->rootIdLoaded) {
1410
            if ($this->parentId) {
1411
                $parent = self::getInstance($this->parentId, ['storagePid' => $this->pid]);
1412
                $this->rootId = $parent->rootId;
0 ignored issues
show
Bug introduced by
The property rootId is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1413
            }
1414
            $this->rootIdLoaded = true;
1415
        }
1416
        return $this->rootId;
1417
    }
1418
1419
    /**
1420
     * This returns the smLinks between logical and physical structMap (METS) and models the
1421
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1422
     *
1423
     * @access protected
1424
     *
1425
     * @abstract
1426
     *
1427
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1428
     */
1429
    protected abstract function _getSmLinks();
1430
1431
    /**
1432
     * This builds an array of the document's logical structure
1433
     *
1434
     * @access protected
1435
     *
1436
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1437
     */
1438
    protected function _getTableOfContents()
1439
    {
1440
        // Is there no logical structure array yet?
1441
        if (!$this->tableOfContentsLoaded) {
1442
            // Get all logical structures.
1443
            $this->getLogicalStructure('', true);
1444
            $this->tableOfContentsLoaded = true;
1445
        }
1446
        return $this->tableOfContents;
1447
    }
1448
1449
    /**
1450
     * This returns the document's thumbnail location
1451
     *
1452
     * @access protected
1453
     *
1454
     * @abstract
1455
     *
1456
     * @param bool $forceReload: Force reloading the thumbnail instead of returning the cached value
1457
     *
1458
     * @return string The document's thumbnail location
1459
     */
1460
    protected abstract function _getThumbnail($forceReload = false);
1461
1462
    /**
1463
     * This returns the ID of the toplevel logical structure node
1464
     *
1465
     * @access protected
1466
     *
1467
     * @abstract
1468
     *
1469
     * @return string The logical structure node's ID
1470
     */
1471
    protected abstract function _getToplevelId();
1472
1473
    /**
1474
     * This returns $this->uid via __get()
1475
     *
1476
     * @access protected
1477
     *
1478
     * @return mixed The UID or the URL of the document
1479
     */
1480
    protected function _getUid()
1481
    {
1482
        return $this->uid;
1483
    }
1484
1485
    /**
1486
     * This sets $this->cPid via __set()
1487
     *
1488
     * @access protected
1489
     *
1490
     * @param int $value: The new PID for the metadata definitions
1491
     *
1492
     * @return void
1493
     */
1494
    protected function _setCPid($value)
1495
    {
1496
        $this->cPid = max(intval($value), 0);
1497
    }
1498
1499
    /**
1500
     * This is a singleton class, thus the constructor should be private/protected
1501
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Doc::getInstance())
1502
     *
1503
     * @access protected
1504
     *
1505
     * @param int $uid: The UID of the document to parse or URL to XML file
1506
     * @param int $pid: If > 0, then only document with this PID gets loaded
1507
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1508
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1509
     *
1510
     * @return void
1511
     */
1512
    protected function __construct($uid, $pid, $preloadedDocument)
1513
    {
1514
        $this->setPreloadedDocument($preloadedDocument);
1515
        $this->init();
1516
        $this->establishRecordId($pid);
1517
        $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger();
1518
        return;
1519
1520
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
0 ignored issues
show
Unused Code introduced by
$queryBuilder = TYPO3\CM...ble('tx_dlf_documents') is not reachable.

This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.

Unreachable code is most often the result of return, die or exit statements that have been added for debug purposes.

function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

In the above example, the last return false will never be executed, because a return statement has already been met in every possible execution path.

Loading history...
1521
            ->getQueryBuilderForTable('tx_dlf_documents');
1522
        $location = '';
1523
        // Prepare to check database for the requested document.
1524
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
1525
            $whereClause = $queryBuilder->expr()->andX(
1526
                $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
1527
                Helper::whereExpression('tx_dlf_documents')
1528
            );
1529
        } else {
1530
            // Try to load METS file / IIIF manifest.
1531
            if ($this->setPreloadedDocument($preloadedDocument) || (GeneralUtility::isValidUrl($uid)
1532
                && $this->load($uid))) {
1533
                // Initialize core METS object.
1534
                $this->init();
1535
                if ($this->getDocument() !== null) {
1536
                    // Cast to string for safety reasons.
1537
                    $location = (string) $uid;
1538
                    $this->establishRecordId($pid);
1539
                } else {
1540
                    // No METS / IIIF part found.
1541
                    return;
1542
                }
1543
            } else {
1544
                // Loading failed.
1545
                return;
1546
            }
1547
            if (
1548
                !empty($location)
1549
                && !empty($this->recordId)
1550
            ) {
1551
                // Try to match record identifier or location (both should be unique).
1552
                $whereClause = $queryBuilder->expr()->andX(
1553
                    $queryBuilder->expr()->orX(
1554
                        $queryBuilder->expr()->eq('tx_dlf_documents.location', $queryBuilder->expr()->literal($location)),
1555
                        $queryBuilder->expr()->eq('tx_dlf_documents.record_id', $queryBuilder->expr()->literal($this->recordId))
1556
                    ),
1557
                    Helper::whereExpression('tx_dlf_documents')
1558
                );
1559
            } else {
1560
                // Can't persistently identify document, don't try to match at all.
1561
                $whereClause = '1=-1';
1562
            }
1563
        }
1564
        // Check for PID if needed.
1565
        if ($pid) {
1566
            $whereClause = $queryBuilder->expr()->andX(
1567
                $whereClause,
1568
                $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid))
1569
            );
1570
        }
1571
        // Get document PID and location from database.
1572
        $result = $queryBuilder
1573
            ->select(
1574
                'tx_dlf_documents.uid AS uid',
1575
                'tx_dlf_documents.pid AS pid',
1576
                'tx_dlf_documents.record_id AS record_id',
1577
                'tx_dlf_documents.partof AS partof',
1578
                'tx_dlf_documents.thumbnail AS thumbnail',
1579
                'tx_dlf_documents.location AS location'
1580
            )
1581
            ->from('tx_dlf_documents')
1582
            ->where($whereClause)
1583
            ->setMaxResults(1)
1584
            ->execute();
1585
1586
        if ($resArray = $result->fetch()) {
1587
            $this->uid = $resArray['uid'];
1588
            $this->pid = $resArray['pid'];
1589
            $this->recordId = $resArray['record_id'];
1590
            $this->parentId = $resArray['partof'];
1591
            $this->thumbnail = $resArray['thumbnail'];
1592
            $this->location = $resArray['location'];
1593
            $this->thumbnailLoaded = true;
1594
            // Load XML file if necessary...
1595
            if (
1596
                $this->getDocument() === null
1597
                && $this->load($this->location)
1598
            ) {
1599
                // ...and set some basic properties.
1600
                $this->init();
1601
            }
1602
            // Do we have a METS / IIIF object now?
1603
            if ($this->getDocument() !== null) {
1604
                // Set new location if necessary.
1605
                if (!empty($location)) {
1606
                    $this->location = $location;
1607
                }
1608
                // Document ready!
1609
                $this->ready = true;
1610
            }
1611
        } elseif ($this->getDocument() !== null) {
1612
            // Set location as UID for documents not in database.
1613
            $this->uid = $location;
1614
            $this->location = $location;
1615
            // Document ready!
1616
            $this->ready = true;
1617
        } else {
1618
            $this->logger->error('No document with UID ' . $uid . ' found or document not accessible');
1619
        }
1620
    }
1621
1622
    /**
1623
     * This magic method is called each time an invisible property is referenced from the object
1624
     *
1625
     * @access public
1626
     *
1627
     * @param string $var: Name of variable to get
1628
     *
1629
     * @return mixed Value of $this->$var
1630
     */
1631
    public function __get($var)
1632
    {
1633
        $method = '_get' . ucfirst($var);
1634
        if (
1635
            !property_exists($this, $var)
1636
            || !method_exists($this, $method)
1637
        ) {
1638
            $this->logger->warning('There is no getter function for property "' . $var . '"');
1639
            return;
1640
        } else {
1641
            return $this->$method();
1642
        }
1643
    }
1644
1645
    /**
1646
     * This magic method is called each time an invisible property is checked for isset() or empty()
1647
     *
1648
     * @access public
1649
     *
1650
     * @param string $var: Name of variable to check
1651
     *
1652
     * @return bool true if variable is set and not empty, false otherwise
1653
     */
1654
    public function __isset($var)
1655
    {
1656
        return !empty($this->__get($var));
1657
    }
1658
1659
    /**
1660
     * This magic method is called each time an invisible property is referenced from the object
1661
     *
1662
     * @access public
1663
     *
1664
     * @param string $var: Name of variable to set
1665
     * @param mixed $value: New value of variable
1666
     *
1667
     * @return void
1668
     */
1669
    public function __set($var, $value)
1670
    {
1671
        $method = '_set' . ucfirst($var);
1672
        if (
1673
            !property_exists($this, $var)
1674
            || !method_exists($this, $method)
1675
        ) {
1676
            $this->logger->warning('There is no setter function for property "' . $var . '"');
1677
        } else {
1678
            $this->$method($value);
1679
        }
1680
    }
1681
}
1682