Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — master (#715)
by Alexander
03:15
created

Doc::__isset()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use TYPO3\CMS\Core\Cache\CacheManager;
16
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
17
use TYPO3\CMS\Core\Database\ConnectionPool;
18
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
19
use TYPO3\CMS\Core\Log\LogManager;
20
use TYPO3\CMS\Core\Utility\GeneralUtility;
21
use TYPO3\CMS\Core\Utility\MathUtility;
22
use TYPO3\CMS\Extbase\Configuration\ConfigurationManager;
23
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
24
use Ubl\Iiif\Tools\IiifHelper;
25
26
/**
27
 * Document class for the 'dlf' extension
28
 *
29
 * @author Sebastian Meyer <[email protected]>
30
 * @author Henrik Lochmann <[email protected]>
31
 * @package TYPO3
32
 * @subpackage dlf
33
 * @access public
34
 * @property int $cPid This holds the PID for the configuration
35
 * @property-read bool $hasFulltext Are there any fulltext files available?
36
 * @property-read string $location This holds the documents location
37
 * @property-read array $metadataArray This holds the documents' parsed metadata array
38
 * @property-read int $numPages The holds the total number of pages
39
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
40
 * @property-read array $physicalStructure This holds the physical structure
41
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
42
 * @property-read int $pid This holds the PID of the document or zero if not in database
43
 * @property-read bool $ready Is the document instantiated successfully?
44
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
45
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
46
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
47
 * @property-read array $tableOfContents This holds the logical structure
48
 * @property-read string $thumbnail This holds the document's thumbnail location
49
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
50
 * @abstract
51
 */
52
abstract class Doc
53
{
54
    /**
55
     * This holds the logger
56
     *
57
     * @var LogManager
58
     * @access protected
59
     */
60
    protected $logger;
61
62
    /**
63
     * This holds the PID for the configuration
64
     *
65
     * @var int
66
     * @access protected
67
     */
68
    protected $cPid = 0;
69
70
    /**
71
     * The extension key
72
     *
73
     * @var string
74
     * @access public
75
     */
76
    public static $extKey = 'dlf';
77
78
    /**
79
     * This holds the configuration for all supported metadata encodings
80
     * @see loadFormats()
81
     *
82
     * @var array
83
     * @access protected
84
     */
85
    protected $formats = [
86
        'OAI' => [
87
            'rootElement' => 'OAI-PMH',
88
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
89
        ],
90
        'METS' => [
91
            'rootElement' => 'mets',
92
            'namespaceURI' => 'http://www.loc.gov/METS/',
93
        ],
94
        'XLINK' => [
95
            'rootElement' => 'xlink',
96
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
97
        ]
98
    ];
99
100
    /**
101
     * Are the available metadata formats loaded?
102
     * @see $formats
103
     *
104
     * @var bool
105
     * @access protected
106
     */
107
    protected $formatsLoaded = false;
108
109
    /**
110
     * Are there any fulltext files available? This also includes IIIF text annotations
111
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
112
     * annotations as fulltext.
113
     *
114
     * @var bool
115
     * @access protected
116
     */
117
    protected $hasFulltext = false;
118
119
    /**
120
     * Last searched logical and physical page
121
     *
122
     * @var array
123
     * @access protected
124
     */
125
    protected $lastSearchedPhysicalPage = ['logicalPage' => null, 'physicalPage' => null];
126
127
    /**
128
     * This holds the logical units
129
     *
130
     * @var array
131
     * @access protected
132
     */
133
    protected $logicalUnits = [];
134
135
    /**
136
     * This holds the documents' parsed metadata array with their corresponding
137
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
138
     *
139
     * @var array
140
     * @access protected
141
     */
142
    protected $metadataArray = [];
143
144
    /**
145
     * Is the metadata array loaded?
146
     * @see $metadataArray
147
     *
148
     * @var bool
149
     * @access protected
150
     */
151
    protected $metadataArrayLoaded = false;
152
153
    /**
154
     * The holds the total number of pages
155
     *
156
     * @var int
157
     * @access protected
158
     */
159
    protected $numPages = 0;
160
161
    /**
162
     * This holds the UID of the parent document or zero if not multi-volumed
163
     *
164
     * @var int
165
     * @access protected
166
     */
167
    protected $parentId = 0;
168
169
    /**
170
     * This holds the physical structure
171
     *
172
     * @var array
173
     * @access protected
174
     */
175
    protected $physicalStructure = [];
176
177
    /**
178
     * This holds the physical structure metadata
179
     *
180
     * @var array
181
     * @access protected
182
     */
183
    protected $physicalStructureInfo = [];
184
185
    /**
186
     * Is the physical structure loaded?
187
     * @see $physicalStructure
188
     *
189
     * @var bool
190
     * @access protected
191
     */
192
    protected $physicalStructureLoaded = false;
193
194
    /**
195
     * This holds the PID of the document or zero if not in database
196
     *
197
     * @var int
198
     * @access protected
199
     */
200
    protected $pid = 0;
201
202
    /**
203
     * This holds the documents' raw text pages with their corresponding
204
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
205
     *
206
     * @var array
207
     * @access protected
208
     */
209
    protected $rawTextArray = [];
210
211
    /**
212
     * Is the document instantiated successfully?
213
     *
214
     * @var bool
215
     * @access protected
216
     */
217
    protected $ready = false;
218
219
    /**
220
     * The METS file's / IIIF manifest's record identifier
221
     *
222
     * @var string
223
     * @access protected
224
     */
225
    protected $recordId;
226
227
    /**
228
     * This holds the singleton object of the document
229
     *
230
     * @var array (\Kitodo\Dlf\Common\Doc)
231
     * @static
232
     * @access protected
233
     */
234
    protected static $registry = [];
235
236
    /**
237
     * This holds the UID of the root document or zero if not multi-volumed
238
     *
239
     * @var int
240
     * @access protected
241
     */
242
    protected $rootId = 0;
243
244
    /**
245
     * Is the root id loaded?
246
     * @see $rootId
247
     *
248
     * @var bool
249
     * @access protected
250
     */
251
    protected $rootIdLoaded = false;
252
253
    /**
254
     * This holds the smLinks between logical and physical structMap
255
     *
256
     * @var array
257
     * @access protected
258
     */
259
    protected $smLinks = ['l2p' => [], 'p2l' => []];
260
261
    /**
262
     * Are the smLinks loaded?
263
     * @see $smLinks
264
     *
265
     * @var bool
266
     * @access protected
267
     */
268
    protected $smLinksLoaded = false;
269
270
    /**
271
     * This holds the logical structure
272
     *
273
     * @var array
274
     * @access protected
275
     */
276
    protected $tableOfContents = [];
277
278
    /**
279
     * Is the table of contents loaded?
280
     * @see $tableOfContents
281
     *
282
     * @var bool
283
     * @access protected
284
     */
285
    protected $tableOfContentsLoaded = false;
286
287
    /**
288
     * This holds the document's thumbnail location
289
     *
290
     * @var string
291
     * @access protected
292
     */
293
    protected $thumbnail = '';
294
295
    /**
296
     * Is the document's thumbnail location loaded?
297
     * @see $thumbnail
298
     *
299
     * @var bool
300
     * @access protected
301
     */
302
    protected $thumbnailLoaded = false;
303
304
    /**
305
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
306
     *
307
     * @var string
308
     * @access protected
309
     */
310
    protected $toplevelId = '';
311
312
    /**
313
     * This holds the whole XML file as \SimpleXMLElement object
314
     *
315
     * @var \SimpleXMLElement
316
     * @access protected
317
     */
318
    protected $xml;
319
320
    /**
321
     * This clears the static registry to prevent memory exhaustion
322
     *
323
     * @access public
324
     *
325
     * @static
326
     *
327
     * @return void
328
     */
329
    public static function clearRegistry()
330
    {
331
        // Reset registry array.
332
        self::$registry = [];
333
    }
334
335
    /**
336
     * This ensures that the recordId, if existent, is retrieved from the document
337
     *
338
     * @access protected
339
     *
340
     * @abstract
341
     *
342
     * @param int $pid: ID of the configuration page with the recordId config
343
     *
344
     */
345
    protected abstract function establishRecordId($pid);
346
347
    /**
348
     * Source document PHP object which is represented by a Document instance
349
     *
350
     * @access protected
351
     *
352
     * @abstract
353
     *
354
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
355
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
356
     */
357
    protected abstract function getDocument();
358
359
    /**
360
     * This gets the location of a downloadable file for a physical page or track
361
     *
362
     * @access public
363
     *
364
     * @abstract
365
     *
366
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
367
     *
368
     * @return string    The file's location as URL
369
     */
370
    public abstract function getDownloadLocation($id);
371
372
    /**
373
     * This gets the location of a file representing a physical page or track
374
     *
375
     * @access public
376
     *
377
     * @abstract
378
     *
379
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
380
     *
381
     * @return string The file's location as URL
382
     */
383
    public abstract function getFileLocation($id);
384
385
    /**
386
     * This gets the MIME type of a file representing a physical page or track
387
     *
388
     * @access public
389
     *
390
     * @abstract
391
     *
392
     * @param string $id: The @ID attribute of the file node
393
     *
394
     * @return string The file's MIME type
395
     */
396
    public abstract function getFileMimeType($id);
397
398
    /**
399
     * This is a singleton class, thus an instance must be created by this method
400
     *
401
     * @access public
402
     *
403
     * @static
404
     *
405
     * @param string $location: The URL of XML file or the IRI of the IIIF resource
406
     * @param array $settings
407
     * @param bool $forceReload: Force reloading the document instead of returning the cached instance
408
     *
409
     * @return \Kitodo\Dlf\Common\Doc|null Instance of this class, either MetsDocument or IiifManifest
410
     */
411
    public static function &getInstance($location, $settings = [], $forceReload = false)
412
    {
413
        // Create new instance depending on format (METS or IIIF) ...
414
        $documentFormat = null;
415
        $xml = null;
416
        $iiif = null;
417
418
        if ($instance = self::getDocCache($location)) {
419
            return $instance;
420
        } else {
421
            $instance = null;
422
        }
423
424
        // Try to load a file from the url
425
        if (GeneralUtility::isValidUrl($location)) {
426
            // Load extension configuration
427
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
428
            // Set user-agent to identify self when fetching XML data.
429
            if (!empty($extConf['useragent'])) {
430
                @ini_set('user_agent', $extConf['useragent']);
431
            }
432
            $content = GeneralUtility::getUrl($location);
433
            if ($content !== false) {
434
                $xml = Helper::getXmlFileAsString($content);
435
                if ($xml !== false) {
436
                    /* @var $xml \SimpleXMLElement */
437
                    $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
438
                    $xpathResult = $xml->xpath('//mets:mets');
439
                    $documentFormat = !empty($xpathResult) ? 'METS' : null;
440
                } else {
441
                    // Try to load file as IIIF resource instead.
442
                    $contentAsJsonArray = json_decode($content, true);
443
                    if ($contentAsJsonArray !== null) {
444
                        IiifHelper::setUrlReader(IiifUrlReader::getInstance());
445
                        IiifHelper::setMaxThumbnailHeight($extConf['iiifThumbnailHeight']);
446
                        IiifHelper::setMaxThumbnailWidth($extConf['iiifThumbnailWidth']);
447
                        $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
448
                        if ($iiif instanceof IiifResourceInterface) {
449
                            $documentFormat = 'IIIF';
450
                        }
451
                    }
452
                }
453
            }
454
        }
455
456
        // Sanitize input.
457
        $pid = max(intval($settings['storagePid']), 0);
458
        if ($documentFormat == 'METS') {
459
            $instance = new MetsDocument($location, $pid, $xml);
460
        } elseif ($documentFormat == 'IIIF') {
461
            $instance = new IiifManifest($location, $pid, $iiif);
462
        }
463
464
        if ($instance) {
465
            self::setDocCache($location, $instance);
466
        }
467
468
        return $instance;
469
    }
470
471
    /**
472
     * This gets details about a logical structure element
473
     *
474
     * @access public
475
     *
476
     * @abstract
477
     *
478
     * @param string $id: The @ID attribute of the logical structure node (METS) or
479
     * the @id property of the Manifest / Range (IIIF)
480
     * @param bool $recursive: Whether to include the child elements / resources
481
     *
482
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
483
     */
484
    public abstract function getLogicalStructure($id, $recursive = false);
485
486
    /**
487
     * This extracts all the metadata for a logical structure node
488
     *
489
     * @access public
490
     *
491
     * @abstract
492
     *
493
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
494
     * of the Manifest / Range (IIIF)
495
     * @param int $cPid: The PID for the metadata definitions
496
     *                       (defaults to $this->cPid or $this->pid)
497
     *
498
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
499
     */
500
    public abstract function getMetadata($id, $cPid = 0);
501
502
    /**
503
     * This returns the first corresponding physical page number of a given logical page label
504
     *
505
     * @access public
506
     *
507
     * @param string $logicalPage: The label (or a part of the label) of the logical page
508
     *
509
     * @return int The physical page number
510
     */
511
    public function getPhysicalPage($logicalPage)
512
    {
513
        if (
514
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
515
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
516
        ) {
517
            return $this->lastSearchedPhysicalPage['physicalPage'];
518
        } else {
519
            $physicalPage = 0;
520
            foreach ($this->physicalStructureInfo as $page) {
521
                if (strpos($page['orderlabel'], $logicalPage) !== false) {
522
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
523
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
524
                    return $physicalPage;
525
                }
526
                $physicalPage++;
527
            }
528
        }
529
        return 1;
530
    }
531
532
    /**
533
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas. Text might be
534
     * given as ALTO for METS or as annotations or ALTO for IIIF resources.
535
     *
536
     * @access public
537
     *
538
     * @abstract
539
     *
540
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
541
     * of the Manifest / Range (IIIF)
542
     *
543
     * @return string The OCR full text
544
     */
545
    public abstract function getFullText($id);
546
547
    /**
548
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas from an
549
     * XML full text representation (currently only ALTO). For IIIF manifests, ALTO documents have
550
     * to be given in the Canvas' / Manifest's "seeAlso" property.
551
     *
552
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
553
     * of the Manifest / Range (IIIF)
554
     *
555
     * @return string The OCR full text
556
     */
557
    protected function getFullTextFromXml($id)
558
    {
559
        $fullText = '';
560
        // Load available text formats, ...
561
        $this->loadFormats();
562
        // ... physical structure ...
563
        $this->_getPhysicalStructure();
564
        // ... and extension configuration.
565
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
566
        $fileGrpsFulltext = GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']);
567
        if (!empty($this->physicalStructureInfo[$id])) {
568
            while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
569
                if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
570
                    // Get full text file.
571
                    $fileContent = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
572
                    if ($fileContent !== false) {
573
                        $textFormat = $this->getTextFormat($fileContent);
574
                    } else {
575
                        $this->logger->warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');
576
                        return $fullText;
577
                    }
578
                    break;
579
                }
580
            }
581
        } else {
582
            $this->logger->warning('Invalid structure node @ID "' . $id . '"');
583
            return $fullText;
584
        }
585
        // Is this text format supported?
586
        // This part actually differs from previous version of indexed OCR
587
        if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $textFormat does not seem to be defined for all execution paths leading up to this point.
Loading history...
588
            $textMiniOcr = '';
589
            if (!empty($this->formats[$textFormat]['class'])) {
590
                $class = $this->formats[$textFormat]['class'];
591
                // Get the raw text from class.
592
                if (
593
                    class_exists($class)
594
                    && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
595
                ) {
596
                    // Load XML from file.
597
                    $ocrTextXml = Helper::getXmlFileAsString($fileContent);
598
                    $textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
599
                    $this->rawTextArray[$id] = $textMiniOcr;
600
                } else {
601
                    $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
602
                }
603
            }
604
            $fullText = $textMiniOcr;
605
        } else {
606
            $this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
607
        }
608
        return $fullText;
609
    }
610
611
    /**
612
     * Get format of the OCR full text
613
     *
614
     * @access private
615
     *
616
     * @param string $fileContent: content of the XML file
617
     *
618
     * @return string The format of the OCR full text
619
     */
620
    private function getTextFormat($fileContent)
621
    {
622
        $xml = Helper::getXmlFileAsString($fileContent);
623
624
        if ($xml !== false) {
625
            // Get the root element's name as text format.
626
            return strtoupper($xml->getName());
627
        } else {
628
            return '';
629
        }
630
    }
631
632
    /**
633
     * This determines a title for the given document
634
     *
635
     * @access public
636
     *
637
     * @static
638
     *
639
     * @param int $uid: The UID of the document
640
     * @param bool $recursive: Search superior documents for a title, too?
641
     *
642
     * @return string The title of the document itself or a parent document
643
     */
644
    public static function getTitle($uid, $recursive = false)
645
    {
646
        $title = '';
647
        // Sanitize input.
648
        $uid = max(intval($uid), 0);
649
        if ($uid) {
650
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
651
                ->getQueryBuilderForTable('tx_dlf_documents');
652
653
            $result = $queryBuilder
654
                ->select(
655
                    'tx_dlf_documents.title',
656
                    'tx_dlf_documents.partof'
657
                )
658
                ->from('tx_dlf_documents')
659
                ->where(
660
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
661
                    Helper::whereExpression('tx_dlf_documents')
662
                )
663
                ->setMaxResults(1)
664
                ->execute();
665
666
            if ($resArray = $result->fetch()) {
667
                // Get title information.
668
                $title = $resArray['title'];
669
                $partof = $resArray['partof'];
670
                // Search parent documents recursively for a title?
671
                if (
672
                    $recursive
673
                    && empty($title)
674
                    && intval($partof)
675
                    && $partof != $uid
676
                ) {
677
                    $title = self::getTitle($partof, true);
678
                }
679
            } else {
680
                Helper::log('No document with UID ' . $uid . ' found or document not accessible', LOG_SEVERITY_WARNING);
681
            }
682
        } else {
683
            Helper::log('Invalid UID ' . $uid . ' for document', LOG_SEVERITY_ERROR);
684
        }
685
        return $title;
686
    }
687
688
    /**
689
     * This extracts all the metadata for the toplevel logical structure node / resource
690
     *
691
     * @access public
692
     *
693
     * @param int $cPid: The PID for the metadata definitions
694
     *
695
     * @return array The logical structure node's / resource's parsed metadata array
696
     */
697
    public function getTitledata($cPid = 0)
698
    {
699
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
700
        // Add information from METS structural map to titledata array.
701
        if ($this instanceof MetsDocument) {
702
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
703
        }
704
        // Set record identifier for METS file / IIIF manifest if not present.
705
        if (
706
            is_array($titledata)
707
            && array_key_exists('record_id', $titledata)
708
        ) {
709
            if (
710
                !empty($this->recordId)
711
                && !in_array($this->recordId, $titledata['record_id'])
712
            ) {
713
                array_unshift($titledata['record_id'], $this->recordId);
714
            }
715
        }
716
        return $titledata;
717
    }
718
719
    /**
720
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
721
     *
722
     * @access protected
723
     *
724
     * @param array $structure: logical structure array
725
     * @param int $depth: current tree depth
726
     * @param string $logId: ID of the logical structure whose depth is requested
727
     *
728
     * @return int|bool: false if structure with $logId is not a child of this substructure,
729
     * or the actual depth.
730
     */
731
    protected function getTreeDepth($structure, $depth, $logId)
732
    {
733
        foreach ($structure as $element) {
734
            if ($element['id'] == $logId) {
735
                return $depth;
736
            } elseif (array_key_exists('children', $element)) {
737
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
738
                if ($foundInChildren !== false) {
739
                    return $foundInChildren;
740
                }
741
            }
742
        }
743
        return false;
744
    }
745
746
    /**
747
     * Get the tree depth of a logical structure element within the table of content
748
     *
749
     * @access public
750
     *
751
     * @param string $logId: The id of the logical structure element whose depth is requested
752
     * @return int|bool tree depth as integer or false if no element with $logId exists within the TOC.
753
     */
754
    public function getStructureDepth($logId)
755
    {
756
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
757
    }
758
759
    /**
760
     * This sets some basic class properties
761
     *
762
     * @access protected
763
     *
764
     * @abstract
765
     *
766
     * @param string $location:The location URL of the XML file to parse
767
     *
768
     * @return void
769
     */
770
    protected abstract function init($location);
771
772
    /**
773
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
774
     *
775
     * @access protected
776
     *
777
     * @abstract
778
     *
779
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
780
     *
781
     * @return bool true if $preloadedDocument can actually be reused, false if it has to be loaded again
782
     */
783
    protected abstract function setPreloadedDocument($preloadedDocument);
784
785
    /**
786
     * METS/IIIF specific part of loading a location
787
     *
788
     * @access protected
789
     *
790
     * @abstract
791
     *
792
     * @param string $location: The URL of the file to load
793
     *
794
     * @return bool true on success or false on failure
795
     */
796
    protected abstract function loadLocation($location);
797
798
    /**
799
     * Load XML file / IIIF resource from URL
800
     *
801
     * @access protected
802
     *
803
     * @param string $location: The URL of the file to load
804
     *
805
     * @return bool true on success or false on failure
806
     */
807
    protected function load($location)
808
    {
809
        // Load XML / JSON-LD file.
810
        if (GeneralUtility::isValidUrl($location)) {
811
            // Load extension configuration
812
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
813
            // Set user-agent to identify self when fetching XML / JSON-LD data.
814
            if (!empty($extConf['useragent'])) {
815
                @ini_set('user_agent', $extConf['useragent']);
816
            }
817
            // the actual loading is format specific
818
            return $this->loadLocation($location);
819
        } else {
820
            $this->logger->error('Invalid file location "' . $location . '" for document loading');
821
        }
822
        return false;
823
    }
824
825
    /**
826
     * Analyze the document if it contains any fulltext that needs to be indexed.
827
     *
828
     * @access protected
829
     *
830
     * @abstract
831
     */
832
    protected abstract function ensureHasFulltextIsSet();
833
834
    /**
835
     * Register all available data formats
836
     *
837
     * @access protected
838
     *
839
     * @return void
840
     */
841
    protected function loadFormats()
842
    {
843
        if (!$this->formatsLoaded) {
844
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
845
                ->getQueryBuilderForTable('tx_dlf_formats');
846
847
            // Get available data formats from database.
848
            $result = $queryBuilder
849
                ->select(
850
                    'tx_dlf_formats.type AS type',
851
                    'tx_dlf_formats.root AS root',
852
                    'tx_dlf_formats.namespace AS namespace',
853
                    'tx_dlf_formats.class AS class'
854
                )
855
                ->from('tx_dlf_formats')
856
                ->where(
857
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0)
858
                )
859
                ->execute();
860
861
            while ($resArray = $result->fetch()) {
862
                // Update format registry.
863
                $this->formats[$resArray['type']] = [
864
                    'rootElement' => $resArray['root'],
865
                    'namespaceURI' => $resArray['namespace'],
866
                    'class' => $resArray['class']
867
                ];
868
            }
869
            $this->formatsLoaded = true;
870
        }
871
    }
872
873
    /**
874
     * Register all available namespaces for a \SimpleXMLElement object
875
     *
876
     * @access public
877
     *
878
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
879
     *
880
     * @return void
881
     */
882
    public function registerNamespaces(&$obj)
883
    {
884
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
885
        $this->loadFormats();
886
        // Do we have a \SimpleXMLElement or \DOMXPath object?
887
        if ($obj instanceof \SimpleXMLElement) {
888
            $method = 'registerXPathNamespace';
889
        } elseif ($obj instanceof \DOMXPath) {
890
            $method = 'registerNamespace';
891
        } else {
892
            $this->logger->error('Given object is neither a SimpleXMLElement nor a DOMXPath instance');
893
            return;
894
        }
895
        // Register metadata format's namespaces.
896
        foreach ($this->formats as $enc => $conf) {
897
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
898
        }
899
    }
900
901
    /**
902
     * This returns $this->cPid via __get()
903
     *
904
     * @access protected
905
     *
906
     * @return int The PID of the metadata definitions
907
     */
908
    protected function _getCPid()
909
    {
910
        return $this->cPid;
911
    }
912
913
    /**
914
     * This returns $this->hasFulltext via __get()
915
     *
916
     * @access protected
917
     *
918
     * @return bool Are there any fulltext files available?
919
     */
920
    protected function _getHasFulltext()
921
    {
922
        $this->ensureHasFulltextIsSet();
923
        return $this->hasFulltext;
924
    }
925
926
    /**
927
     * This returns $this->location via __get()
928
     *
929
     * @access protected
930
     *
931
     * @return string The location of the document
932
     */
933
    protected function _getLocation()
934
    {
935
        return $this->location;
936
    }
937
938
    /**
939
     * Format specific part of building the document's metadata array
940
     *
941
     * @access protected
942
     *
943
     * @abstract
944
     *
945
     * @param int $cPid
946
     */
947
    protected abstract function prepareMetadataArray($cPid);
948
949
    /**
950
     * This builds an array of the document's metadata
951
     *
952
     * @access protected
953
     *
954
     * @return array Array of metadata with their corresponding logical structure node ID as key
955
     */
956
    protected function _getMetadataArray()
957
    {
958
        // Set metadata definitions' PID.
959
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
960
        if (!$cPid) {
961
            $this->logger->error('Invalid PID ' . $cPid . ' for metadata definitions');
962
            return [];
963
        }
964
        if (
965
            !$this->metadataArrayLoaded
966
            || $this->metadataArray[0] != $cPid
967
        ) {
968
            $this->prepareMetadataArray($cPid);
969
            $this->metadataArray[0] = $cPid;
0 ignored issues
show
Bug introduced by
The property metadataArray is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
970
            $this->metadataArrayLoaded = true;
971
        }
972
        return $this->metadataArray;
973
    }
974
975
    /**
976
     * This returns $this->numPages via __get()
977
     *
978
     * @access protected
979
     *
980
     * @return int The total number of pages and/or tracks
981
     */
982
    protected function _getNumPages()
983
    {
984
        $this->_getPhysicalStructure();
985
        return $this->numPages;
986
    }
987
988
    /**
989
     * This returns $this->parentId via __get()
990
     *
991
     * @access protected
992
     *
993
     * @return int The UID of the parent document or zero if not applicable
994
     */
995
    protected function _getParentId()
996
    {
997
        return $this->parentId;
998
    }
999
1000
    /**
1001
     * This builds an array of the document's physical structure
1002
     *
1003
     * @access protected
1004
     *
1005
     * @abstract
1006
     *
1007
     * @return array Array of physical elements' id, type, label and file representations ordered
1008
     * by @ORDER attribute / IIIF Sequence's Canvases
1009
     */
1010
    protected abstract function _getPhysicalStructure();
1011
1012
    /**
1013
     * This gives an array of the document's physical structure metadata
1014
     *
1015
     * @access protected
1016
     *
1017
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1018
     */
1019
    protected function _getPhysicalStructureInfo()
1020
    {
1021
        // Is there no physical structure array yet?
1022
        if (!$this->physicalStructureLoaded) {
1023
            // Build physical structure array.
1024
            $this->_getPhysicalStructure();
1025
        }
1026
        return $this->physicalStructureInfo;
1027
    }
1028
1029
    /**
1030
     * This returns $this->pid via __get()
1031
     *
1032
     * @access protected
1033
     *
1034
     * @return int The PID of the document or zero if not in database
1035
     */
1036
    protected function _getPid()
1037
    {
1038
        return $this->pid;
1039
    }
1040
1041
    /**
1042
     * This returns $this->ready via __get()
1043
     *
1044
     * @access protected
1045
     *
1046
     * @return bool Is the document instantiated successfully?
1047
     */
1048
    protected function _getReady()
1049
    {
1050
        return $this->ready;
1051
    }
1052
1053
    /**
1054
     * This returns $this->recordId via __get()
1055
     *
1056
     * @access protected
1057
     *
1058
     * @return mixed The METS file's / IIIF manifest's record identifier
1059
     */
1060
    protected function _getRecordId()
1061
    {
1062
        return $this->recordId;
1063
    }
1064
1065
    /**
1066
     * This returns $this->rootId via __get()
1067
     *
1068
     * @access protected
1069
     *
1070
     * @return int The UID of the root document or zero if not applicable
1071
     */
1072
    protected function _getRootId()
1073
    {
1074
        if (!$this->rootIdLoaded) {
1075
            if ($this->parentId) {
1076
                $parent = self::getInstance($this->parentId, ['storagePid' => $this->pid]);
1077
                $this->rootId = $parent->rootId;
0 ignored issues
show
Bug introduced by
The property rootId is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1078
            }
1079
            $this->rootIdLoaded = true;
1080
        }
1081
        return $this->rootId;
1082
    }
1083
1084
    /**
1085
     * This returns the smLinks between logical and physical structMap (METS) and models the
1086
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1087
     *
1088
     * @access protected
1089
     *
1090
     * @abstract
1091
     *
1092
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1093
     */
1094
    protected abstract function _getSmLinks();
1095
1096
    /**
1097
     * This builds an array of the document's logical structure
1098
     *
1099
     * @access protected
1100
     *
1101
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1102
     */
1103
    protected function _getTableOfContents()
1104
    {
1105
        // Is there no logical structure array yet?
1106
        if (!$this->tableOfContentsLoaded) {
1107
            // Get all logical structures.
1108
            $this->getLogicalStructure('', true);
1109
            $this->tableOfContentsLoaded = true;
1110
        }
1111
        return $this->tableOfContents;
1112
    }
1113
1114
    /**
1115
     * This returns the document's thumbnail location
1116
     *
1117
     * @access protected
1118
     *
1119
     * @abstract
1120
     *
1121
     * @param bool $forceReload: Force reloading the thumbnail instead of returning the cached value
1122
     *
1123
     * @return string The document's thumbnail location
1124
     */
1125
    protected abstract function _getThumbnail($forceReload = false);
1126
1127
    /**
1128
     * This returns the ID of the toplevel logical structure node
1129
     *
1130
     * @access protected
1131
     *
1132
     * @abstract
1133
     *
1134
     * @return string The logical structure node's ID
1135
     */
1136
    protected abstract function _getToplevelId();
1137
1138
    /**
1139
     * This returns $this->uid via __get()
1140
     *
1141
     * @access protected
1142
     *
1143
     * @return mixed The UID or the URL of the document
1144
     */
1145
    protected function _getUid()
1146
    {
1147
        return $this->uid;
0 ignored issues
show
Bug Best Practice introduced by
The property uid does not exist on Kitodo\Dlf\Common\Doc. Since you implemented __get, consider adding a @property annotation.
Loading history...
1148
    }
1149
1150
    /**
1151
     * This sets $this->cPid via __set()
1152
     *
1153
     * @access protected
1154
     *
1155
     * @param int $value: The new PID for the metadata definitions
1156
     *
1157
     * @return void
1158
     */
1159
    protected function _setCPid($value)
1160
    {
1161
        $this->cPid = max(intval($value), 0);
1162
    }
1163
1164
    /**
1165
     * This is a singleton class, thus the constructor should be private/protected
1166
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Doc::getInstance())
1167
     *
1168
     * @access protected
1169
     *
1170
     * @param string $location: The location URL of the XML file to parse
1171
     * @param int $pid: If > 0, then only document with this PID gets loaded
1172
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1173
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1174
     *
1175
     * @return void
1176
     */
1177
    protected function __construct($location, $pid, $preloadedDocument)
1178
    {
1179
        $this->setPreloadedDocument($preloadedDocument);
1180
        $this->init($location);
1181
        $this->establishRecordId($pid);
1182
        return;
1183
    }
1184
1185
    /**
1186
     * This magic method is called each time an invisible property is referenced from the object
1187
     *
1188
     * @access public
1189
     *
1190
     * @param string $var: Name of variable to get
1191
     *
1192
     * @return mixed Value of $this->$var
1193
     */
1194
    public function __get($var)
1195
    {
1196
        $method = '_get' . ucfirst($var);
1197
        if (
1198
            !property_exists($this, $var)
1199
            || !method_exists($this, $method)
1200
        ) {
1201
            $this->logger->warning('There is no getter function for property "' . $var . '"');
1202
            return;
1203
        } else {
1204
            return $this->$method();
1205
        }
1206
    }
1207
1208
    /**
1209
     * This magic method is called each time an invisible property is checked for isset() or empty()
1210
     *
1211
     * @access public
1212
     *
1213
     * @param string $var: Name of variable to check
1214
     *
1215
     * @return bool true if variable is set and not empty, false otherwise
1216
     */
1217
    public function __isset($var)
1218
    {
1219
        return !empty($this->__get($var));
1220
    }
1221
1222
    /**
1223
     * This magic method is called each time an invisible property is referenced from the object
1224
     *
1225
     * @access public
1226
     *
1227
     * @param string $var: Name of variable to set
1228
     * @param mixed $value: New value of variable
1229
     *
1230
     * @return void
1231
     */
1232
    public function __set($var, $value)
1233
    {
1234
        $method = '_set' . ucfirst($var);
1235
        if (
1236
            !property_exists($this, $var)
1237
            || !method_exists($this, $method)
1238
        ) {
1239
            $this->logger->warning('There is no setter function for property "' . $var . '"');
1240
        } else {
1241
            $this->$method($value);
1242
        }
1243
    }
1244
1245
    /**
1246
     * get Cache Hit for $doc
1247
     *
1248
     * @param string $location
1249
     * @return Doc|false
1250
     */
1251
    private static function getDocCache(string $location)
1252
    {
1253
        $cacheIdentifier = md5($location);
1254
        $cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_doc');
1255
        $cacheHit = $cache->get($cacheIdentifier);
1256
1257
        return $cacheHit;
1258
    }
1259
1260
    /**
1261
     * set Cache for $doc
1262
     *
1263
     * @param string $location
1264
     * @param Doc $doc
1265
     * @return void
1266
     */
1267
    private static function setDocCache(string $location, Doc $doc)
1268
    {
1269
        $cacheIdentifier = md5($location);
1270
        $cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_doc');
1271
1272
        // Save value in cache
1273
        $cache->set($cacheIdentifier, $doc);
1274
    }
1275
1276
}
1277