Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — dev-extbase-fluid (#746)
by Alexander
02:30
created

Doc::getTitle()   B

Complexity

Conditions 7
Paths 4

Size

Total Lines 42
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 29
nc 4
nop 2
dl 0
loc 42
rs 8.5226
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use TYPO3\CMS\Core\Cache\CacheManager;
16
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
17
use TYPO3\CMS\Core\Database\ConnectionPool;
18
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
19
use TYPO3\CMS\Core\Log\LogManager;
20
use TYPO3\CMS\Core\Utility\GeneralUtility;
21
use TYPO3\CMS\Core\Utility\MathUtility;
22
use TYPO3\CMS\Extbase\Configuration\ConfigurationManager;
23
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
24
use Ubl\Iiif\Tools\IiifHelper;
25
26
/**
27
 * Document class for the 'dlf' extension
28
 *
29
 * @author Sebastian Meyer <[email protected]>
30
 * @author Henrik Lochmann <[email protected]>
31
 * @package TYPO3
32
 * @subpackage dlf
33
 * @access public
34
 * @property int $cPid This holds the PID for the configuration
35
 * @property-read bool $hasFulltext Are there any fulltext files available?
36
 * @property-read string $location This holds the documents location
37
 * @property-read array $metadataArray This holds the documents' parsed metadata array
38
 * @property-read int $numPages The holds the total number of pages
39
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
40
 * @property-read array $physicalStructure This holds the physical structure
41
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
42
 * @property-read int $pid This holds the PID of the document or zero if not in database
43
 * @property-read bool $ready Is the document instantiated successfully?
44
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
45
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
46
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
47
 * @property-read array $tableOfContents This holds the logical structure
48
 * @property-read string $thumbnail This holds the document's thumbnail location
49
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
50
 * @property-read mixed $uid This holds the UID or the URL of the document
51
 * @abstract
52
 */
53
abstract class Doc
54
{
55
    /**
56
     * This holds the logger
57
     *
58
     * @var LogManager
59
     * @access protected
60
     */
61
    protected $logger;
62
63
    /**
64
     * This holds the PID for the configuration
65
     *
66
     * @var int
67
     * @access protected
68
     */
69
    protected $cPid = 0;
70
71
    /**
72
     * The extension key
73
     *
74
     * @var string
75
     * @access public
76
     */
77
    public static $extKey = 'dlf';
78
79
    /**
80
     * This holds the configuration for all supported metadata encodings
81
     * @see loadFormats()
82
     *
83
     * @var array
84
     * @access protected
85
     */
86
    protected $formats = [
87
        'OAI' => [
88
            'rootElement' => 'OAI-PMH',
89
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
90
        ],
91
        'METS' => [
92
            'rootElement' => 'mets',
93
            'namespaceURI' => 'http://www.loc.gov/METS/',
94
        ],
95
        'XLINK' => [
96
            'rootElement' => 'xlink',
97
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
98
        ]
99
    ];
100
101
    /**
102
     * Are the available metadata formats loaded?
103
     * @see $formats
104
     *
105
     * @var bool
106
     * @access protected
107
     */
108
    protected $formatsLoaded = false;
109
110
    /**
111
     * Are there any fulltext files available? This also includes IIIF text annotations
112
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
113
     * annotations as fulltext.
114
     *
115
     * @var bool
116
     * @access protected
117
     */
118
    protected $hasFulltext = false;
119
120
    /**
121
     * Last searched logical and physical page
122
     *
123
     * @var array
124
     * @access protected
125
     */
126
    protected $lastSearchedPhysicalPage = ['logicalPage' => null, 'physicalPage' => null];
127
128
    /**
129
     * This holds the documents location
130
     *
131
     * @var string
132
     * @access protected
133
     */
134
    protected $location = '';
135
136
    /**
137
     * This holds the logical units
138
     *
139
     * @var array
140
     * @access protected
141
     */
142
    protected $logicalUnits = [];
143
144
    /**
145
     * This holds the documents' parsed metadata array with their corresponding
146
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
147
     *
148
     * @var array
149
     * @access protected
150
     */
151
    protected $metadataArray = [];
152
153
    /**
154
     * Is the metadata array loaded?
155
     * @see $metadataArray
156
     *
157
     * @var bool
158
     * @access protected
159
     */
160
    protected $metadataArrayLoaded = false;
161
162
    /**
163
     * The holds the total number of pages
164
     *
165
     * @var int
166
     * @access protected
167
     */
168
    protected $numPages = 0;
169
170
    /**
171
     * This holds the UID of the parent document or zero if not multi-volumed
172
     *
173
     * @var int
174
     * @access protected
175
     */
176
    protected $parentId = 0;
177
178
    /**
179
     * This holds the physical structure
180
     *
181
     * @var array
182
     * @access protected
183
     */
184
    protected $physicalStructure = [];
185
186
    /**
187
     * This holds the physical structure metadata
188
     *
189
     * @var array
190
     * @access protected
191
     */
192
    protected $physicalStructureInfo = [];
193
194
    /**
195
     * Is the physical structure loaded?
196
     * @see $physicalStructure
197
     *
198
     * @var bool
199
     * @access protected
200
     */
201
    protected $physicalStructureLoaded = false;
202
203
    /**
204
     * This holds the PID of the document or zero if not in database
205
     *
206
     * @var int
207
     * @access protected
208
     */
209
    protected $pid = 0;
210
211
    /**
212
     * This holds the documents' raw text pages with their corresponding
213
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
214
     *
215
     * @var array
216
     * @access protected
217
     */
218
    protected $rawTextArray = [];
219
220
    /**
221
     * Is the document instantiated successfully?
222
     *
223
     * @var bool
224
     * @access protected
225
     */
226
    protected $ready = false;
227
228
    /**
229
     * The METS file's / IIIF manifest's record identifier
230
     *
231
     * @var string
232
     * @access protected
233
     */
234
    protected $recordId;
235
236
    /**
237
     * This holds the singleton object of the document
238
     *
239
     * @var array (\Kitodo\Dlf\Common\Doc)
240
     * @static
241
     * @access protected
242
     */
243
    protected static $registry = [];
244
245
    /**
246
     * This holds the UID of the root document or zero if not multi-volumed
247
     *
248
     * @var int
249
     * @access protected
250
     */
251
    protected $rootId = 0;
252
253
    /**
254
     * Is the root id loaded?
255
     * @see $rootId
256
     *
257
     * @var bool
258
     * @access protected
259
     */
260
    protected $rootIdLoaded = false;
261
262
    /**
263
     * This holds the smLinks between logical and physical structMap
264
     *
265
     * @var array
266
     * @access protected
267
     */
268
    protected $smLinks = ['l2p' => [], 'p2l' => []];
269
270
    /**
271
     * Are the smLinks loaded?
272
     * @see $smLinks
273
     *
274
     * @var bool
275
     * @access protected
276
     */
277
    protected $smLinksLoaded = false;
278
279
    /**
280
     * This holds the logical structure
281
     *
282
     * @var array
283
     * @access protected
284
     */
285
    protected $tableOfContents = [];
286
287
    /**
288
     * Is the table of contents loaded?
289
     * @see $tableOfContents
290
     *
291
     * @var bool
292
     * @access protected
293
     */
294
    protected $tableOfContentsLoaded = false;
295
296
    /**
297
     * This holds the document's thumbnail location
298
     *
299
     * @var string
300
     * @access protected
301
     */
302
    protected $thumbnail = '';
303
304
    /**
305
     * Is the document's thumbnail location loaded?
306
     * @see $thumbnail
307
     *
308
     * @var bool
309
     * @access protected
310
     */
311
    protected $thumbnailLoaded = false;
312
313
    /**
314
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
315
     *
316
     * @var string
317
     * @access protected
318
     */
319
    protected $toplevelId = '';
320
321
    /**
322
     * This holds the UID or the URL of the document
323
     *
324
     * @var mixed
325
     * @access protected
326
     */
327
    protected $uid = 0;
328
329
    /**
330
     * This holds the whole XML file as \SimpleXMLElement object
331
     *
332
     * @var \SimpleXMLElement
333
     * @access protected
334
     */
335
    protected $xml;
336
337
    /**
338
     * This clears the static registry to prevent memory exhaustion
339
     *
340
     * @access public
341
     *
342
     * @static
343
     *
344
     * @return void
345
     */
346
    public static function clearRegistry()
347
    {
348
        // Reset registry array.
349
        self::$registry = [];
350
    }
351
352
    /**
353
     * This ensures that the recordId, if existent, is retrieved from the document
354
     *
355
     * @access protected
356
     *
357
     * @abstract
358
     *
359
     * @param int $pid: ID of the configuration page with the recordId config
360
     *
361
     */
362
    protected abstract function establishRecordId($pid);
363
364
    /**
365
     * Source document PHP object which is represented by a Document instance
366
     *
367
     * @access protected
368
     *
369
     * @abstract
370
     *
371
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
372
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
373
     */
374
    protected abstract function getDocument();
375
376
    /**
377
     * This gets the location of a downloadable file for a physical page or track
378
     *
379
     * @access public
380
     *
381
     * @abstract
382
     *
383
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
384
     *
385
     * @return string    The file's location as URL
386
     */
387
    public abstract function getDownloadLocation($id);
388
389
    /**
390
     * This gets the location of a file representing a physical page or track
391
     *
392
     * @access public
393
     *
394
     * @abstract
395
     *
396
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
397
     *
398
     * @return string The file's location as URL
399
     */
400
    public abstract function getFileLocation($id);
401
402
    /**
403
     * This gets the MIME type of a file representing a physical page or track
404
     *
405
     * @access public
406
     *
407
     * @abstract
408
     *
409
     * @param string $id: The @ID attribute of the file node
410
     *
411
     * @return string The file's MIME type
412
     */
413
    public abstract function getFileMimeType($id);
414
415
    /**
416
     * This is a singleton class, thus an instance must be created by this method
417
     *
418
     * @access public
419
     *
420
     * @static
421
     *
422
     * @param string $location: The URL of XML file or the IRI of the IIIF resource
423
     * @param array $settings
424
     * @param bool $forceReload: Force reloading the document instead of returning the cached instance
425
     *
426
     * @return \Kitodo\Dlf\Common\Doc Instance of this class, either MetsDocument or IiifManifest
427
     */
428
    public static function &getInstance($location, $settings = [], $forceReload = false)
429
    {
430
        // Create new instance depending on format (METS or IIIF) ...
431
        $instance = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $instance is dead and can be removed.
Loading history...
432
        $documentFormat = null;
433
        $xml = null;
434
        $iiif = null;
435
436
        if ($instance = self::getDocCache($location)) {
437
            return $instance;
438
        }
439
440
        // Try to load a file from the url
441
        if (GeneralUtility::isValidUrl($location)) {
442
            // Load extension configuration
443
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
444
            // Set user-agent to identify self when fetching XML data.
445
            if (!empty($extConf['useragent'])) {
446
                @ini_set('user_agent', $extConf['useragent']);
447
            }
448
            $content = GeneralUtility::getUrl($location);
449
            if ($content !== false) {
450
                $xml = Helper::getXmlFileAsString($content);
451
                if ($xml !== false) {
452
                    /* @var $xml \SimpleXMLElement */
453
                    $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
454
                    $xpathResult = $xml->xpath('//mets:mets');
455
                    $documentFormat = !empty($xpathResult) ? 'METS' : null;
456
                } else {
457
                    // Try to load file as IIIF resource instead.
458
                    $contentAsJsonArray = json_decode($content, true);
459
                    if ($contentAsJsonArray !== null) {
460
                        // Load plugin configuration.
461
                        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
462
                        IiifHelper::setUrlReader(IiifUrlReader::getInstance());
463
                        IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
464
                        IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
465
                        $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
466
                        if ($iiif instanceof IiifResourceInterface) {
467
                            $documentFormat = 'IIIF';
468
                        }
469
                    }
470
                }
471
            }
472
        }
473
474
        // Sanitize input.
475
        $pid = max(intval($settings['storagePid']), 0);
476
        if ($documentFormat == 'METS') {
477
            $instance = new MetsDocument($location, $pid, $xml);
478
        } elseif ($documentFormat == 'IIIF') {
479
            $instance = new IiifManifest($location, $pid, $iiif);
480
        }
481
482
        self::setDocCache($location, $instance);
0 ignored issues
show
Bug introduced by
It seems like $instance can also be of type false; however, parameter $doc of Kitodo\Dlf\Common\Doc::setDocCache() does only seem to accept Kitodo\Dlf\Common\Doc, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

482
        self::setDocCache($location, /** @scrutinizer ignore-type */ $instance);
Loading history...
483
        return $instance;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $instance could also return false which is incompatible with the documented return type Kitodo\Dlf\Common\Doc. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
484
    }
485
486
    /**
487
     * This gets details about a logical structure element
488
     *
489
     * @access public
490
     *
491
     * @abstract
492
     *
493
     * @param string $id: The @ID attribute of the logical structure node (METS) or
494
     * the @id property of the Manifest / Range (IIIF)
495
     * @param bool $recursive: Whether to include the child elements / resources
496
     *
497
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
498
     */
499
    public abstract function getLogicalStructure($id, $recursive = false);
500
501
    /**
502
     * This extracts all the metadata for a logical structure node
503
     *
504
     * @access public
505
     *
506
     * @abstract
507
     *
508
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
509
     * of the Manifest / Range (IIIF)
510
     * @param int $cPid: The PID for the metadata definitions
511
     *                       (defaults to $this->cPid or $this->pid)
512
     *
513
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
514
     */
515
    public abstract function getMetadata($id, $cPid = 0);
516
517
    /**
518
     * This returns the first corresponding physical page number of a given logical page label
519
     *
520
     * @access public
521
     *
522
     * @param string $logicalPage: The label (or a part of the label) of the logical page
523
     *
524
     * @return int The physical page number
525
     */
526
    public function getPhysicalPage($logicalPage)
527
    {
528
        if (
529
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
530
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
531
        ) {
532
            return $this->lastSearchedPhysicalPage['physicalPage'];
533
        } else {
534
            $physicalPage = 0;
535
            foreach ($this->physicalStructureInfo as $page) {
536
                if (strpos($page['orderlabel'], $logicalPage) !== false) {
537
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
538
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
539
                    return $physicalPage;
540
                }
541
                $physicalPage++;
542
            }
543
        }
544
        return 1;
545
    }
546
547
    /**
548
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas. Text might be
549
     * given as ALTO for METS or as annotations or ALTO for IIIF resources.
550
     *
551
     * @access public
552
     *
553
     * @abstract
554
     *
555
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
556
     * of the Manifest / Range (IIIF)
557
     *
558
     * @return string The OCR full text
559
     */
560
    public abstract function getFullText($id);
561
562
    /**
563
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas from an
564
     * XML full text representation (currently only ALTO). For IIIF manifests, ALTO documents have
565
     * to be given in the Canvas' / Manifest's "seeAlso" property.
566
     *
567
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
568
     * of the Manifest / Range (IIIF)
569
     *
570
     * @return string The OCR full text
571
     */
572
    protected function getFullTextFromXml($id)
573
    {
574
        $fullText = '';
575
        // Load available text formats, ...
576
        $this->loadFormats();
577
        // ... physical structure ...
578
        $this->_getPhysicalStructure();
579
        // ... and extension configuration.
580
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
581
        $fileGrpsFulltext = GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']);
582
        if (!empty($this->physicalStructureInfo[$id])) {
583
            while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
584
                if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
585
                    // Get full text file.
586
                    $fileContent = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
587
                    if ($fileContent !== false) {
588
                        $textFormat = $this->getTextFormat($fileContent);
589
                    } else {
590
                        $this->logger->warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');
591
                        return $fullText;
592
                    }
593
                    break;
594
                }
595
            }
596
        } else {
597
            $this->logger->warning('Invalid structure node @ID "' . $id . '"');
598
            return $fullText;
599
        }
600
        // Is this text format supported?
601
        // This part actually differs from previous version of indexed OCR
602
        if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $textFormat does not seem to be defined for all execution paths leading up to this point.
Loading history...
603
            $textMiniOcr = '';
604
            if (!empty($this->formats[$textFormat]['class'])) {
605
                $class = $this->formats[$textFormat]['class'];
606
                // Get the raw text from class.
607
                if (
608
                    class_exists($class)
609
                    && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
610
                ) {
611
                    // Load XML from file.
612
                    $ocrTextXml = Helper::getXmlFileAsString($fileContent);
613
                    $textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
614
                    $this->rawTextArray[$id] = $textMiniOcr;
615
                } else {
616
                    $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
617
                }
618
            }
619
            $fullText = $textMiniOcr;
620
        } else {
621
            $this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
622
        }
623
        return $fullText;
624
    }
625
626
    /**
627
     * Get format of the OCR full text
628
     *
629
     * @access private
630
     *
631
     * @param string $fileContent: content of the XML file
632
     *
633
     * @return string The format of the OCR full text
634
     */
635
    private function getTextFormat($fileContent)
636
    {
637
        // Get the root element's name as text format.
638
        return strtoupper(Helper::getXmlFileAsString($fileContent)->getName());
639
    }
640
641
    /**
642
     * This determines a title for the given document
643
     *
644
     * @access public
645
     *
646
     * @static
647
     *
648
     * @param int $uid: The UID of the document
649
     * @param bool $recursive: Search superior documents for a title, too?
650
     *
651
     * @return string The title of the document itself or a parent document
652
     */
653
    public static function getTitle($uid, $recursive = false)
654
    {
655
        $title = '';
656
        // Sanitize input.
657
        $uid = max(intval($uid), 0);
658
        if ($uid) {
659
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
660
                ->getQueryBuilderForTable('tx_dlf_documents');
661
662
            $result = $queryBuilder
663
                ->select(
664
                    'tx_dlf_documents.title',
665
                    'tx_dlf_documents.partof'
666
                )
667
                ->from('tx_dlf_documents')
668
                ->where(
669
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
670
                    Helper::whereExpression('tx_dlf_documents')
671
                )
672
                ->setMaxResults(1)
673
                ->execute();
674
675
            if ($resArray = $result->fetch()) {
676
                // Get title information.
677
                $title = $resArray['title'];
678
                $partof = $resArray['partof'];
679
                // Search parent documents recursively for a title?
680
                if (
681
                    $recursive
682
                    && empty($title)
683
                    && intval($partof)
684
                    && $partof != $uid
685
                ) {
686
                    $title = self::getTitle($partof, true);
687
                }
688
            } else {
689
                Helper::log('No document with UID ' . $uid . ' found or document not accessible', LOG_SEVERITY_WARNING);
690
            }
691
        } else {
692
            Helper::log('Invalid UID ' . $uid . ' for document', LOG_SEVERITY_ERROR);
693
        }
694
        return $title;
695
    }
696
697
    /**
698
     * This extracts all the metadata for the toplevel logical structure node / resource
699
     *
700
     * @access public
701
     *
702
     * @param int $cPid: The PID for the metadata definitions
703
     *
704
     * @return array The logical structure node's / resource's parsed metadata array
705
     */
706
    public function getTitledata($cPid = 0)
707
    {
708
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
709
        // Add information from METS structural map to titledata array.
710
        if ($this instanceof MetsDocument) {
711
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
712
        }
713
        // Set record identifier for METS file / IIIF manifest if not present.
714
        if (
715
            is_array($titledata)
716
            && array_key_exists('record_id', $titledata)
717
        ) {
718
            if (
719
                !empty($this->recordId)
720
                && !in_array($this->recordId, $titledata['record_id'])
721
            ) {
722
                array_unshift($titledata['record_id'], $this->recordId);
723
            }
724
        }
725
        return $titledata;
726
    }
727
728
    /**
729
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
730
     *
731
     * @access protected
732
     *
733
     * @param array $structure: logical structure array
734
     * @param int $depth: current tree depth
735
     * @param string $logId: ID of the logical structure whose depth is requested
736
     *
737
     * @return int|bool: false if structure with $logId is not a child of this substructure,
738
     * or the actual depth.
739
     */
740
    protected function getTreeDepth($structure, $depth, $logId)
741
    {
742
        foreach ($structure as $element) {
743
            if ($element['id'] == $logId) {
744
                return $depth;
745
            } elseif (array_key_exists('children', $element)) {
746
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
747
                if ($foundInChildren !== false) {
748
                    return $foundInChildren;
749
                }
750
            }
751
        }
752
        return false;
753
    }
754
755
    /**
756
     * Get the tree depth of a logical structure element within the table of content
757
     *
758
     * @access public
759
     *
760
     * @param string $logId: The id of the logical structure element whose depth is requested
761
     * @return int|bool tree depth as integer or false if no element with $logId exists within the TOC.
762
     */
763
    public function getStructureDepth($logId)
764
    {
765
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
766
    }
767
768
    /**
769
     * This sets some basic class properties
770
     *
771
     * @access protected
772
     *
773
     * @abstract
774
     *
775
     * @return void
776
     */
777
    protected abstract function init();
778
779
    /**
780
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
781
     *
782
     * @access protected
783
     *
784
     * @abstract
785
     *
786
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
787
     *
788
     * @return bool true if $preloadedDocument can actually be reused, false if it has to be loaded again
789
     */
790
    protected abstract function setPreloadedDocument($preloadedDocument);
791
792
    /**
793
     * METS/IIIF specific part of loading a location
794
     *
795
     * @access protected
796
     *
797
     * @abstract
798
     *
799
     * @param string $location: The URL of the file to load
800
     *
801
     * @return bool true on success or false on failure
802
     */
803
    protected abstract function loadLocation($location);
804
805
    /**
806
     * Load XML file / IIIF resource from URL
807
     *
808
     * @access protected
809
     *
810
     * @param string $location: The URL of the file to load
811
     *
812
     * @return bool true on success or false on failure
813
     */
814
    protected function load($location)
815
    {
816
        // Load XML / JSON-LD file.
817
        if (GeneralUtility::isValidUrl($location)) {
818
            // Load extension configuration
819
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
820
            // Set user-agent to identify self when fetching XML / JSON-LD data.
821
            if (!empty($extConf['useragent'])) {
822
                @ini_set('user_agent', $extConf['useragent']);
823
            }
824
            // the actual loading is format specific
825
            return $this->loadLocation($location);
826
        } else {
827
            $this->logger->error('Invalid file location "' . $location . '" for document loading');
828
        }
829
        return false;
830
    }
831
832
    /**
833
     * Analyze the document if it contains any fulltext that needs to be indexed.
834
     *
835
     * @access protected
836
     *
837
     * @abstract
838
     */
839
    protected abstract function ensureHasFulltextIsSet();
840
841
    /**
842
     * Register all available data formats
843
     *
844
     * @access protected
845
     *
846
     * @return void
847
     */
848
    protected function loadFormats()
849
    {
850
        if (!$this->formatsLoaded) {
851
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
852
                ->getQueryBuilderForTable('tx_dlf_formats');
853
854
            // Get available data formats from database.
855
            $result = $queryBuilder
856
                ->select(
857
                    'tx_dlf_formats.type AS type',
858
                    'tx_dlf_formats.root AS root',
859
                    'tx_dlf_formats.namespace AS namespace',
860
                    'tx_dlf_formats.class AS class'
861
                )
862
                ->from('tx_dlf_formats')
863
                ->where(
864
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0)
865
                )
866
                ->execute();
867
868
            while ($resArray = $result->fetch()) {
869
                // Update format registry.
870
                $this->formats[$resArray['type']] = [
871
                    'rootElement' => $resArray['root'],
872
                    'namespaceURI' => $resArray['namespace'],
873
                    'class' => $resArray['class']
874
                ];
875
            }
876
            $this->formatsLoaded = true;
877
        }
878
    }
879
880
    /**
881
     * Register all available namespaces for a \SimpleXMLElement object
882
     *
883
     * @access public
884
     *
885
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
886
     *
887
     * @return void
888
     */
889
    public function registerNamespaces(&$obj)
890
    {
891
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
892
        $this->loadFormats();
893
        // Do we have a \SimpleXMLElement or \DOMXPath object?
894
        if ($obj instanceof \SimpleXMLElement) {
895
            $method = 'registerXPathNamespace';
896
        } elseif ($obj instanceof \DOMXPath) {
897
            $method = 'registerNamespace';
898
        } else {
899
            $this->logger->error('Given object is neither a SimpleXMLElement nor a DOMXPath instance');
900
            return;
901
        }
902
        // Register metadata format's namespaces.
903
        foreach ($this->formats as $enc => $conf) {
904
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
905
        }
906
    }
907
908
    /**
909
     * This returns $this->cPid via __get()
910
     *
911
     * @access protected
912
     *
913
     * @return int The PID of the metadata definitions
914
     */
915
    protected function _getCPid()
916
    {
917
        return $this->cPid;
918
    }
919
920
    /**
921
     * This returns $this->hasFulltext via __get()
922
     *
923
     * @access protected
924
     *
925
     * @return bool Are there any fulltext files available?
926
     */
927
    protected function _getHasFulltext()
928
    {
929
        $this->ensureHasFulltextIsSet();
930
        return $this->hasFulltext;
931
    }
932
933
    /**
934
     * This returns $this->location via __get()
935
     *
936
     * @access protected
937
     *
938
     * @return string The location of the document
939
     */
940
    protected function _getLocation()
941
    {
942
        return $this->location;
943
    }
944
945
    /**
946
     * Format specific part of building the document's metadata array
947
     *
948
     * @access protected
949
     *
950
     * @abstract
951
     *
952
     * @param int $cPid
953
     */
954
    protected abstract function prepareMetadataArray($cPid);
955
956
    /**
957
     * This builds an array of the document's metadata
958
     *
959
     * @access protected
960
     *
961
     * @return array Array of metadata with their corresponding logical structure node ID as key
962
     */
963
    protected function _getMetadataArray()
964
    {
965
        // Set metadata definitions' PID.
966
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
967
        if (!$cPid) {
968
            $this->logger->error('Invalid PID ' . $cPid . ' for metadata definitions');
969
            return [];
970
        }
971
        if (
972
            !$this->metadataArrayLoaded
973
            || $this->metadataArray[0] != $cPid
974
        ) {
975
            $this->prepareMetadataArray($cPid);
976
            $this->metadataArray[0] = $cPid;
0 ignored issues
show
Bug introduced by
The property metadataArray is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
977
            $this->metadataArrayLoaded = true;
978
        }
979
        return $this->metadataArray;
980
    }
981
982
    /**
983
     * This returns $this->numPages via __get()
984
     *
985
     * @access protected
986
     *
987
     * @return int The total number of pages and/or tracks
988
     */
989
    protected function _getNumPages()
990
    {
991
        $this->_getPhysicalStructure();
992
        return $this->numPages;
993
    }
994
995
    /**
996
     * This returns $this->parentId via __get()
997
     *
998
     * @access protected
999
     *
1000
     * @return int The UID of the parent document or zero if not applicable
1001
     */
1002
    protected function _getParentId()
1003
    {
1004
        return $this->parentId;
1005
    }
1006
1007
    /**
1008
     * This builds an array of the document's physical structure
1009
     *
1010
     * @access protected
1011
     *
1012
     * @abstract
1013
     *
1014
     * @return array Array of physical elements' id, type, label and file representations ordered
1015
     * by @ORDER attribute / IIIF Sequence's Canvases
1016
     */
1017
    protected abstract function _getPhysicalStructure();
1018
1019
    /**
1020
     * This gives an array of the document's physical structure metadata
1021
     *
1022
     * @access protected
1023
     *
1024
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1025
     */
1026
    protected function _getPhysicalStructureInfo()
1027
    {
1028
        // Is there no physical structure array yet?
1029
        if (!$this->physicalStructureLoaded) {
1030
            // Build physical structure array.
1031
            $this->_getPhysicalStructure();
1032
        }
1033
        return $this->physicalStructureInfo;
1034
    }
1035
1036
    /**
1037
     * This returns $this->pid via __get()
1038
     *
1039
     * @access protected
1040
     *
1041
     * @return int The PID of the document or zero if not in database
1042
     */
1043
    protected function _getPid()
1044
    {
1045
        return $this->pid;
1046
    }
1047
1048
    /**
1049
     * This returns $this->ready via __get()
1050
     *
1051
     * @access protected
1052
     *
1053
     * @return bool Is the document instantiated successfully?
1054
     */
1055
    protected function _getReady()
1056
    {
1057
        return $this->ready;
1058
    }
1059
1060
    /**
1061
     * This returns $this->recordId via __get()
1062
     *
1063
     * @access protected
1064
     *
1065
     * @return mixed The METS file's / IIIF manifest's record identifier
1066
     */
1067
    protected function _getRecordId()
1068
    {
1069
        return $this->recordId;
1070
    }
1071
1072
    /**
1073
     * This returns $this->rootId via __get()
1074
     *
1075
     * @access protected
1076
     *
1077
     * @return int The UID of the root document or zero if not applicable
1078
     */
1079
    protected function _getRootId()
1080
    {
1081
        if (!$this->rootIdLoaded) {
1082
            if ($this->parentId) {
1083
                $parent = self::getInstance($this->parentId, ['storagePid' => $this->pid]);
1084
                $this->rootId = $parent->rootId;
0 ignored issues
show
Bug introduced by
The property rootId is declared read-only in Kitodo\Dlf\Common\Doc.
Loading history...
1085
            }
1086
            $this->rootIdLoaded = true;
1087
        }
1088
        return $this->rootId;
1089
    }
1090
1091
    /**
1092
     * This returns the smLinks between logical and physical structMap (METS) and models the
1093
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1094
     *
1095
     * @access protected
1096
     *
1097
     * @abstract
1098
     *
1099
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1100
     */
1101
    protected abstract function _getSmLinks();
1102
1103
    /**
1104
     * This builds an array of the document's logical structure
1105
     *
1106
     * @access protected
1107
     *
1108
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1109
     */
1110
    protected function _getTableOfContents()
1111
    {
1112
        // Is there no logical structure array yet?
1113
        if (!$this->tableOfContentsLoaded) {
1114
            // Get all logical structures.
1115
            $this->getLogicalStructure('', true);
1116
            $this->tableOfContentsLoaded = true;
1117
        }
1118
        return $this->tableOfContents;
1119
    }
1120
1121
    /**
1122
     * This returns the document's thumbnail location
1123
     *
1124
     * @access protected
1125
     *
1126
     * @abstract
1127
     *
1128
     * @param bool $forceReload: Force reloading the thumbnail instead of returning the cached value
1129
     *
1130
     * @return string The document's thumbnail location
1131
     */
1132
    protected abstract function _getThumbnail($forceReload = false);
1133
1134
    /**
1135
     * This returns the ID of the toplevel logical structure node
1136
     *
1137
     * @access protected
1138
     *
1139
     * @abstract
1140
     *
1141
     * @return string The logical structure node's ID
1142
     */
1143
    protected abstract function _getToplevelId();
1144
1145
    /**
1146
     * This returns $this->uid via __get()
1147
     *
1148
     * @access protected
1149
     *
1150
     * @return mixed The UID or the URL of the document
1151
     */
1152
    protected function _getUid()
1153
    {
1154
        return $this->uid;
1155
    }
1156
1157
    /**
1158
     * This sets $this->cPid via __set()
1159
     *
1160
     * @access protected
1161
     *
1162
     * @param int $value: The new PID for the metadata definitions
1163
     *
1164
     * @return void
1165
     */
1166
    protected function _setCPid($value)
1167
    {
1168
        $this->cPid = max(intval($value), 0);
1169
    }
1170
1171
    /**
1172
     * This is a singleton class, thus the constructor should be private/protected
1173
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Doc::getInstance())
1174
     *
1175
     * @access protected
1176
     *
1177
     * @param int $location: The location URL of the XML file to parse
1178
     * @param int $pid: If > 0, then only document with this PID gets loaded
1179
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1180
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1181
     *
1182
     * @return void
1183
     */
1184
    protected function __construct($location, $pid, $preloadedDocument)
1185
    {
1186
        $this->setPreloadedDocument($preloadedDocument);
1187
        $this->init();
1188
        $this->establishRecordId($pid);
1189
        $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger();
1190
1191
        return;
1192
    }
1193
1194
    /**
1195
     * This magic method is called each time an invisible property is referenced from the object
1196
     *
1197
     * @access public
1198
     *
1199
     * @param string $var: Name of variable to get
1200
     *
1201
     * @return mixed Value of $this->$var
1202
     */
1203
    public function __get($var)
1204
    {
1205
        $method = '_get' . ucfirst($var);
1206
        if (
1207
            !property_exists($this, $var)
1208
            || !method_exists($this, $method)
1209
        ) {
1210
            $this->logger->warning('There is no getter function for property "' . $var . '"');
1211
            return;
1212
        } else {
1213
            return $this->$method();
1214
        }
1215
    }
1216
1217
    /**
1218
     * This magic method is called each time an invisible property is checked for isset() or empty()
1219
     *
1220
     * @access public
1221
     *
1222
     * @param string $var: Name of variable to check
1223
     *
1224
     * @return bool true if variable is set and not empty, false otherwise
1225
     */
1226
    public function __isset($var)
1227
    {
1228
        return !empty($this->__get($var));
1229
    }
1230
1231
    /**
1232
     * This magic method is called each time an invisible property is referenced from the object
1233
     *
1234
     * @access public
1235
     *
1236
     * @param string $var: Name of variable to set
1237
     * @param mixed $value: New value of variable
1238
     *
1239
     * @return void
1240
     */
1241
    public function __set($var, $value)
1242
    {
1243
        $method = '_set' . ucfirst($var);
1244
        if (
1245
            !property_exists($this, $var)
1246
            || !method_exists($this, $method)
1247
        ) {
1248
            $this->logger->warning('There is no setter function for property "' . $var . '"');
1249
        } else {
1250
            $this->$method($value);
1251
        }
1252
    }
1253
1254
    /**
1255
     * get Cache Hit for $doc
1256
     *
1257
     * @param string $location
1258
     * @return Doc|false
1259
     */
1260
    private static function getDocCache(string $location)
1261
    {
1262
        $cacheIdentifier = md5($location);
1263
        $cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_doc');
1264
        $cacheHit = $cache->get($cacheIdentifier);
1265
1266
        return $cacheHit;
1267
    }
1268
1269
    /**
1270
     * set Cache for $doc
1271
     *
1272
     * @param string $location
1273
     * @param Doc $doc
1274
     * @return void
1275
     */
1276
    private static function setDocCache(string $location, Doc $doc)
1277
    {
1278
        $cacheIdentifier = md5($location);
1279
        $cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_doc');
1280
1281
        // Save value in cache
1282
        $cache->set($cacheIdentifier, $doc);
1283
    }
1284
1285
}
1286