Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( 982517...802e26 )
by Sebastian
21s queued 11s
created

Document::_getDmdSec()   B

Complexity

Conditions 8
Paths 2

Size

Total Lines 27
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 18
nc 2
nop 0
dl 0
loc 27
rs 8.4444
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
A Document::_getTableOfContents() 0 8 2
1
<?php
2
namespace Kitodo\Dlf\Common;
3
4
/**
5
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
6
 *
7
 * This file is part of the Kitodo and TYPO3 projects.
8
 *
9
 * @license GNU General Public License version 3 or later.
10
 * For the full copyright and license information, please read the
11
 * LICENSE.txt file that was distributed with this source code.
12
 */
13
14
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
15
use Ubl\Iiif\Tools\IiifHelper;
16
17
/**
18
 * Document class for the 'dlf' extension
19
 *
20
 * @author Sebastian Meyer <[email protected]>
21
 * @author Henrik Lochmann <[email protected]>
22
 * @package TYPO3
23
 * @subpackage dlf
24
 * @access public
25
 * @abstract
26
 */
27
abstract class Document {
28
    /**
29
     * This holds the PID for the configuration
30
     *
31
     * @var integer
32
     * @access protected
33
     */
34
    protected $cPid = 0;
35
36
    /**
37
     * The extension key
38
     *
39
     * @var string
40
     * @access public
41
     */
42
    public static $extKey = 'dlf';
43
44
    /**
45
     * This holds the configuration for all supported metadata encodings
46
     * @see loadFormats()
47
     *
48
     * @var array
49
     * @access protected
50
     */
51
    protected $formats = [
52
        'OAI' => [
53
            'rootElement' => 'OAI-PMH',
54
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
55
        ],
56
        'METS' => [
57
            'rootElement' => 'mets',
58
            'namespaceURI' => 'http://www.loc.gov/METS/',
59
        ],
60
        'XLINK' => [
61
            'rootElement' => 'xlink',
62
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
63
        ]
64
    ];
65
66
    /**
67
     * Are there any fulltext files available? This also includes IIIF text annotations
68
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
69
     * annotations as fulltext.
70
     *
71
     * @var boolean
72
     * @access protected
73
     */
74
    protected $hasFulltext = FALSE;
75
76
    /**
77
     * Last searched logical and physical page
78
     *
79
     * @var array
80
     * @access protected
81
     */
82
    protected $lastSearchedPhysicalPage = ['logicalPage' => NULL, 'physicalPage' => NULL];
83
84
    /**
85
     * This holds the documents location
86
     *
87
     * @var string
88
     * @access protected
89
     */
90
    protected $location = '';
91
92
    /**
93
     * This holds the logical units
94
     *
95
     * @var array
96
     * @access protected
97
     */
98
    protected $logicalUnits = [];
99
100
    /**
101
     * This holds the documents' parsed metadata array with their corresponding
102
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
103
     *
104
     * @var array
105
     * @access protected
106
     */
107
    protected $metadataArray = [];
108
109
    /**
110
     * Is the metadata array loaded?
111
     * @see $metadataArray
112
     *
113
     * @var boolean
114
     * @access protected
115
     */
116
    protected $metadataArrayLoaded = FALSE;
117
118
    /**
119
     * The holds the total number of pages
120
     *
121
     * @var integer
122
     * @access protected
123
     */
124
    protected $numPages = 0;
125
126
    /**
127
     * This holds the UID of the parent document or zero if not multi-volumed
128
     *
129
     * @var integer
130
     * @access protected
131
     */
132
    protected $parentId = 0;
133
134
    /**
135
     * This holds the physical structure
136
     *
137
     * @var array
138
     * @access protected
139
     */
140
    protected $physicalStructure = [];
141
142
    /**
143
     * This holds the physical structure metadata
144
     *
145
     * @var array
146
     * @access protected
147
     */
148
    protected $physicalStructureInfo = [];
149
150
    /**
151
     * Is the physical structure loaded?
152
     * @see $physicalStructure
153
     *
154
     * @var boolean
155
     * @access protected
156
     */
157
    protected $physicalStructureLoaded = FALSE;
158
159
    /**
160
     * This holds the PID of the document or zero if not in database
161
     *
162
     * @var integer
163
     * @access protected
164
     */
165
    protected $pid = 0;
166
167
    /**
168
     * This holds the documents' raw text pages with their corresponding
169
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
170
     *
171
     * @var array
172
     * @access protected
173
     */
174
    protected $rawTextArray = [];
175
176
    /**
177
     * Is the document instantiated successfully?
178
     *
179
     * @var boolean
180
     * @access protected
181
     */
182
    protected $ready = FALSE;
183
184
    /**
185
     * The METS file's / IIIF manifest's record identifier
186
     *
187
     * @var string
188
     * @access protected
189
     */
190
    protected $recordId;
191
192
    /**
193
     * This holds the singleton object of the document
194
     *
195
     * @var array (\Kitodo\Dlf\Common\Document)
196
     * @static
197
     * @access protected
198
     */
199
    protected static $registry = [];
200
201
    /**
202
     * This holds the UID of the root document or zero if not multi-volumed
203
     *
204
     * @var integer
205
     * @access protected
206
     */
207
    protected $rootId = 0;
208
209
    /**
210
     * Is the root id loaded?
211
     * @see $rootId
212
     *
213
     * @var boolean
214
     * @access protected
215
     */
216
    protected $rootIdLoaded = FALSE;
217
218
    /**
219
     * This holds the smLinks between logical and physical structMap
220
     *
221
     * @var array
222
     * @access protected
223
     */
224
    protected $smLinks = ['l2p' => [], 'p2l' => []];
225
226
    /**
227
     * Are the smLinks loaded?
228
     * @see $smLinks
229
     *
230
     * @var boolean
231
     * @access protected
232
     */
233
    protected $smLinksLoaded = FALSE;
234
235
    /**
236
     * This holds the logical structure
237
     *
238
     * @var array
239
     * @access protected
240
     */
241
    protected $tableOfContents = [];
242
243
    /**
244
     * Is the table of contents loaded?
245
     * @see $tableOfContents
246
     *
247
     * @var boolean
248
     * @access protected
249
     */
250
    protected $tableOfContentsLoaded = FALSE;
251
252
    /**
253
     * This holds the document's thumbnail location.
254
     *
255
     * @var string
256
     * @access protected
257
     */
258
    protected $thumbnail = '';
259
260
    /**
261
     * Is the document's thumbnail location loaded?
262
     * @see $thumbnail
263
     *
264
     * @var boolean
265
     * @access protected
266
     */
267
    protected $thumbnailLoaded = FALSE;
268
269
    /**
270
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF).
271
     *
272
     * @var string
273
     * @access protected
274
     */
275
    protected $toplevelId = '';
276
277
    /**
278
     * This holds the UID or the URL of the document
279
     *
280
     * @var mixed
281
     * @access protected
282
     */
283
    protected $uid = 0;
284
285
    /**
286
     * This holds the whole XML file as \SimpleXMLElement object
287
     *
288
     * @var \SimpleXMLElement
289
     * @access protected
290
     */
291
    protected $xml;
292
293
    /**
294
     * This clears the static registry to prevent memory exhaustion
295
     *
296
     * @access public
297
     *
298
     * @static
299
     *
300
     * @return void
301
     */
302
    public static function clearRegistry() {
303
        // Reset registry array.
304
        self::$registry = [];
305
    }
306
307
    /**
308
     * This ensures that the recordId, if existent, is retrieved from the document.
309
     *
310
     * @access protected
311
     *
312
     * @abstract
313
     *
314
     * @param integer $pid: ID of the configuration page with the recordId config
315
     *
316
     */
317
    protected abstract function establishRecordId($pid);
318
319
    /**
320
     * Source document PHP object which is represented by a Document instance
321
     *
322
     * @access protected
323
     *
324
     * @abstract
325
     *
326
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
327
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
328
     */
329
    protected abstract function getDocument();
330
331
    /**
332
     * This gets the location of a downloadable file for a physical page or track
333
     *
334
     * @access public
335
     *
336
     * @abstract
337
     *
338
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
339
     *
340
     * @return string    The file's location as URL
341
     */
342
    public abstract function getDownloadLocation($id);
343
344
    /**
345
     * This gets the location of a file representing a physical page or track
346
     *
347
     * @access public
348
     *
349
     * @abstract
350
     *
351
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
352
     *
353
     * @return string The file's location as URL
354
     */
355
    public abstract function getFileLocation($id);
356
357
    /**
358
     * This gets the MIME type of a file representing a physical page or track
359
     *
360
     * @access public
361
     *
362
     * @abstract
363
     *
364
     * @param string $id: The @ID attribute of the file node
365
     *
366
     * @return string The file's MIME type
367
     */
368
    public abstract function getFileMimeType($id);
369
370
    /**
371
     * This is a singleton class, thus an instance must be created by this method
372
     *
373
     * @access public
374
     *
375
     * @static
376
     *
377
     * @param mixed $uid: The unique identifier of the document to parse, the URL of XML file or the IRI of the IIIF resource
378
     * @param integer $pid: If > 0, then only document with this PID gets loaded
379
     * @param boolean $forceReload: Force reloading the document instead of returning the cached instance
380
     *
381
     * @return \Kitodo\Dlf\Common\Document Instance of this class, either MetsDocument or IiifManifest
382
     */
383
    public static function &getInstance($uid, $pid = 0, $forceReload = FALSE) {
384
        // Sanitize input.
385
        $pid = max(intval($pid), 0);
386
        if (!$forceReload) {
387
            $regObj = md5($uid);
388
            if (is_object(self::$registry[$regObj])
389
                && self::$registry[$regObj] instanceof self) {
390
                // Check if instance has given PID.
391
                if (!$pid
392
                    || !self::$registry[$regObj]->pid
393
                    || $pid == self::$registry[$regObj]->pid) {
394
                    // Return singleton instance if available.
395
                    return self::$registry[$regObj];
396
                }
397
            } else {
398
                // Check the user's session...
399
                $sessionData = Helper::loadFromSession(get_called_class());
400
                if (is_object($sessionData[$regObj])
401
                    && $sessionData[$regObj] instanceof self) {
402
                    // Check if instance has given PID.
403
                    if (!$pid
404
                        || !$sessionData[$regObj]->pid
405
                        || $pid == $sessionData[$regObj]->pid) {
406
                        // ...and restore registry.
407
                        self::$registry[$regObj] = $sessionData[$regObj];
408
                        return self::$registry[$regObj];
409
                    }
410
                }
411
            }
412
        }
413
        // Create new instance depending on format (METS or IIIF) ...
414
        $documentFormat = null;
415
        $xml = null;
416
        $iiif = null;
417
        // Try to get document format from database
418
        if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($uid)) {
419
            $whereClause = 'tx_dlf_documents.uid='.intval($uid).Helper::whereClause('tx_dlf_documents');
420
            if ($pid) {
421
                $whereClause .= ' AND tx_dlf_documents.pid='.intval($pid);
422
            }
423
            $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
424
                'tx_dlf_documents.location AS location,tx_dlf_documents.document_format AS document_format',
425
                'tx_dlf_documents',
426
                $whereClause,
427
                '',
428
                '',
429
                '1'
430
                );
431
432
            if ($GLOBALS['TYPO3_DB']->sql_num_rows($result) > 0) {
433
                for ($i = 0, $j = $GLOBALS['TYPO3_DB']->sql_num_rows($result); $i < $j; $i++) {
434
                    $resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result);
435
                    $documentFormat = $resArray['document_format'];
436
                }
437
            }
438
439
        } else {
440
            // Get document format from content of remote document
441
            // Cast to string for safety reasons.
442
            $location = (string) $uid;
443
            // Try to load a file from the url
444
            if (\TYPO3\CMS\Core\Utility\GeneralUtility::isValidUrl($location)) {
445
                // Load extension configuration
446
                $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['dlf']);
447
                // Set user-agent to identify self when fetching XML data.
448
                if (!empty($extConf['useragent'])) {
449
                    @ini_set('user_agent', $extConf['useragent']);
1 ignored issue
show
Security Best Practice introduced by
It seems like you do not handle an error condition for ini_set(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

449
                    /** @scrutinizer ignore-unhandled */ @ini_set('user_agent', $extConf['useragent']);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
450
                }
451
                $content = \TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($location);
452
                // TODO use single place to load xml
453
                // Turn off libxml's error logging.
454
                $libxmlErrors = libxml_use_internal_errors(TRUE);
455
                // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
456
                $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE);
457
                // Try to load XML from file.
458
                $xml = simplexml_load_string($content);
0 ignored issues
show
Bug introduced by
It seems like $content can also be of type false; however, parameter $data of simplexml_load_string() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

458
                $xml = simplexml_load_string(/** @scrutinizer ignore-type */ $content);
Loading history...
459
                // reset entity loader setting
460
                libxml_disable_entity_loader($previousValueOfEntityLoader);
461
                // Reset libxml's error logging.
462
                libxml_use_internal_errors($libxmlErrors);
463
                if ($xml !== false) {
464
                    /* @var $xml \SimpleXMLElement */
465
                    $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
466
                    $xpathResult = $xml->xpath('//mets:mets');
467
                    $documentFormat = ($xpathResult !== false && count($xpathResult)>0) ? 'METS' : null;
468
                } else {
469
                    // Try to load file as IIIF resource instead.
470
                    $contentAsJsonArray = json_decode($content, true);
0 ignored issues
show
Bug introduced by
It seems like $content can also be of type false; however, parameter $json of json_decode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

470
                    $contentAsJsonArray = json_decode(/** @scrutinizer ignore-type */ $content, true);
Loading history...
471
                    if ($contentAsJsonArray !== null) {
472
                        // Load plugin configuration.
473
                        $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
474
                        IiifHelper::setUrlReader(IiifUrlReader::getInstance());
475
                        IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
476
                        IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
477
                        $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
478
                        if ($iiif instanceof IiifResourceInterface) {
479
                            $documentFormat = 'IIIF';
480
                        }
481
                    }
482
                }
483
            }
484
        }
485
        // Sanitize input.
486
        $pid = max(intval($pid), 0);
487
        if ($documentFormat == 'METS') {
488
            $instance = new MetsDocument($uid, $pid, $xml);
489
        } elseif ($documentFormat == 'IIIF') {
490
            $instance = new IiifManifest($uid, $pid, $iiif);
491
        }
492
        // Save instance to registry.
493
        if ($instance->ready) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $instance does not seem to be defined for all execution paths leading up to this point.
Loading history...
494
            self::$registry[md5($instance->uid)] = $instance;
495
            if ($instance->uid != $instance->location) {
496
                self::$registry[md5($instance->location)] = $instance;
497
            }
498
            // Load extension configuration
499
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['dlf']);
500
            // Save registry to session if caching is enabled.
501
            if (!empty($extConf['caching'])) {
502
                Helper::saveToSession(self::$registry, get_class($instance));
503
            }
504
        }
505
        // Return new instance.
506
        return $instance;
507
    }
508
509
    /**
510
     * This gets details about a logical structure element
511
     *
512
     * @access public
513
     *
514
     * @abstract
515
     *
516
     * @param string $id: The @ID attribute of the logical structure node (METS) or
517
     * the @id property of the Manifest / Range (IIIF)
518
     * @param boolean $recursive: Whether to include the child elements / resources
519
     *
520
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
521
     */
522
    public abstract function getLogicalStructure($id, $recursive = FALSE);
523
524
    /**
525
     * This extracts all the metadata for a logical structure node
526
     *
527
     * @access public
528
     *
529
     * @abstract
530
     *
531
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
532
     * of the Manifest / Range (IIIF)
533
     * @param integer $cPid: The PID for the metadata definitions
534
     *                       (defaults to $this->cPid or $this->pid)
535
     *
536
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
537
     */
538
    public abstract function getMetadata($id, $cPid = 0);
539
540
    /**
541
     * This returns the first corresponding physical page number of a given logical page label
542
     *
543
     * @access public
544
     *
545
     * @param string $logicalPage: The label (or a part of the label) of the logical page
546
     *
547
     * @return integer The physical page number
548
     */
549
    public function getPhysicalPage($logicalPage) {
550
        if (!empty($this->lastSearchedPhysicalPage['logicalPage'])
551
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage) {
552
            return $this->lastSearchedPhysicalPage['physicalPage'];
553
        } else {
554
            $physicalPage = 0;
555
            foreach ($this->physicalStructureInfo as $page) {
556
                if (strpos($page['orderlabel'], $logicalPage) !== FALSE) {
557
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
558
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
559
                    return $physicalPage;
560
                }
561
                $physicalPage++;
562
            }
563
        }
564
        return 1;
565
    }
566
567
    /**
568
     * This extracts the raw text for a physical structure node / IIIF Manifest / Canvas. Text might be
569
     * given as ALTO for METS or as annotations or ALTO for IIIF resources. If IIIF plain text annotations
570
     * with the motivation "painting" should be treated as full text representations, the extension has to be
571
     * configured accordingly.
572
     *
573
     * @access public
574
     *
575
     * @abstract
576
     *
577
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
578
     * of the Manifest / Range (IIIF)
579
     *
580
     * @return string The physical structure node's / IIIF resource's raw text
581
     */
582
    public abstract function getRawText($id);
583
584
    /**
585
     * This extracts the raw text for a physical structure node / IIIF Manifest / Canvas from an
586
     * XML fulltext representation (currently only ALTO). For IIIF manifests, ALTO documents have
587
     * to be given in the Canvas' / Manifest's "seeAlso" property.
588
     *
589
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
590
     * of the Manifest / Range (IIIF)
591
     *
592
     * @return string The physical structure node's / IIIF resource's raw text from XML
593
     */
594
    protected function getRawTextFromXml($id) {
595
        $rawText = '';
596
        // Load available text formats, ...
597
        $this->loadFormats();
598
        // ... physical structure ...
599
        $this->_getPhysicalStructure();
600
        // ... and extension configuration.
601
        $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
602
        if (!empty($this->physicalStructureInfo[$id])) {
603
            // Get fulltext file.
604
            $file = $this->getFileLocation($this->physicalStructureInfo[$id]['files'][$extConf['fileGrpFulltext']]);
605
            // Turn off libxml's error logging.
606
            $libxmlErrors = libxml_use_internal_errors(TRUE);
607
            // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept.
608
            $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE);
609
            // Load XML from file.
610
            $rawTextXml = simplexml_load_string(\TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($file));
0 ignored issues
show
Bug introduced by
It seems like TYPO3\CMS\Core\Utility\G...lUtility::getUrl($file) can also be of type false; however, parameter $data of simplexml_load_string() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

610
            $rawTextXml = simplexml_load_string(/** @scrutinizer ignore-type */ \TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($file));
Loading history...
611
            // Reset entity loader setting.
612
            libxml_disable_entity_loader($previousValueOfEntityLoader);
613
            // Reset libxml's error logging.
614
            libxml_use_internal_errors($libxmlErrors);
615
            // Get the root element's name as text format.
616
            $textFormat = strtoupper($rawTextXml->getName());
617
        } else {
618
            Helper::devLog('Invalid structure node @ID "'.$id.'"', DEVLOG_SEVERITY_WARNING);
619
            return $rawText;
620
        }
621
        // Is this text format supported?
622
        if (!empty($this->formats[$textFormat])) {
623
            if (!empty($this->formats[$textFormat]['class'])) {
624
                $class = $this->formats[$textFormat]['class'];
625
                // Get the raw text from class.
626
                if (class_exists($class)
627
                    && ($obj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance($class)) instanceof FulltextInterface) {
628
                    $rawText = $obj->getRawText($rawTextXml);
0 ignored issues
show
Bug introduced by
It seems like $rawTextXml can also be of type false; however, parameter $xml of Kitodo\Dlf\Common\FulltextInterface::getRawText() does only seem to accept SimpleXMLElement, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

628
                    $rawText = $obj->getRawText(/** @scrutinizer ignore-type */ $rawTextXml);
Loading history...
629
                    $this->rawTextArray[$id] = $rawText;
630
                } else {
631
                    Helper::devLog('Invalid class/method "'.$class.'->getRawText()" for text format "'.$textFormat.'"', DEVLOG_SEVERITY_WARNING);
632
                }
633
            }
634
        } else {
635
            Helper::devLog('Unsupported text format "'.$textFormat.'" in physical node with @ID "'.$id.'"', DEVLOG_SEVERITY_WARNING);
636
        }
637
        return $rawText;
638
    }
639
640
    /**
641
     * This determines a title for the given document
642
     *
643
     * @access public
644
     *
645
     * @static
646
     *
647
     * @param integer $uid: The UID of the document
648
     * @param boolean $recursive: Search superior documents for a title, too?
649
     *
650
     * @return string The title of the document itself or a parent document
651
     */
652
    public static function getTitle($uid, $recursive = FALSE) {
653
        $title = '';
654
        // Sanitize input.
655
        $uid = max(intval($uid), 0);
656
        if ($uid) {
657
            $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
658
                'tx_dlf_documents.title,tx_dlf_documents.partof',
659
                'tx_dlf_documents',
660
                'tx_dlf_documents.uid='.$uid
661
                    .Helper::whereClause('tx_dlf_documents'),
662
                '',
663
                '',
664
                '1'
665
            );
666
            if ($GLOBALS['TYPO3_DB']->sql_num_rows($result)) {
667
                // Get title information.
668
                list ($title, $partof) = $GLOBALS['TYPO3_DB']->sql_fetch_row($result);
669
                // Search parent documents recursively for a title?
670
                if ($recursive
671
                    && empty($title)
672
                    && intval($partof)
673
                    && $partof != $uid) {
674
                    $title = self::getTitle($partof, TRUE);
675
                }
676
            } else {
677
                Helper::devLog('No document with UID '.$uid.' found or document not accessible', DEVLOG_SEVERITY_WARNING);
678
            }
679
        } else {
680
            Helper::devLog('Invalid UID '.$uid.' for document', DEVLOG_SEVERITY_ERROR);
681
        }
682
        return $title;
683
    }
684
685
    /**
686
     * This extracts all the metadata for the toplevel logical structure node / resource
687
     *
688
     * @access public
689
     *
690
     * @param integer $cPid: The PID for the metadata definitions
691
     *
692
     * @return array The logical structure node's / resource's parsed metadata array
693
     */
694
    public function getTitledata($cPid = 0) {
695
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
696
        // Set record identifier for METS file / IIIF manifest if not present.
697
        if (is_array($titledata)
698
            && array_key_exists('record_id', $titledata)) {
699
            if (!empty($this->recordId)
700
                && !in_array($this->recordId, $titledata['record_id'])) {
701
                array_unshift($titledata['record_id'], $this->recordId);
702
            }
703
        }
704
        return $titledata;
705
    }
706
707
    /**
708
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
709
     *
710
     * @access protected
711
     *
712
     * @param array $structure: logical structure array
713
     * @param integer $depth: current tree depth
714
     * @param string $logId: ID of the logical structure whose depth is requested
715
     *
716
     * @return integer|boolean: false if structure with $logId is not a child of this substructure,
717
     * or the actual depth.
718
     */
719
    protected function getTreeDepth($structure, $depth, $logId) {
720
        foreach ($structure as $element) {
721
            if ($element['id'] == $logId) {
722
                return $depth;
723
            } elseif (array_key_exists('children', $element)) {
724
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
725
                if ($foundInChildren!==false) {
726
                    return $foundInChildren;
727
                }
728
            }
729
        }
730
        return false;
731
    }
732
733
    /**
734
     * Get the tree depth of a logical structure element within the table of content
735
     *
736
     * @access public
737
     *
738
     * @param string $logId: The id of the logical structure element whose depth is requested
739
     * @return number|boolean tree depth as integer or FALSE if no element with $logId exists within the TOC.
740
     */
741
    public function getStructureDepth($logId) {
742
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
743
    }
744
745
    /**
746
     * This sets some basic class properties
747
     *
748
     * @access protected
749
     *
750
     * @abstract
751
     *
752
     * @return void
753
     */
754
    protected abstract function init();
755
756
    /**
757
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
758
     *
759
     * @access protected
760
     *
761
     * @abstract
762
     *
763
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
764
     *
765
     * @return boolean true if $preloadedDocument can actually be reused, false if it has to be loaded again
766
     */
767
    protected abstract function setPreloadedDocument($preloadedDocument);
768
769
    /**
770
     * METS/IIIF specific part of loading a location
771
     *
772
     * @access protected
773
     *
774
     * @abstract
775
     *
776
     * @param string $location: The URL of the file to load
777
     */
778
    protected abstract function loadLocation($location);
779
780
    /**
781
     * Load XML file / IIIF resource from URL
782
     *
783
     * @access protected
784
     *
785
     * @param string $location: The URL of the file to load
786
     *
787
     * @return boolean TRUE on success or FALSE on failure
788
     */
789
    protected function load($location) {
790
        // Load XML / JSON-LD file.
791
        if (\TYPO3\CMS\Core\Utility\GeneralUtility::isValidUrl($location)) {
792
            // Load extension configuration
793
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['dlf']);
794
            // Set user-agent to identify self when fetching XML / JSON-LD data.
795
            if (!empty($extConf['useragent'])) {
796
                @ini_set('user_agent', $extConf['useragent']);
1 ignored issue
show
Security Best Practice introduced by
It seems like you do not handle an error condition for ini_set(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

796
                /** @scrutinizer ignore-unhandled */ @ini_set('user_agent', $extConf['useragent']);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
797
            }
798
            // the actual loading is format specific
799
            return $this->loadLocation($location);
800
        } else {
801
            Helper::devLog('Invalid file location "'.$location.'" for document loading', DEVLOG_SEVERITY_ERROR);
802
        }
803
        return FALSE;
804
    }
805
806
    /**
807
     * Analyze the document if it contains any fulltext that needs to be indexed.
808
     *
809
     * @access protected
810
     *
811
     * @abstract
812
     */
813
    protected abstract function ensureHasFulltextIsSet();
814
815
    /**
816
     * Register all available data formats
817
     *
818
     * @access protected
819
     *
820
     * @return void
821
     */
822
    protected function loadFormats() {
823
        if (!$this->formatsLoaded) {
824
            // Get available data formats from database.
825
            $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
826
                'tx_dlf_formats.type AS type,tx_dlf_formats.root AS root,tx_dlf_formats.namespace AS namespace,tx_dlf_formats.class AS class',
827
                'tx_dlf_formats',
828
                'tx_dlf_formats.pid=0'
829
                    .Helper::whereClause('tx_dlf_formats'),
830
                '',
831
                '',
832
                ''
833
            );
834
            while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
835
                // Update format registry.
836
                $this->formats[$resArray['type']] = [
837
                    'rootElement' => $resArray['root'],
838
                    'namespaceURI' => $resArray['namespace'],
839
                    'class' => $resArray['class']
840
                ];
841
            }
842
            $this->formatsLoaded = TRUE;
0 ignored issues
show
Bug Best Practice introduced by
The property formatsLoaded does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
843
        }
844
    }
845
846
    /**
847
     * Register all available namespaces for a \SimpleXMLElement object
848
     *
849
     * @access public
850
     *
851
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
852
     *
853
     * @return void
854
     */
855
    public function registerNamespaces(&$obj) {
856
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
857
        $this->loadFormats();
858
        // Do we have a \SimpleXMLElement or \DOMXPath object?
859
        if ($obj instanceof \SimpleXMLElement) {
860
            $method = 'registerXPathNamespace';
861
        } elseif ($obj instanceof \DOMXPath) {
862
            $method = 'registerNamespace';
863
        } else {
864
            Helper::devLog('Given object is neither a SimpleXMLElement nor a DOMXPath instance', DEVLOG_SEVERITY_ERROR);
865
            return;
866
        }
867
        // Register metadata format's namespaces.
868
        foreach ($this->formats as $enc => $conf) {
869
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
870
        }
871
    }
872
873
    /**
874
     * This saves the document to the database and index
875
     *
876
     * @access public
877
     *
878
     * @param integer $pid: The PID of the saved record
879
     * @param integer $core: The UID of the Solr core for indexing
880
     *
881
     * @return boolean TRUE on success or FALSE on failure
882
     */
883
    public function save($pid = 0, $core = 0) {
884
        if (TYPO3_MODE !== 'BE') {
0 ignored issues
show
introduced by
The condition Kitodo\Dlf\Common\TYPO3_MODE !== 'BE' is always false.
Loading history...
885
            Helper::devLog('Saving a document is only allowed in the backend', DEVLOG_SEVERITY_ERROR);
886
            return FALSE;
887
        }
888
        // Make sure $pid is a non-negative integer.
889
        $pid = max(intval($pid), 0);
890
        // Make sure $core is a non-negative integer.
891
        $core = max(intval($core), 0);
892
        // If $pid is not given, try to get it elsewhere.
893
        if (!$pid
894
            && $this->pid) {
895
            // Retain current PID.
896
            $pid = $this->pid;
897
        } elseif (!$pid) {
898
            Helper::devLog('Invalid PID '.$pid.' for document saving', DEVLOG_SEVERITY_ERROR);
899
            return FALSE;
900
        }
901
        // Set PID for metadata definitions.
902
        $this->cPid = $pid;
903
        // Set UID placeholder if not updating existing record.
904
        if ($pid != $this->pid) {
905
            $this->uid = uniqid('NEW');
906
        }
907
        // Get metadata array.
908
        $metadata = $this->getTitledata($pid);
909
        // Check for record identifier.
910
        if (empty($metadata['record_id'][0])) {
911
            Helper::devLog('No record identifier found to avoid duplication', DEVLOG_SEVERITY_ERROR);
912
            return FALSE;
913
        }
914
        // Load plugin configuration.
915
        $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
916
        // Get UID for structure type.
917
        $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
918
            'tx_dlf_structures.uid AS uid',
919
            'tx_dlf_structures',
920
            'tx_dlf_structures.pid='.intval($pid)
921
                .' AND tx_dlf_structures.index_name='.$GLOBALS['TYPO3_DB']->fullQuoteStr($metadata['type'][0], 'tx_dlf_structures')
922
                .Helper::whereClause('tx_dlf_structures'),
923
            '',
924
            '',
925
            '1'
926
        );
927
        if ($GLOBALS['TYPO3_DB']->sql_num_rows($result)) {
928
            list ($structure) = $GLOBALS['TYPO3_DB']->sql_fetch_row($result);
929
        } else {
930
            Helper::devLog('Could not identify document/structure type "'.$GLOBALS['TYPO3_DB']->fullQuoteStr($metadata['type'][0], 'tx_dlf_structures').'"', DEVLOG_SEVERITY_ERROR);
931
            return FALSE;
932
        }
933
        $metadata['type'][0] = $structure;
934
        // Get UIDs for collections.
935
        $collections = [];
936
        $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
937
            'tx_dlf_collections.index_name AS index_name,tx_dlf_collections.uid AS uid',
938
            'tx_dlf_collections',
939
            'tx_dlf_collections.pid='.intval($pid)
940
                .' AND tx_dlf_collections.sys_language_uid IN (-1,0)'
941
                .Helper::whereClause('tx_dlf_collections'),
942
            '',
943
            '',
944
            ''
945
        );
946
        while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
947
            $collUid[$resArray['index_name']] = $resArray['uid'];
948
        }
949
        foreach ($metadata['collection'] as $collection) {
950
            if (!empty($collUid[$collection])) {
951
                // Add existing collection's UID.
952
                $collections[] = $collUid[$collection];
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $collUid does not seem to be defined for all execution paths leading up to this point.
Loading history...
953
            } else {
954
                // Insert new collection.
955
                $collNewUid = uniqid('NEW');
956
                $collData['tx_dlf_collections'][$collNewUid] = [
957
                    'pid' => $pid,
958
                    'label' => $collection,
959
                    'index_name' => $collection,
960
                    'oai_name' => (!empty($conf['publishNewCollections']) ? Helper::getCleanString($collection) : ''),
961
                    'description' => '',
962
                    'documents' => 0,
963
                    'owner' => 0,
964
                    'status' => 0,
965
                ];
966
                $substUid = Helper::processDBasAdmin($collData);
967
                // Prevent double insertion.
968
                unset ($collData);
969
                // Add new collection's UID.
970
                $collections[] = $substUid[$collNewUid];
971
                if ((TYPO3_REQUESTTYPE & TYPO3_REQUESTTYPE_CLI) == FALSE) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing Kitodo\Dlf\Common\TYPO3_...n\TYPO3_REQUESTTYPE_CLI of type integer to the boolean FALSE. If you are specifically checking for 0, consider using something more explicit like === 0 instead.
Loading history...
972
                    Helper::addMessage(
973
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newCollection'), $collection, $substUid[$collNewUid])),
974
                        Helper::getMessage('flash.attention', TRUE),
975
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
976
                        TRUE
977
                    );
978
                }
979
            }
980
        }
981
        $metadata['collection'] = $collections;
982
        // Get UID for owner.
983
        $owner = !empty($metadata['owner'][0]) ? $metadata['owner'][0] : 'default';
984
        $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
985
            'tx_dlf_libraries.uid AS uid',
986
            'tx_dlf_libraries',
987
            'tx_dlf_libraries.pid='.intval($pid)
988
                .' AND tx_dlf_libraries.index_name='.$GLOBALS['TYPO3_DB']->fullQuoteStr($owner, 'tx_dlf_libraries')
989
                .Helper::whereClause('tx_dlf_libraries'),
990
            '',
991
            '',
992
            '1'
993
        );
994
        if ($GLOBALS['TYPO3_DB']->sql_num_rows($result)) {
995
            list ($ownerUid) = $GLOBALS['TYPO3_DB']->sql_fetch_row($result);
996
        } else {
997
            // Insert new library.
998
            $libNewUid = uniqid('NEW');
999
            $libData['tx_dlf_libraries'][$libNewUid] = [
1 ignored issue
show
Comprehensibility Best Practice introduced by
$libData was never initialized. Although not strictly required by PHP, it is generally a good practice to add $libData = array(); before regardless.
Loading history...
1000
                'pid' => $pid,
1001
                'label' => $owner,
1002
                'index_name' => $owner,
1003
                'website' => '',
1004
                'contact' => '',
1005
                'image' => '',
1006
                'oai_label' => '',
1007
                'oai_base' => '',
1008
                'opac_label' => '',
1009
                'opac_base' => '',
1010
                'union_label' => '',
1011
                'union_base' => '',
1012
            ];
1013
            $substUid = Helper::processDBasAdmin($libData);
1014
            // Add new library's UID.
1015
            $ownerUid = $substUid[$libNewUid];
1016
            if ((TYPO3_REQUESTTYPE & TYPO3_REQUESTTYPE_CLI) == FALSE) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing Kitodo\Dlf\Common\TYPO3_...n\TYPO3_REQUESTTYPE_CLI of type integer to the boolean FALSE. If you are specifically checking for 0, consider using something more explicit like === 0 instead.
Loading history...
1017
                Helper::addMessage(
1018
                    htmlspecialchars(sprintf(Helper::getMessage('flash.newLibrary'), $owner, $ownerUid)),
1019
                    Helper::getMessage('flash.attention', TRUE),
1020
                    \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1021
                    TRUE
1022
                );
1023
            }
1024
        }
1025
        $metadata['owner'][0] = $ownerUid;
1026
        // Get UID of parent document.
1027
        $partof = $this->getParentDocumentUidForSaving($pid, $core);
1028
        // Use the date of publication or title as alternative sorting metric for parts of multi-part works.
1029
        if (!empty($partof)) {
1030
            if (empty($metadata['volume'][0])
1031
                && !empty($metadata['year'][0])) {
1032
                $metadata['volume'] = $metadata['year'];
1033
            }
1034
            if (empty($metadata['volume_sorting'][0])) {
1035
                if (!empty($metadata['year_sorting'][0])) {
1036
                    $metadata['volume_sorting'][0] = $metadata['year_sorting'][0];
1037
                } elseif (!empty($metadata['year'][0])) {
1038
                    $metadata['volume_sorting'][0] = $metadata['year'][0];
1039
                }
1040
            }
1041
            // If volume_sorting is still empty, try to use title_sorting finally (workaround for newspapers)
1042
            if (empty($metadata['volume_sorting'][0])) {
1043
                if (!empty($metadata['title_sorting'][0])) {
1044
                    $metadata['volume_sorting'][0] = $metadata['title_sorting'][0];
1045
                }
1046
            }
1047
        }
1048
        // Get metadata for lists and sorting.
1049
        $listed = [];
1050
        $sortable = [];
1051
        $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
1052
            'tx_dlf_metadata.index_name AS index_name,tx_dlf_metadata.is_listed AS is_listed,tx_dlf_metadata.is_sortable AS is_sortable',
1053
            'tx_dlf_metadata',
1054
            '(tx_dlf_metadata.is_listed=1 OR tx_dlf_metadata.is_sortable=1)'
1055
                .' AND tx_dlf_metadata.pid='.intval($pid)
1056
                .Helper::whereClause('tx_dlf_metadata'),
1057
            '',
1058
            '',
1059
            ''
1060
        );
1061
        while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
1062
            if (!empty($metadata[$resArray['index_name']])) {
1063
                if ($resArray['is_listed']) {
1064
                    $listed[$resArray['index_name']] = $metadata[$resArray['index_name']];
1065
                }
1066
                if ($resArray['is_sortable']) {
1067
                    $sortable[$resArray['index_name']] = $metadata[$resArray['index_name']][0];
1068
                }
1069
            }
1070
        }
1071
        // Fill data array.
1072
        $data['tx_dlf_documents'][$this->uid] = [
1 ignored issue
show
Comprehensibility Best Practice introduced by
$data was never initialized. Although not strictly required by PHP, it is generally a good practice to add $data = array(); before regardless.
Loading history...
1073
            'pid' => $pid,
1074
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['starttime'] => 0,
1075
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['endtime'] => 0,
1076
            'prod_id' => $metadata['prod_id'][0],
1077
            'location' => $this->location,
1078
            'record_id' => $metadata['record_id'][0],
1079
            'opac_id' => $metadata['opac_id'][0],
1080
            'union_id' => $metadata['union_id'][0],
1081
            'urn' => $metadata['urn'][0],
1082
            'purl' => $metadata['purl'][0],
1083
            'title' => $metadata['title'][0],
1084
            'title_sorting' => $metadata['title_sorting'][0],
1085
            'author' => implode('; ', $metadata['author']),
1086
            'year' => implode('; ', $metadata['year']),
1087
            'place' => implode('; ', $metadata['place']),
1088
            'thumbnail' => $this->_getThumbnail(TRUE),
1089
            'metadata' => serialize($listed),
1090
            'metadata_sorting' => serialize($sortable),
1091
            'structure' => $metadata['type'][0],
1092
            'partof' => $partof,
1093
            'volume' => $metadata['volume'][0],
1094
            'volume_sorting' => $metadata['volume_sorting'][0],
1095
            'collections' => $metadata['collection'],
1096
            'owner' => $metadata['owner'][0],
1097
            'solrcore' => $core,
1098
            'status' => 0,
1099
            'document_format' => $metadata['document_format'][0],
1100
        ];
1101
        // Unhide hidden documents.
1102
        if (!empty($conf['unhideOnIndex'])) {
1103
            $data['tx_dlf_documents'][$this->uid][$GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['disabled']] = 0;
1104
        }
1105
        // Process data.
1106
        $newIds = Helper::processDBasAdmin($data);
1107
        // Replace placeholder with actual UID.
1108
        if (strpos($this->uid, 'NEW') === 0) {
1109
            $this->uid = $newIds[$this->uid];
1110
            $this->pid = $pid;
1111
            $this->parentId = $partof;
1112
        }
1113
        if ((TYPO3_REQUESTTYPE & TYPO3_REQUESTTYPE_CLI) == FALSE) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing Kitodo\Dlf\Common\TYPO3_...n\TYPO3_REQUESTTYPE_CLI of type integer to the boolean FALSE. If you are specifically checking for 0, consider using something more explicit like === 0 instead.
Loading history...
1114
            Helper::addMessage(
1115
                htmlspecialchars(sprintf(Helper::getMessage('flash.documentSaved'), $metadata['title'][0], $this->uid)),
1116
                Helper::getMessage('flash.done', TRUE),
1117
                \TYPO3\CMS\Core\Messaging\FlashMessage::OK,
1118
                TRUE
1119
            );
1120
        }
1121
        // Add document to index.
1122
        if ($core) {
1123
            Indexer::add($this, $core);
1124
        } else {
1125
            Helper::devLog('Invalid UID "'.$core.'" for Solr core', DEVLOG_SEVERITY_NOTICE);
1126
        }
1127
        return TRUE;
1128
    }
1129
1130
    /**
1131
     * Get the ID of the parent document if the current document has one. Also save a parent document
1132
     * to the database and the Solr index if their $pid and the current $pid differ.
1133
     * Currently only applies to METS documents.
1134
     *
1135
     * @access protected
1136
     *
1137
     * @abstract
1138
     *
1139
     * @return int The parent document's id.
1140
     */
1141
    protected abstract function getParentDocumentUidForSaving($pid, $core);
1142
1143
    /**
1144
     * This returns $this->hasFulltext via __get()
1145
     *
1146
     * @access protected
1147
     *
1148
     * @return boolean Are there any fulltext files available?
1149
     */
1150
    protected function _getHasFulltext() {
1151
        $this->ensureHasFulltextIsSet();
1152
        return $this->hasFulltext;
1153
    }
1154
1155
    /**
1156
     * This returns $this->location via __get()
1157
     *
1158
     * @access protected
1159
     *
1160
     * @return string The location of the document
1161
     */
1162
    protected function _getLocation() {
1163
        return $this->location;
1164
    }
1165
1166
    /**
1167
     * Format specific part of building the document's metadata array
1168
     *
1169
     * @access protected
1170
     *
1171
     * @abstract
1172
     *
1173
     * @param integer $cPid
1174
     */
1175
    protected abstract function prepareMetadataArray($cPid);
1176
1177
    /**
1178
     * This builds an array of the document's metadata
1179
     *
1180
     * @access protected
1181
     *
1182
     * @return array Array of metadata with their corresponding logical structure node ID as key
1183
     */
1184
    protected function _getMetadataArray() {
1185
        // Set metadata definitions' PID.
1186
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
1187
        if (!$cPid) {
1188
            Helper::devLog('Invalid PID '.$cPid.' for metadata definitions', DEVLOG_SEVERITY_ERROR);
1189
            return [];
1190
        }
1191
        if (!$this->metadataArrayLoaded
1192
            || $this->metadataArray[0] != $cPid) {
1193
            $this->prepareMetadataArray($cPid);
1194
            $this->metadataArray[0] = $cPid;
1195
            $this->metadataArrayLoaded = TRUE;
1196
        }
1197
        return $this->metadataArray;
1198
    }
1199
1200
    /**
1201
     * This returns $this->numPages via __get()
1202
     *
1203
     * @access protected
1204
     *
1205
     * @return integer The total number of pages and/or tracks
1206
     */
1207
    protected function _getNumPages() {
1208
        $this->_getPhysicalStructure();
1209
        return $this->numPages;
1210
    }
1211
1212
    /**
1213
     * This returns $this->parentId via __get()
1214
     *
1215
     * @access protected
1216
     *
1217
     * @return integer The UID of the parent document or zero if not applicable
1218
     */
1219
    protected function _getParentId() {
1220
        return $this->parentId;
1221
    }
1222
1223
    /**
1224
     * This builds an array of the document's physical structure
1225
     *
1226
     * @access protected
1227
     *
1228
     * @abstract
1229
     *
1230
     * @return array Array of physical elements' id, type, label and file representations ordered
1231
     * by @ORDER attribute / IIIF Sequence's Canvases
1232
     */
1233
    protected abstract function _getPhysicalStructure();
1234
1235
    /**
1236
     * This gives an array of the document's physical structure metadata
1237
     *
1238
     * @access protected
1239
     *
1240
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1241
     */
1242
    protected function _getPhysicalStructureInfo() {
1243
        // Is there no physical structure array yet?
1244
        if (!$this->physicalStructureLoaded) {
1245
            // Build physical structure array.
1246
            $this->_getPhysicalStructure();
1247
        }
1248
        return $this->physicalStructureInfo;
1249
    }
1250
1251
    /**
1252
     * This returns $this->pid via __get()
1253
     *
1254
     * @access protected
1255
     *
1256
     * @return integer The PID of the document or zero if not in database
1257
     */
1258
    protected function _getPid() {
1259
        return $this->pid;
1260
    }
1261
1262
    /**
1263
     * This returns $this->ready via __get()
1264
     *
1265
     * @access protected
1266
     *
1267
     * @return boolean Is the document instantiated successfully?
1268
     */
1269
    protected function _getReady() {
1270
        return $this->ready;
1271
    }
1272
1273
    /**
1274
     * This returns $this->recordId via __get()
1275
     *
1276
     * @access protected
1277
     *
1278
     * @return mixed The METS file's / IIIF manifest's record identifier
1279
     */
1280
    protected function _getRecordId() {
1281
        return $this->recordId;
1282
    }
1283
1284
    /**
1285
     * This returns $this->rootId via __get()
1286
     *
1287
     * @access protected
1288
     *
1289
     * @return integer The UID of the root document or zero if not applicable
1290
     */
1291
    protected function _getRootId() {
1292
        if (!$this->rootIdLoaded) {
1293
            if ($this->parentId) {
1294
                $parent = self::getInstance($this->parentId, $this->pid);
1295
                $this->rootId = $parent->rootId;
1296
            }
1297
            $this->rootIdLoaded = TRUE;
1298
        }
1299
        return $this->rootId;
1300
    }
1301
1302
    /**
1303
     * This returns the smLinks between logical and physical structMap (METS) and models the
1304
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1305
     *
1306
     * @access protected
1307
     *
1308
     * @abstract
1309
     *
1310
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1311
     */
1312
    protected abstract function _getSmLinks();
1313
1314
    /**
1315
     * This builds an array of the document's logical structure
1316
     *
1317
     * @access protected
1318
     *
1319
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1320
     */
1321
    protected function _getTableOfContents() {
1322
        // Is there no logical structure array yet?
1323
        if (!$this->tableOfContentsLoaded) {
1324
            // Get all logical structures.
1325
            $this->getLogicalStructure('', TRUE);
1326
            $this->tableOfContentsLoaded = TRUE;
1327
        }
1328
        return $this->tableOfContents;
1329
    }
1330
1331
    /**
1332
     * This returns the document's thumbnail location
1333
     *
1334
     * @access protected
1335
     *
1336
     * @abstract
1337
     *
1338
     * @param boolean $forceReload: Force reloading the thumbnail instead of returning the cached value
1339
     *
1340
     * @return string The document's thumbnail location
1341
     */
1342
    protected abstract function _getThumbnail($forceReload = FALSE);
1343
1344
    /**
1345
     * This returns the ID of the toplevel logical structure node
1346
     *
1347
     * @access protected
1348
     *
1349
     * @abstract
1350
     *
1351
     * @return string The logical structure node's ID
1352
     */
1353
    protected abstract function _getToplevelId();
1354
1355
    /**
1356
     * This returns $this->uid via __get()
1357
     *
1358
     * @access protected
1359
     *
1360
     * @return mixed The UID or the URL of the document
1361
     */
1362
    protected function _getUid() {
1363
        return $this->uid;
1364
    }
1365
1366
    /**
1367
     * This sets $this->cPid via __set()
1368
     *
1369
     * @access protected
1370
     *
1371
     * @param integer $value: The new PID for the metadata definitions
1372
     *
1373
     * @return void
1374
     */
1375
    protected function _setCPid($value) {
1376
        $this->cPid = max(intval($value), 0);
1377
    }
1378
1379
    /**
1380
     * This magic method is invoked each time a clone is called on the object variable
1381
     * (This method is defined as private/protected because singleton objects should not be cloned)
1382
     *
1383
     * @access protected
1384
     *
1385
     * @return void
1386
     */
1387
    protected function __clone() {}
1388
1389
    /**
1390
     * This is a singleton class, thus the constructor should be private/protected
1391
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Document::getInstance())
1392
     *
1393
     * @access protected
1394
     *
1395
     * @param integer $uid: The UID of the document to parse or URL to XML file
1396
     * @param integer $pid: If > 0, then only document with this PID gets loaded
1397
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1398
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1399
     *
1400
     * @return void
1401
     */
1402
    protected function __construct($uid, $pid, $preloadedDocument) {
1403
        // Prepare to check database for the requested document.
1404
        if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($uid)) {
1405
            $whereClause = 'tx_dlf_documents.uid='.intval($uid).Helper::whereClause('tx_dlf_documents');
1406
        } else {
1407
            // Try to load METS file / IIIF manifest.
1408
            if ($this->setPreloadedDocument($preloadedDocument)
1409
                || (\TYPO3\CMS\Core\Utility\GeneralUtility::isValidUrl($uid)
1410
                && $this->load($uid))) {
1411
                // Initialize core METS object.
1412
                $this->init();
1413
                if ($this->getDocument() !== NULL) {
1414
                    // Cast to string for safety reasons.
1415
                    $location = (string) $uid;
1416
                    $this->establishRecordId($pid);
1417
                } else {
1418
                    // No METS / IIIF part found.
1419
                    return;
1420
                }
1421
            } else {
1422
                // Loading failed.
1423
                return;
1424
            }
1425
            if (!empty($location)
1426
                && !empty($this->recordId)) {
1427
                // Try to match record identifier or location (both should be unique).
1428
                $whereClause = '(tx_dlf_documents.location='.$GLOBALS['TYPO3_DB']->fullQuoteStr($location, 'tx_dlf_documents').' OR tx_dlf_documents.record_id='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->recordId, 'tx_dlf_documents').')'.Helper::whereClause('tx_dlf_documents');
1429
            } else {
1430
                // Can't persistently identify document, don't try to match at all.
1431
                $whereClause = '1=-1';
1432
            }
1433
        }
1434
        // Check for PID if needed.
1435
        if ($pid) {
1436
            $whereClause .= ' AND tx_dlf_documents.pid='.intval($pid);
1437
        }
1438
        // Get document PID and location from database.
1439
        $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
1440
            'tx_dlf_documents.uid AS uid,tx_dlf_documents.pid AS pid,tx_dlf_documents.record_id AS record_id,tx_dlf_documents.partof AS partof,tx_dlf_documents.thumbnail AS thumbnail,tx_dlf_documents.location AS location',
1441
            'tx_dlf_documents',
1442
            $whereClause,
1443
            '',
1444
            '',
1445
            '1'
1446
        );
1447
        if ($GLOBALS['TYPO3_DB']->sql_num_rows($result) > 0) {
1448
            list ($this->uid, $this->pid, $this->recordId, $this->parentId, $this->thumbnail, $this->location) = $GLOBALS['TYPO3_DB']->sql_fetch_row($result);
1449
            $this->thumbnailLoaded = TRUE;
1450
            // Load XML file if necessary...
1451
            if ($this->getDocument() === NULL
1452
                && $this->load($this->location)) {
1453
                // ...and set some basic properties.
1454
                $this->init();
1455
            }
1456
            // Do we have a METS / IIIF object now?
1457
            if ($this->getDocument() !== NULL) {
1458
                // Set new location if necessary.
1459
                if (!empty($location)) {
1460
                    $this->location = $location;
1461
                }
1462
                // Document ready!
1463
                $this->ready = TRUE;
1464
            }
1465
        } elseif ($this->getDocument() !== NULL) {
1466
            // Set location as UID for documents not in database.
1467
            $this->uid = $location;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $location does not seem to be defined for all execution paths leading up to this point.
Loading history...
1468
            $this->location = $location;
1469
            // Document ready!
1470
            $this->ready = TRUE;
1471
        } else {
1472
            Helper::devLog('No document with UID '.$uid.' found or document not accessible', DEVLOG_SEVERITY_ERROR);
1473
        }
1474
    }
1475
1476
    /**
1477
     * This magic method is called each time an invisible property is referenced from the object
1478
     *
1479
     * @access public
1480
     *
1481
     * @param string $var: Name of variable to get
1482
     *
1483
     * @return mixed Value of $this->$var
1484
     */
1485
    public function __get($var) {
1486
        $method = '_get'.ucfirst($var);
1487
        if (!property_exists($this, $var)
1488
            || !method_exists($this, $method)) {
1489
            Helper::devLog('There is no getter function for property "'.$var.'"', DEVLOG_SEVERITY_WARNING);
1490
            return;
1491
        } else {
1492
            return $this->$method();
1493
        }
1494
    }
1495
1496
    /**
1497
     * This magic method is called each time an invisible property is referenced from the object
1498
     *
1499
     * @access public
1500
     *
1501
     * @param string $var: Name of variable to set
1502
     * @param mixed $value: New value of variable
1503
     *
1504
     * @return void
1505
     */
1506
    public function __set($var, $value) {
1507
        $method = '_set'.ucfirst($var);
1508
        if (!property_exists($this, $var)
1509
            || !method_exists($this, $method)) {
1510
            Helper::devLog('There is no setter function for property "'.$var.'"', DEVLOG_SEVERITY_WARNING);
1511
        } else {
1512
            $this->$method($value);
1513
        }
1514
    }
1515
}
1516