Passed
Pull Request — master (#86)
by
unknown
02:39
created

Document::__isset()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
c 0
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 1
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
16
use TYPO3\CMS\Core\Database\ConnectionPool;
17
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
18
use TYPO3\CMS\Core\Log\LogManager;
19
use TYPO3\CMS\Core\Utility\GeneralUtility;
20
use TYPO3\CMS\Core\Utility\MathUtility;
21
use Ubl\Iiif\Presentation\Common\Model\Resources\IiifResourceInterface;
22
use Ubl\Iiif\Tools\IiifHelper;
23
24
/**
25
 * Document class for the 'dlf' extension
26
 *
27
 * @author Sebastian Meyer <[email protected]>
28
 * @author Henrik Lochmann <[email protected]>
29
 * @package TYPO3
30
 * @subpackage dlf
31
 * @access public
32
 * @property int $cPid This holds the PID for the configuration
33
 * @property-read bool $hasFulltext Are there any fulltext files available?
34
 * @property-read string $location This holds the documents location
35
 * @property-read array $metadataArray This holds the documents' parsed metadata array
36
 * @property-read int $numPages The holds the total number of pages
37
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
38
 * @property-read array $physicalStructure This holds the physical structure
39
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
40
 * @property-read int $pid This holds the PID of the document or zero if not in database
41
 * @property-read bool $ready Is the document instantiated successfully?
42
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
43
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
44
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
45
 * @property-read array $tableOfContents This holds the logical structure
46
 * @property-read string $thumbnail This holds the document's thumbnail location
47
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
48
 * @property-read mixed $uid This holds the UID or the URL of the document
49
 * @abstract
50
 */
51
abstract class Document
52
{
53
    /**
54
     * This holds the logger
55
     *
56
     * @var LogManager
57
     * @access protected
58
     */
59
    protected $logger;
60
61
    /**
62
     * This holds the PID for the configuration
63
     *
64
     * @var int
65
     * @access protected
66
     */
67
    protected $cPid = 0;
68
69
    /**
70
     * The extension key
71
     *
72
     * @var string
73
     * @access public
74
     */
75
    public static $extKey = 'dlf';
76
77
    /**
78
     * This holds the configuration for all supported metadata encodings
79
     * @see loadFormats()
80
     *
81
     * @var array
82
     * @access protected
83
     */
84
    protected $formats = [
85
        'OAI' => [
86
            'rootElement' => 'OAI-PMH',
87
            'namespaceURI' => 'http://www.openarchives.org/OAI/2.0/',
88
        ],
89
        'METS' => [
90
            'rootElement' => 'mets',
91
            'namespaceURI' => 'http://www.loc.gov/METS/',
92
        ],
93
        'XLINK' => [
94
            'rootElement' => 'xlink',
95
            'namespaceURI' => 'http://www.w3.org/1999/xlink',
96
        ]
97
    ];
98
99
    /**
100
     * Are the available metadata formats loaded?
101
     * @see $formats
102
     *
103
     * @var bool
104
     * @access protected
105
     */
106
    protected $formatsLoaded = false;
107
108
    /**
109
     * Are there any fulltext files available? This also includes IIIF text annotations
110
     * with motivation 'painting' if Kitodo.Presentation is configured to store text
111
     * annotations as fulltext.
112
     *
113
     * @var bool
114
     * @access protected
115
     */
116
    protected $hasFulltext = false;
117
118
    /**
119
     * Last searched logical and physical page
120
     *
121
     * @var array
122
     * @access protected
123
     */
124
    protected $lastSearchedPhysicalPage = ['logicalPage' => null, 'physicalPage' => null];
125
126
    /**
127
     * This holds the documents location
128
     *
129
     * @var string
130
     * @access protected
131
     */
132
    protected $location = '';
133
134
    /**
135
     * This holds the logical units
136
     *
137
     * @var array
138
     * @access protected
139
     */
140
    protected $logicalUnits = [];
141
142
    /**
143
     * This holds the documents' parsed metadata array with their corresponding
144
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
145
     *
146
     * @var array
147
     * @access protected
148
     */
149
    protected $metadataArray = [];
150
151
    /**
152
     * Is the metadata array loaded?
153
     * @see $metadataArray
154
     *
155
     * @var bool
156
     * @access protected
157
     */
158
    protected $metadataArrayLoaded = false;
159
160
    /**
161
     * The holds the total number of pages
162
     *
163
     * @var int
164
     * @access protected
165
     */
166
    protected $numPages = 0;
167
168
    /**
169
     * This holds the UID of the parent document or zero if not multi-volumed
170
     *
171
     * @var int
172
     * @access protected
173
     */
174
    protected $parentId = 0;
175
176
    /**
177
     * This holds the physical structure
178
     *
179
     * @var array
180
     * @access protected
181
     */
182
    protected $physicalStructure = [];
183
184
    /**
185
     * This holds the physical structure metadata
186
     *
187
     * @var array
188
     * @access protected
189
     */
190
    protected $physicalStructureInfo = [];
191
192
    /**
193
     * Is the physical structure loaded?
194
     * @see $physicalStructure
195
     *
196
     * @var bool
197
     * @access protected
198
     */
199
    protected $physicalStructureLoaded = false;
200
201
    /**
202
     * This holds the PID of the document or zero if not in database
203
     *
204
     * @var int
205
     * @access protected
206
     */
207
    protected $pid = 0;
208
209
    /**
210
     * This holds the documents' raw text pages with their corresponding
211
     * structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
212
     *
213
     * @var array
214
     * @access protected
215
     */
216
    protected $rawTextArray = [];
217
218
    /**
219
     * Is the document instantiated successfully?
220
     *
221
     * @var bool
222
     * @access protected
223
     */
224
    protected $ready = false;
225
226
    /**
227
     * The METS file's / IIIF manifest's record identifier
228
     *
229
     * @var string
230
     * @access protected
231
     */
232
    protected $recordId;
233
234
    /**
235
     * This holds the singleton object of the document
236
     *
237
     * @var array (\Kitodo\Dlf\Common\Document)
238
     * @static
239
     * @access protected
240
     */
241
    protected static $registry = [];
242
243
    /**
244
     * This holds the UID of the root document or zero if not multi-volumed
245
     *
246
     * @var int
247
     * @access protected
248
     */
249
    protected $rootId = 0;
250
251
    /**
252
     * Is the root id loaded?
253
     * @see $rootId
254
     *
255
     * @var bool
256
     * @access protected
257
     */
258
    protected $rootIdLoaded = false;
259
260
    /**
261
     * This holds the smLinks between logical and physical structMap
262
     *
263
     * @var array
264
     * @access protected
265
     */
266
    protected $smLinks = ['l2p' => [], 'p2l' => []];
267
268
    /**
269
     * Are the smLinks loaded?
270
     * @see $smLinks
271
     *
272
     * @var bool
273
     * @access protected
274
     */
275
    protected $smLinksLoaded = false;
276
277
    /**
278
     * This holds the logical structure
279
     *
280
     * @var array
281
     * @access protected
282
     */
283
    protected $tableOfContents = [];
284
285
    /**
286
     * Is the table of contents loaded?
287
     * @see $tableOfContents
288
     *
289
     * @var bool
290
     * @access protected
291
     */
292
    protected $tableOfContentsLoaded = false;
293
294
    /**
295
     * This holds the document's thumbnail location
296
     *
297
     * @var string
298
     * @access protected
299
     */
300
    protected $thumbnail = '';
301
302
    /**
303
     * Is the document's thumbnail location loaded?
304
     * @see $thumbnail
305
     *
306
     * @var bool
307
     * @access protected
308
     */
309
    protected $thumbnailLoaded = false;
310
311
    /**
312
     * This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
313
     *
314
     * @var string
315
     * @access protected
316
     */
317
    protected $toplevelId = '';
318
319
    /**
320
     * This holds the UID or the URL of the document
321
     *
322
     * @var mixed
323
     * @access protected
324
     */
325
    protected $uid = 0;
326
327
    /**
328
     * This holds the whole XML file as \SimpleXMLElement object
329
     *
330
     * @var \SimpleXMLElement
331
     * @access protected
332
     */
333
    protected $xml;
334
335
    /**
336
     * This clears the static registry to prevent memory exhaustion
337
     *
338
     * @access public
339
     *
340
     * @static
341
     *
342
     * @return void
343
     */
344
    public static function clearRegistry()
345
    {
346
        // Reset registry array.
347
        self::$registry = [];
348
    }
349
350
    /**
351
     * This ensures that the recordId, if existent, is retrieved from the document
352
     *
353
     * @access protected
354
     *
355
     * @abstract
356
     *
357
     * @param int $pid: ID of the configuration page with the recordId config
358
     *
359
     */
360
    protected abstract function establishRecordId($pid);
361
362
    /**
363
     * Source document PHP object which is represented by a Document instance
364
     *
365
     * @access protected
366
     *
367
     * @abstract
368
     *
369
     * @return \SimpleXMLElement|IiifResourceInterface An PHP object representation of
370
     * the current document. SimpleXMLElement for METS, IiifResourceInterface for IIIF
371
     */
372
    protected abstract function getDocument();
373
374
    /**
375
     * This gets the location of a downloadable file for a physical page or track
376
     *
377
     * @access public
378
     *
379
     * @abstract
380
     *
381
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
382
     *
383
     * @return string    The file's location as URL
384
     */
385
    public abstract function getDownloadLocation($id);
386
387
    /**
388
     * This gets the location of a file representing a physical page or track
389
     *
390
     * @access public
391
     *
392
     * @abstract
393
     *
394
     * @param string $id: The @ID attribute of the file node (METS) or the @id property of the IIIF resource
395
     *
396
     * @return string The file's location as URL
397
     */
398
    public abstract function getFileLocation($id);
399
400
    /**
401
     * This gets the MIME type of a file representing a physical page or track
402
     *
403
     * @access public
404
     *
405
     * @abstract
406
     *
407
     * @param string $id: The @ID attribute of the file node
408
     *
409
     * @return string The file's MIME type
410
     */
411
    public abstract function getFileMimeType($id);
412
413
    /**
414
     * This is a singleton class, thus an instance must be created by this method
415
     *
416
     * @access public
417
     *
418
     * @static
419
     *
420
     * @param mixed $uid: The unique identifier of the document to parse, the URL of XML file or the IRI of the IIIF resource
421
     * @param int $pid: If > 0, then only document with this PID gets loaded
422
     * @param bool $forceReload: Force reloading the document instead of returning the cached instance
423
     *
424
     * @return \Kitodo\Dlf\Common\Document Instance of this class, either MetsDocument or IiifManifest
425
     */
426
    public static function &getInstance($uid, $pid = 0, $forceReload = false)
427
    {
428
        // Sanitize input.
429
        $pid = max(intval($pid), 0);
430
        if (!$forceReload) {
431
            $regObj = Helper::digest($uid);
432
            if (
433
                is_object(self::$registry[$regObj])
434
                && self::$registry[$regObj] instanceof self
435
            ) {
436
                // Check if instance has given PID.
437
                if (
438
                    !$pid
439
                    || !self::$registry[$regObj]->pid
440
                    || $pid == self::$registry[$regObj]->pid
441
                ) {
442
                    // Return singleton instance if available.
443
                    return self::$registry[$regObj];
444
                }
445
            } else {
446
                // Check the user's session...
447
                $sessionData = Helper::loadFromSession(get_called_class());
448
                if (
449
                    is_object($sessionData[$regObj])
450
                    && $sessionData[$regObj] instanceof self
451
                ) {
452
                    // Check if instance has given PID.
453
                    if (
454
                        !$pid
455
                        || !$sessionData[$regObj]->pid
456
                        || $pid == $sessionData[$regObj]->pid
457
                    ) {
458
                        // ...and restore registry.
459
                        self::$registry[$regObj] = $sessionData[$regObj];
460
                        return self::$registry[$regObj];
461
                    }
462
                }
463
            }
464
        }
465
        // Create new instance depending on format (METS or IIIF) ...
466
        $instance = null;
467
        $documentFormat = null;
468
        $xml = null;
469
        $iiif = null;
470
        // Try to get document format from database
471
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
472
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
473
                ->getQueryBuilderForTable('tx_dlf_documents');
474
475
            $queryBuilder
476
                ->select(
477
                    'tx_dlf_documents.location AS location',
478
                    'tx_dlf_documents.document_format AS document_format'
479
                )
480
                ->from('tx_dlf_documents');
481
482
            // Get UID of document with given record identifier.
483
            if ($pid) {
484
                $queryBuilder
485
                    ->where(
486
                        $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
487
                        $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid)),
488
                        Helper::whereExpression('tx_dlf_documents')
489
                    );
490
            } else {
491
                $queryBuilder
492
                    ->where(
493
                        $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
494
                        Helper::whereExpression('tx_dlf_documents')
495
                    );
496
            }
497
498
            $result = $queryBuilder
499
                ->setMaxResults(1)
500
                ->execute();
501
502
            if ($resArray = $result->fetch()) {
503
                $documentFormat = $resArray['document_format'];
504
            }
505
        } else {
506
            // Get document format from content of remote document
507
            // Cast to string for safety reasons.
508
            $location = (string) $uid;
509
            // Try to load a file from the url
510
            if (GeneralUtility::isValidUrl($location)) {
511
                // Load extension configuration
512
                $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
513
                // Set user-agent to identify self when fetching XML data.
514
                if (!empty($extConf['useragent'])) {
515
                    @ini_set('user_agent', $extConf['useragent']);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for ini_set(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

515
                    /** @scrutinizer ignore-unhandled */ @ini_set('user_agent', $extConf['useragent']);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
516
                }
517
                $content = GeneralUtility::getUrl($location);
518
                if ($content !== false) {
519
                    // TODO use single place to load xml
520
                    // Turn off libxml's error logging.
521
                    $libxmlErrors = libxml_use_internal_errors(true);
522
                    // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
523
                    $previousValueOfEntityLoader = libxml_disable_entity_loader(true);
524
                    // Try to load XML from file.
525
                    $xml = simplexml_load_string($content);
526
                    // reset entity loader setting
527
                    libxml_disable_entity_loader($previousValueOfEntityLoader);
528
                    // Reset libxml's error logging.
529
                    libxml_use_internal_errors($libxmlErrors);
530
                    if ($xml !== false) {
531
                        /* @var $xml \SimpleXMLElement */
532
                        $xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
533
                        $xpathResult = $xml->xpath('//mets:mets');
534
                        $documentFormat = !empty($xpathResult) ? 'METS' : null;
535
                    } else {
536
                        // Try to load file as IIIF resource instead.
537
                        $contentAsJsonArray = json_decode($content, true);
538
                        if ($contentAsJsonArray !== null) {
539
                            // Load plugin configuration.
540
                            $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
541
                            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
542
                            IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
543
                            IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
544
                            $iiif = IiifHelper::loadIiifResource($contentAsJsonArray);
545
                            if ($iiif instanceof IiifResourceInterface) {
546
                                $documentFormat = 'IIIF';
547
                            }
548
                        }
549
                    }
550
                }
551
            }
552
        }
553
        // Sanitize input.
554
        $pid = max(intval($pid), 0);
555
        if ($documentFormat == 'METS') {
556
            $instance = new MetsDocument($uid, $pid, $xml);
557
        } elseif ($documentFormat == 'IIIF') {
558
            $instance = new IiifManifest($uid, $pid, $iiif);
559
        }
560
        // Save instance to registry.
561
        if (
562
            $instance instanceof self
563
            && $instance->ready) {
564
            self::$registry[Helper::digest($instance->uid)] = $instance;
565
            if ($instance->uid != $instance->location) {
566
                self::$registry[Helper::digest($instance->location)] = $instance;
567
            }
568
            // Load extension configuration
569
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
570
            // Save registry to session if caching is enabled.
571
            if (!empty($extConf['caching'])) {
572
                Helper::saveToSession(self::$registry, get_class($instance));
573
            }
574
            $instance->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($instance));
575
        }
576
        // Return new instance.
577
        return $instance;
578
    }
579
580
    /**
581
     * This gets details about a logical structure element
582
     *
583
     * @access public
584
     *
585
     * @abstract
586
     *
587
     * @param string $id: The @ID attribute of the logical structure node (METS) or
588
     * the @id property of the Manifest / Range (IIIF)
589
     * @param bool $recursive: Whether to include the child elements / resources
590
     *
591
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
592
     */
593
    public abstract function getLogicalStructure($id, $recursive = false);
594
595
    /**
596
     * This extracts all the metadata for a logical structure node
597
     *
598
     * @access public
599
     *
600
     * @abstract
601
     *
602
     * @param string $id: The @ID attribute of the logical structure node (METS) or the @id property
603
     * of the Manifest / Range (IIIF)
604
     * @param int $cPid: The PID for the metadata definitions
605
     *                       (defaults to $this->cPid or $this->pid)
606
     *
607
     * @return array The logical structure node's / the IIIF resource's parsed metadata array
608
     */
609
    public abstract function getMetadata($id, $cPid = 0);
610
611
    /**
612
     * This returns the first corresponding physical page number of a given logical page label
613
     *
614
     * @access public
615
     *
616
     * @param string $logicalPage: The label (or a part of the label) of the logical page
617
     *
618
     * @return int The physical page number
619
     */
620
    public function getPhysicalPage($logicalPage)
621
    {
622
        if (
623
            !empty($this->lastSearchedPhysicalPage['logicalPage'])
624
            && $this->lastSearchedPhysicalPage['logicalPage'] == $logicalPage
625
        ) {
626
            return $this->lastSearchedPhysicalPage['physicalPage'];
627
        } else {
628
            $physicalPage = 0;
629
            foreach ($this->physicalStructureInfo as $page) {
630
                if (strpos($page['orderlabel'], $logicalPage) !== false) {
631
                    $this->lastSearchedPhysicalPage['logicalPage'] = $logicalPage;
632
                    $this->lastSearchedPhysicalPage['physicalPage'] = $physicalPage;
633
                    return $physicalPage;
634
                }
635
                $physicalPage++;
636
            }
637
        }
638
        return 1;
639
    }
640
641
    /**
642
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas. Text might be
643
     * given as ALTO for METS or as annotations or ALTO for IIIF resources.
644
     *
645
     * @access public
646
     *
647
     * @abstract
648
     *
649
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
650
     * of the Manifest / Range (IIIF)
651
     *
652
     * @return string The OCR full text
653
     */
654
    public abstract function getFullText($id);
655
656
    /**
657
     * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas from an
658
     * XML full text representation (currently only ALTO). For IIIF manifests, ALTO documents have
659
     * to be given in the Canvas' / Manifest's "seeAlso" property.
660
     *
661
     * @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
662
     * of the Manifest / Range (IIIF)
663
     *
664
     * @return string The OCR full text
665
     */
666
    protected function getFullTextFromXml($id)
667
    {
668
        $fullText = '';
669
        // Load available text formats, ...
670
        $this->loadFormats();
671
        // ... physical structure ...
672
        $this->_getPhysicalStructure();
673
        // ... and extension configuration.
674
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
675
        $fileGrpsFulltext = GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']);
676
        if (!empty($this->physicalStructureInfo[$id])) {
677
            while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
678
                if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
679
                    // Get full text file.
680
                    $fileContent = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
681
                    if ($fileContent !== false) {
682
                        $textFormat = $this->getTextFormat($fileContent);
683
                    } else {
684
                        $this->logger->warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');
0 ignored issues
show
Bug introduced by
The method warning() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

684
                        $this->logger->/** @scrutinizer ignore-call */ 
685
                                       warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
685
                        return $fullText;
686
                    }
687
                    break;
688
                }
689
            }
690
        } else {
691
            $this->logger->warning('Invalid structure node @ID "' . $id . '"');
692
            return $fullText;
693
        }
694
        // Is this text format supported?
695
        // This part actually differs from previous version of indexed OCR
696
        if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $textFormat does not seem to be defined for all execution paths leading up to this point.
Loading history...
697
            $textMiniOcr = '';
698
            if (!empty($this->formats[$textFormat]['class'])) {
699
                $class = $this->formats[$textFormat]['class'];
700
                // Get the raw text from class.
701
                if (
702
                    class_exists($class)
703
                    && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
704
                ) {
705
                    // Load XML from file.
706
                    $ocrTextXml = $this->getXmlObject($fileContent);
707
                    $textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
708
                    $this->rawTextArray[$id] = $textMiniOcr;
709
                } else {
710
                    $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
711
                }
712
            }
713
            $fullText = $textMiniOcr;
714
        } else {
715
            $this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
716
        }
717
        return $fullText;
718
    }
719
720
    /**
721
     * Get format of the OCR full text
722
     *
723
     * @access private
724
     *
725
     * @param string $fileContent: content of the XML file
726
     *
727
     * @return string The format of the OCR full text
728
     */
729
    private function getTextFormat($fileContent)
730
    {
731
        // Get the root element's name as text format.
732
        return strtoupper($this->getXmlObject($fileContent)->getName());
733
    }
734
735
    /**
736
     * Get the OCR full text as object
737
     *
738
     * @access private
739
     *
740
     * @param string $fileContent: content of the XML file
741
     *
742
     * @return \SimpleXMLElement The OCR full text as object
743
     */
744
    private function getXmlObject($fileContent)
745
    {
746
        // Turn off libxml's error logging.
747
        $libxmlErrors = libxml_use_internal_errors(true);
748
        // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept.
749
        $previousValueOfEntityLoader = libxml_disable_entity_loader(true);
750
        // Load XML from file.
751
        $ocrTextXml = simplexml_load_string($fileContent);
752
        // Reset entity loader setting.
753
        libxml_disable_entity_loader($previousValueOfEntityLoader);
754
        // Reset libxml's error logging.
755
        libxml_use_internal_errors($libxmlErrors);
756
        // Get the root element.
757
        return $ocrTextXml;
758
    }
759
760
    /**
761
     * This determines a title for the given document
762
     *
763
     * @access public
764
     *
765
     * @static
766
     *
767
     * @param int $uid: The UID of the document
768
     * @param bool $recursive: Search superior documents for a title, too?
769
     *
770
     * @return string The title of the document itself or a parent document
771
     */
772
    public static function getTitle($uid, $recursive = false)
773
    {
774
        $logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(__CLASS__);
775
776
        $title = '';
777
        // Sanitize input.
778
        $uid = max(intval($uid), 0);
779
        if ($uid) {
780
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
781
                ->getQueryBuilderForTable('tx_dlf_documents');
782
783
            $result = $queryBuilder
784
                ->select(
785
                    'tx_dlf_documents.title',
786
                    'tx_dlf_documents.partof'
787
                )
788
                ->from('tx_dlf_documents')
789
                ->where(
790
                    $queryBuilder->expr()->eq('tx_dlf_documents.uid', $uid),
791
                    Helper::whereExpression('tx_dlf_documents')
792
                )
793
                ->setMaxResults(1)
794
                ->execute();
795
796
            if ($resArray = $result->fetch()) {
797
                // Get title information.
798
                $title = $resArray['title'];
799
                $partof = $resArray['partof'];
800
                // Search parent documents recursively for a title?
801
                if (
802
                    $recursive
803
                    && empty($title)
804
                    && intval($partof)
805
                    && $partof != $uid
806
                ) {
807
                    $title = self::getTitle($partof, true);
808
                }
809
            } else {
810
                $logger->warning('No document with UID ' . $uid . ' found or document not accessible');
811
            }
812
        } else {
813
            $logger->error('Invalid UID ' . $uid . ' for document');
814
        }
815
        return $title;
816
    }
817
818
    /**
819
     * This extracts all the metadata for the toplevel logical structure node / resource
820
     *
821
     * @access public
822
     *
823
     * @param int $cPid: The PID for the metadata definitions
824
     *
825
     * @return array The logical structure node's / resource's parsed metadata array
826
     */
827
    public function getTitledata($cPid = 0)
828
    {
829
        $titledata = $this->getMetadata($this->_getToplevelId(), $cPid);
830
        // Add information from METS structural map to titledata array.
831
        if ($this instanceof MetsDocument) {
832
            $this->addMetadataFromMets($titledata, $this->_getToplevelId());
833
        }
834
        // Set record identifier for METS file / IIIF manifest if not present.
835
        if (
836
            is_array($titledata)
837
            && array_key_exists('record_id', $titledata)
838
        ) {
839
            if (
840
                !empty($this->recordId)
841
                && !in_array($this->recordId, $titledata['record_id'])
842
            ) {
843
                array_unshift($titledata['record_id'], $this->recordId);
844
            }
845
        }
846
        return $titledata;
847
    }
848
849
    /**
850
     * Traverse a logical (sub-) structure tree to find the structure with the requested logical id and return it's depth.
851
     *
852
     * @access protected
853
     *
854
     * @param array $structure: logical structure array
855
     * @param int $depth: current tree depth
856
     * @param string $logId: ID of the logical structure whose depth is requested
857
     *
858
     * @return int|bool: false if structure with $logId is not a child of this substructure,
859
     * or the actual depth.
860
     */
861
    protected function getTreeDepth($structure, $depth, $logId)
862
    {
863
        foreach ($structure as $element) {
864
            if ($element['id'] == $logId) {
865
                return $depth;
866
            } elseif (array_key_exists('children', $element)) {
867
                $foundInChildren = $this->getTreeDepth($element['children'], $depth + 1, $logId);
868
                if ($foundInChildren !== false) {
869
                    return $foundInChildren;
870
                }
871
            }
872
        }
873
        return false;
874
    }
875
876
    /**
877
     * Get the tree depth of a logical structure element within the table of content
878
     *
879
     * @access public
880
     *
881
     * @param string $logId: The id of the logical structure element whose depth is requested
882
     * @return int|bool tree depth as integer or false if no element with $logId exists within the TOC.
883
     */
884
    public function getStructureDepth($logId)
885
    {
886
        return $this->getTreeDepth($this->_getTableOfContents(), 1, $logId);
887
    }
888
889
    /**
890
     * This sets some basic class properties
891
     *
892
     * @access protected
893
     *
894
     * @abstract
895
     *
896
     * @return void
897
     */
898
    protected abstract function init();
899
900
    /**
901
     * Reuse any document object that might have been already loaded to determine wether document is METS or IIIF
902
     *
903
     * @access protected
904
     *
905
     * @abstract
906
     *
907
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: any instance that has already been loaded
908
     *
909
     * @return bool true if $preloadedDocument can actually be reused, false if it has to be loaded again
910
     */
911
    protected abstract function setPreloadedDocument($preloadedDocument);
912
913
    /**
914
     * METS/IIIF specific part of loading a location
915
     *
916
     * @access protected
917
     *
918
     * @abstract
919
     *
920
     * @param string $location: The URL of the file to load
921
     *
922
     * @return bool true on success or false on failure
923
     */
924
    protected abstract function loadLocation($location);
925
926
    /**
927
     * Load XML file / IIIF resource from URL
928
     *
929
     * @access protected
930
     *
931
     * @param string $location: The URL of the file to load
932
     *
933
     * @return bool true on success or false on failure
934
     */
935
    protected function load($location)
936
    {
937
        // Load XML / JSON-LD file.
938
        if (GeneralUtility::isValidUrl($location)) {
939
            // Load extension configuration
940
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
941
            // Set user-agent to identify self when fetching XML / JSON-LD data.
942
            if (!empty($extConf['useragent'])) {
943
                @ini_set('user_agent', $extConf['useragent']);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for ini_set(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

943
                /** @scrutinizer ignore-unhandled */ @ini_set('user_agent', $extConf['useragent']);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
944
            }
945
            // the actual loading is format specific
946
            return $this->loadLocation($location);
947
        } else {
948
            $this->logger->error('Invalid file location "' . $location . '" for document loading');
0 ignored issues
show
Bug introduced by
The method error() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

948
            $this->logger->/** @scrutinizer ignore-call */ 
949
                           error('Invalid file location "' . $location . '" for document loading');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
949
        }
950
        return false;
951
    }
952
953
    /**
954
     * Analyze the document if it contains any fulltext that needs to be indexed.
955
     *
956
     * @access protected
957
     *
958
     * @abstract
959
     */
960
    protected abstract function ensureHasFulltextIsSet();
961
962
    /**
963
     * Register all available data formats
964
     *
965
     * @access protected
966
     *
967
     * @return void
968
     */
969
    protected function loadFormats()
970
    {
971
        if (!$this->formatsLoaded) {
972
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
973
                ->getQueryBuilderForTable('tx_dlf_formats');
974
975
            // Get available data formats from database.
976
            $result = $queryBuilder
977
                ->select(
978
                    'tx_dlf_formats.type AS type',
979
                    'tx_dlf_formats.root AS root',
980
                    'tx_dlf_formats.namespace AS namespace',
981
                    'tx_dlf_formats.class AS class'
982
                )
983
                ->from('tx_dlf_formats')
984
                ->where(
985
                    $queryBuilder->expr()->eq('tx_dlf_formats.pid', 0)
986
                )
987
                ->execute();
988
989
            while ($resArray = $result->fetch()) {
990
                // Update format registry.
991
                $this->formats[$resArray['type']] = [
992
                    'rootElement' => $resArray['root'],
993
                    'namespaceURI' => $resArray['namespace'],
994
                    'class' => $resArray['class']
995
                ];
996
            }
997
            $this->formatsLoaded = true;
998
        }
999
    }
1000
1001
    /**
1002
     * Register all available namespaces for a \SimpleXMLElement object
1003
     *
1004
     * @access public
1005
     *
1006
     * @param \SimpleXMLElement|\DOMXPath &$obj: \SimpleXMLElement or \DOMXPath object
1007
     *
1008
     * @return void
1009
     */
1010
    public function registerNamespaces(&$obj)
1011
    {
1012
        // TODO Check usage. XML specific method does not seem to be used anywhere outside this class within the project, but it is public and may be used by extensions.
1013
        $this->loadFormats();
1014
        // Do we have a \SimpleXMLElement or \DOMXPath object?
1015
        if ($obj instanceof \SimpleXMLElement) {
1016
            $method = 'registerXPathNamespace';
1017
        } elseif ($obj instanceof \DOMXPath) {
1018
            $method = 'registerNamespace';
1019
        } else {
1020
            $this->logger->error('Given object is neither a SimpleXMLElement nor a DOMXPath instance');
1021
            return;
1022
        }
1023
        // Register metadata format's namespaces.
1024
        foreach ($this->formats as $enc => $conf) {
1025
            $obj->$method(strtolower($enc), $conf['namespaceURI']);
1026
        }
1027
    }
1028
1029
    /**
1030
     * This saves the document to the database and index
1031
     *
1032
     * @access public
1033
     *
1034
     * @param int $pid: The PID of the saved record
1035
     * @param int $core: The UID of the Solr core for indexing
1036
     * @param int|string $owner: UID or index_name of owner to set while indexing
1037
     *
1038
     * @return bool true on success or false on failure
1039
     */
1040
    public function save($pid = 0, $core = 0, $owner = null)
1041
    {
1042
        if (\TYPO3_MODE !== 'BE') {
1043
            $this->logger->error('Saving a document is only allowed in the backend');
1044
            return false;
1045
        }
1046
        // Make sure $pid is a non-negative integer.
1047
        $pid = max(intval($pid), 0);
1048
        // Make sure $core is a non-negative integer.
1049
        $core = max(intval($core), 0);
1050
        // If $pid is not given, try to get it elsewhere.
1051
        if (
1052
            !$pid
1053
            && $this->pid
1054
        ) {
1055
            // Retain current PID.
1056
            $pid = $this->pid;
1057
        } elseif (!$pid) {
1058
            $this->logger->error('Invalid PID ' . $pid . ' for document saving');
1059
            return false;
1060
        }
1061
        // Set PID for metadata definitions.
1062
        $this->cPid = $pid;
1063
        // Set UID placeholder if not updating existing record.
1064
        if ($pid != $this->pid) {
1065
            $this->uid = uniqid('NEW');
0 ignored issues
show
Bug introduced by
The property uid is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1066
        }
1067
        // Get metadata array.
1068
        $metadata = $this->getTitledata($pid);
1069
        // Check for record identifier.
1070
        if (empty($metadata['record_id'][0])) {
1071
            $this->logger->error('No record identifier found to avoid duplication');
1072
            return false;
1073
        }
1074
        // Load plugin configuration.
1075
        $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
1076
1077
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1078
            ->getQueryBuilderForTable('tx_dlf_structures');
1079
1080
        // Get UID for structure type.
1081
        $result = $queryBuilder
1082
            ->select('tx_dlf_structures.uid AS uid')
1083
            ->from('tx_dlf_structures')
1084
            ->where(
1085
                $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($pid)),
1086
                $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
1087
                Helper::whereExpression('tx_dlf_structures')
1088
            )
1089
            ->setMaxResults(1)
1090
            ->execute();
1091
1092
        if ($resArray = $result->fetch()) {
1093
            $structure = $resArray['uid'];
1094
        } else {
1095
            $this->logger->error('Could not identify document/structure type "' . $queryBuilder->expr()->literal($metadata['type'][0]) . '"');
1096
            return false;
1097
        }
1098
        $metadata['type'][0] = $structure;
1099
1100
        // Remove appended "valueURI" from authors' names for storing in database.
1101
        foreach ($metadata['author'] as $i => $author) {
1102
            $splitName = explode(chr(31), $author);
1103
            $metadata['author'][$i] = $splitName[0];
1104
        }
1105
1106
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1107
            ->getQueryBuilderForTable('tx_dlf_collections');
1108
        // Get hidden records, too.
1109
        $queryBuilder
1110
            ->getRestrictions()
1111
            ->removeByType(HiddenRestriction::class);
1112
1113
        // Get UIDs for collections.
1114
        $result = $queryBuilder
1115
            ->select(
1116
                'tx_dlf_collections.index_name AS index_name',
1117
                'tx_dlf_collections.uid AS uid'
1118
            )
1119
            ->from('tx_dlf_collections')
1120
            ->where(
1121
                $queryBuilder->expr()->eq('tx_dlf_collections.pid', intval($pid)),
1122
                $queryBuilder->expr()->in('tx_dlf_collections.sys_language_uid', [-1, 0])
1123
            )
1124
            ->execute();
1125
1126
        $collUid = [];
1127
        while ($resArray = $result->fetch()) {
1128
            $collUid[$resArray['index_name']] = $resArray['uid'];
1129
        }
1130
        $collections = [];
1131
        foreach ($metadata['collection'] as $collection) {
1132
            if (!empty($collUid[$collection])) {
1133
                // Add existing collection's UID.
1134
                $collections[] = $collUid[$collection];
1135
            } else {
1136
                // Insert new collection.
1137
                $collNewUid = uniqid('NEW');
1138
                $collData['tx_dlf_collections'][$collNewUid] = [
1139
                    'pid' => $pid,
1140
                    'label' => $collection,
1141
                    'index_name' => $collection,
1142
                    'oai_name' => (!empty($conf['publishNewCollections']) ? Helper::getCleanString($collection) : ''),
1143
                    'description' => '',
1144
                    'documents' => 0,
1145
                    'owner' => 0,
1146
                    'status' => 0,
1147
                ];
1148
                $substUid = Helper::processDBasAdmin($collData);
1149
                // Prevent double insertion.
1150
                unset($collData);
1151
                // Add new collection's UID.
1152
                $collections[] = $substUid[$collNewUid];
1153
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1154
                    Helper::addMessage(
1155
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newCollection'), $collection, $substUid[$collNewUid])),
1156
                        Helper::getMessage('flash.attention', true),
1157
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1158
                        true
1159
                    );
1160
                }
1161
            }
1162
        }
1163
        $metadata['collection'] = $collections;
1164
1165
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1166
            ->getQueryBuilderForTable('tx_dlf_libraries');
1167
1168
        // Get UID for owner.
1169
        if (empty($owner)) {
1170
            $owner = empty($metadata['owner'][0]) ? $metadata['owner'][0] : 'default';
1171
        }
1172
        if (!MathUtility::canBeInterpretedAsInteger($owner)) {
1173
            $result = $queryBuilder
1174
                ->select('tx_dlf_libraries.uid AS uid')
1175
                ->from('tx_dlf_libraries')
1176
                ->where(
1177
                    $queryBuilder->expr()->eq('tx_dlf_libraries.pid', intval($pid)),
1178
                    $queryBuilder->expr()->eq('tx_dlf_libraries.index_name', $queryBuilder->expr()->literal($owner)),
1179
                    Helper::whereExpression('tx_dlf_libraries')
1180
                )
1181
                ->setMaxResults(1)
1182
                ->execute();
1183
1184
            if ($resArray = $result->fetch()) {
1185
                $ownerUid = $resArray['uid'];
1186
            } else {
1187
                // Insert new library.
1188
                $libNewUid = uniqid('NEW');
1189
                $libData['tx_dlf_libraries'][$libNewUid] = [
0 ignored issues
show
Comprehensibility Best Practice introduced by
$libData was never initialized. Although not strictly required by PHP, it is generally a good practice to add $libData = array(); before regardless.
Loading history...
1190
                    'pid' => $pid,
1191
                    'label' => $owner,
1192
                    'index_name' => $owner,
1193
                    'website' => '',
1194
                    'contact' => '',
1195
                    'image' => '',
1196
                    'oai_label' => '',
1197
                    'oai_base' => '',
1198
                    'opac_label' => '',
1199
                    'opac_base' => '',
1200
                    'union_label' => '',
1201
                    'union_base' => '',
1202
                ];
1203
                $substUid = Helper::processDBasAdmin($libData);
1204
                // Add new library's UID.
1205
                $ownerUid = $substUid[$libNewUid];
1206
                if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1207
                    Helper::addMessage(
1208
                        htmlspecialchars(sprintf(Helper::getMessage('flash.newLibrary'), $owner, $ownerUid)),
1209
                        Helper::getMessage('flash.attention', true),
1210
                        \TYPO3\CMS\Core\Messaging\FlashMessage::INFO,
1211
                        true
1212
                    );
1213
                }
1214
            }
1215
            $owner = $ownerUid;
1216
        }
1217
        $metadata['owner'][0] = $owner;
1218
        // Get UID of parent document.
1219
        $partof = $this->getParentDocumentUidForSaving($pid, $core, $owner);
1220
        // Use the date of publication or title as alternative sorting metric for parts of multi-part works.
1221
        if (!empty($partof)) {
1222
            if (
1223
                empty($metadata['volume'][0])
1224
                && !empty($metadata['year'][0])
1225
            ) {
1226
                $metadata['volume'] = $metadata['year'];
1227
            }
1228
            if (empty($metadata['volume_sorting'][0])) {
1229
                // If METS @ORDER is given it is preferred over year_sorting and year.
1230
                if (!empty($metadata['mets_order'][0])) {
1231
                    $metadata['volume_sorting'][0] = $metadata['mets_order'][0];
1232
                } elseif (!empty($metadata['year_sorting'][0])) {
1233
                    $metadata['volume_sorting'][0] = $metadata['year_sorting'][0];
1234
                } elseif (!empty($metadata['year'][0])) {
1235
                    $metadata['volume_sorting'][0] = $metadata['year'][0];
1236
                }
1237
            }
1238
            // If volume_sorting is still empty, try to use title_sorting or METS @ORDERLABEL finally (workaround for newspapers)
1239
            if (empty($metadata['volume_sorting'][0])) {
1240
                if (!empty($metadata['title_sorting'][0])) {
1241
                    $metadata['volume_sorting'][0] = $metadata['title_sorting'][0];
1242
                } elseif (!empty($metadata['mets_orderlabel'][0])) {
1243
                    $metadata['volume_sorting'][0] = $metadata['mets_orderlabel'][0];
1244
                }
1245
            }
1246
        }
1247
1248
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1249
            ->getQueryBuilderForTable('tx_dlf_metadata');
1250
1251
        // Get metadata for lists and sorting.
1252
        $result = $queryBuilder
1253
            ->select(
1254
                'tx_dlf_metadata.index_name AS index_name',
1255
                'tx_dlf_metadata.is_listed AS is_listed',
1256
                'tx_dlf_metadata.is_sortable AS is_sortable'
1257
            )
1258
            ->from('tx_dlf_metadata')
1259
            ->where(
1260
                $queryBuilder->expr()->orX(
1261
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_listed', 1),
1262
                    $queryBuilder->expr()->eq('tx_dlf_metadata.is_sortable', 1)
1263
                ),
1264
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($pid)),
1265
                Helper::whereExpression('tx_dlf_metadata')
1266
            )
1267
            ->execute();
1268
1269
        $listed = [];
1270
        $sortable = [];
1271
1272
        while ($resArray = $result->fetch()) {
1273
            if (!empty($metadata[$resArray['index_name']])) {
1274
                if ($resArray['is_listed']) {
1275
                    $listed[$resArray['index_name']] = $metadata[$resArray['index_name']];
1276
                }
1277
                if ($resArray['is_sortable']) {
1278
                    $sortable[$resArray['index_name']] = $metadata[$resArray['index_name']][0];
1279
                }
1280
            }
1281
        }
1282
        // Fill data array.
1283
        $data['tx_dlf_documents'][$this->uid] = [
0 ignored issues
show
Comprehensibility Best Practice introduced by
$data was never initialized. Although not strictly required by PHP, it is generally a good practice to add $data = array(); before regardless.
Loading history...
1284
            'pid' => $pid,
1285
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['starttime'] => 0,
1286
            $GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['endtime'] => 0,
1287
            'prod_id' => $metadata['prod_id'][0],
1288
            'location' => $this->location,
1289
            'record_id' => $metadata['record_id'][0],
1290
            'opac_id' => $metadata['opac_id'][0],
1291
            'union_id' => $metadata['union_id'][0],
1292
            'urn' => $metadata['urn'][0],
1293
            'purl' => $metadata['purl'][0],
1294
            'title' => $metadata['title'][0],
1295
            'title_sorting' => $metadata['title_sorting'][0],
1296
            'author' => implode('; ', $metadata['author']),
1297
            'year' => implode('; ', $metadata['year']),
1298
            'place' => implode('; ', $metadata['place']),
1299
            'thumbnail' => $this->_getThumbnail(true),
1300
            'metadata' => serialize($listed),
1301
            'metadata_sorting' => serialize($sortable),
1302
            'structure' => $metadata['type'][0],
1303
            'partof' => $partof,
1304
            'volume' => $metadata['volume'][0],
1305
            'volume_sorting' => $metadata['volume_sorting'][0],
1306
            'license' => $metadata['license'][0],
1307
            'terms' => $metadata['terms'][0],
1308
            'restrictions' => $metadata['restrictions'][0],
1309
            'out_of_print' => $metadata['out_of_print'][0],
1310
            'rights_info' => $metadata['rights_info'][0],
1311
            'collections' => $metadata['collection'],
1312
            'mets_label' => $metadata['mets_label'][0],
1313
            'mets_orderlabel' => $metadata['mets_orderlabel'][0],
1314
            'mets_order' => $metadata['mets_order'][0],
1315
            'owner' => $metadata['owner'][0],
1316
            'solrcore' => $core,
1317
            'status' => 0,
1318
            'document_format' => $metadata['document_format'][0],
1319
        ];
1320
        // Unhide hidden documents.
1321
        if (!empty($conf['unhideOnIndex'])) {
1322
            $data['tx_dlf_documents'][$this->uid][$GLOBALS['TCA']['tx_dlf_documents']['ctrl']['enablecolumns']['disabled']] = 0;
1323
        }
1324
        // Process data.
1325
        $newIds = Helper::processDBasAdmin($data);
1326
        // Replace placeholder with actual UID.
1327
        if (strpos($this->uid, 'NEW') === 0) {
1328
            $this->uid = $newIds[$this->uid];
1329
            $this->pid = $pid;
0 ignored issues
show
Bug introduced by
The property pid is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1330
            $this->parentId = $partof;
0 ignored issues
show
Bug introduced by
The property parentId is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1331
        }
1332
        if (!(\TYPO3_REQUESTTYPE & \TYPO3_REQUESTTYPE_CLI)) {
1333
            Helper::addMessage(
1334
                htmlspecialchars(sprintf(Helper::getMessage('flash.documentSaved'), $metadata['title'][0], $this->uid)),
1335
                Helper::getMessage('flash.done', true),
1336
                \TYPO3\CMS\Core\Messaging\FlashMessage::OK,
1337
                true
1338
            );
1339
        }
1340
        // Add document to index.
1341
        if ($core) {
1342
            return Indexer::add($this, $core);
1343
        } else {
1344
            $this->logger->notice('Invalid UID "' . $core . '" for Solr core');
0 ignored issues
show
Bug introduced by
The method notice() does not exist on TYPO3\CMS\Core\Log\LogManager. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1344
            $this->logger->/** @scrutinizer ignore-call */ 
1345
                           notice('Invalid UID "' . $core . '" for Solr core');

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1345
            return false;
1346
        }
1347
    }
1348
1349
    /**
1350
     * Get the ID of the parent document if the current document has one. Also save a parent document
1351
     * to the database and the Solr index if their $pid and the current $pid differ.
1352
     * Currently only applies to METS documents.
1353
     *
1354
     * @access protected
1355
     *
1356
     * @abstract
1357
     *
1358
     * @return int The parent document's id.
1359
     */
1360
    protected abstract function getParentDocumentUidForSaving($pid, $core, $owner);
1361
1362
    /**
1363
     * This returns $this->cPid via __get()
1364
     *
1365
     * @access protected
1366
     *
1367
     * @return int The PID of the metadata definitions
1368
     */
1369
    protected function _getCPid()
1370
    {
1371
        return $this->cPid;
1372
    }
1373
1374
    /**
1375
     * This returns $this->hasFulltext via __get()
1376
     *
1377
     * @access protected
1378
     *
1379
     * @return bool Are there any fulltext files available?
1380
     */
1381
    protected function _getHasFulltext()
1382
    {
1383
        $this->ensureHasFulltextIsSet();
1384
        return $this->hasFulltext;
1385
    }
1386
1387
    /**
1388
     * This returns $this->location via __get()
1389
     *
1390
     * @access protected
1391
     *
1392
     * @return string The location of the document
1393
     */
1394
    protected function _getLocation()
1395
    {
1396
        return $this->location;
1397
    }
1398
1399
    /**
1400
     * Format specific part of building the document's metadata array
1401
     *
1402
     * @access protected
1403
     *
1404
     * @abstract
1405
     *
1406
     * @param int $cPid
1407
     */
1408
    protected abstract function prepareMetadataArray($cPid);
1409
1410
    /**
1411
     * This builds an array of the document's metadata
1412
     *
1413
     * @access protected
1414
     *
1415
     * @return array Array of metadata with their corresponding logical structure node ID as key
1416
     */
1417
    protected function _getMetadataArray()
1418
    {
1419
        // Set metadata definitions' PID.
1420
        $cPid = ($this->cPid ? $this->cPid : $this->pid);
1421
        if (!$cPid) {
1422
            $this->logger->error('Invalid PID ' . $cPid . ' for metadata definitions');
1423
            return [];
1424
        }
1425
        if (
1426
            !$this->metadataArrayLoaded
1427
            || $this->metadataArray[0] != $cPid
1428
        ) {
1429
            $this->prepareMetadataArray($cPid);
1430
            $this->metadataArray[0] = $cPid;
0 ignored issues
show
Bug introduced by
The property metadataArray is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1431
            $this->metadataArrayLoaded = true;
1432
        }
1433
        return $this->metadataArray;
1434
    }
1435
1436
    /**
1437
     * This returns $this->numPages via __get()
1438
     *
1439
     * @access protected
1440
     *
1441
     * @return int The total number of pages and/or tracks
1442
     */
1443
    protected function _getNumPages()
1444
    {
1445
        $this->_getPhysicalStructure();
1446
        return $this->numPages;
1447
    }
1448
1449
    /**
1450
     * This returns $this->parentId via __get()
1451
     *
1452
     * @access protected
1453
     *
1454
     * @return int The UID of the parent document or zero if not applicable
1455
     */
1456
    protected function _getParentId()
1457
    {
1458
        return $this->parentId;
1459
    }
1460
1461
    /**
1462
     * This builds an array of the document's physical structure
1463
     *
1464
     * @access protected
1465
     *
1466
     * @abstract
1467
     *
1468
     * @return array Array of physical elements' id, type, label and file representations ordered
1469
     * by @ORDER attribute / IIIF Sequence's Canvases
1470
     */
1471
    protected abstract function _getPhysicalStructure();
1472
1473
    /**
1474
     * This gives an array of the document's physical structure metadata
1475
     *
1476
     * @access protected
1477
     *
1478
     * @return array Array of elements' type, label and file representations ordered by @ID attribute / Canvas order
1479
     */
1480
    protected function _getPhysicalStructureInfo()
1481
    {
1482
        // Is there no physical structure array yet?
1483
        if (!$this->physicalStructureLoaded) {
1484
            // Build physical structure array.
1485
            $this->_getPhysicalStructure();
1486
        }
1487
        return $this->physicalStructureInfo;
1488
    }
1489
1490
    /**
1491
     * This returns $this->pid via __get()
1492
     *
1493
     * @access protected
1494
     *
1495
     * @return int The PID of the document or zero if not in database
1496
     */
1497
    protected function _getPid()
1498
    {
1499
        return $this->pid;
1500
    }
1501
1502
    /**
1503
     * This returns $this->ready via __get()
1504
     *
1505
     * @access protected
1506
     *
1507
     * @return bool Is the document instantiated successfully?
1508
     */
1509
    protected function _getReady()
1510
    {
1511
        return $this->ready;
1512
    }
1513
1514
    /**
1515
     * This returns $this->recordId via __get()
1516
     *
1517
     * @access protected
1518
     *
1519
     * @return mixed The METS file's / IIIF manifest's record identifier
1520
     */
1521
    protected function _getRecordId()
1522
    {
1523
        return $this->recordId;
1524
    }
1525
1526
    /**
1527
     * This returns $this->rootId via __get()
1528
     *
1529
     * @access protected
1530
     *
1531
     * @return int The UID of the root document or zero if not applicable
1532
     */
1533
    protected function _getRootId()
1534
    {
1535
        if (!$this->rootIdLoaded) {
1536
            if ($this->parentId) {
1537
                $parent = self::getInstance($this->parentId, $this->pid);
1538
                $this->rootId = $parent->rootId;
0 ignored issues
show
Bug introduced by
The property rootId is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1539
            }
1540
            $this->rootIdLoaded = true;
1541
        }
1542
        return $this->rootId;
1543
    }
1544
1545
    /**
1546
     * This returns the smLinks between logical and physical structMap (METS) and models the
1547
     * relation between IIIF Canvases and Manifests / Ranges in the same way
1548
     *
1549
     * @access protected
1550
     *
1551
     * @abstract
1552
     *
1553
     * @return array The links between logical and physical nodes / Range, Manifest and Canvas
1554
     */
1555
    protected abstract function _getSmLinks();
1556
1557
    /**
1558
     * This builds an array of the document's logical structure
1559
     *
1560
     * @access protected
1561
     *
1562
     * @return array Array of structure nodes' id, label, type and physical page indexes/mptr / Canvas link with original hierarchy preserved
1563
     */
1564
    protected function _getTableOfContents()
1565
    {
1566
        // Is there no logical structure array yet?
1567
        if (!$this->tableOfContentsLoaded) {
1568
            // Get all logical structures.
1569
            $this->getLogicalStructure('', true);
1570
            $this->tableOfContentsLoaded = true;
1571
        }
1572
        return $this->tableOfContents;
1573
    }
1574
1575
    /**
1576
     * This returns the document's thumbnail location
1577
     *
1578
     * @access protected
1579
     *
1580
     * @abstract
1581
     *
1582
     * @param bool $forceReload: Force reloading the thumbnail instead of returning the cached value
1583
     *
1584
     * @return string The document's thumbnail location
1585
     */
1586
    protected abstract function _getThumbnail($forceReload = false);
1587
1588
    /**
1589
     * This returns the ID of the toplevel logical structure node
1590
     *
1591
     * @access protected
1592
     *
1593
     * @abstract
1594
     *
1595
     * @return string The logical structure node's ID
1596
     */
1597
    protected abstract function _getToplevelId();
1598
1599
    /**
1600
     * This returns $this->uid via __get()
1601
     *
1602
     * @access protected
1603
     *
1604
     * @return mixed The UID or the URL of the document
1605
     */
1606
    protected function _getUid()
1607
    {
1608
        return $this->uid;
1609
    }
1610
1611
    /**
1612
     * This sets $this->cPid via __set()
1613
     *
1614
     * @access protected
1615
     *
1616
     * @param int $value: The new PID for the metadata definitions
1617
     *
1618
     * @return void
1619
     */
1620
    protected function _setCPid($value)
1621
    {
1622
        $this->cPid = max(intval($value), 0);
1623
    }
1624
1625
    /**
1626
     * This magic method is invoked each time a clone is called on the object variable
1627
     *
1628
     * @access protected
1629
     *
1630
     * @return void
1631
     */
1632
    protected function __clone()
1633
    {
1634
        // This method is defined as protected because singleton objects should not be cloned.
1635
    }
1636
1637
    /**
1638
     * This is a singleton class, thus the constructor should be private/protected
1639
     * (Get an instance of this class by calling \Kitodo\Dlf\Common\Document::getInstance())
1640
     *
1641
     * @access protected
1642
     *
1643
     * @param int $uid: The UID of the document to parse or URL to XML file
1644
     * @param int $pid: If > 0, then only document with this PID gets loaded
1645
     * @param \SimpleXMLElement|IiifResourceInterface $preloadedDocument: Either null or the \SimpleXMLElement
1646
     * or IiifResourceInterface that has been loaded to determine the basic document format.
1647
     *
1648
     * @return void
1649
     */
1650
    protected function __construct($uid, $pid, $preloadedDocument)
1651
    {
1652
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1653
            ->getQueryBuilderForTable('tx_dlf_documents');
1654
        $location = '';
1655
        // Prepare to check database for the requested document.
1656
        if (MathUtility::canBeInterpretedAsInteger($uid)) {
1657
            $whereClause = $queryBuilder->expr()->andX(
1658
                $queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($uid)),
1659
                Helper::whereExpression('tx_dlf_documents')
1660
            );
1661
        } else {
1662
            // Try to load METS file / IIIF manifest.
1663
            if ($this->setPreloadedDocument($preloadedDocument) || (GeneralUtility::isValidUrl($uid)
1664
                && $this->load($uid))) {
1665
                // Initialize core METS object.
1666
                $this->init();
1667
                if ($this->getDocument() !== null) {
1668
                    // Cast to string for safety reasons.
1669
                    $location = (string) $uid;
1670
                    $this->establishRecordId($pid);
1671
                } else {
1672
                    // No METS / IIIF part found.
1673
                    return;
1674
                }
1675
            } else {
1676
                // Loading failed.
1677
                return;
1678
            }
1679
            if (
1680
                !empty($location)
1681
                && !empty($this->recordId)
1682
            ) {
1683
                // Try to match record identifier or location (both should be unique).
1684
                $whereClause = $queryBuilder->expr()->andX(
1685
                    $queryBuilder->expr()->orX(
1686
                        $queryBuilder->expr()->eq('tx_dlf_documents.location', $queryBuilder->expr()->literal($location)),
1687
                        $queryBuilder->expr()->eq('tx_dlf_documents.record_id', $queryBuilder->expr()->literal($this->recordId))
1688
                    ),
1689
                    Helper::whereExpression('tx_dlf_documents')
1690
                );
1691
            } else {
1692
                // Can't persistently identify document, don't try to match at all.
1693
                $whereClause = '1=-1';
1694
            }
1695
        }
1696
        // Check for PID if needed.
1697
        if ($pid) {
1698
            $whereClause = $queryBuilder->expr()->andX(
1699
                $whereClause,
1700
                $queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($pid))
1701
            );
1702
        }
1703
        // Get document PID and location from database.
1704
        $result = $queryBuilder
1705
            ->select(
1706
                'tx_dlf_documents.uid AS uid',
1707
                'tx_dlf_documents.pid AS pid',
1708
                'tx_dlf_documents.record_id AS record_id',
1709
                'tx_dlf_documents.partof AS partof',
1710
                'tx_dlf_documents.thumbnail AS thumbnail',
1711
                'tx_dlf_documents.location AS location'
1712
            )
1713
            ->from('tx_dlf_documents')
1714
            ->where($whereClause)
1715
            ->setMaxResults(1)
1716
            ->execute();
1717
1718
        if ($resArray = $result->fetch()) {
1719
            $this->uid = $resArray['uid'];
0 ignored issues
show
Bug introduced by
The property uid is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1720
            $this->pid = $resArray['pid'];
0 ignored issues
show
Bug introduced by
The property pid is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1721
            $this->recordId = $resArray['record_id'];
0 ignored issues
show
Bug introduced by
The property recordId is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1722
            $this->parentId = $resArray['partof'];
0 ignored issues
show
Bug introduced by
The property parentId is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1723
            $this->thumbnail = $resArray['thumbnail'];
0 ignored issues
show
Bug introduced by
The property thumbnail is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1724
            $this->location = $resArray['location'];
0 ignored issues
show
Bug introduced by
The property location is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1725
            $this->thumbnailLoaded = true;
1726
            // Load XML file if necessary...
1727
            if (
1728
                $this->getDocument() === null
1729
                && $this->load($this->location)
1730
            ) {
1731
                // ...and set some basic properties.
1732
                $this->init();
1733
            }
1734
            // Do we have a METS / IIIF object now?
1735
            if ($this->getDocument() !== null) {
1736
                // Set new location if necessary.
1737
                if (!empty($location)) {
1738
                    $this->location = $location;
1739
                }
1740
                // Document ready!
1741
                $this->ready = true;
0 ignored issues
show
Bug introduced by
The property ready is declared read-only in Kitodo\Dlf\Common\Document.
Loading history...
1742
            }
1743
        } elseif ($this->getDocument() !== null) {
1744
            // Set location as UID for documents not in database.
1745
            $this->uid = $location;
1746
            $this->location = $location;
1747
            // Document ready!
1748
            $this->ready = true;
1749
        } else {
1750
            $this->logger->error('No document with UID ' . $uid . ' found or document not accessible');
1751
        }
1752
    }
1753
1754
    /**
1755
     * This magic method is called each time an invisible property is referenced from the object
1756
     *
1757
     * @access public
1758
     *
1759
     * @param string $var: Name of variable to get
1760
     *
1761
     * @return mixed Value of $this->$var
1762
     */
1763
    public function __get($var)
1764
    {
1765
        $method = '_get' . ucfirst($var);
1766
        if (
1767
            !property_exists($this, $var)
1768
            || !method_exists($this, $method)
1769
        ) {
1770
            $this->logger->warning('There is no getter function for property "' . $var . '"');
1771
            return;
1772
        } else {
1773
            return $this->$method();
1774
        }
1775
    }
1776
1777
    /**
1778
     * This magic method is called each time an invisible property is checked for isset() or empty()
1779
     *
1780
     * @access public
1781
     *
1782
     * @param string $var: Name of variable to check
1783
     *
1784
     * @return bool true if variable is set and not empty, false otherwise
1785
     */
1786
    public function __isset($var)
1787
    {
1788
        return !empty($this->__get($var));
1789
    }
1790
1791
    /**
1792
     * This magic method is called each time an invisible property is referenced from the object
1793
     *
1794
     * @access public
1795
     *
1796
     * @param string $var: Name of variable to set
1797
     * @param mixed $value: New value of variable
1798
     *
1799
     * @return void
1800
     */
1801
    public function __set($var, $value)
1802
    {
1803
        $method = '_set' . ucfirst($var);
1804
        if (
1805
            !property_exists($this, $var)
1806
            || !method_exists($this, $method)
1807
        ) {
1808
            $this->logger->warning('There is no setter function for property "' . $var . '"');
1809
        } else {
1810
            $this->$method($value);
1811
        }
1812
    }
1813
}
1814