Passed
Pull Request — master (#123)
by
unknown
08:11 queued 03:22
created

MetsDocument::getStructureDepth()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
c 0
b 0
f 0
dl 0
loc 7
rs 10
cc 2
nc 2
nop 1
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
16
use TYPO3\CMS\Core\Database\ConnectionPool;
17
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
18
use TYPO3\CMS\Core\Log\LogManager;
19
use TYPO3\CMS\Core\Utility\GeneralUtility;
20
use Ubl\Iiif\Tools\IiifHelper;
21
use Ubl\Iiif\Services\AbstractImageService;
22
23
/**
24
 * MetsDocument class for the 'dlf' extension.
25
 *
26
 * @package TYPO3
27
 * @subpackage dlf
28
 *
29
 * @access public
30
 *
31
 * @property int $cPid this holds the PID for the configuration
32
 * @property-read array $formats this holds the configuration for all supported metadata encodings
33
 * @property bool $formatsLoaded flag with information if the available metadata formats are loaded
34
 * @property-read bool $hasFulltext flag with information if there are any fulltext files available
35
 * @property array $lastSearchedPhysicalPage the last searched logical and physical page
36
 * @property array $logicalUnits this holds the logical units
37
 * @property-read array $metadataArray this holds the documents' parsed metadata array
38
 * @property bool $metadataArrayLoaded flag with information if the metadata array is loaded
39
 * @property-read int $numPages the holds the total number of pages
40
 * @property-read int $parentId this holds the UID of the parent document or zero if not multi-volumed
41
 * @property-read array $physicalStructure this holds the physical structure
42
 * @property-read array $physicalStructureInfo this holds the physical structure metadata
43
 * @property bool $physicalStructureLoaded flag with information if the physical structure is loaded
44
 * @property-read int $pid this holds the PID of the document or zero if not in database
45
 * @property array $rawTextArray this holds the documents' raw text pages with their corresponding structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key
46
 * @property-read bool $ready Is the document instantiated successfully?
47
 * @property-read string $recordId the METS file's / IIIF manifest's record identifier
48
 * @property-read int $rootId this holds the UID of the root document or zero if not multi-volumed
49
 * @property-read array $smLinks this holds the smLinks between logical and physical structMap
50
 * @property bool $smLinksLoaded flag with information if the smLinks are loaded
51
 * @property-read array $tableOfContents this holds the logical structure
52
 * @property bool $tableOfContentsLoaded flag with information if the table of contents is loaded
53
 * @property-read string $thumbnail this holds the document's thumbnail location
54
 * @property bool $thumbnailLoaded flag with information if the thumbnail is loaded
55
 * @property-read string $toplevelId this holds the toplevel structure's "@ID" (METS) or the manifest's "@id" (IIIF)
56
 * @property \SimpleXMLElement $xml this holds the whole XML file as \SimpleXMLElement object
57
 * @property-read array $mdSec associative array of METS metadata sections indexed by their IDs.
58
 * @property bool $mdSecLoaded flag with information if the array of METS metadata sections is loaded
59
 * @property-read array $dmdSec subset of `$mdSec` storing only the dmdSec entries; kept for compatibility.
60
 * @property-read array $fileGrps this holds the file ID -> USE concordance
61
 * @property bool $fileGrpsLoaded flag with information if file groups array is loaded
62
 * @property-read array $fileInfos additional information about files (e.g., ADMID), indexed by ID.
63
 * @property-read \SimpleXMLElement $mets this holds the XML file's METS part as \SimpleXMLElement object
64
 * @property-read string $parentHref URL of the parent document (determined via mptr element), or empty string if none is available
65
 */
66
final class MetsDocument extends AbstractDocument
67
{
68
    /**
69
     * @access protected
70
     * @var string[] Subsections / tags that may occur within `<mets:amdSec>`
71
     *
72
     * @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#amdSec
73
     * @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#mdSecType
74
     */
75
    protected const ALLOWED_AMD_SEC = ['techMD', 'rightsMD', 'sourceMD', 'digiprovMD'];
76
77
    /**
78
     * @access protected
79
     * @var string This holds the whole XML file as string for serialization purposes
80
     *
81
     * @see __sleep() / __wakeup()
82
     */
83
    protected string $asXML = '';
84
85
    /**
86
     * @access protected
87
     * @var array This maps the ID of each amdSec to the IDs of its children (techMD etc.). When an ADMID references an amdSec instead of techMD etc., this is used to iterate the child elements.
88
     */
89
    protected array $amdSecChildIds = [];
90
91
    /**
92
     * @access protected
93
     * @var array Associative array of METS metadata sections indexed by their IDs.
94
     */
95
    protected array $mdSec = [];
96
97
    /**
98
     * @access protected
99
     * @var bool Are the METS file's metadata sections loaded?
100
     *
101
     * @see MetsDocument::$mdSec
102
     */
103
    protected bool $mdSecLoaded = false;
104
105
    /**
106
     * @access protected
107
     * @var array Subset of $mdSec storing only the dmdSec entries; kept for compatibility.
108
     */
109
    protected array $dmdSec = [];
110
111
    /**
112
     * @access protected
113
     * @var array This holds the file ID -> USE concordance
114
     *
115
     * @see magicGetFileGrps()
116
     */
117
    protected array $fileGrps = [];
118
119
    /**
120
     * @access protected
121
     * @var bool Are the image file groups loaded?
122
     *
123
     * @see $fileGrps
124
     */
125
    protected bool $fileGrpsLoaded = false;
126
127
    /**
128
     * @access protected
129
     * @var \SimpleXMLElement This holds the XML file's METS part as \SimpleXMLElement object
130
     */
131
    protected \SimpleXMLElement $mets;
132
133
    /**
134
     * @access protected
135
     * @var string URL of the parent document (determined via mptr element), or empty string if none is available
136
     */
137
    protected string $parentHref = '';
138
139
    /**
140
     * @access protected
141
     * @var array the extension settings
142
     */
143
    protected array $settings = [];
144
145
    /**
146
     * This adds metadata from METS structural map to metadata array.
147
     *
148
     * @access public
149
     *
150
     * @param array &$metadata The metadata array to extend
151
     * @param string $id The "@ID" attribute of the logical structure node
152
     *
153
     * @return void
154
     */
155
    public function addMetadataFromMets(array &$metadata, string $id): void
156
    {
157
        $details = $this->getLogicalStructure($id);
158
        if (!empty($details)) {
159
            $metadata['mets_order'][0] = $details['order'];
160
            $metadata['mets_label'][0] = $details['label'];
161
            $metadata['mets_orderlabel'][0] = $details['orderlabel'];
162
        }
163
    }
164
165
    /**
166
     * @see AbstractDocument::establishRecordId()
167
     */
168
    protected function establishRecordId(int $pid): void
169
    {
170
        // Check for METS object @ID.
171
        if (!empty($this->mets['OBJID'])) {
172
            $this->recordId = (string) $this->mets['OBJID'];
0 ignored issues
show
Bug introduced by
The property recordId is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
173
        }
174
        // Get hook objects.
175
        $hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php');
176
        // Apply hooks.
177
        foreach ($hookObjects as $hookObj) {
178
            if (method_exists($hookObj, 'postProcessRecordId')) {
179
                $hookObj->postProcessRecordId($this->xml, $this->recordId);
180
            }
181
        }
182
    }
183
184
    /**
185
     * @see AbstractDocument::getDownloadLocation()
186
     */
187
    public function getDownloadLocation(string $id): string
188
    {
189
        $file = $this->getFileInfo($id);
190
        if ($file['mimeType'] === 'application/vnd.kitodo.iiif') {
191
            $file['location'] = (strrpos($file['location'], 'info.json') === strlen($file['location']) - 9) ? $file['location'] : (strrpos($file['location'], '/') === strlen($file['location']) ? $file['location'] . 'info.json' : $file['location'] . '/info.json');
192
            $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'iiif');
193
            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
194
            IiifHelper::setMaxThumbnailHeight($conf['thumbnailHeight']);
195
            IiifHelper::setMaxThumbnailWidth($conf['thumbnailWidth']);
196
            $service = IiifHelper::loadIiifResource($file['location']);
197
            if ($service instanceof AbstractImageService) {
198
                return $service->getImageUrl();
199
            }
200
        } elseif ($file['mimeType'] === 'application/vnd.netfpx') {
201
            $baseURL = $file['location'] . (strpos($file['location'], '?') === false ? '?' : '');
202
            // TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server'
203
            return $baseURL . '&CVT=jpeg';
204
        }
205
        return $file['location'];
206
    }
207
208
    /**
209
     * {@inheritDoc}
210
     * @see AbstractDocument::getFileInfo()
211
     */
212
    public function getFileInfo($id): ?array
213
    {
214
        $this->magicGetFileGrps();
215
216
        if (isset($this->fileInfos[$id]) && empty($this->fileInfos[$id]['location'])) {
217
            $this->fileInfos[$id]['location'] = $this->getFileLocation($id);
0 ignored issues
show
Bug introduced by
The property fileInfos is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
218
        }
219
220
        if (isset($this->fileInfos[$id]) && empty($this->fileInfos[$id]['mimeType'])) {
221
            $this->fileInfos[$id]['mimeType'] = $this->getFileMimeType($id);
222
        }
223
224
        return $this->fileInfos[$id] ?? null;
225
    }
226
227
    /**
228
     * @see AbstractDocument::getFileLocation()
229
     */
230
    public function getFileLocation(string $id): string
231
    {
232
        $location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]');
233
        if (
234
            !empty($id)
235
            && !empty($location)
236
        ) {
237
            return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href;
238
        } else {
239
            $this->logger->warning('There is no file node with @ID "' . $id . '"');
240
            return '';
241
        }
242
    }
243
244
    /**
245
     * @see AbstractDocument::getFileMimeType()
246
     */
247
    public function getFileMimeType(string $id): string
248
    {
249
        $mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE');
250
        if (
251
            !empty($id)
252
            && !empty($mimetype)
253
        ) {
254
            return (string) $mimetype[0];
255
        } else {
256
            $this->logger->warning('There is no file node with @ID "' . $id . '" or no MIME type specified');
257
            return '';
258
        }
259
    }
260
261
    /**
262
     * @see AbstractDocument::getLogicalStructure()
263
     */
264
    public function getLogicalStructure(string $id, bool $recursive = false): array
265
    {
266
        $details = [];
267
        // Is the requested logical unit already loaded?
268
        if (
269
            !$recursive
270
            && !empty($this->logicalUnits[$id])
271
        ) {
272
            // Yes. Return it.
273
            return $this->logicalUnits[$id];
274
        } elseif (!empty($id)) {
275
            // Get specified logical unit.
276
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]');
277
        } else {
278
            // Get all logical units at top level.
279
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div');
280
        }
281
        if (!empty($divs)) {
282
            if (!$recursive) {
283
                // Get the details for the first xpath hit.
284
                $details = $this->getLogicalStructureInfo($divs[0]);
285
            } else {
286
                // Walk the logical structure recursively and fill the whole table of contents.
287
                foreach ($divs as $div) {
288
                    $this->tableOfContents[] = $this->getLogicalStructureInfo($div, $recursive);
0 ignored issues
show
Bug introduced by
The property tableOfContents is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
289
                }
290
            }
291
        }
292
        return $details;
293
    }
294
295
    /**
296
     * This gets details about a logical structure element
297
     *
298
     * @access protected
299
     *
300
     * @param \SimpleXMLElement $structure The logical structure node
301
     * @param bool $recursive Whether to include the child elements
302
     *
303
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
304
     */
305
    protected function getLogicalStructureInfo(\SimpleXMLElement $structure, bool $recursive = false): array
306
    {
307
        $attributes = $structure->attributes();
308
309
        // Extract identity information.
310
        $details = [
311
            'id' => (string) $attributes['ID'],
312
            'dmdId' => isset($attributes['DMDID']) ? (string) $attributes['DMDID'] : '',
313
            'admId' => isset($attributes['ADMID']) ? (string) $attributes['ADMID'] : '',
314
            'order' => isset($attributes['ORDER']) ? (string) $attributes['ORDER'] : '',
315
            'label' => isset($attributes['LABEL']) ? (string) $attributes['LABEL'] : '',
316
            'orderlabel' => isset($attributes['ORDERLABEL']) ? (string) $attributes['ORDERLABEL'] : '',
317
            'contentIds' => isset($attributes['CONTENTIDS']) ? (string) $attributes['CONTENTIDS'] : '',
318
            'volume' => '',
319
            'year' => '',
320
            'pagination' => '',
321
            'type' => isset($attributes['TYPE']) ? (string) $attributes['TYPE'] : '',
322
            'description' => '',
323
            'thumbnailId' => null,
324
            'files' => [],
325
        ];
326
327
        // Set volume and year information only if no label is set and this is the toplevel structure element.
328
        if (empty($details['label']) && empty($details['orderlabel'])) {
329
            $metadata = $this->getMetadata($details['id']);
330
            $details['volume'] = $metadata['volume'][0] ?? '';
331
            $details['year'] = $metadata['year'][0] ?? '';
332
        }
333
334
        // add description for 3D objects
335
        if ($details['type'] == 'object') {
336
            $metadata = $this->getMetadata($details['id']);
337
            $details['description'] = $metadata['description'][0] ?? '';
338
        }
339
340
        // Load smLinks.
341
        $this->magicGetSmLinks();
342
        // Load physical structure.
343
        $this->magicGetPhysicalStructure();
344
        // Get the physical page or external file this structure element is pointing at.
345
        // Is there a mptr node?
346
        if (count($structure->children('http://www.loc.gov/METS/')->mptr)) {
347
            // Yes. Get the file reference.
348
            $details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
349
        } elseif (
350
            !empty($this->physicalStructure)
351
            && array_key_exists($details['id'], $this->smLinks['l2p'])
352
        ) {
353
            // Link logical structure to the first corresponding physical page/track.
354
            $details['points'] = max((int) array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, true), 1);
355
            $details['thumbnailId'] = $this->getThumbnail();
356
            // Get page/track number of the first page/track related to this structure element.
357
            $details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel'];
358
        } elseif ($details['id'] == $this->magicGetToplevelId()) {
359
            // Point to self if this is the toplevel structure.
360
            $details['points'] = 1;
361
            $details['thumbnailId'] = $this->getThumbnail();
362
        }
363
        if ($details['thumbnailId'] === null) {
364
            unset($details['thumbnailId']);
365
        }
366
        // Get the files this structure element is pointing at.
367
        $fileUse = $this->magicGetFileGrps();
368
        // Get the file representations from fileSec node.
369
        foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) {
370
            // Check if file has valid @USE attribute.
371
            if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
0 ignored issues
show
Bug introduced by
The method attributes() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

371
            if (!empty($fileUse[(string) $fptr->/** @scrutinizer ignore-call */ attributes()->FILEID])) {

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
372
                $details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
373
            }
374
        }
375
        // Keep for later usage.
376
        $this->logicalUnits[$details['id']] = $details;
377
        // Walk the structure recursively? And are there any children of the current element?
378
        if (
379
            $recursive
380
            && count($structure->children('http://www.loc.gov/METS/')->div)
381
        ) {
382
            $details['children'] = [];
383
            foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) {
384
                // Repeat for all children.
385
                $details['children'][] = $this->getLogicalStructureInfo($child, true);
0 ignored issues
show
Bug introduced by
It seems like $child can also be of type null; however, parameter $structure of Kitodo\Dlf\Common\MetsDo...tLogicalStructureInfo() does only seem to accept SimpleXMLElement, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

385
                $details['children'][] = $this->getLogicalStructureInfo(/** @scrutinizer ignore-type */ $child, true);
Loading history...
386
            }
387
        }
388
        return $details;
389
    }
390
391
    /**
392
     * Get thumbnail for logical structure info.
393
     *
394
     * @access private
395
     *
396
     * @param string $id empty if top level document, else passed the id of parent document
397
     *
398
     * @return ?string thumbnail or null if not found
399
     */
400
    private function getThumbnail(string $id = '')
401
    {
402
        // Load plugin configuration.
403
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'files');
404
        $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']);
405
406
        $thumbnail = null;
407
408
        while ($fileGrpThumb = array_shift($fileGrpsThumb)) {
409
            if (empty($id)) {
410
                $thumbnail = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb] ?? null;
411
            } else {
412
                $parentId = $this->smLinks['l2p'][$id][0] ?? null;
413
                $thumbnail = $this->physicalStructureInfo[$parentId]['files'][$fileGrpThumb] ?? null;
414
            }
415
416
            if (!empty($thumbnail)) {
417
                break;
418
            }
419
        }
420
        return $thumbnail;
421
    }
422
423
    /**
424
     * @see AbstractDocument::getMetadata()
425
     */
426
    public function getMetadata(string $id, int $cPid = 0): array
427
    {
428
        $cPid = $this->ensureValidPid($cPid);
429
430
        if ($cPid == 0) {
431
            $this->logger->warning('Invalid PID for metadata definitions');
432
            return [];
433
        }
434
435
        $metadata = $this->getMetadataFromArray($id, $cPid);
436
437
        if (empty($metadata)) {
438
            return [];
439
        }
440
441
        $metadata = $this->processMetadataSections($id, $cPid, $metadata);
442
443
        if (!empty($metadata)) {
444
            $metadata = $this->setDefaultTitleAndDate($metadata);
445
        }
446
447
        return $metadata;
448
    }
449
450
    /**
451
     * Ensure that pId is valid.
452
     *
453
     * @access private
454
     *
455
     * @param integer $cPid
456
     *
457
     * @return integer
458
     */
459
    private function ensureValidPid(int $cPid): int
460
    {
461
        $cPid = max($cPid, 0);
462
        if ($cPid == 0 && ($this->cPid || $this->pid)) {
463
            // Retain current PID.
464
            $cPid = $this->cPid ?: $this->pid;
465
        }
466
        return $cPid;
467
    }
468
469
    /**
470
     * Get metadata from array.
471
     *
472
     * @access private
473
     *
474
     * @param string $id
475
     * @param integer $cPid
476
     *
477
     * @return array
478
     */
479
    private function getMetadataFromArray(string $id, int $cPid): array
480
    {
481
        if (!empty($this->metadataArray[$id]) && $this->metadataArray[0] == $cPid) {
482
            return $this->metadataArray[$id];
483
        }
484
        return $this->initializeMetadata('METS');
485
    }
486
487
    /**
488
     * Process metadata sections.
489
     *
490
     * @access private
491
     *
492
     * @param string $id
493
     * @param integer $cPid
494
     * @param array $metadata
495
     *
496
     * @return array
497
     */
498
    private function processMetadataSections(string $id, int $cPid, array $metadata): array
499
    {
500
        $mdIds = $this->getMetadataIds($id);
501
        if (empty($mdIds)) {
502
            // There is no metadata section for this structure node.
503
            return [];
504
        }
505
        // Array used as set of available section types (dmdSec, techMD, ...)
506
        $metadataSections = [];
507
        // Load available metadata formats and metadata sections.
508
        $this->loadFormats();
509
        $this->magicGetMdSec();
510
511
        $metadata['type'] = $this->getLogicalUnitType($id);
512
513
        foreach ($mdIds as $dmdId) {
514
            $mdSectionType = $this->mdSec[$dmdId]['section'];
515
516
            if ($this->hasMetadataSection($metadataSections, $mdSectionType, 'dmdSec')) {
517
                continue;
518
            }
519
520
            if (!$this->extractAndProcessMetadata($dmdId, $mdSectionType, $metadata, $cPid, $metadataSections)) {
521
                continue;
522
            }
523
524
            $metadataSections[] = $mdSectionType;
525
        }
526
527
        // Files are not expected to reference a dmdSec
528
        if (isset($this->fileInfos[$id]) || in_array('dmdSec', $metadataSections)) {
529
            return $metadata;
530
        } else {
531
            $this->logger->warning('No supported descriptive metadata found for logical structure with @ID "' . $id . '"');
532
            return [];
533
        }
534
    }
535
536
    /**
537
     * Get logical unit type.
538
     *
539
     * @access private
540
     *
541
     * @param string $id
542
     *
543
     * @return array
544
     */
545
    private function getLogicalUnitType(string $id): array
546
    {
547
        if (!empty($this->logicalUnits[$id])) {
548
            return [$this->logicalUnits[$id]['type']];
549
        } else {
550
            $struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE');
551
            if (!empty($struct)) {
552
                return [(string) $struct[0]];
553
            }
554
        }
555
        return [];
556
    }
557
558
    /**
559
     * Extract and process metadata.
560
     *
561
     * @access private
562
     *
563
     * @param string $dmdId
564
     * @param string $mdSectionType
565
     * @param array $metadata
566
     * @param integer $cPid
567
     * @param array $metadataSections
568
     *
569
     * @return boolean
570
     */
571
    private function extractAndProcessMetadata(string $dmdId, string $mdSectionType, array &$metadata, int $cPid, array $metadataSections): bool
572
    {
573
        if ($this->hasMetadataSection($metadataSections, $mdSectionType, 'dmdSec')) {
574
            return true;
575
        }
576
577
        $metadataExtracted = $this->extractMetadataIfTypeSupported($dmdId, $mdSectionType, $metadata);
578
579
        if (!$metadataExtracted) {
580
            return false;
581
        }
582
583
        $additionalMetadata = $this->getAdditionalMetadataFromDatabase($cPid, $dmdId);
584
        // We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly.
585
        $domNode = dom_import_simplexml($this->mdSec[$dmdId]['xml']);
586
        $domXPath = new \DOMXPath($domNode->ownerDocument);
0 ignored issues
show
Bug introduced by
It seems like $domNode->ownerDocument can also be of type null; however, parameter $document of DOMXPath::__construct() does only seem to accept DOMDocument, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

586
        $domXPath = new \DOMXPath(/** @scrutinizer ignore-type */ $domNode->ownerDocument);
Loading history...
587
        $this->registerNamespaces($domXPath);
588
589
        $this->processAdditionalMetadata($additionalMetadata, $domXPath, $domNode, $metadata);
590
591
        return true;
592
    }
593
594
    /**
595
     * Check if searched metadata section is stored in the array.
596
     *
597
     * @access private
598
     *
599
     * @param array $metadataSections
600
     * @param string $currentMetadataSection
601
     * @param string $searchedMetadataSection
602
     *
603
     * @return boolean
604
     */
605
    private function hasMetadataSection(array $metadataSections, string $currentMetadataSection, string $searchedMetadataSection): bool
606
    {
607
        return $currentMetadataSection === $searchedMetadataSection && in_array($searchedMetadataSection, $metadataSections);
608
    }
609
610
    /**
611
     * Process additional metadata.
612
     *
613
     * @access private
614
     *
615
     * @param array $additionalMetadata
616
     * @param \DOMXPath $domXPath
617
     * @param \DOMElement $domNode
618
     * @param array $metadata
619
     *
620
     * @return void
621
     */
622
    private function processAdditionalMetadata(array $additionalMetadata, \DOMXPath $domXPath, \DOMElement $domNode, array &$metadata): void
623
    {
624
        foreach ($additionalMetadata as $resArray) {
625
            $this->setMetadataFieldValues($resArray, $domXPath, $domNode, $metadata);
626
            $this->setDefaultMetadataValue($resArray, $metadata);
627
            $this->setSortableMetadataValue($resArray, $domXPath, $domNode, $metadata);
628
        }
629
    }
630
631
    /**
632
     * Set metadata field values.
633
     *
634
     * @access private
635
     *
636
     * @param array $resArray
637
     * @param \DOMXPath $domXPath
638
     * @param \DOMElement $domNode
639
     * @param array $metadata
640
     *
641
     * @return void
642
     */
643
    private function setMetadataFieldValues(array $resArray, \DOMXPath $domXPath, \DOMElement $domNode, array &$metadata): void
644
    {
645
        if ($resArray['format'] > 0 && !empty($resArray['xpath'])) {
646
            $values = $domXPath->evaluate($resArray['xpath'], $domNode);
647
            if ($values instanceof \DOMNodeList && $values->length > 0) {
648
                $metadata[$resArray['index_name']] = [];
649
                foreach ($values as $value) {
650
                    $metadata[$resArray['index_name']][] = trim((string) $value->nodeValue);
651
                }
652
            } elseif (!($values instanceof \DOMNodeList)) {
653
                $metadata[$resArray['index_name']] = [trim((string) $values)];
654
            }
655
        }
656
    }
657
658
    /**
659
     * Set default metadata value.
660
     *
661
     * @access private
662
     *
663
     * @param array $resArray
664
     * @param array $metadata
665
     *
666
     * @return void
667
     */
668
    private function setDefaultMetadataValue(array $resArray, array &$metadata): void
669
    {
670
        if (empty($metadata[$resArray['index_name']][0]) && strlen($resArray['default_value']) > 0) {
671
            $metadata[$resArray['index_name']] = [$resArray['default_value']];
672
        }
673
    }
674
675
    /**
676
     * Set sortable metadata value.
677
     *
678
     * @access private
679
     *
680
     * @param array $resArray
681
     * @param \DOMXPath $domXPath
682
     * @param \DOMElement $domNode
683
     * @param array $metadata
684
     *
685
     * @return void
686
     */
687
    private function setSortableMetadataValue(array $resArray, \DOMXPath $domXPath, \DOMElement $domNode, array &$metadata): void
688
    {
689
        if (!empty($metadata[$resArray['index_name']]) && $resArray['is_sortable']) {
690
            if ($resArray['format'] > 0 && !empty($resArray['xpath_sorting'])) {
691
                $values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode);
692
                if ($values instanceof \DOMNodeList && $values->length > 0) {
693
                    $metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values->item(0)->nodeValue);
694
                } elseif (!($values instanceof \DOMNodeList)) {
695
                    $metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values);
696
                }
697
            }
698
            if (empty($metadata[$resArray['index_name'] . '_sorting'][0])) {
699
                $metadata[$resArray['index_name'] . '_sorting'][0] = $metadata[$resArray['index_name']][0];
700
            }
701
        }
702
    }
703
704
    /**
705
     * Set default title and date if those metadata is not set.
706
     *
707
     * @access private
708
     *
709
     * @param array $metadata
710
     *
711
     * @return array
712
     */
713
    private function setDefaultTitleAndDate(array $metadata): array
714
    {
715
        // Set title to empty string if not present.
716
        if (empty($metadata['title'][0])) {
717
            $metadata['title'][0] = '';
718
            $metadata['title_sorting'][0] = '';
719
        }
720
721
        // Set title_sorting to title as default.
722
        if (empty($metadata['title_sorting'][0])) {
723
            $metadata['title_sorting'][0] = $metadata['title'][0];
724
        }
725
726
        // Set date to empty string if not present.
727
        if (empty($metadata['date'][0])) {
728
            $metadata['date'][0] = '';
729
        }
730
731
        return $metadata;
732
    }
733
734
    /**
735
     * Extract metadata if metadata type is supported.
736
     *
737
     * @access private
738
     *
739
     * @param string $dmdId descriptive metadata id
740
     * @param string $mdSectionType metadata section type
741
     * @param array &$metadata
742
     *
743
     * @return bool true if extraction successful, false otherwise
744
     */
745
    private function extractMetadataIfTypeSupported(string $dmdId, string $mdSectionType, array &$metadata)
746
    {
747
        // Is this metadata format supported?
748
        if (!empty($this->formats[$this->mdSec[$dmdId]['type']])) {
749
            if (!empty($this->formats[$this->mdSec[$dmdId]['type']]['class'])) {
750
                $class = $this->formats[$this->mdSec[$dmdId]['type']]['class'];
751
                // Get the metadata from class.
752
                if (class_exists($class)) {
753
                    $obj = GeneralUtility::makeInstance($class);
754
                    if ($obj instanceof MetadataInterface) {
755
                        $obj->extractMetadata($this->mdSec[$dmdId]['xml'], $metadata, GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'general')['useExternalApisForMetadata']);
756
                        return true;
757
                    }
758
                } else {
759
                    $this->logger->warning('Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this->mdSec[$dmdId]['type'] . '"');
760
                }
761
            }
762
        } else {
763
            $this->logger->notice('Unsupported metadata format "' . $this->mdSec[$dmdId]['type'] . '" in ' . $mdSectionType . ' with @ID "' . $dmdId . '"');
764
        }
765
        return false;
766
    }
767
768
    /**
769
     * Get additional data from database.
770
     *
771
     * @access private
772
     *
773
     * @param int $cPid page id
774
     * @param string $dmdId descriptive metadata id
775
     *
776
     * @return array additional metadata data queried from database
777
     */
778
    private function getAdditionalMetadataFromDatabase(int $cPid, string $dmdId)
779
    {
780
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
781
            ->getQueryBuilderForTable('tx_dlf_metadata');
782
        // Get hidden records, too.
783
        $queryBuilder
784
            ->getRestrictions()
785
            ->removeByType(HiddenRestriction::class);
786
        // Get all metadata with configured xpath and applicable format first.
787
        $resultWithFormat = $queryBuilder
788
            ->select(
789
                'tx_dlf_metadata.index_name AS index_name',
790
                'tx_dlf_metadataformat_joins.xpath AS xpath',
791
                'tx_dlf_metadataformat_joins.xpath_sorting AS xpath_sorting',
792
                'tx_dlf_metadata.is_sortable AS is_sortable',
793
                'tx_dlf_metadata.default_value AS default_value',
794
                'tx_dlf_metadata.format AS format'
795
            )
796
            ->from('tx_dlf_metadata')
797
            ->innerJoin(
798
                'tx_dlf_metadata',
799
                'tx_dlf_metadataformat',
800
                'tx_dlf_metadataformat_joins',
801
                $queryBuilder->expr()->eq(
802
                    'tx_dlf_metadataformat_joins.parent_id',
803
                    'tx_dlf_metadata.uid'
804
                )
805
            )
806
            ->innerJoin(
807
                'tx_dlf_metadataformat_joins',
808
                'tx_dlf_formats',
809
                'tx_dlf_formats_joins',
810
                $queryBuilder->expr()->eq(
811
                    'tx_dlf_formats_joins.uid',
812
                    'tx_dlf_metadataformat_joins.encoded'
813
                )
814
            )
815
            ->where(
816
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', $cPid),
817
                $queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0),
818
                $queryBuilder->expr()->eq('tx_dlf_metadataformat_joins.pid', $cPid),
819
                $queryBuilder->expr()->eq('tx_dlf_formats_joins.type', $queryBuilder->createNamedParameter($this->mdSec[$dmdId]['type']))
820
            )
821
            ->execute();
822
        // Get all metadata without a format, but with a default value next.
823
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
824
            ->getQueryBuilderForTable('tx_dlf_metadata');
825
            // Get hidden records, too.
826
        $queryBuilder
827
            ->getRestrictions()
828
            ->removeByType(HiddenRestriction::class);
829
        $resultWithoutFormat = $queryBuilder
830
            ->select(
831
                'tx_dlf_metadata.index_name AS index_name',
832
                'tx_dlf_metadata.is_sortable AS is_sortable',
833
                'tx_dlf_metadata.default_value AS default_value',
834
                'tx_dlf_metadata.format AS format'
835
            )
836
            ->from('tx_dlf_metadata')
837
            ->where(
838
                $queryBuilder->expr()->eq('tx_dlf_metadata.pid', $cPid),
839
                $queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0),
840
                $queryBuilder->expr()->eq('tx_dlf_metadata.format', 0),
841
                $queryBuilder->expr()->neq('tx_dlf_metadata.default_value', $queryBuilder->createNamedParameter(''))
842
            )
843
            ->execute();
844
        // Merge both result sets.
845
        return array_merge($resultWithFormat->fetchAllAssociative(), $resultWithoutFormat->fetchAllAssociative());
846
    }
847
848
    /**
849
     * Get IDs of (descriptive and administrative) metadata sections
850
     * referenced by node of given $id. The $id may refer to either
851
     * a logical structure node or to a file.
852
     *
853
     * @access protected
854
     *
855
     * @param string $id The "@ID" attribute of the file node
856
     *
857
     * @return array
858
     */
859
    protected function getMetadataIds(string $id): array
860
    {
861
        // Load amdSecChildIds concordance
862
        $this->magicGetMdSec();
863
        $fileInfo = $this->getFileInfo($id);
864
865
        // Get DMDID and ADMID of logical structure node
866
        if (!empty($this->logicalUnits[$id])) {
867
            $dmdIds = $this->logicalUnits[$id]['dmdId'] ?? '';
868
            $admIds = $this->logicalUnits[$id]['admId'] ?? '';
869
        } else {
870
            $mdSec = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]')[0];
871
            if ($mdSec) {
0 ignored issues
show
introduced by
$mdSec is of type SimpleXMLElement, thus it always evaluated to true.
Loading history...
872
                $dmdIds = (string) $mdSec->attributes()->DMDID;
873
                $admIds = (string) $mdSec->attributes()->ADMID;
874
            } elseif (isset($fileInfo)) {
875
                $dmdIds = $fileInfo['dmdId'];
876
                $admIds = $fileInfo['admId'];
877
            } else {
878
                $dmdIds = '';
879
                $admIds = '';
880
            }
881
        }
882
883
        // Handle multiple DMDIDs/ADMIDs
884
        $allMdIds = explode(' ', $dmdIds);
885
886
        foreach (explode(' ', $admIds) as $admId) {
887
            if (isset($this->mdSec[$admId])) {
888
                // $admId references an actual metadata section such as techMD
889
                $allMdIds[] = $admId;
890
            } elseif (isset($this->amdSecChildIds[$admId])) {
891
                // $admId references a <mets:amdSec> element. Resolve child elements.
892
                foreach ($this->amdSecChildIds[$admId] as $childId) {
893
                    $allMdIds[] = $childId;
894
                }
895
            }
896
        }
897
898
        return array_filter(
899
            $allMdIds,
900
            function ($element) {
901
                return !empty($element);
902
            }
903
        );
904
    }
905
906
    /**
907
     * @see AbstractDocument::getFullText()
908
     */
909
    public function getFullText(string $id): string
910
    {
911
        $fullText = '';
912
913
        // Load fileGrps and check for full text files.
914
        $this->magicGetFileGrps();
915
        if ($this->hasFulltext) {
916
            $fullText = $this->getFullTextFromXml($id);
917
        }
918
        return $fullText;
919
    }
920
921
    /**
922
     * @see AbstractDocument::getStructureDepth()
923
     */
924
    public function getStructureDepth(string $logId)
925
    {
926
        $ancestors = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*');
927
        if (!empty($ancestors)) {
928
            return count($ancestors);
929
        } else {
930
            return 0;
931
        }
932
    }
933
934
    /**
935
     * @see AbstractDocument::init()
936
     */
937
    protected function init(string $location, array $settings): void
938
    {
939
        $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($this));
940
        $this->settings = $settings;
941
        // Get METS node from XML file.
942
        $this->registerNamespaces($this->xml);
943
        $mets = $this->xml->xpath('//mets:mets');
944
        if (!empty($mets)) {
945
            $this->mets = $mets[0];
0 ignored issues
show
Bug introduced by
The property mets is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
946
            // Register namespaces.
947
            $this->registerNamespaces($this->mets);
948
        } else {
949
            if (!empty($location)) {
950
                $this->logger->error('No METS part found in document with location "' . $location . '".');
951
            } elseif (!empty($this->recordId)) {
952
                $this->logger->error('No METS part found in document with recordId "' . $this->recordId . '".');
953
            } else {
954
                $this->logger->error('No METS part found in current document.');
955
            }
956
        }
957
    }
958
959
    /**
960
     * @see AbstractDocument::loadLocation()
961
     */
962
    protected function loadLocation(string $location): bool
963
    {
964
        $fileResource = Helper::getUrl($location);
965
        if ($fileResource !== false) {
966
            $xml = Helper::getXmlFileAsString($fileResource);
967
            // Set some basic properties.
968
            if ($xml !== false) {
969
                $this->xml = $xml;
970
                return true;
971
            }
972
        }
973
        $this->logger->error('Could not load XML file from "' . $location . '"');
974
        return false;
975
    }
976
977
    /**
978
     * @see AbstractDocument::ensureHasFulltextIsSet()
979
     */
980
    protected function ensureHasFulltextIsSet(): void
981
    {
982
        // Are the fileGrps already loaded?
983
        if (!$this->fileGrpsLoaded) {
984
            $this->magicGetFileGrps();
985
        }
986
    }
987
988
    /**
989
     * @see AbstractDocument::setPreloadedDocument()
990
     */
991
    protected function setPreloadedDocument($preloadedDocument): bool
992
    {
993
994
        if ($preloadedDocument instanceof \SimpleXMLElement) {
995
            $this->xml = $preloadedDocument;
996
            return true;
997
        }
998
        return false;
999
    }
1000
1001
    /**
1002
     * @see AbstractDocument::getDocument()
1003
     */
1004
    protected function getDocument(): \SimpleXMLElement
1005
    {
1006
        return $this->mets;
1007
    }
1008
1009
    /**
1010
     * This builds an array of the document's metadata sections
1011
     *
1012
     * @access protected
1013
     *
1014
     * @return array Array of metadata sections with their IDs as array key
1015
     */
1016
    protected function magicGetMdSec(): array
1017
    {
1018
        if (!$this->mdSecLoaded) {
1019
            $this->loadFormats();
1020
1021
            foreach ($this->mets->xpath('./mets:dmdSec') as $dmdSecTag) {
1022
                $dmdSec = $this->processMdSec($dmdSecTag);
1023
1024
                if ($dmdSec !== null) {
1025
                    $this->mdSec[$dmdSec['id']] = $dmdSec;
0 ignored issues
show
Bug introduced by
The property mdSec is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1026
                    $this->dmdSec[$dmdSec['id']] = $dmdSec;
0 ignored issues
show
Bug introduced by
The property dmdSec is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1027
                }
1028
            }
1029
1030
            foreach ($this->mets->xpath('./mets:amdSec') as $amdSecTag) {
1031
                $childIds = [];
1032
1033
                foreach ($amdSecTag->children('http://www.loc.gov/METS/') as $mdSecTag) {
1034
                    if (!in_array($mdSecTag->getName(), self::ALLOWED_AMD_SEC)) {
1035
                        continue;
1036
                    }
1037
1038
                    // TODO: Should we check that the format may occur within this type (e.g., to ignore VIDEOMD within rightsMD)?
1039
                    $mdSec = $this->processMdSec($mdSecTag);
0 ignored issues
show
Bug introduced by
It seems like $mdSecTag can also be of type null; however, parameter $element of Kitodo\Dlf\Common\MetsDocument::processMdSec() does only seem to accept SimpleXMLElement, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1039
                    $mdSec = $this->processMdSec(/** @scrutinizer ignore-type */ $mdSecTag);
Loading history...
1040
1041
                    if ($mdSec !== null) {
1042
                        $this->mdSec[$mdSec['id']] = $mdSec;
1043
1044
                        $childIds[] = $mdSec['id'];
1045
                    }
1046
                }
1047
1048
                $amdSecId = (string) $amdSecTag->attributes()->ID;
1049
                if (!empty($amdSecId)) {
1050
                    $this->amdSecChildIds[$amdSecId] = $childIds;
1051
                }
1052
            }
1053
1054
            $this->mdSecLoaded = true;
1055
        }
1056
        return $this->mdSec;
1057
    }
1058
1059
    /**
1060
     * Gets the document's metadata sections
1061
     *
1062
     * @access protected
1063
     *
1064
     * @return array Array of metadata sections with their IDs as array key
1065
     */
1066
    protected function magicGetDmdSec(): array
1067
    {
1068
        $this->magicGetMdSec();
1069
        return $this->dmdSec;
1070
    }
1071
1072
    /**
1073
     * Processes an element of METS `mdSecType`.
1074
     *
1075
     * @access protected
1076
     *
1077
     * @param \SimpleXMLElement $element
1078
     *
1079
     * @return array|null The processed metadata section
1080
     */
1081
    protected function processMdSec(\SimpleXMLElement $element): ?array
1082
    {
1083
        $mdId = (string) $element->attributes()->ID;
1084
        if (empty($mdId)) {
1085
            return null;
1086
        }
1087
1088
        $this->registerNamespaces($element);
1089
1090
        $type = '';
1091
        $mdType = $element->xpath('./mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE');
1092
        $otherMdType = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE');
1093
1094
        if (!empty($mdType) && !empty($this->formats[(string) $mdType[0]])) {
1095
            $type = (string) $mdType[0];
1096
            $xml = $element->xpath('./mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']);
1097
        } elseif (!empty($otherMdType) && !empty($this->formats[(string) $otherMdType[0]])) {
1098
            $type = (string) $otherMdType[0];
1099
            $xml = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']);
1100
        }
1101
1102
        if (empty($xml)) {
1103
            return null;
1104
        }
1105
1106
        $this->registerNamespaces($xml[0]);
1107
1108
        return [
1109
            'id' => $mdId,
1110
            'section' => $element->getName(),
1111
            'type' => $type,
1112
            'xml' => $xml[0],
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $xml does not seem to be defined for all execution paths leading up to this point.
Loading history...
1113
        ];
1114
    }
1115
1116
    /**
1117
     * This builds the file ID -> USE concordance
1118
     *
1119
     * @access protected
1120
     *
1121
     * @return array Array of file use groups with file IDs
1122
     */
1123
    protected function magicGetFileGrps(): array
1124
    {
1125
        if (!$this->fileGrpsLoaded) {
1126
            // Get configured USE attributes.
1127
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'files');
1128
            $useGrps = GeneralUtility::trimExplode(',', $extConf['fileGrpImages']);
1129
            if (!empty($extConf['fileGrpThumbs'])) {
1130
                $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']));
1131
            }
1132
            if (!empty($extConf['fileGrpDownload'])) {
1133
                $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpDownload']));
1134
            }
1135
            if (!empty($extConf['fileGrpFulltext'])) {
1136
                $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']));
1137
            }
1138
            if (!empty($extConf['fileGrpAudio'])) {
1139
                $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpAudio']));
1140
            }
1141
            // Get all file groups.
1142
            $fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp');
1143
            if (!empty($fileGrps)) {
1144
                // Build concordance for configured USE attributes.
1145
                foreach ($fileGrps as $fileGrp) {
1146
                    if (in_array((string) $fileGrp['USE'], $useGrps)) {
1147
                        foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) {
1148
                            $fileId = (string) $file->attributes()->ID;
0 ignored issues
show
Bug introduced by
The method attributes() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1148
                            $fileId = (string) $file->/** @scrutinizer ignore-call */ attributes()->ID;

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1149
                            $this->fileGrps[$fileId] = (string) $fileGrp['USE'];
0 ignored issues
show
Bug introduced by
The property fileGrps is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1150
                            $this->fileInfos[$fileId] = [
0 ignored issues
show
Bug introduced by
The property fileInfos is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1151
                                'fileGrp' => (string) $fileGrp['USE'],
1152
                                'admId' => (string) $file->attributes()->ADMID,
1153
                                'dmdId' => (string) $file->attributes()->DMDID,
1154
                            ];
1155
                        }
1156
                    }
1157
                }
1158
            }
1159
            // Are there any fulltext files available?
1160
            if (
1161
                !empty($extConf['fileGrpFulltext'])
1162
                && array_intersect(GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']), $this->fileGrps) !== []
1163
            ) {
1164
                $this->hasFulltext = true;
0 ignored issues
show
Bug introduced by
The property hasFulltext is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1165
            }
1166
            $this->fileGrpsLoaded = true;
1167
        }
1168
        return $this->fileGrps;
1169
    }
1170
1171
    /**
1172
     * @see AbstractDocument::prepareMetadataArray()
1173
     */
1174
    protected function prepareMetadataArray(int $cPid): void
1175
    {
1176
        $ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID');
1177
        // Get all logical structure nodes with metadata.
1178
        if (!empty($ids)) {
1179
            foreach ($ids as $id) {
1180
                $this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid);
0 ignored issues
show
Bug introduced by
The property metadataArray is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1181
            }
1182
        }
1183
        // Set current PID for metadata definitions.
1184
    }
1185
1186
    /**
1187
     * This returns $this->mets via __get()
1188
     *
1189
     * @access protected
1190
     *
1191
     * @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object
1192
     */
1193
    protected function magicGetMets(): \SimpleXMLElement
1194
    {
1195
        return $this->mets;
1196
    }
1197
1198
    /**
1199
     * @see AbstractDocument::magicGetPhysicalStructure()
1200
     */
1201
    protected function magicGetPhysicalStructure(): array
1202
    {
1203
        // Is there no physical structure array yet?
1204
        if (!$this->physicalStructureLoaded) {
1205
            // Does the document have a structMap node of type "PHYSICAL"?
1206
            $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div');
1207
            if (!empty($elementNodes)) {
1208
                // Get file groups.
1209
                $fileUse = $this->magicGetFileGrps();
1210
                // Get the physical sequence's metadata.
1211
                $physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]');
1212
                $firstNode = $physNode[0];
1213
                $id = (string) $firstNode['ID'];
1214
                $this->physicalStructureInfo[$id]['id'] = $id;
0 ignored issues
show
Bug introduced by
The property physicalStructureInfo is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1215
                $this->physicalStructureInfo[$id]['dmdId'] = isset($firstNode['DMDID']) ? (string) $firstNode['DMDID'] : '';
1216
                $this->physicalStructureInfo[$id]['admId'] = isset($firstNode['ADMID']) ? (string) $firstNode['ADMID'] : '';
1217
                $this->physicalStructureInfo[$id]['order'] = isset($firstNode['ORDER']) ? (string) $firstNode['ORDER'] : '';
1218
                $this->physicalStructureInfo[$id]['label'] = isset($firstNode['LABEL']) ? (string) $firstNode['LABEL'] : '';
1219
                $this->physicalStructureInfo[$id]['orderlabel'] = isset($firstNode['ORDERLABEL']) ? (string) $firstNode['ORDERLABEL'] : '';
1220
                $this->physicalStructureInfo[$id]['type'] = (string) $firstNode['TYPE'];
1221
                $this->physicalStructureInfo[$id]['contentIds'] = isset($firstNode['CONTENTIDS']) ? (string) $firstNode['CONTENTIDS'] : '';
1222
                // Get the file representations from fileSec node.
1223
                foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) {
1224
                    // Check if file has valid @USE attribute.
1225
                    if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
1226
                        $this->physicalStructureInfo[$id]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
1227
                    }
1228
                }
1229
                // Build the physical elements' array from the physical structMap node.
1230
                $elements = [];
1231
                foreach ($elementNodes as $elementNode) {
1232
                    $id = (string) $elementNode['ID'];
1233
                    $order = (int) $elementNode['ORDER'];
1234
                    $elements[$order] = $id;
1235
                    $this->physicalStructureInfo[$elements[$order]]['id'] = $id;
1236
                    $this->physicalStructureInfo[$elements[$order]]['dmdId'] = isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : '';
1237
                    $this->physicalStructureInfo[$elements[$order]]['admId'] = isset($elementNode['ADMID']) ? (string) $elementNode['ADMID'] : '';
1238
                    $this->physicalStructureInfo[$elements[$order]]['order'] = isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : '';
1239
                    $this->physicalStructureInfo[$elements[$order]]['label'] = isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : '';
1240
                    $this->physicalStructureInfo[$elements[$order]]['orderlabel'] = isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : '';
1241
                    $this->physicalStructureInfo[$elements[$order]]['type'] = (string) $elementNode['TYPE'];
1242
                    $this->physicalStructureInfo[$elements[$order]]['contentIds'] = isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : '';
1243
                    // Get the file representations from fileSec node.
1244
                    foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) {
1245
                        // Check if file has valid @USE attribute.
1246
                        if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
1247
                            $this->physicalStructureInfo[$elements[$order]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
1248
                        }
1249
                    }
1250
                }
1251
                // Sort array by keys (= @ORDER).
1252
                ksort($elements);
1253
                // Set total number of pages/tracks.
1254
                $this->numPages = count($elements);
0 ignored issues
show
Bug introduced by
The property numPages is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1255
                // Merge and re-index the array to get numeric indexes.
1256
                array_unshift($elements, $id);
1257
                $this->physicalStructure = $elements;
0 ignored issues
show
Bug introduced by
The property physicalStructure is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1258
            }
1259
            $this->physicalStructureLoaded = true;
1260
        }
1261
        return $this->physicalStructure;
1262
    }
1263
1264
    /**
1265
     * @see AbstractDocument::magicGetSmLinks()
1266
     */
1267
    protected function magicGetSmLinks(): array
1268
    {
1269
        if (!$this->smLinksLoaded) {
1270
            $smLinks = $this->mets->xpath('./mets:structLink/mets:smLink');
1271
            if (!empty($smLinks)) {
1272
                foreach ($smLinks as $smLink) {
1273
                    $this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to;
0 ignored issues
show
Bug introduced by
The property smLinks is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1274
                    $this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from;
1275
                }
1276
            }
1277
            $this->smLinksLoaded = true;
1278
        }
1279
        return $this->smLinks;
1280
    }
1281
1282
    /**
1283
     * @see AbstractDocument::magicGetThumbnail()
1284
     */
1285
    protected function magicGetThumbnail(bool $forceReload = false): string
1286
    {
1287
        if (
1288
            !$this->thumbnailLoaded
1289
            || $forceReload
1290
        ) {
1291
            // Retain current PID.
1292
            $cPid = $this->cPid ?: $this->pid;
1293
            if (!$cPid) {
1294
                $this->logger->error('Invalid PID ' . $cPid . ' for structure definitions');
1295
                $this->thumbnailLoaded = true;
1296
                return $this->thumbnail;
1297
            }
1298
            // Load extension configuration.
1299
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'files');
1300
            if (empty($extConf['fileGrpThumbs'])) {
1301
                $this->logger->warning('No fileGrp for thumbnails specified');
1302
                $this->thumbnailLoaded = true;
1303
                return $this->thumbnail;
1304
            }
1305
            $strctId = $this->magicGetToplevelId();
1306
            $metadata = $this->getToplevelMetadata($cPid);
1307
1308
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1309
                ->getQueryBuilderForTable('tx_dlf_structures');
1310
1311
            // Get structure element to get thumbnail from.
1312
            $result = $queryBuilder
1313
                ->select('tx_dlf_structures.thumbnail AS thumbnail')
1314
                ->from('tx_dlf_structures')
1315
                ->where(
1316
                    $queryBuilder->expr()->eq('tx_dlf_structures.pid', $cPid),
1317
                    $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
1318
                    Helper::whereExpression('tx_dlf_structures')
1319
                )
1320
                ->setMaxResults(1)
1321
                ->execute();
1322
1323
            $allResults = $result->fetchAllAssociative();
1324
1325
            if (count($allResults) == 1) {
1326
                $resArray = $allResults[0];
1327
                // Get desired thumbnail structure if not the toplevel structure itself.
1328
                if (!empty($resArray['thumbnail'])) {
1329
                    $strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid);
1330
                    // Check if this document has a structure element of the desired type.
1331
                    $strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID');
1332
                    if (!empty($strctIds)) {
1333
                        $strctId = (string) $strctIds[0];
1334
                    }
1335
                }
1336
                // Load smLinks.
1337
                $this->magicGetSmLinks();
1338
                // Get thumbnail location.
1339
                $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']);
1340
                while ($fileGrpThumb = array_shift($fileGrpsThumb)) {
1341
                    if (
1342
                        $this->magicGetPhysicalStructure()
1343
                        && !empty($this->smLinks['l2p'][$strctId])
1344
                        && !empty($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb])
1345
                    ) {
1346
                        $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]);
0 ignored issues
show
Bug introduced by
The property thumbnail is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1347
                        break;
1348
                    } elseif (!empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb])) {
1349
                        $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]);
1350
                        break;
1351
                    }
1352
                }
1353
            } else {
1354
                $this->logger->error('No structure of type "' . $metadata['type'][0] . '" found in database');
1355
            }
1356
            $this->thumbnailLoaded = true;
1357
        }
1358
        return $this->thumbnail;
1359
    }
1360
1361
    /**
1362
     * @see AbstractDocument::magicGetToplevelId()
1363
     */
1364
    protected function magicGetToplevelId(): string
1365
    {
1366
        if (empty($this->toplevelId)) {
1367
            // Get all logical structure nodes with metadata, but without associated METS-Pointers.
1368
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]');
1369
            if (!empty($divs)) {
1370
                // Load smLinks.
1371
                $this->magicGetSmLinks();
1372
                foreach ($divs as $div) {
1373
                    $id = (string) $div['ID'];
1374
                    // Are there physical structure nodes for this logical structure?
1375
                    if (array_key_exists($id, $this->smLinks['l2p'])) {
1376
                        // Yes. That's what we're looking for.
1377
                        $this->toplevelId = $id;
0 ignored issues
show
Bug introduced by
The property toplevelId is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1378
                        break;
1379
                    } elseif (empty($this->toplevelId)) {
1380
                        // No. Remember this anyway, but keep looking for a better one.
1381
                        $this->toplevelId = $id;
1382
                    }
1383
                }
1384
            }
1385
        }
1386
        return $this->toplevelId;
1387
    }
1388
1389
    /**
1390
     * Try to determine URL of parent document.
1391
     *
1392
     * @access public
1393
     *
1394
     * @return string
1395
     */
1396
    public function magicGetParentHref(): string
1397
    {
1398
        if (empty($this->parentHref)) {
1399
            // Get the closest ancestor of the current document which has a MPTR child.
1400
            $parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $this->toplevelId . '"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr');
1401
            if (!empty($parentMptr)) {
1402
                $this->parentHref = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
0 ignored issues
show
Bug introduced by
The property parentHref is declared read-only in Kitodo\Dlf\Common\MetsDocument.
Loading history...
1403
            }
1404
        }
1405
1406
        return $this->parentHref;
1407
    }
1408
1409
    /**
1410
     * This magic method is executed prior to any serialization of the object
1411
     * @see __wakeup()
1412
     *
1413
     * @access public
1414
     *
1415
     * @return array Properties to be serialized
1416
     */
1417
    public function __sleep(): array
1418
    {
1419
        // \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization
1420
        $this->asXML = $this->xml->asXML();
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->xml->asXML() can also be of type true. However, the property $asXML is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1421
        return ['pid', 'recordId', 'parentId', 'asXML'];
1422
    }
1423
1424
    /**
1425
     * This magic method is used for setting a string value for the object
1426
     *
1427
     * @access public
1428
     *
1429
     * @return string String representing the METS object
1430
     */
1431
    public function __toString(): string
1432
    {
1433
        $xml = new \DOMDocument('1.0', 'utf-8');
1434
        $xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), true));
1435
        $xml->formatOutput = true;
1436
        return $xml->saveXML();
1437
    }
1438
1439
    /**
1440
     * This magic method is executed after the object is deserialized
1441
     * @see __sleep()
1442
     *
1443
     * @access public
1444
     *
1445
     * @return void
1446
     */
1447
    public function __wakeup(): void
1448
    {
1449
        $xml = Helper::getXmlFileAsString($this->asXML);
1450
        if ($xml !== false) {
1451
            $this->asXML = '';
1452
            $this->xml = $xml;
1453
            // Rebuild the unserializable properties.
1454
            $this->init('', $this->settings);
1455
        } else {
1456
            $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(static::class);
1457
            $this->logger->error('Could not load XML after deserialization');
1458
        }
1459
    }
1460
}
1461