Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Pull Request — master (#861)
by Sebastian
03:23
created

MetsDocument::_getMdSec()   B

Complexity

Conditions 9
Paths 2

Size

Total Lines 41
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 21
nc 2
nop 0
dl 0
loc 41
rs 8.0555
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Common;
14
15
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
16
use TYPO3\CMS\Core\Database\ConnectionPool;
17
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
18
use TYPO3\CMS\Core\Utility\GeneralUtility;
19
use Ubl\Iiif\Tools\IiifHelper;
20
use Ubl\Iiif\Services\AbstractImageService;
21
use TYPO3\CMS\Core\Log\LogManager;
22
23
/**
24
 * MetsDocument class for the 'dlf' extension.
25
 *
26
 * @author Sebastian Meyer <[email protected]>
27
 * @author Henrik Lochmann <[email protected]>
28
 * @package TYPO3
29
 * @subpackage dlf
30
 * @access public
31
 * @property int $cPid This holds the PID for the configuration
32
 * @property-read array $mdSec Associative array of METS metadata sections indexed by their IDs.
33
 * @property-read array $dmdSec Subset of `$mdSec` storing only the dmdSec entries; kept for compatibility.
34
 * @property-read array $fileGrps This holds the file ID -> USE concordance
35
 * @property-read array $fileInfos Additional information about files (e.g., ADMID), indexed by ID.
36
 * @property-read bool $hasFulltext Are there any fulltext files available?
37
 * @property-read array $metadataArray This holds the documents' parsed metadata array
38
 * @property-read \SimpleXMLElement $mets This holds the XML file's METS part as \SimpleXMLElement object
39
 * @property-read int $numPages The holds the total number of pages
40
 * @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed
41
 * @property-read array $physicalStructure This holds the physical structure
42
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
43
 * @property-read int $pid This holds the PID of the document or zero if not in database
44
 * @property-read bool $ready Is the document instantiated successfully?
45
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
46
 * @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed
47
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
48
 * @property-read array $tableOfContents This holds the logical structure
49
 * @property-read string $thumbnail This holds the document's thumbnail location
50
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
51
 * @property-read string $parentHref URL of the parent document (determined via mptr element), or empty string if none is available
52
 */
53
final class MetsDocument extends Doc
54
{
55
    /**
56
     * Subsections / tags that may occur within `<mets:amdSec>`.
57
     *
58
     * @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#amdSec
59
     * @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#mdSecType
60
     *
61
     * @var string[]
62
     */
63
    protected const ALLOWED_AMD_SEC = ['techMD', 'rightsMD', 'sourceMD', 'digiprovMD'];
64
65
    /**
66
     * This holds the whole XML file as string for serialization purposes
67
     * @see __sleep() / __wakeup()
68
     *
69
     * @var string
70
     * @access protected
71
     */
72
    protected $asXML = '';
73
74
    /**
75
     * This maps the ID of each amdSec to the IDs of its children (techMD etc.).
76
     * When an ADMID references an amdSec instead of techMD etc., this is used to iterate the child elements.
77
     *
78
     * @var string[]
79
     * @access protected
80
     */
81
    protected $amdSecChildIds = [];
82
83
    /**
84
     * Associative array of METS metadata sections indexed by their IDs.
85
     *
86
     * @var array
87
     * @access protected
88
     */
89
    protected $mdSec = [];
90
91
    /**
92
     * Are the METS file's metadata sections loaded?
93
     * @see MetsDocument::$mdSec
94
     *
95
     * @var bool
96
     * @access protected
97
     */
98
    protected $mdSecLoaded = false;
99
100
    /**
101
     * Subset of $mdSec storing only the dmdSec entries; kept for compatibility.
102
     *
103
     * @var array
104
     * @access protected
105
     */
106
    protected $dmdSec = [];
107
108
    /**
109
     * The extension key
110
     *
111
     * @var	string
112
     * @access public
113
     */
114
    public static $extKey = 'dlf';
115
116
    /**
117
     * This holds the file ID -> USE concordance
118
     * @see _getFileGrps()
119
     *
120
     * @var array
121
     * @access protected
122
     */
123
    protected $fileGrps = [];
124
125
    /**
126
     * Are the image file groups loaded?
127
     * @see $fileGrps
128
     *
129
     * @var bool
130
     * @access protected
131
     */
132
    protected $fileGrpsLoaded = false;
133
134
    /**
135
     * Additional information about files (e.g., ADMID), indexed by ID.
136
     * TODO: Consider using this for `getFileMimeType()` and `getFileLocation()`.
137
     * @see _getFileInfos()
138
     *
139
     * @var array
140
     * @access protected
141
     */
142
    protected $fileInfos = [];
143
144
    /**
145
     * This holds the XML file's METS part as \SimpleXMLElement object
146
     *
147
     * @var \SimpleXMLElement
148
     * @access protected
149
     */
150
    protected $mets;
151
152
    /**
153
     * This holds the whole XML file as \SimpleXMLElement object
154
     *
155
     * @var \SimpleXMLElement
156
     * @access protected
157
     */
158
    protected $xml;
159
160
    /**
161
     * URL of the parent document (determined via mptr element),
162
     * or empty string if none is available
163
     *
164
     * @var string|null
165
     * @access protected
166
     */
167
    protected $parentHref;
168
169
    /**
170
     * This adds metadata from METS structural map to metadata array.
171
     *
172
     * @access	public
173
     *
174
     * @param	array	&$metadata: The metadata array to extend
175
     * @param	string	$id: The "@ID" attribute of the logical structure node
176
     *
177
     * @return  void
178
     */
179
    public function addMetadataFromMets(&$metadata, $id)
180
    {
181
        $details = $this->getLogicalStructure($id);
182
        if (!empty($details)) {
183
            $metadata['mets_order'][0] = $details['order'];
184
            $metadata['mets_label'][0] = $details['label'];
185
            $metadata['mets_orderlabel'][0] = $details['orderlabel'];
186
        }
187
    }
188
189
    /**
190
     *
191
     * {@inheritDoc}
192
     * @see \Kitodo\Dlf\Common\Doc::establishRecordId()
193
     */
194
    protected function establishRecordId($pid)
195
    {
196
        // Check for METS object @ID.
197
        if (!empty($this->mets['OBJID'])) {
198
            $this->recordId = (string) $this->mets['OBJID'];
199
        }
200
        // Get hook objects.
201
        $hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php');
202
        // Apply hooks.
203
        foreach ($hookObjects as $hookObj) {
204
            if (method_exists($hookObj, 'construct_postProcessRecordId')) {
205
                $hookObj->construct_postProcessRecordId($this->xml, $this->recordId);
206
            }
207
        }
208
    }
209
210
    /**
211
     *
212
     * {@inheritDoc}
213
     * @see \Kitodo\Dlf\Common\Doc::getDownloadLocation()
214
     */
215
    public function getDownloadLocation($id)
216
    {
217
        $fileMimeType = $this->getFileMimeType($id);
218
        $fileLocation = $this->getFileLocation($id);
219
        if ($fileMimeType === 'application/vnd.kitodo.iiif') {
220
            $fileLocation = (strrpos($fileLocation, 'info.json') === strlen($fileLocation) - 9) ? $fileLocation : (strrpos($fileLocation, '/') === strlen($fileLocation) ? $fileLocation . 'info.json' : $fileLocation . '/info.json');
221
            $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
222
            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
223
            IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
224
            IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
225
            $service = IiifHelper::loadIiifResource($fileLocation);
226
            if ($service !== null && $service instanceof AbstractImageService) {
227
                return $service->getImageUrl();
228
            }
229
        } elseif ($fileMimeType === 'application/vnd.netfpx') {
230
            $baseURL = $fileLocation . (strpos($fileLocation, '?') === false ? '?' : '');
231
            // TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server'
232
            return $baseURL . '&CVT=jpeg';
233
        }
234
        return $fileLocation;
235
    }
236
237
    /**
238
     * {@inheritDoc}
239
     * @see \Kitodo\Dlf\Common\Doc::getFileLocation()
240
     */
241
    public function getFileLocation($id)
242
    {
243
        $location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]');
244
        if (
245
            !empty($id)
246
            && !empty($location)
247
        ) {
248
            return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href;
249
        } else {
250
            $this->logger->warning('There is no file node with @ID "' . $id . '"');
251
            return '';
252
        }
253
    }
254
255
    /**
256
     * {@inheritDoc}
257
     * @see \Kitodo\Dlf\Common\Doc::getFileMimeType()
258
     */
259
    public function getFileMimeType($id)
260
    {
261
        $mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE');
262
        if (
263
            !empty($id)
264
            && !empty($mimetype)
265
        ) {
266
            return (string) $mimetype[0];
267
        } else {
268
            $this->logger->warning('There is no file node with @ID "' . $id . '" or no MIME type specified');
269
            return '';
270
        }
271
    }
272
273
    /**
274
     * {@inheritDoc}
275
     * @see \Kitodo\Dlf\Common\Doc::getLogicalStructure()
276
     */
277
    public function getLogicalStructure($id, $recursive = false)
278
    {
279
        $details = [];
280
        // Is the requested logical unit already loaded?
281
        if (
282
            !$recursive
283
            && !empty($this->logicalUnits[$id])
284
        ) {
285
            // Yes. Return it.
286
            return $this->logicalUnits[$id];
287
        } elseif (!empty($id)) {
288
            // Get specified logical unit.
289
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]');
290
        } else {
291
            // Get all logical units at top level.
292
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div');
293
        }
294
        if (!empty($divs)) {
295
            if (!$recursive) {
296
                // Get the details for the first xpath hit.
297
                $details = $this->getLogicalStructureInfo($divs[0]);
298
            } else {
299
                // Walk the logical structure recursively and fill the whole table of contents.
300
                foreach ($divs as $div) {
301
                    $this->tableOfContents[] = $this->getLogicalStructureInfo($div, $recursive);
302
                }
303
            }
304
        }
305
        return $details;
306
    }
307
308
    /**
309
     * This gets details about a logical structure element
310
     *
311
     * @access protected
312
     *
313
     * @param \SimpleXMLElement $structure: The logical structure node
314
     * @param bool $recursive: Whether to include the child elements
315
     *
316
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
317
     */
318
    protected function getLogicalStructureInfo(\SimpleXMLElement $structure, $recursive = false)
319
    {
320
        // Get attributes.
321
        foreach ($structure->attributes() as $attribute => $value) {
322
            $attributes[$attribute] = (string) $value;
323
        }
324
        // Load plugin configuration.
325
        $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
326
        // Extract identity information.
327
        $details = [];
328
        $details['id'] = $attributes['ID'];
329
        $details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : '');
330
        $details['admId'] = (isset($attributes['ADMID']) ? $attributes['ADMID'] : '');
331
        $details['order'] = (isset($attributes['ORDER']) ? $attributes['ORDER'] : '');
332
        $details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : '');
333
        $details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : '');
334
        $details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : '');
335
        $details['volume'] = '';
336
        // Set volume information only if no label is set and this is the toplevel structure element.
337
        if (
338
            empty($details['label'])
339
            && $details['id'] == $this->_getToplevelId()
340
        ) {
341
            $metadata = $this->getMetadata($details['id']);
342
            if (!empty($metadata['volume'][0])) {
343
                $details['volume'] = $metadata['volume'][0];
344
            }
345
        }
346
        $details['pagination'] = '';
347
        $details['type'] = $attributes['TYPE'];
348
        // add description for 3D objects
349
        if ($details['type'] == 'object') {
350
            $metadata = $this->getMetadata($details['id']);
351
            $details['description'] = $metadata['description'][0] ?? '';
352
        }
353
        $details['thumbnailId'] = '';
354
        // Load smLinks.
355
        $this->_getSmLinks();
356
        // Load physical structure.
357
        $this->_getPhysicalStructure();
358
        // Get the physical page or external file this structure element is pointing at.
359
        $details['points'] = '';
360
        // Is there a mptr node?
361
        if (count($structure->children('http://www.loc.gov/METS/')->mptr)) {
362
            // Yes. Get the file reference.
363
            $details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
364
        } elseif (
365
            !empty($this->physicalStructure)
366
            && array_key_exists($details['id'], $this->smLinks['l2p'])
367
        ) {
368
            // Link logical structure to the first corresponding physical page/track.
369
            $details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, true)), 1);
370
            $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']);
371
            while ($fileGrpThumb = array_shift($fileGrpsThumb)) {
372
                if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb])) {
373
                    $details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb];
374
                    break;
375
                }
376
            }
377
            // Get page/track number of the first page/track related to this structure element.
378
            $details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel'];
379
        } elseif ($details['id'] == $this->_getToplevelId()) {
380
            // Point to self if this is the toplevel structure.
381
            $details['points'] = 1;
382
            $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']);
383
            while ($fileGrpThumb = array_shift($fileGrpsThumb)) {
384
                if (
385
                    !empty($this->physicalStructure)
386
                    && !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb])
387
                ) {
388
                    $details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb];
389
                    break;
390
                }
391
            }
392
        }
393
        // Get the files this structure element is pointing at.
394
        $details['files'] = [];
395
        $fileUse = $this->_getFileGrps();
396
        // Get the file representations from fileSec node.
397
        foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) {
398
            // Check if file has valid @USE attribute.
399
            if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
400
                $details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
401
            }
402
        }
403
        // Keep for later usage.
404
        $this->logicalUnits[$details['id']] = $details;
405
        // Walk the structure recursively? And are there any children of the current element?
406
        if (
407
            $recursive
408
            && count($structure->children('http://www.loc.gov/METS/')->div)
409
        ) {
410
            $details['children'] = [];
411
            foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) {
412
                // Repeat for all children.
413
                $details['children'][] = $this->getLogicalStructureInfo($child, true);
414
            }
415
        }
416
        return $details;
417
    }
418
419
    /**
420
     * {@inheritDoc}
421
     * @see \Kitodo\Dlf\Common\Doc::getMetadata()
422
     */
423
    public function getMetadata($id, $cPid = 0)
424
    {
425
        // Make sure $cPid is a non-negative integer.
426
        $cPid = max(intval($cPid), 0);
427
        // If $cPid is not given, try to get it elsewhere.
428
        if (
429
            !$cPid
430
            && ($this->cPid || $this->pid)
431
        ) {
432
            // Retain current PID.
433
            $cPid = ($this->cPid ? $this->cPid : $this->pid);
434
        } elseif (!$cPid) {
435
            $this->logger->warning('Invalid PID ' . $cPid . ' for metadata definitions');
436
            return [];
437
        }
438
        // Get metadata from parsed metadata array if available.
439
        if (
440
            !empty($this->metadataArray[$id])
441
            && $this->metadataArray[0] == $cPid
442
        ) {
443
            return $this->metadataArray[$id];
444
        }
445
        // Initialize metadata array with empty values.
446
        $metadata = [
447
            'title' => [],
448
            'title_sorting' => [],
449
            'description' => [],
450
            'author' => [],
451
            'holder' => [],
452
            'place' => [],
453
            'year' => [],
454
            'prod_id' => [],
455
            'record_id' => [],
456
            'opac_id' => [],
457
            'union_id' => [],
458
            'urn' => [],
459
            'purl' => [],
460
            'type' => [],
461
            'volume' => [],
462
            'volume_sorting' => [],
463
            'date' => [],
464
            'license' => [],
465
            'terms' => [],
466
            'restrictions' => [],
467
            'out_of_print' => [],
468
            'rights_info' => [],
469
            'collection' => [],
470
            'owner' => [],
471
            'mets_label' => [],
472
            'mets_orderlabel' => [],
473
            'document_format' => ['METS'],
474
        ];
475
        $mdIds = $this->getMetadataIds($id);
476
        if (empty($mdIds)) {
477
            // There is no metadata section for this structure node.
478
            return [];
479
        }
480
        // Associative array used as set of available section types (dmdSec, techMD, ...)
481
        $hasMetadataSection = [];
482
        // Load available metadata formats and metadata sections.
483
        $this->loadFormats();
484
        $this->_getMdSec();
485
        // Get the structure's type.
486
        if (!empty($this->logicalUnits[$id])) {
487
            $metadata['type'] = [$this->logicalUnits[$id]['type']];
488
        } else {
489
            $struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE');
490
            if (!empty($struct)) {
491
                $metadata['type'] = [(string) $struct[0]];
492
            }
493
        }
494
        foreach ($mdIds as $dmdId) {
495
            $mdSectionType = $this->mdSec[$dmdId]['section'];
496
497
            // To preserve behavior of previous Kitodo versions, extract metadata only from first supported dmdSec
498
            // However, we want to extract, for example, all techMD sections (VIDEOMD, AUDIOMD)
499
            if ($mdSectionType === 'dmdSec' && isset($hasMetadataSection['dmdSec'])) {
500
                continue;
501
            }
502
503
            // Is this metadata format supported?
504
            if (!empty($this->formats[$this->mdSec[$dmdId]['type']])) {
505
                if (!empty($this->formats[$this->mdSec[$dmdId]['type']]['class'])) {
506
                    $class = $this->formats[$this->mdSec[$dmdId]['type']]['class'];
507
                    // Get the metadata from class.
508
                    if (
509
                        class_exists($class)
510
                        && ($obj = GeneralUtility::makeInstance($class)) instanceof MetadataInterface
511
                    ) {
512
                        $obj->extractMetadata($this->mdSec[$dmdId]['xml'], $metadata);
513
                    } else {
514
                        $this->logger->warning('Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this->mdSec[$dmdId]['type'] . '"');
515
                    }
516
                }
517
            } else {
518
                $this->logger->notice('Unsupported metadata format "' . $this->mdSec[$dmdId]['type'] . '" in ' . $mdSectionType . ' with @ID "' . $dmdId . '"');
519
                // Continue searching for supported metadata with next @DMDID.
520
                continue;
521
            }
522
            // Get the additional metadata from database.
523
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
524
                ->getQueryBuilderForTable('tx_dlf_metadata');
525
            // Get hidden records, too.
526
            $queryBuilder
527
                ->getRestrictions()
528
                ->removeByType(HiddenRestriction::class);
529
            // Get all metadata with configured xpath and applicable format first.
530
            $resultWithFormat = $queryBuilder
531
                ->select(
532
                    'tx_dlf_metadata.index_name AS index_name',
533
                    'tx_dlf_metadataformat_joins.xpath AS xpath',
534
                    'tx_dlf_metadataformat_joins.xpath_sorting AS xpath_sorting',
535
                    'tx_dlf_metadata.is_sortable AS is_sortable',
536
                    'tx_dlf_metadata.default_value AS default_value',
537
                    'tx_dlf_metadata.format AS format'
538
                )
539
                ->from('tx_dlf_metadata')
540
                ->innerJoin(
541
                    'tx_dlf_metadata',
542
                    'tx_dlf_metadataformat',
543
                    'tx_dlf_metadataformat_joins',
544
                    $queryBuilder->expr()->eq(
545
                        'tx_dlf_metadataformat_joins.parent_id',
546
                        'tx_dlf_metadata.uid'
547
                    )
548
                )
549
                ->innerJoin(
550
                    'tx_dlf_metadataformat_joins',
551
                    'tx_dlf_formats',
552
                    'tx_dlf_formats_joins',
553
                    $queryBuilder->expr()->eq(
554
                        'tx_dlf_formats_joins.uid',
555
                        'tx_dlf_metadataformat_joins.encoded'
556
                    )
557
                )
558
                ->where(
559
                    $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)),
560
                    $queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0),
561
                    $queryBuilder->expr()->eq('tx_dlf_metadataformat_joins.pid', intval($cPid)),
562
                    $queryBuilder->expr()->eq('tx_dlf_formats_joins.type', $queryBuilder->createNamedParameter($this->mdSec[$dmdId]['type']))
563
                )
564
                ->execute();
565
            // Get all metadata without a format, but with a default value next.
566
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
567
                ->getQueryBuilderForTable('tx_dlf_metadata');
568
            // Get hidden records, too.
569
            $queryBuilder
570
                ->getRestrictions()
571
                ->removeByType(HiddenRestriction::class);
572
            $resultWithoutFormat = $queryBuilder
573
                ->select(
574
                    'tx_dlf_metadata.index_name AS index_name',
575
                    'tx_dlf_metadata.is_sortable AS is_sortable',
576
                    'tx_dlf_metadata.default_value AS default_value',
577
                    'tx_dlf_metadata.format AS format'
578
                )
579
                ->from('tx_dlf_metadata')
580
                ->where(
581
                    $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)),
582
                    $queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0),
583
                    $queryBuilder->expr()->eq('tx_dlf_metadata.format', 0),
584
                    $queryBuilder->expr()->neq('tx_dlf_metadata.default_value', $queryBuilder->createNamedParameter(''))
585
                )
586
                ->execute();
587
            // Merge both result sets.
588
            $allResults = array_merge($resultWithFormat->fetchAll(), $resultWithoutFormat->fetchAll());
589
            // We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly.
590
            $domNode = dom_import_simplexml($this->mdSec[$dmdId]['xml']);
591
            $domXPath = new \DOMXPath($domNode->ownerDocument);
592
            $this->registerNamespaces($domXPath);
593
            // OK, now make the XPath queries.
594
            foreach ($allResults as $resArray) {
595
                // Set metadata field's value(s).
596
                if (
597
                    $resArray['format'] > 0
598
                    && !empty($resArray['xpath'])
599
                    && ($values = $domXPath->evaluate($resArray['xpath'], $domNode))
600
                ) {
601
                    if (
602
                        $values instanceof \DOMNodeList
603
                        && $values->length > 0
604
                    ) {
605
                        $metadata[$resArray['index_name']] = [];
606
                        foreach ($values as $value) {
607
                            $metadata[$resArray['index_name']][] = trim((string) $value->nodeValue);
608
                        }
609
                    } elseif (!($values instanceof \DOMNodeList)) {
610
                        $metadata[$resArray['index_name']] = [trim((string) $values)];
611
                    }
612
                }
613
                // Set default value if applicable.
614
                if (
615
                    empty($metadata[$resArray['index_name']][0])
616
                    && strlen($resArray['default_value']) > 0
617
                ) {
618
                    $metadata[$resArray['index_name']] = [$resArray['default_value']];
619
                }
620
                // Set sorting value if applicable.
621
                if (
622
                    !empty($metadata[$resArray['index_name']])
623
                    && $resArray['is_sortable']
624
                ) {
625
                    if (
626
                        $resArray['format'] > 0
627
                        && !empty($resArray['xpath_sorting'])
628
                        && ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode))
629
                    ) {
630
                        if (
631
                            $values instanceof \DOMNodeList
632
                            && $values->length > 0
633
                        ) {
634
                            $metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values->item(0)->nodeValue);
635
                        } elseif (!($values instanceof \DOMNodeList)) {
636
                            $metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values);
637
                        }
638
                    }
639
                    if (empty($metadata[$resArray['index_name'] . '_sorting'][0])) {
640
                        $metadata[$resArray['index_name'] . '_sorting'][0] = $metadata[$resArray['index_name']][0];
641
                    }
642
                }
643
            }
644
645
            $hasMetadataSection[$mdSectionType] = true;
646
        }
647
        // Set title to empty string if not present.
648
        if (empty($metadata['title'][0])) {
649
            $metadata['title'][0] = '';
650
            $metadata['title_sorting'][0] = '';
651
        }
652
        // Set title_sorting to title as default.
653
        if (empty($metadata['title_sorting'][0])) {
654
            $metadata['title_sorting'][0] = $metadata['title'][0];
655
        // Set date to empty string if not present.
656
        if (empty($metadata['date'][0])) {
657
            $metadata['date'][0] = '';
658
        }
659
        // Files are not expected to reference a dmdSec
660
        if (isset($this->fileInfos[$id]) || isset($hasMetadataSection['dmdSec'])) {
661
            return $metadata;
662
        } else {
663
            $this->logger->warning('No supported descriptive metadata found for logical structure with @ID "' . $id . '"');
664
            return [];
665
        }
666
    }
667
668
    /**
669
     * Get IDs of (descriptive and administrative) metadata sections
670
     * referenced by node of given $id. The $id may refer to either
671
     * a logical structure node or to a file.
672
     *
673
     * @access protected
674
     * @param string $id: The "@ID" attribute of the file node
675
     * @return void
676
     */
677
    protected function getMetadataIds($id)
0 ignored issues
show
Bug introduced by
A parse error occurred: Syntax error, unexpected T_PROTECTED on line 677 at column 4
Loading history...
678
    {
679
        // Load amdSecChildIds concordance
680
        $this->_getMdSec();
681
        $this->_getFileInfos();
682
683
        // Get DMDID and ADMID of logical structure node
684
        if (!empty($this->logicalUnits[$id])) {
685
            $dmdIds = $this->logicalUnits[$id]['dmdId'] ?? '';
686
            $admIds = $this->logicalUnits[$id]['admId'] ?? '';
687
        } else {
688
            $mdSec = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]')[0];
689
            if ($mdSec) {
690
                $dmdIds = (string) $mdSec->attributes()->DMDID;
691
                $admIds = (string) $mdSec->attributes()->ADMID;
692
            } else if (isset($this->fileInfos[$id])) {
693
                $dmdIds = $this->fileInfos[$id]['dmdId'];
694
                $admIds = $this->fileInfos[$id]['admId'];
695
            } else {
696
                $dmdIds = '';
697
                $admIds = '';
698
            }
699
        }
700
701
        // Handle multiple DMDIDs/ADMIDs
702
        $allMdIds = explode(' ', $dmdIds);
703
704
        foreach (explode(' ', $admIds) as $admId) {
705
            if (isset($this->mdSec[$admId])) {
706
                // $admId references an actual metadata section such as techMD
707
                $allMdIds[] = $admId;
708
            } elseif (isset($this->amdSecChildIds[$admId])) {
709
                // $admId references a <mets:amdSec> element. Resolve child elements.
710
                foreach ($this->amdSecChildIds[$admId] as $childId) {
711
                    $allMdIds[] = $childId;
712
                }
713
            }
714
        }
715
716
        return array_filter($allMdIds, function ($element) {
717
            return !empty($element);
718
        });
719
    }
720
721
    /**
722
     * {@inheritDoc}
723
     * @see \Kitodo\Dlf\Common\Doc::getFullText()
724
     */
725
    public function getFullText($id)
726
    {
727
        $fullText = '';
728
729
        // Load fileGrps and check for full text files.
730
        $this->_getFileGrps();
731
        if ($this->hasFulltext) {
732
            $fullText = $this->getFullTextFromXml($id);
733
        }
734
        return $fullText;
735
    }
736
737
    /**
738
     * {@inheritDoc}
739
     * @see Doc::getStructureDepth()
740
     */
741
    public function getStructureDepth($logId)
742
    {
743
        $ancestors = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*');
744
        if (!empty($ancestors)) {
745
            return count($ancestors);
746
        } else {
747
            return 0;
748
        }
749
    }
750
751
    /**
752
     * {@inheritDoc}
753
     * @see \Kitodo\Dlf\Common\Doc::init()
754
     */
755
    protected function init($location)
756
    {
757
        $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($this));
758
        // Get METS node from XML file.
759
        $this->registerNamespaces($this->xml);
760
        $mets = $this->xml->xpath('//mets:mets');
761
        if (!empty($mets)) {
762
            $this->mets = $mets[0];
763
            // Register namespaces.
764
            $this->registerNamespaces($this->mets);
765
        } else {
766
            if (!empty($location)) {
767
                $this->logger->error('No METS part found in document with location "' . $location . '".');
768
            } else if (!empty($this->recordId)) {
769
                $this->logger->error('No METS part found in document with recordId "' . $this->recordId . '".');
770
            } else {
771
                $this->logger->error('No METS part found in current document.');
772
            }
773
        }
774
    }
775
776
    /**
777
     * {@inheritDoc}
778
     * @see \Kitodo\Dlf\Common\Doc::loadLocation()
779
     */
780
    protected function loadLocation($location)
781
    {
782
        $fileResource = Helper::getUrl($location);
783
        if ($fileResource !== false) {
784
            $xml = Helper::getXmlFileAsString($fileResource);
785
            // Set some basic properties.
786
            if ($xml !== false) {
787
                $this->xml = $xml;
788
                return true;
789
            }
790
        }
791
        $this->logger->error('Could not load XML file from "' . $location . '"');
792
        return false;
793
    }
794
795
    /**
796
     * {@inheritDoc}
797
     * @see \Kitodo\Dlf\Common\Doc::ensureHasFulltextIsSet()
798
     */
799
    protected function ensureHasFulltextIsSet()
800
    {
801
        // Are the fileGrps already loaded?
802
        if (!$this->fileGrpsLoaded) {
803
            $this->_getFileGrps();
804
        }
805
    }
806
807
    /**
808
     * {@inheritDoc}
809
     * @see Doc::setPreloadedDocument()
810
     */
811
    protected function setPreloadedDocument($preloadedDocument)
812
    {
813
814
        if ($preloadedDocument instanceof \SimpleXMLElement) {
815
            $this->xml = $preloadedDocument;
816
            return true;
817
        }
818
        return false;
819
    }
820
821
    /**
822
     * {@inheritDoc}
823
     * @see Doc::getDocument()
824
     */
825
    protected function getDocument()
826
    {
827
        return $this->mets;
828
    }
829
830
    /**
831
     * This builds an array of the document's metadata sections
832
     *
833
     * @access protected
834
     *
835
     * @return array Array of metadata sections with their IDs as array key
836
     */
837
    protected function _getMdSec()
838
    {
839
        if (!$this->mdSecLoaded) {
840
            $this->loadFormats();
841
842
            foreach ($this->mets->xpath('./mets:dmdSec') as $dmdSecTag) {
843
                $dmdSec = $this->processMdSec($dmdSecTag);
844
845
                if ($dmdSec !== null) {
846
                    $this->mdSec[$dmdSec['id']] = $dmdSec;
847
                    $this->dmdSec[$dmdSec['id']] = $dmdSec;
848
                }
849
            }
850
851
            foreach ($this->mets->xpath('./mets:amdSec') as $amdSecTag) {
852
                $childIds = [];
853
854
                foreach ($amdSecTag->children('http://www.loc.gov/METS/') as $mdSecTag) {
855
                    if (!in_array($mdSecTag->getName(), self::ALLOWED_AMD_SEC)) {
856
                        continue;
857
                    }
858
859
                    // TODO: Should we check that the format may occur within this type (e.g., to ignore VIDEOMD within rightsMD)?
860
                    $mdSec = $this->processMdSec($mdSecTag);
861
862
                    if ($mdSec !== null) {
863
                        $this->mdSec[$mdSec['id']] = $mdSec;
864
865
                        $childIds[] = $mdSec['id'];
866
                    }
867
                }
868
869
                $amdSecId = (string) $amdSecTag->attributes()->ID;
870
                if (!empty($amdSecId)) {
871
                    $this->amdSecChildIds[$amdSecId] = $childIds;
872
                }
873
            }
874
875
            $this->mdSecLoaded = true;
876
        }
877
        return $this->mdSec;
878
    }
879
880
    protected function _getDmdSec()
881
    {
882
        $this->_getMdSec();
883
        return $this->dmdSec;
884
    }
885
886
    /**
887
     * Processes an element of METS `mdSecType`.
888
     *
889
     * @access protected
890
     *
891
     * @param \SimpleXMLElement $element
892
     *
893
     * @return array|null The processed metadata section
894
     */
895
    protected function processMdSec($element)
896
    {
897
        $mdId = (string) $element->attributes()->ID;
898
        if (empty($mdId)) {
899
            return null;
900
        }
901
902
        $this->registerNamespaces($element);
903
        if ($type = $element->xpath('./mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) {
904
            if (!empty($this->formats[(string) $type[0]])) {
905
                $type = (string) $type[0];
906
                $xml = $element->xpath('./mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']);
907
            }
908
        } elseif ($type = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) {
909
            if (!empty($this->formats[(string) $type[0]])) {
910
                $type = (string) $type[0];
911
                $xml = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']);
912
            }
913
        }
914
915
        if (empty($xml)) {
916
            return null;
917
        }
918
919
        $this->registerNamespaces($xml[0]);
920
921
        return [
922
            'id' => $mdId,
923
            'section' => $element->getName(),
924
            'type' => $type,
925
            'xml' => $xml[0],
926
        ];
927
    }
928
929
    /**
930
     * This builds the file ID -> USE concordance
931
     *
932
     * @access protected
933
     *
934
     * @return array Array of file use groups with file IDs
935
     */
936
    protected function _getFileGrps()
937
    {
938
        if (!$this->fileGrpsLoaded) {
939
            // Get configured USE attributes.
940
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
941
            $useGrps = GeneralUtility::trimExplode(',', $extConf['fileGrpImages']);
942
            if (!empty($extConf['fileGrpThumbs'])) {
943
                $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']));
944
            }
945
            if (!empty($extConf['fileGrpDownload'])) {
946
                $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpDownload']));
947
            }
948
            if (!empty($extConf['fileGrpFulltext'])) {
949
                $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']));
950
            }
951
            if (!empty($extConf['fileGrpAudio'])) {
952
                $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpAudio']));
953
            }
954
            // Get all file groups.
955
            $fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp');
956
            if (!empty($fileGrps)) {
957
                // Build concordance for configured USE attributes.
958
                foreach ($fileGrps as $fileGrp) {
959
                    if (in_array((string) $fileGrp['USE'], $useGrps)) {
960
                        foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) {
961
                            $fileId = (string) $file->attributes()->ID;
962
                            $this->fileGrps[$fileId] = (string) $fileGrp['USE'];
963
                            $this->fileInfos[$fileId] = [
964
                                'fileGrp' => (string) $fileGrp['USE'],
965
                                'admId' => (string) $file->attributes()->ADMID,
966
                                'dmdId' => (string) $file->attributes()->DMDID,
967
                            ];
968
                        }
969
                    }
970
                }
971
            }
972
            // Are there any fulltext files available?
973
            if (
974
                !empty($extConf['fileGrpFulltext'])
975
                && array_intersect(GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']), $this->fileGrps) !== []
976
            ) {
977
                $this->hasFulltext = true;
978
            }
979
            $this->fileGrpsLoaded = true;
980
        }
981
        return $this->fileGrps;
982
    }
983
984
    /**
985
     *
986
     * @access protected
987
     * @return array
988
     */
989
    protected function _getFileInfos()
990
    {
991
        $this->_getFileGrps();
992
        return $this->fileInfos;
993
    }
994
995
    /**
996
     * {@inheritDoc}
997
     * @see \Kitodo\Dlf\Common\Doc::prepareMetadataArray()
998
     */
999
    protected function prepareMetadataArray($cPid)
1000
    {
1001
        $ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID');
1002
        // Get all logical structure nodes with metadata.
1003
        if (!empty($ids)) {
1004
            foreach ($ids as $id) {
1005
                $this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid);
1006
            }
1007
        }
1008
        // Set current PID for metadata definitions.
1009
    }
1010
1011
    /**
1012
     * This returns $this->mets via __get()
1013
     *
1014
     * @access protected
1015
     *
1016
     * @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object
1017
     */
1018
    protected function _getMets()
1019
    {
1020
        return $this->mets;
1021
    }
1022
1023
    /**
1024
     * {@inheritDoc}
1025
     * @see \Kitodo\Dlf\Common\Doc::_getPhysicalStructure()
1026
     */
1027
    protected function _getPhysicalStructure()
1028
    {
1029
        // Is there no physical structure array yet?
1030
        if (!$this->physicalStructureLoaded) {
1031
            // Does the document have a structMap node of type "PHYSICAL"?
1032
            $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div');
1033
            if (!empty($elementNodes)) {
1034
                // Get file groups.
1035
                $fileUse = $this->_getFileGrps();
1036
                // Get the physical sequence's metadata.
1037
                $physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]');
1038
                $physSeq[0] = (string) $physNode[0]['ID'];
1039
                $this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID'];
1040
                $this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : '');
1041
                $this->physicalStructureInfo[$physSeq[0]]['admId'] = (isset($physNode[0]['ADMID']) ? (string) $physNode[0]['ADMID'] : '');
1042
                $this->physicalStructureInfo[$physSeq[0]]['order'] = (isset($physNode[0]['ORDER']) ? (string) $physNode[0]['ORDER'] : '');
1043
                $this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : '');
1044
                $this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : '');
1045
                $this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE'];
1046
                $this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : '');
1047
                // Get the file representations from fileSec node.
1048
                foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) {
1049
                    // Check if file has valid @USE attribute.
1050
                    if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
1051
                        $this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
1052
                    }
1053
                }
1054
                // Build the physical elements' array from the physical structMap node.
1055
                foreach ($elementNodes as $elementNode) {
1056
                    $elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID'];
1057
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID'];
1058
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : '');
1059
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['admId'] = (isset($elementNode['ADMID']) ? (string) $elementNode['ADMID'] : '');
1060
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : '');
1061
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : '');
1062
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : '');
1063
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE'];
1064
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : '');
1065
                    // Get the file representations from fileSec node.
1066
                    foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) {
1067
                        // Check if file has valid @USE attribute.
1068
                        if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
1069
                            $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
1070
                        }
1071
                    }
1072
                }
1073
                // Sort array by keys (= @ORDER).
1074
                if (ksort($elements)) {
1075
                    // Set total number of pages/tracks.
1076
                    $this->numPages = count($elements);
1077
                    // Merge and re-index the array to get nice numeric indexes.
1078
                    $this->physicalStructure = array_merge($physSeq, $elements);
1079
                }
1080
            }
1081
            $this->physicalStructureLoaded = true;
1082
        }
1083
        return $this->physicalStructure;
1084
    }
1085
1086
    /**
1087
     * {@inheritDoc}
1088
     * @see \Kitodo\Dlf\Common\Doc::_getSmLinks()
1089
     */
1090
    protected function _getSmLinks()
1091
    {
1092
        if (!$this->smLinksLoaded) {
1093
            $smLinks = $this->mets->xpath('./mets:structLink/mets:smLink');
1094
            if (!empty($smLinks)) {
1095
                foreach ($smLinks as $smLink) {
1096
                    $this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to;
1097
                    $this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from;
1098
                }
1099
            }
1100
            $this->smLinksLoaded = true;
1101
        }
1102
        return $this->smLinks;
1103
    }
1104
1105
    /**
1106
     * {@inheritDoc}
1107
     * @see \Kitodo\Dlf\Common\Doc::_getThumbnail()
1108
     */
1109
    protected function _getThumbnail($forceReload = false)
1110
    {
1111
        if (
1112
            !$this->thumbnailLoaded
1113
            || $forceReload
1114
        ) {
1115
            // Retain current PID.
1116
            $cPid = ($this->cPid ? $this->cPid : $this->pid);
1117
            if (!$cPid) {
1118
                $this->logger->error('Invalid PID ' . $cPid . ' for structure definitions');
1119
                $this->thumbnailLoaded = true;
1120
                return $this->thumbnail;
1121
            }
1122
            // Load extension configuration.
1123
            $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
1124
            if (empty($extConf['fileGrpThumbs'])) {
1125
                $this->logger->warning('No fileGrp for thumbnails specified');
1126
                $this->thumbnailLoaded = true;
1127
                return $this->thumbnail;
1128
            }
1129
            $strctId = $this->_getToplevelId();
1130
            $metadata = $this->getTitledata($cPid);
1131
1132
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1133
                ->getQueryBuilderForTable('tx_dlf_structures');
1134
1135
            // Get structure element to get thumbnail from.
1136
            $result = $queryBuilder
1137
                ->select('tx_dlf_structures.thumbnail AS thumbnail')
1138
                ->from('tx_dlf_structures')
1139
                ->where(
1140
                    $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($cPid)),
1141
                    $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
1142
                    Helper::whereExpression('tx_dlf_structures')
1143
                )
1144
                ->setMaxResults(1)
1145
                ->execute();
1146
1147
            $allResults = $result->fetchAll();
1148
1149
            if (count($allResults) == 1) {
1150
                $resArray = $allResults[0];
1151
                // Get desired thumbnail structure if not the toplevel structure itself.
1152
                if (!empty($resArray['thumbnail'])) {
1153
                    $strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid);
1154
                    // Check if this document has a structure element of the desired type.
1155
                    $strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID');
1156
                    if (!empty($strctIds)) {
1157
                        $strctId = (string) $strctIds[0];
1158
                    }
1159
                }
1160
                // Load smLinks.
1161
                $this->_getSmLinks();
1162
                // Get thumbnail location.
1163
                $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']);
1164
                while ($fileGrpThumb = array_shift($fileGrpsThumb)) {
1165
                    if (
1166
                        $this->_getPhysicalStructure()
1167
                        && !empty($this->smLinks['l2p'][$strctId])
1168
                        && !empty($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb])
1169
                    ) {
1170
                        $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]);
1171
                        break;
1172
                    } elseif (!empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb])) {
1173
                        $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]);
1174
                        break;
1175
                    }
1176
                }
1177
            } else {
1178
                $this->logger->error('No structure of type "' . $metadata['type'][0] . '" found in database');
1179
            }
1180
            $this->thumbnailLoaded = true;
1181
        }
1182
        return $this->thumbnail;
1183
    }
1184
1185
    /**
1186
     * {@inheritDoc}
1187
     * @see \Kitodo\Dlf\Common\Doc::_getToplevelId()
1188
     */
1189
    protected function _getToplevelId()
1190
    {
1191
        if (empty($this->toplevelId)) {
1192
            // Get all logical structure nodes with metadata, but without associated METS-Pointers.
1193
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]');
1194
            if (!empty($divs)) {
1195
                // Load smLinks.
1196
                $this->_getSmLinks();
1197
                foreach ($divs as $div) {
1198
                    $id = (string) $div['ID'];
1199
                    // Are there physical structure nodes for this logical structure?
1200
                    if (array_key_exists($id, $this->smLinks['l2p'])) {
1201
                        // Yes. That's what we're looking for.
1202
                        $this->toplevelId = $id;
1203
                        break;
1204
                    } elseif (empty($this->toplevelId)) {
1205
                        // No. Remember this anyway, but keep looking for a better one.
1206
                        $this->toplevelId = $id;
1207
                    }
1208
                }
1209
            }
1210
        }
1211
        return $this->toplevelId;
1212
    }
1213
1214
    /**
1215
     * Try to determine URL of parent document.
1216
     *
1217
     * @return string|null
1218
     */
1219
    public function _getParentHref()
1220
    {
1221
        if ($this->parentHref === null) {
1222
            $this->parentHref = '';
1223
1224
            // Get the closest ancestor of the current document which has a MPTR child.
1225
            $parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $this->toplevelId . '"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr');
1226
            if (!empty($parentMptr)) {
1227
                $this->parentHref = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
1228
            }
1229
        }
1230
1231
        return $this->parentHref;
1232
    }
1233
1234
    /**
1235
     * This magic method is executed prior to any serialization of the object
1236
     * @see __wakeup()
1237
     *
1238
     * @access public
1239
     *
1240
     * @return array Properties to be serialized
1241
     */
1242
    public function __sleep()
1243
    {
1244
        // \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization
1245
        $this->asXML = $this->xml->asXML();
1246
        return ['uid', 'pid', 'recordId', 'parentId', 'asXML'];
1247
    }
1248
1249
    /**
1250
     * This magic method is used for setting a string value for the object
1251
     *
1252
     * @access public
1253
     *
1254
     * @return string String representing the METS object
1255
     */
1256
    public function __toString()
1257
    {
1258
        $xml = new \DOMDocument('1.0', 'utf-8');
1259
        $xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), true));
1260
        $xml->formatOutput = true;
1261
        return $xml->saveXML();
1262
    }
1263
1264
    /**
1265
     * This magic method is executed after the object is deserialized
1266
     * @see __sleep()
1267
     *
1268
     * @access public
1269
     *
1270
     * @return void
1271
     */
1272
    public function __wakeup()
1273
    {
1274
        $xml = Helper::getXmlFileAsString($this->asXML);
1275
        if ($xml !== false) {
1276
            $this->asXML = '';
1277
            $this->xml = $xml;
1278
            // Rebuild the unserializable properties.
1279
            $this->init('');
1280
        } else {
1281
            $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(static::class);
1282
            $this->logger->error('Could not load XML after deserialization');
1283
        }
1284
    }
1285
}
1286