Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( 982517...802e26 )
by Sebastian
21s queued 11s
created

MetsDocument::getLogicalStructure()   B

Complexity

Conditions 7
Paths 7

Size

Total Lines 26
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 15
dl 0
loc 26
rs 8.8333
c 0
b 0
f 0
cc 7
nc 7
nop 2
1
<?php
2
namespace Kitodo\Dlf\Common;
3
4
use Ubl\Iiif\Tools\IiifHelper;
5
use Ubl\Iiif\Services\AbstractImageService;
6
7
/**
8
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
9
 *
10
 * This file is part of the Kitodo and TYPO3 projects.
11
 *
12
 * @license GNU General Public License version 3 or later.
13
 * For the full copyright and license information, please read the
14
 * LICENSE.txt file that was distributed with this source code.
15
 */
16
17
/**
18
 * MetsDocument class for the 'dlf' extension.
19
 *
20
 * @author	Sebastian Meyer <[email protected]>
21
 * @author	Henrik Lochmann <[email protected]>
22
 * @package	TYPO3
23
 * @subpackage	tx_dlf
24
 * @access	public
25
 */
26
final class MetsDocument extends Document
27
{
28
    /**
29
     * This holds the whole XML file as string for serialization purposes
30
     * @see __sleep() / __wakeup()
31
     *
32
     * @var string
33
     * @access protected
34
     */
35
    protected $asXML = '';
36
37
    /**
38
     * This holds the XML file's dmdSec parts with their IDs as array key
39
     *
40
     * @var array
41
     * @access protected
42
     */
43
    protected $dmdSec = [];
44
45
    /**
46
     * Are the METS file's dmdSecs loaded?
47
     * @see $dmdSec
48
     *
49
     * @var boolean
50
     * @access protected
51
     */
52
    protected $dmdSecLoaded = FALSE;
53
54
    /**
55
     * The extension key
56
     *
57
     * @var	string
58
     * @access public
59
     */
60
    public static $extKey = 'dlf';
61
62
    /**
63
     * This holds the file ID -> USE concordance
64
     * @see _getFileGrps()
65
     *
66
     * @var array
67
     * @access protected
68
     */
69
    protected $fileGrps = [];
70
71
    /**
72
     * Are the file groups loaded?
73
     * @see $fileGrps
74
     *
75
     * @var boolean
76
     * @access protected
77
     */
78
    protected $fileGrpsLoaded = FALSE;
79
80
    /**
81
     * Are the available metadata formats loaded?
82
     * @see $formats
83
     *
84
     * @var boolean
85
     * @access protected
86
     */
87
    protected $formatsLoaded = FALSE;
88
89
    /**
90
     * This holds the XML file's METS part as \SimpleXMLElement object
91
     *
92
     * @var \SimpleXMLElement
93
     * @access protected
94
     */
95
    protected $mets;
96
97
    /**
98
     * This holds the whole XML file as \SimpleXMLElement object
99
     *
100
     * @var \SimpleXMLElement
101
     * @access protected
102
     */
103
    protected $xml;
104
105
    /**
106
     *
107
     * {@inheritDoc}
108
     * @see \Kitodo\Dlf\Common\Document::establishRecordId()
109
     */
110
    protected function establishRecordId($pid) {
111
        // Check for METS object @ID.
112
        if (!empty($this->mets['OBJID'])) {
113
            $this->recordId = (string) $this->mets['OBJID'];
114
        }
115
        // Get hook objects.
116
        $hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php');
117
        // Apply hooks.
118
        foreach ($hookObjects as $hookObj) {
119
            if (method_exists($hookObj, 'construct_postProcessRecordId')) {
120
                $hookObj->construct_postProcessRecordId($this->xml, $this->recordId);
121
            }
122
        }
123
    }
124
125
    /**
126
     *
127
     * {@inheritDoc}
128
     * @see \Kitodo\Dlf\Common\Document::getDownloadLocation()
129
     */
130
    public function getDownloadLocation($id) {
131
        $fileMimeType = $this->getFileMimeType($id);
132
        $fileLocation = $this->getFileLocation($id);
133
        if ($fileMimeType == "application/vnd.kitodo.iiif") {
134
            $fileLocation = strrpos($fileLocation, "info.json") == strlen($fileLocation) - 9 ? $fileLocation :
135
            strrpos($fileLocation, "/") == strlen($fileLocation) ? $fileLocation."info.json" : $fileLocation."/info.json";
136
            $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
137
            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
138
            IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
139
            IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
140
            $service = IiifHelper::loadIiifResource($fileLocation);
141
            if ($service != null && $service instanceof AbstractImageService) {
142
                return $service->getImageUrl();
143
            }
144
        } elseif ($fileMimeType = "application/vnd.netfpx") {
0 ignored issues
show
Unused Code introduced by
The assignment to $fileMimeType is dead and can be removed.
Loading history...
145
            $baseURL = $fileLocation.(strpos($fileLocation, "?") === false ? "?" : "");
146
            // TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server'
147
            return $baseURL."&CVT=jpeg";
148
        }
149
        return $fileLocation;
150
    }
151
152
    /**
153
     * {@inheritDoc}
154
     * @see \Kitodo\Dlf\Common\Document::getFileLocation()
155
     */
156
    public function getFileLocation($id) {
157
        if (!empty($id)
158
            && ($location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="'.$id.'"]/mets:FLocat[@LOCTYPE="URL"]'))) {
159
                return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href;
160
        } else {
161
            Helper::devLog('There is no file node with @ID "'.$id.'"', DEVLOG_SEVERITY_WARNING);
162
            return '';
163
        }
164
    }
165
166
    /**
167
     * {@inheritDoc}
168
     * @see \Kitodo\Dlf\Common\Document::getFileMimeType()
169
     */
170
    public function getFileMimeType($id) {
171
        if (!empty($id)
172
            && ($mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="'.$id.'"]/@MIMETYPE'))) {
173
                return (string) $mimetype[0];
174
        } else {
175
            Helper::devLog('There is no file node with @ID "'.$id.'" or no MIME type specified', DEVLOG_SEVERITY_WARNING);
176
            return '';
177
        }
178
    }
179
180
    /**
181
     * {@inheritDoc}
182
     * @see \Kitodo\Dlf\Common\Document::getLogicalStructure()
183
     */
184
    public function getLogicalStructure($id, $recursive = FALSE) {
185
        $details = [];
186
        // Is the requested logical unit already loaded?
187
        if (!$recursive
188
            && !empty($this->logicalUnits[$id])) {
189
            // Yes. Return it.
190
            return $this->logicalUnits[$id];
191
        } elseif (!empty($id)) {
192
            // Get specified logical unit.
193
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]');
194
        } else {
195
            // Get all logical units at top level.
196
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div');
197
        }
198
        if (!empty($divs)) {
199
            if (!$recursive) {
200
                // Get the details for the first xpath hit.
201
                $details = $this->getLogicalStructureInfo($divs[0]);
202
            } else {
203
                // Walk the logical structure recursively and fill the whole table of contents.
204
                foreach ($divs as $div) {
205
                    $this->tableOfContents[] = $this->getLogicalStructureInfo($div, TRUE);
206
                }
207
            }
208
        }
209
        return $details;
210
    }
211
212
    /**
213
     * This gets details about a logical structure element
214
     *
215
     * @access protected
216
     *
217
     * @param \SimpleXMLElement $structure: The logical structure node
218
     * @param boolean $recursive: Whether to include the child elements
219
     *
220
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
221
     */
222
    protected function getLogicalStructureInfo(\SimpleXMLElement $structure, $recursive = FALSE) {
223
        // Get attributes.
224
        foreach ($structure->attributes() as $attribute => $value) {
225
            $attributes[$attribute] = (string) $value;
226
        }
227
        // Load plugin configuration.
228
        $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
229
        // Extract identity information.
230
        $details = [];
231
        $details['id'] = $attributes['ID'];
1 ignored issue
show
Comprehensibility Best Practice introduced by
The variable $attributes seems to be defined by a foreach iteration on line 224. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
232
        $details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : '');
233
        $details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : '');
234
        $details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : '');
235
        $details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : '');
236
        $details['volume'] = '';
237
        // Set volume information only if no label is set and this is the toplevel structure element.
238
        if (empty($details['label'])
239
            && $details['id'] == $this->_getToplevelId()) {
240
            $metadata = $this->getMetadata($details['id']);
241
            if (!empty($metadata['volume'][0])) {
242
                $details['volume'] = $metadata['volume'][0];
243
            }
244
        }
245
        $details['pagination'] = '';
246
        $details['type'] = $attributes['TYPE'];
247
        $details['thumbnailId'] = '';
248
        // Load smLinks.
249
        $this->_getSmLinks();
250
        // Load physical structure.
251
        $this->_getPhysicalStructure();
252
        // Get the physical page or external file this structure element is pointing at.
253
        $details['points'] = '';
254
        // Is there a mptr node?
255
        if (count($structure->children('http://www.loc.gov/METS/')->mptr)) {
256
            // Yes. Get the file reference.
257
            $details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
258
        } elseif (!empty($this->physicalStructure)
259
            && array_key_exists($details['id'], $this->smLinks['l2p'])) { // Are there any physical elements and is this logical unit linked to at least one of them?
260
            $details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, TRUE)), 1);
261
            if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$extConf['fileGrpThumbs']])) {
262
                $details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$extConf['fileGrpThumbs']];
263
            }
264
            // Get page/track number of the first page/track related to this structure element.
265
            $details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel'];
266
        } elseif ($details['id'] == $this->_getToplevelId()) { // Is this the toplevel structure element?
267
            // Yes. Point to itself.
268
            $details['points'] = 1;
269
            if (!empty($this->physicalStructure)
270
            && !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']])) {
271
                $details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']];
272
            }
273
        }
274
        // Get the files this structure element is pointing at.
275
        $details['files'] = [];
276
        $fileUse = $this->_getFileGrps();
277
        // Get the file representations from fileSec node.
278
        foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) {
279
            // Check if file has valid @USE attribute.
280
            if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
281
                $details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
282
            }
283
        }
284
        // Keep for later usage.
285
        $this->logicalUnits[$details['id']] = $details;
286
        // Walk the structure recursively? And are there any children of the current element?
287
        if ($recursive
288
            && count($structure->children('http://www.loc.gov/METS/')->div)) {
289
            $details['children'] = [];
290
            foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) {
291
                // Repeat for all children.
292
                $details['children'][] = $this->getLogicalStructureInfo($child, TRUE);
293
            }
294
        }
295
        return $details;
296
    }
297
298
    /**
299
     * {@inheritDoc}
300
     * @see \Kitodo\Dlf\Common\Document::getMetadata()
301
     */
302
    public function getMetadata($id, $cPid = 0) {
303
        // Make sure $cPid is a non-negative integer.
304
        $cPid = max(intval($cPid), 0);
305
        // If $cPid is not given, try to get it elsewhere.
306
        if (!$cPid
307
            && ($this->cPid || $this->pid)) {
308
            // Retain current PID.
309
            $cPid = ($this->cPid ? $this->cPid : $this->pid);
310
        } elseif (!$cPid) {
311
            Helper::devLog('Invalid PID '.$cPid.' for metadata definitions', DEVLOG_SEVERITY_WARNING);
312
            return [];
313
        }
314
        // Get metadata from parsed metadata array if available.
315
        if (!empty($this->metadataArray[$id])
316
            && $this->metadataArray[0] == $cPid) {
317
            return $this->metadataArray[$id];
318
        }
319
        // Initialize metadata array with empty values.
320
        $metadata = [
321
            'title' => [],
322
            'title_sorting' => [],
323
            'author' => [],
324
            'place' => [],
325
            'year' => [],
326
            'prod_id' => [],
327
            'record_id' => [],
328
            'opac_id' => [],
329
            'union_id' => [],
330
            'urn' => [],
331
            'purl' => [],
332
            'type' => [],
333
            'volume' => [],
334
            'volume_sorting' => [],
335
            'collection' => [],
336
            'owner' => [],
337
            'document_format' => [],
338
        ];
339
        $metadata['document_format'][] = 'METS';
340
        // Get the logical structure node's DMDID.
341
        if (!empty($this->logicalUnits[$id])) {
342
            $dmdId = $this->logicalUnits[$id]['dmdId'];
343
        } else {
344
            $dmdId = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]/@DMDID');
345
            $dmdId = (string) $dmdId[0];
346
        }
347
        if (!empty($dmdId)) {
348
            // Load available metadata formats and dmdSecs.
349
            $this->loadFormats();
350
            $this->_getDmdSec();
351
            // Is this metadata format supported?
352
            if (!empty($this->formats[$this->dmdSec[$dmdId]['type']])) {
353
                if (!empty($this->formats[$this->dmdSec[$dmdId]['type']]['class'])) {
354
                    $class = $this->formats[$this->dmdSec[$dmdId]['type']]['class'];
355
                    // Get the metadata from class.
356
                    if (class_exists($class)
357
                        && ($obj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance($class)) instanceof MetadataInterface) {
358
                        $obj->extractMetadata($this->dmdSec[$dmdId]['xml'], $metadata);
359
                    } else {
360
                        Helper::devLog('Invalid class/method "'.$class.'->extractMetadata()" for metadata format "'.$this->dmdSec[$dmdId]['type'].'"', DEVLOG_SEVERITY_WARNING);
361
                    }
362
                }
363
            } else {
364
                Helper::devLog('Unsupported metadata format "'.$this->dmdSec[$dmdId]['type'].'" in dmdSec with @ID "'.$dmdId.'"', DEVLOG_SEVERITY_WARNING);
365
                return [];
366
            }
367
            // Get the structure's type.
368
            if (!empty($this->logicalUnits[$id])) {
369
                $metadata['type'] = [$this->logicalUnits[$id]['type']];
370
            } else {
371
                $struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]/@TYPE');
372
                $metadata['type'] = [(string) $struct[0]];
373
            }
374
            // Get the additional metadata from database.
375
            $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
376
                'tx_dlf_metadata.index_name AS index_name,tx_dlf_metadataformat.xpath AS xpath,tx_dlf_metadataformat.xpath_sorting AS xpath_sorting,tx_dlf_metadata.is_sortable AS is_sortable,tx_dlf_metadata.default_value AS default_value,tx_dlf_metadata.format AS format',
377
                'tx_dlf_metadata,tx_dlf_metadataformat,tx_dlf_formats',
378
                'tx_dlf_metadata.pid='.$cPid
379
                .' AND tx_dlf_metadataformat.pid='.$cPid
380
                .' AND ((tx_dlf_metadata.uid=tx_dlf_metadataformat.parent_id AND tx_dlf_metadataformat.encoded=tx_dlf_formats.uid AND tx_dlf_formats.type='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->dmdSec[$dmdId]['type'], 'tx_dlf_formats').') OR tx_dlf_metadata.format=0)'
381
                .Helper::whereClause('tx_dlf_metadata', TRUE)
382
                .Helper::whereClause('tx_dlf_metadataformat')
383
                .Helper::whereClause('tx_dlf_formats'),
384
                '',
385
                '',
386
                ''
387
                );
388
            // We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly.
389
            $domNode = dom_import_simplexml($this->dmdSec[$dmdId]['xml']);
390
            $domXPath = new \DOMXPath($domNode->ownerDocument);
391
            $this->registerNamespaces($domXPath);
392
            // OK, now make the XPath queries.
393
            while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
394
                // Set metadata field's value(s).
395
                if ($resArray['format'] > 0
396
                    && !empty($resArray['xpath'])
397
                    && ($values = $domXPath->evaluate($resArray['xpath'], $domNode))) {
398
                    if ($values instanceof \DOMNodeList
399
                        && $values->length > 0) {
400
                        $metadata[$resArray['index_name']] = [];
401
                        foreach ($values as $value) {
402
                            $metadata[$resArray['index_name']][] = trim((string) $value->nodeValue);
403
                        }
404
                    } elseif (!($values instanceof \DOMNodeList)) {
405
                        $metadata[$resArray['index_name']] = [trim((string) $values)];
406
                    }
407
                }
408
                // Set default value if applicable.
409
                if (empty($metadata[$resArray['index_name']][0])
410
                    && strlen($resArray['default_value']) > 0) {
411
                    $metadata[$resArray['index_name']] = [$resArray['default_value']];
412
                }
413
                // Set sorting value if applicable.
414
                if (!empty($metadata[$resArray['index_name']])
415
                    && $resArray['is_sortable']) {
416
                    if ($resArray['format'] > 0
417
                        && !empty($resArray['xpath_sorting'])
418
                        && ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode))) {
419
                        if ($values instanceof \DOMNodeList
420
                            && $values->length > 0) {
421
                            $metadata[$resArray['index_name'].'_sorting'][0] = trim((string) $values->item(0)->nodeValue);
422
                        } elseif (!($values instanceof \DOMNodeList)) {
423
                            $metadata[$resArray['index_name'].'_sorting'][0] = trim((string) $values);
424
                        }
425
                    }
426
                    if (empty($metadata[$resArray['index_name'].'_sorting'][0])) {
427
                        $metadata[$resArray['index_name'].'_sorting'][0] = $metadata[$resArray['index_name']][0];
428
                    }
429
                }
430
            }
431
            // Set title to empty string if not present.
432
            if (empty($metadata['title'][0])) {
433
                $metadata['title'][0] = '';
434
                $metadata['title_sorting'][0] = '';
435
            }
436
            // Add collections from database to toplevel element if document is already saved.
437
            if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($this->uid)
438
                && $id == $this->_getToplevelId()) {
439
                $result = $GLOBALS['TYPO3_DB']->exec_SELECT_mm_query(
440
                    'tx_dlf_collections.index_name AS index_name',
441
                    'tx_dlf_documents',
442
                    'tx_dlf_relations',
443
                    'tx_dlf_collections',
444
                    'AND tx_dlf_collections.pid='.intval($cPid)
445
                    .' AND tx_dlf_documents.uid='.intval($this->uid)
446
                    .' AND tx_dlf_relations.ident='.$GLOBALS['TYPO3_DB']->fullQuoteStr('docs_colls', 'tx_dlf_relations')
447
                    .' AND tx_dlf_collections.sys_language_uid IN (-1,0)'
448
                    .Helper::whereClause('tx_dlf_documents')
449
                    .Helper::whereClause('tx_dlf_collections'),
450
                    'tx_dlf_collections.index_name',
451
                    '',
452
                    ''
453
                    );
454
                while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
455
                    if (!in_array($resArray['index_name'], $metadata['collection'])) {
456
                        $metadata['collection'][] = $resArray['index_name'];
457
                    }
458
                }
459
            }
460
        } else {
461
            // There is no dmdSec for this structure node.
462
            return [];
463
        }
464
        return $metadata;
465
    }
466
467
    /**
468
     * {@inheritDoc}
469
     * @see \Kitodo\Dlf\Common\Document::getRawText()
470
     */
471
    public function getRawText($id) {
472
        $rawText = '';
473
        // Get text from raw text array if available.
474
        if (!empty($this->rawTextArray[$id])) {
475
            return $this->rawTextArray[$id];
476
        }
477
        // Load fileGrps and check for fulltext files.
478
        $this->_getFileGrps();
479
        if ($this->hasFulltext) {
480
            $rawText = $this->getRawTextFromXml($id);
481
        }
482
        return $rawText;
483
    }
484
485
    /**
486
     * {@inheritDoc}
487
     * @see Document::getStructureDepth()
488
     */
489
    public function getStructureDepth($logId)
490
    {
491
        return count($this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$logId.'"]/ancestor::*'));
492
    }
493
494
    /**
495
     * {@inheritDoc}
496
     * @see \Kitodo\Dlf\Common\Document::init()
497
     */
498
    protected function init() {
499
        // Get METS node from XML file.
500
        $this->registerNamespaces($this->xml);
501
        $mets = $this->xml->xpath('//mets:mets');
502
        if ($mets) {
503
            $this->mets = $mets[0];
504
            // Register namespaces.
505
            $this->registerNamespaces($this->mets);
506
        } else {
507
            Helper::devLog('No METS part found in document with UID '.$this->uid, DEVLOG_SEVERITY_ERROR);
508
        }
509
    }
510
511
    /**
512
     * {@inheritDoc}
513
     * @see \Kitodo\Dlf\Common\Document::loadLocation()
514
     */
515
    protected function loadLocation($location) {
516
        // Turn off libxml's error logging.
517
        $libxmlErrors = libxml_use_internal_errors(TRUE);
518
        // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
519
        $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE);
520
        // Load XML from file.
521
        $xml = simplexml_load_string(\TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($location));
0 ignored issues
show
Bug introduced by
It seems like TYPO3\CMS\Core\Utility\G...lity::getUrl($location) can also be of type false; however, parameter $data of simplexml_load_string() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

521
        $xml = simplexml_load_string(/** @scrutinizer ignore-type */ \TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($location));
Loading history...
522
        // reset entity loader setting
523
        libxml_disable_entity_loader($previousValueOfEntityLoader);
524
        // Reset libxml's error logging.
525
        libxml_use_internal_errors($libxmlErrors);
526
        // Set some basic properties.
527
        if ($xml !== FALSE) {
528
            $this->xml = $xml;
529
            return TRUE;
530
        } else {
531
            Helper::devLog('Could not load XML file from "'.$location.'"', DEVLOG_SEVERITY_ERROR);
532
        }
533
    }
534
535
    /**
536
     * {@inheritDoc}
537
     * @see \Kitodo\Dlf\Common\Document::ensureHasFulltextIsSet()
538
     */
539
    protected function ensureHasFulltextIsSet() {
540
        // Are the fileGrps already loaded?
541
        if (!$this->fileGrpsLoaded) {
542
            $this->_getFileGrps();
543
        }
544
    }
545
546
    /**
547
     * {@inheritDoc}
548
     * @see Document::getParentDocumentUid()
549
     */
550
    protected function getParentDocumentUidForSaving($pid, $core)
551
    {
552
        $partof = 0;
553
        // Get the closest ancestor of the current document which has a MPTR child.
554
        $parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$this->_getToplevelId().'"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr');
555
        if (!empty($parentMptr[0])) {
556
            $parentLocation = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
557
            if ($parentLocation != $this->location) {
558
                $parentDoc = self::getInstance($parentLocation, $pid);
559
                if ($parentDoc->ready) {
560
                    if ($parentDoc->pid != $pid) {
561
                        $parentDoc->save($pid, $core);
562
                    }
563
                    $partof = $parentDoc->uid;
564
                }
565
            }
566
        }
567
        return $partof;
568
    }
569
570
    /**
571
     * {@inheritDoc}
572
     * @see Document::setPreloadedDocument()
573
     */
574
    protected function setPreloadedDocument($preloadedDocument) {
575
576
        if ($preloadedDocument instanceof \SimpleXMLElement) {
577
            $this->xml = $preloadedDocument;
578
            return true;
579
        }
580
        return false;
581
    }
582
583
    /**
584
     * {@inheritDoc}
585
     * @see Document::getDocument()
586
     */
587
    protected function getDocument() {
588
        return $this->mets;
589
    }
590
591
    /**
592
     * This returns $this->cPid via __get()
593
     *
594
     * @access protected
595
     *
596
     * @return integer The PID of the metadata definitions
597
     */
598
    protected function _getCPid() {
599
        return $this->cPid;
600
    }
601
602
    /**
603
     * This builds an array of the document's dmdSecs
604
     *
605
     * @access protected
606
     *
607
     * @return array Array of dmdSecs with their IDs as array key
608
     */
609
    protected function _getDmdSec() {
610
        if (!$this->dmdSecLoaded) {
611
            // Get available data formats.
612
            $this->loadFormats();
613
            // Get dmdSec nodes from METS.
614
            $dmdIds = $this->mets->xpath('./mets:dmdSec/@ID');
615
            foreach ($dmdIds as $dmdId) {
616
                if ($type = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) {
617
                    if (!empty($this->formats[(string) $type[0]])) {
618
                        $type = (string) $type[0];
619
                        $xml = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="'.$type.'"]/mets:xmlData/'.strtolower($type).':'.$this->formats[$type]['rootElement']);
620
                    }
621
                } elseif ($type = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) {
622
                    if (!empty($this->formats[(string) $type[0]])) {
623
                        $type = (string) $type[0];
624
                        $xml = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="'.$type.'"]/mets:xmlData/'.strtolower($type).':'.$this->formats[$type]['rootElement']);
625
                    }
626
                }
627
                if ($xml) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $xml does not seem to be defined for all execution paths leading up to this point.
Loading history...
628
                    $this->dmdSec[(string) $dmdId]['type'] = $type;
629
                    $this->dmdSec[(string) $dmdId]['xml'] = $xml[0];
630
                    $this->registerNamespaces($this->dmdSec[(string) $dmdId]['xml']);
631
                }
632
            }
633
            $this->dmdSecLoaded = TRUE;
634
        }
635
        return $this->dmdSec;
636
    }
637
638
    /**
639
     * This builds the file ID -> USE concordance
640
     *
641
     * @access protected
642
     *
643
     * @return array Array of file use groups with file IDs
644
     */
645
    protected function _getFileGrps() {
646
        if (!$this->fileGrpsLoaded) {
647
            // Get configured USE attributes.
648
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
649
            $useGrps = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $extConf['fileGrps']);
650
            if (!empty($extConf['fileGrpThumbs'])) {
651
                $useGrps[] = $extConf['fileGrpThumbs'];
652
            }
653
            if (!empty($extConf['fileGrpDownload'])) {
654
                $useGrps[] = $extConf['fileGrpDownload'];
655
            }
656
            if (!empty($extConf['fileGrpFulltext'])) {
657
                $useGrps[] = $extConf['fileGrpFulltext'];
658
            }
659
            if (!empty($extConf['fileGrpAudio'])) {
660
                $useGrps[] = $extConf['fileGrpAudio'];
661
            }
662
            // Get all file groups.
663
            $fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp');
664
            // Build concordance for configured USE attributes.
665
            foreach ($fileGrps as $fileGrp) {
666
                if (in_array((string) $fileGrp['USE'], $useGrps)) {
667
                    foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) {
668
                        $this->fileGrps[(string) $file->attributes()->ID] = (string) $fileGrp['USE'];
669
                    }
670
                }
671
            }
672
            // Are there any fulltext files available?
673
            if (!empty($extConf['fileGrpFulltext'])
674
                && in_array($extConf['fileGrpFulltext'], $this->fileGrps)) {
675
                    $this->hasFulltext = TRUE;
676
            }
677
            $this->fileGrpsLoaded = TRUE;
678
        }
679
        return $this->fileGrps;
680
    }
681
682
    /**
683
     * {@inheritDoc}
684
     * @see \Kitodo\Dlf\Common\Document::prepareMetadataArray()
685
     */
686
    protected function prepareMetadataArray($cPid) {
687
        // Get all logical structure nodes with metadata.
688
        if (($ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID'))) {
689
            foreach ($ids as $id) {
690
                $this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid);
691
            }
692
        }
693
        // Set current PID for metadata definitions.
694
    }
695
696
    /**
697
     * This returns $this->mets via __get()
698
     *
699
     * @access protected
700
     *
701
     * @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object
702
     */
703
    protected function _getMets() {
704
        return $this->mets;
705
    }
706
707
    /**
708
     * {@inheritDoc}
709
     * @see \Kitodo\Dlf\Common\Document::_getPhysicalStructure()
710
     */
711
    protected function _getPhysicalStructure() {
712
        // Is there no physical structure array yet?
713
        if (!$this->physicalStructureLoaded) {
714
            // Does the document have a structMap node of type "PHYSICAL"?
715
            $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div');
716
            if ($elementNodes) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $elementNodes of type SimpleXMLElement[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
717
                // Get file groups.
718
                $fileUse = $this->_getFileGrps();
719
                // Get the physical sequence's metadata.
720
                $physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]');
721
                $physSeq[0] = (string) $physNode[0]['ID'];
1 ignored issue
show
Comprehensibility Best Practice introduced by
$physSeq was never initialized. Although not strictly required by PHP, it is generally a good practice to add $physSeq = array(); before regardless.
Loading history...
722
                $this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID'];
723
                $this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : '');
724
                $this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : '');
725
                $this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : '');
726
                $this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE'];
727
                $this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : '');
728
                // Get the file representations from fileSec node.
729
                foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) {
730
                    // Check if file has valid @USE attribute.
731
                    if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
732
                        $this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
733
                    }
734
                }
735
                // Build the physical elements' array from the physical structMap node.
736
                foreach ($elementNodes as $elementNode) {
737
                    $elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID'];
738
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID'];
1 ignored issue
show
Comprehensibility Best Practice introduced by
The variable $elements seems to be defined later in this foreach loop on line 737. Are you sure it is defined here?
Loading history...
739
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : '');
740
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : '');
741
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : '');
742
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE'];
743
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : '');
744
                    // Get the file representations from fileSec node.
745
                    foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) {
746
                        // Check if file has valid @USE attribute.
747
                        if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
748
                            $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
749
                        }
750
                    }
751
                }
752
                // Sort array by keys (= @ORDER).
753
                if (ksort($elements)) {
754
                    // Set total number of pages/tracks.
755
                    $this->numPages = count($elements);
756
                    // Merge and re-index the array to get nice numeric indexes.
757
                    $this->physicalStructure = array_merge($physSeq, $elements);
758
                }
759
            }
760
            $this->physicalStructureLoaded = TRUE;
761
        }
762
        return $this->physicalStructure;
763
    }
764
765
    /**
766
     * {@inheritDoc}
767
     * @see \Kitodo\Dlf\Common\Document::_getSmLinks()
768
     */
769
    protected function _getSmLinks() {
770
        if (!$this->smLinksLoaded) {
771
            $smLinks = $this->mets->xpath('./mets:structLink/mets:smLink');
772
            foreach ($smLinks as $smLink) {
773
                $this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to;
774
                $this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from;
775
            }
776
            $this->smLinksLoaded = TRUE;
777
        }
778
        return $this->smLinks;
779
    }
780
781
    /**
782
     * {@inheritDoc}
783
     * @see \Kitodo\Dlf\Common\Document::_getThumbnail()
784
     */
785
    protected function _getThumbnail($forceReload = FALSE) {
786
        if (!$this->thumbnailLoaded
787
            || $forceReload) {
788
            // Retain current PID.
789
            $cPid = ($this->cPid ? $this->cPid : $this->pid);
790
            if (!$cPid) {
791
                Helper::devLog('Invalid PID '.$cPid.' for structure definitions', DEVLOG_SEVERITY_ERROR);
792
                $this->thumbnailLoaded = TRUE;
793
                return $this->thumbnail;
794
            }
795
            // Load extension configuration.
796
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
797
            if (empty($extConf['fileGrpThumbs'])) {
798
                Helper::devLog('No fileGrp for thumbnails specified', DEVLOG_SEVERITY_WARNING);
799
                $this->thumbnailLoaded = TRUE;
800
                return $this->thumbnail;
801
            }
802
            $strctId = $this->_getToplevelId();
803
            $metadata = $this->getTitledata($cPid);
804
            // Get structure element to get thumbnail from.
805
            $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
806
                'tx_dlf_structures.thumbnail AS thumbnail',
807
                'tx_dlf_structures',
808
                'tx_dlf_structures.pid='.intval($cPid)
809
                .' AND tx_dlf_structures.index_name='.$GLOBALS['TYPO3_DB']->fullQuoteStr($metadata['type'][0], 'tx_dlf_structures')
810
                .Helper::whereClause('tx_dlf_structures'),
811
                '',
812
                '',
813
                '1'
814
                );
815
            if ($GLOBALS['TYPO3_DB']->sql_num_rows($result) > 0) {
816
                $resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result);
817
                // Get desired thumbnail structure if not the toplevel structure itself.
818
                if (!empty($resArray['thumbnail'])) {
819
                    $strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid);
820
                    // Check if this document has a structure element of the desired type.
821
                    $strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="'.$strctType.'"]/@ID');
822
                    if (!empty($strctIds)) {
823
                        $strctId = (string) $strctIds[0];
824
                    }
825
                }
826
                // Load smLinks.
827
                $this->_getSmLinks();
828
                // Get thumbnail location.
829
                if ($this->_getPhysicalStructure()
830
                    && !empty($this->smLinks['l2p'][$strctId])) {
831
                    $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$extConf['fileGrpThumbs']]);
832
                } else {
833
                    $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']]);
834
                }
835
            } else {
836
                Helper::devLog('No structure of type "'.$metadata['type'][0].'" found in database', DEVLOG_SEVERITY_ERROR);
837
            }
838
            $this->thumbnailLoaded = TRUE;
839
        }
840
        return $this->thumbnail;
841
    }
842
843
    /**
844
     * {@inheritDoc}
845
     * @see \Kitodo\Dlf\Common\Document::_getToplevelId()
846
     */
847
    protected function _getToplevelId() {
848
        if (empty($this->toplevelId)) {
849
            // Get all logical structure nodes with metadata, but without associated METS-Pointers.
850
            if (($divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]'))) {
851
                // Load smLinks.
852
                $this->_getSmLinks();
853
                foreach ($divs as $div) {
854
                    $id = (string) $div['ID'];
855
                    // Are there physical structure nodes for this logical structure?
856
                    if (array_key_exists($id, $this->smLinks['l2p'])) {
857
                        // Yes. That's what we're looking for.
858
                        $this->toplevelId = $id;
859
                        break;
860
                    } elseif (empty($this->toplevelId)) {
861
                        // No. Remember this anyway, but keep looking for a better one.
862
                        $this->toplevelId = $id;
863
                    }
864
                }
865
            }
866
        }
867
        return $this->toplevelId;
868
    }
869
870
    /**
871
     * This magic method is executed prior to any serialization of the object
872
     * @see __wakeup()
873
     *
874
     * @access public
875
     *
876
     * @return array Properties to be serialized
877
     */
878
    public function __sleep() {
879
        // \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization
880
        $this->asXML = $this->xml->asXML();
881
        return ['uid', 'pid', 'recordId', 'parentId', 'asXML'];
882
    }
883
884
    /**
885
     * This magic method is used for setting a string value for the object
886
     *
887
     * @access public
888
     *
889
     * @return string String representing the METS object
890
     */
891
    public function __toString() {
892
        $xml = new \DOMDocument('1.0', 'utf-8');
893
        $xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), TRUE));
894
        $xml->formatOutput = TRUE;
895
        return $xml->saveXML();
896
    }
897
898
    /**
899
     * This magic method is executed after the object is deserialized
900
     * @see __sleep()
901
     *
902
     * @access public
903
     *
904
     * @return void
905
     */
906
    public function __wakeup() {
907
        // Turn off libxml's error logging.
908
        $libxmlErrors = libxml_use_internal_errors(TRUE);
909
        // Reload XML from string.
910
        $xml = @simplexml_load_string($this->asXML);
911
        // Reset libxml's error logging.
912
        libxml_use_internal_errors($libxmlErrors);
913
        if ($xml !== FALSE) {
914
            $this->asXML = '';
915
            $this->xml = $xml;
916
            // Rebuild the unserializable properties.
917
            $this->init();
918
        } else {
919
            Helper::devLog('Could not load XML after deserialization', DEVLOG_SEVERITY_ERROR);
920
        }
921
    }
922
}
923