Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Branch master (64c81c)
by Sebastian
04:54
created

MetsDocument::_getPhysicalStructure()   D

Complexity

Conditions 19
Paths 66

Size

Total Lines 54
Code Lines 33

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 33
c 1
b 0
f 0
dl 0
loc 54
rs 4.5166
cc 19
nc 66
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
namespace Kitodo\Dlf\Common;
3
4
/**
5
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
6
 *
7
 * This file is part of the Kitodo and TYPO3 projects.
8
 *
9
 * @license GNU General Public License version 3 or later.
10
 * For the full copyright and license information, please read the
11
 * LICENSE.txt file that was distributed with this source code.
12
 */
13
14
use TYPO3\CMS\Core\Database\ConnectionPool;
15
use TYPO3\CMS\Core\Utility\GeneralUtility;
16
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
17
use Ubl\Iiif\Tools\IiifHelper;
18
use Ubl\Iiif\Services\AbstractImageService;
19
20
/**
21
 * MetsDocument class for the 'dlf' extension.
22
 *
23
 * @author	Sebastian Meyer <[email protected]>
24
 * @author	Henrik Lochmann <[email protected]>
25
 * @package	TYPO3
26
 * @subpackage	tx_dlf
27
 * @access	public
28
 */
29
final class MetsDocument extends Document
30
{
31
    /**
32
     * This holds the whole XML file as string for serialization purposes
33
     * @see __sleep() / __wakeup()
34
     *
35
     * @var string
36
     * @access protected
37
     */
38
    protected $asXML = '';
39
40
    /**
41
     * This holds the XML file's dmdSec parts with their IDs as array key
42
     *
43
     * @var array
44
     * @access protected
45
     */
46
    protected $dmdSec = [];
47
48
    /**
49
     * Are the METS file's dmdSecs loaded?
50
     * @see $dmdSec
51
     *
52
     * @var boolean
53
     * @access protected
54
     */
55
    protected $dmdSecLoaded = FALSE;
56
57
    /**
58
     * The extension key
59
     *
60
     * @var	string
61
     * @access public
62
     */
63
    public static $extKey = 'dlf';
64
65
    /**
66
     * This holds the file ID -> USE concordance
67
     * @see _getFileGrps()
68
     *
69
     * @var array
70
     * @access protected
71
     */
72
    protected $fileGrps = [];
73
74
    /**
75
     * Are the file groups loaded?
76
     * @see $fileGrps
77
     *
78
     * @var boolean
79
     * @access protected
80
     */
81
    protected $fileGrpsLoaded = FALSE;
82
83
    /**
84
     * Are the available metadata formats loaded?
85
     * @see $formats
86
     *
87
     * @var boolean
88
     * @access protected
89
     */
90
    protected $formatsLoaded = FALSE;
91
92
    /**
93
     * This holds the XML file's METS part as \SimpleXMLElement object
94
     *
95
     * @var \SimpleXMLElement
96
     * @access protected
97
     */
98
    protected $mets;
99
100
    /**
101
     * This holds the whole XML file as \SimpleXMLElement object
102
     *
103
     * @var \SimpleXMLElement
104
     * @access protected
105
     */
106
    protected $xml;
107
108
    /**
109
     * This adds metadata from METS structural map to metadata array.
110
     *
111
     * @access	public
112
     *
113
     * @param	array	&$metadata: The metadata array to extend
114
     * @param	string	$id: The @ID attribute of the logical structure node
115
     *
116
     * @return  void
117
     */
118
    public function addMetadataFromMets(&$metadata, $id) {
119
        $details = $this->getLogicalStructure($id);
120
        if (!empty($details)) {
121
            $metadata['mets_label'][0] = $details['label'];
122
            $metadata['mets_orderlabel'][0] = $details['orderlabel'];
123
        }
124
    }
125
126
    /**
127
     *
128
     * {@inheritDoc}
129
     * @see \Kitodo\Dlf\Common\Document::establishRecordId()
130
     */
131
    protected function establishRecordId($pid) {
132
        // Check for METS object @ID.
133
        if (!empty($this->mets['OBJID'])) {
134
            $this->recordId = (string) $this->mets['OBJID'];
135
        }
136
        // Get hook objects.
137
        $hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php');
138
        // Apply hooks.
139
        foreach ($hookObjects as $hookObj) {
140
            if (method_exists($hookObj, 'construct_postProcessRecordId')) {
141
                $hookObj->construct_postProcessRecordId($this->xml, $this->recordId);
142
            }
143
        }
144
    }
145
146
    /**
147
     *
148
     * {@inheritDoc}
149
     * @see \Kitodo\Dlf\Common\Document::getDownloadLocation()
150
     */
151
    public function getDownloadLocation($id) {
152
        $fileMimeType = $this->getFileMimeType($id);
153
        $fileLocation = $this->getFileLocation($id);
154
        if ($fileMimeType == "application/vnd.kitodo.iiif") {
155
            $fileLocation = strrpos($fileLocation, "info.json") == strlen($fileLocation) - 9 ? $fileLocation :
156
            strrpos($fileLocation, "/") == strlen($fileLocation) ? $fileLocation."info.json" : $fileLocation."/info.json";
157
            $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
158
            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
159
            IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
160
            IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
161
            $service = IiifHelper::loadIiifResource($fileLocation);
162
            if ($service != null && $service instanceof AbstractImageService) {
163
                return $service->getImageUrl();
164
            }
165
        } elseif ($fileMimeType = "application/vnd.netfpx") {
0 ignored issues
show
Unused Code introduced by
The assignment to $fileMimeType is dead and can be removed.
Loading history...
166
            $baseURL = $fileLocation.(strpos($fileLocation, "?") === false ? "?" : "");
167
            // TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server'
168
            return $baseURL."&CVT=jpeg";
169
        }
170
        return $fileLocation;
171
    }
172
173
    /**
174
     * {@inheritDoc}
175
     * @see \Kitodo\Dlf\Common\Document::getFileLocation()
176
     */
177
    public function getFileLocation($id) {
178
        if (!empty($id)
179
            && ($location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="'.$id.'"]/mets:FLocat[@LOCTYPE="URL"]'))) {
180
                return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href;
181
        } else {
182
            Helper::devLog('There is no file node with @ID "'.$id.'"', DEVLOG_SEVERITY_WARNING);
183
            return '';
184
        }
185
    }
186
187
    /**
188
     * {@inheritDoc}
189
     * @see \Kitodo\Dlf\Common\Document::getFileMimeType()
190
     */
191
    public function getFileMimeType($id) {
192
        if (!empty($id)
193
            && ($mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="'.$id.'"]/@MIMETYPE'))) {
194
                return (string) $mimetype[0];
195
        } else {
196
            Helper::devLog('There is no file node with @ID "'.$id.'" or no MIME type specified', DEVLOG_SEVERITY_WARNING);
197
            return '';
198
        }
199
    }
200
201
    /**
202
     * {@inheritDoc}
203
     * @see \Kitodo\Dlf\Common\Document::getLogicalStructure()
204
     */
205
    public function getLogicalStructure($id, $recursive = FALSE) {
206
        $details = [];
207
        // Is the requested logical unit already loaded?
208
        if (!$recursive
209
            && !empty($this->logicalUnits[$id])) {
210
            // Yes. Return it.
211
            return $this->logicalUnits[$id];
212
        } elseif (!empty($id)) {
213
            // Get specified logical unit.
214
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]');
215
        } else {
216
            // Get all logical units at top level.
217
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div');
218
        }
219
        if (!empty($divs)) {
220
            if (!$recursive) {
221
                // Get the details for the first xpath hit.
222
                $details = $this->getLogicalStructureInfo($divs[0]);
223
            } else {
224
                // Walk the logical structure recursively and fill the whole table of contents.
225
                foreach ($divs as $div) {
226
                    $this->tableOfContents[] = $this->getLogicalStructureInfo($div, TRUE);
227
                }
228
            }
229
        }
230
        return $details;
231
    }
232
233
    /**
234
     * This gets details about a logical structure element
235
     *
236
     * @access protected
237
     *
238
     * @param \SimpleXMLElement $structure: The logical structure node
239
     * @param boolean $recursive: Whether to include the child elements
240
     *
241
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
242
     */
243
    protected function getLogicalStructureInfo(\SimpleXMLElement $structure, $recursive = FALSE) {
244
        // Get attributes.
245
        foreach ($structure->attributes() as $attribute => $value) {
246
            $attributes[$attribute] = (string) $value;
247
        }
248
        // Load plugin configuration.
249
        $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
250
        // Extract identity information.
251
        $details = [];
252
        $details['id'] = $attributes['ID'];
1 ignored issue
show
Comprehensibility Best Practice introduced by
The variable $attributes seems to be defined by a foreach iteration on line 245. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
253
        $details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : '');
254
        $details['order'] = (isset($attributes['ORDER']) ? $attributes['ORDER'] : '');
255
        $details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : '');
256
        $details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : '');
257
        $details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : '');
258
        $details['volume'] = '';
259
        // Set volume information only if no label is set and this is the toplevel structure element.
260
        if (empty($details['label'])
261
            && $details['id'] == $this->_getToplevelId()) {
262
            $metadata = $this->getMetadata($details['id']);
263
            if (!empty($metadata['volume'][0])) {
264
                $details['volume'] = $metadata['volume'][0];
265
            }
266
        }
267
        $details['pagination'] = '';
268
        $details['type'] = $attributes['TYPE'];
269
        $details['thumbnailId'] = '';
270
        // Load smLinks.
271
        $this->_getSmLinks();
272
        // Load physical structure.
273
        $this->_getPhysicalStructure();
274
        // Get the physical page or external file this structure element is pointing at.
275
        $details['points'] = '';
276
        // Is there a mptr node?
277
        if (count($structure->children('http://www.loc.gov/METS/')->mptr)) {
278
            // Yes. Get the file reference.
279
            $details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
280
        } elseif (!empty($this->physicalStructure)
281
            && array_key_exists($details['id'], $this->smLinks['l2p'])) { // Are there any physical elements and is this logical unit linked to at least one of them?
282
            $details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, TRUE)), 1);
283
            if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$extConf['fileGrpThumbs']])) {
284
                $details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$extConf['fileGrpThumbs']];
285
            }
286
            // Get page/track number of the first page/track related to this structure element.
287
            $details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel'];
288
        } elseif ($details['id'] == $this->_getToplevelId()) { // Is this the toplevel structure element?
289
            // Yes. Point to itself.
290
            $details['points'] = 1;
291
            if (!empty($this->physicalStructure)
292
            && !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']])) {
293
                $details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']];
294
            }
295
        }
296
        // Get the files this structure element is pointing at.
297
        $details['files'] = [];
298
        $fileUse = $this->_getFileGrps();
299
        // Get the file representations from fileSec node.
300
        foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) {
301
            // Check if file has valid @USE attribute.
302
            if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
303
                $details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
304
            }
305
        }
306
        // Keep for later usage.
307
        $this->logicalUnits[$details['id']] = $details;
308
        // Walk the structure recursively? And are there any children of the current element?
309
        if ($recursive
310
            && count($structure->children('http://www.loc.gov/METS/')->div)) {
311
            $details['children'] = [];
312
            foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) {
313
                // Repeat for all children.
314
                $details['children'][] = $this->getLogicalStructureInfo($child, TRUE);
315
            }
316
        }
317
        return $details;
318
    }
319
320
    /**
321
     * {@inheritDoc}
322
     * @see \Kitodo\Dlf\Common\Document::getMetadata()
323
     */
324
    public function getMetadata($id, $cPid = 0) {
325
        // Make sure $cPid is a non-negative integer.
326
        $cPid = max(intval($cPid), 0);
327
        // If $cPid is not given, try to get it elsewhere.
328
        if (!$cPid
329
            && ($this->cPid || $this->pid)) {
330
            // Retain current PID.
331
            $cPid = ($this->cPid ? $this->cPid : $this->pid);
332
        } elseif (!$cPid) {
333
            Helper::devLog('Invalid PID '.$cPid.' for metadata definitions', DEVLOG_SEVERITY_WARNING);
334
            return [];
335
        }
336
        // Get metadata from parsed metadata array if available.
337
        if (!empty($this->metadataArray[$id])
338
            && $this->metadataArray[0] == $cPid) {
339
            return $this->metadataArray[$id];
340
        }
341
        // Initialize metadata array with empty values.
342
        $metadata = [
343
            'title' => [],
344
            'title_sorting' => [],
345
            'author' => [],
346
            'place' => [],
347
            'year' => [],
348
            'prod_id' => [],
349
            'record_id' => [],
350
            'opac_id' => [],
351
            'union_id' => [],
352
            'urn' => [],
353
            'purl' => [],
354
            'type' => [],
355
            'volume' => [],
356
            'volume_sorting' => [],
357
            'license' => [],
358
            'terms' => [],
359
            'restrictions' => [],
360
            'out_of_print' => [],
361
            'rights_info' => [],
362
            'collection' => [],
363
            'owner' => [],
364
            'mets_label' => [],
365
            'mets_orderlabel' => [],
366
            'document_format' => [],
367
        ];
368
        $metadata['document_format'][] = 'METS';
369
        // Get the logical structure node's DMDID.
370
        if (!empty($this->logicalUnits[$id])) {
371
            $dmdId = $this->logicalUnits[$id]['dmdId'];
372
        } else {
373
            $dmdId = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]/@DMDID');
374
            $dmdId = (string) $dmdId[0];
375
        }
376
        if (!empty($dmdId)) {
377
            // Load available metadata formats and dmdSecs.
378
            $this->loadFormats();
379
            $this->_getDmdSec();
380
            // Is this metadata format supported?
381
            if (!empty($this->formats[$this->dmdSec[$dmdId]['type']])) {
382
                if (!empty($this->formats[$this->dmdSec[$dmdId]['type']]['class'])) {
383
                    $class = $this->formats[$this->dmdSec[$dmdId]['type']]['class'];
384
                    // Get the metadata from class.
385
                    if (class_exists($class)
386
                        && ($obj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance($class)) instanceof MetadataInterface) {
387
                        $obj->extractMetadata($this->dmdSec[$dmdId]['xml'], $metadata);
388
                    } else {
389
                        Helper::devLog('Invalid class/method "'.$class.'->extractMetadata()" for metadata format "'.$this->dmdSec[$dmdId]['type'].'"', DEVLOG_SEVERITY_WARNING);
390
                    }
391
                }
392
            } else {
393
                Helper::devLog('Unsupported metadata format "'.$this->dmdSec[$dmdId]['type'].'" in dmdSec with @ID "'.$dmdId.'"', DEVLOG_SEVERITY_WARNING);
394
                return [];
395
            }
396
            // Get the structure's type.
397
            if (!empty($this->logicalUnits[$id])) {
398
                $metadata['type'] = [$this->logicalUnits[$id]['type']];
399
            } else {
400
                $struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]/@TYPE');
401
                $metadata['type'] = [(string) $struct[0]];
402
            }
403
            // Get the additional metadata from database.
404
            $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
405
                'tx_dlf_metadata.index_name AS index_name,tx_dlf_metadataformat.xpath AS xpath,tx_dlf_metadataformat.xpath_sorting AS xpath_sorting,tx_dlf_metadata.is_sortable AS is_sortable,tx_dlf_metadata.default_value AS default_value,tx_dlf_metadata.format AS format',
406
                'tx_dlf_metadata,tx_dlf_metadataformat,tx_dlf_formats',
407
                'tx_dlf_metadata.pid='.$cPid
408
                    .' AND tx_dlf_metadataformat.pid='.$cPid
409
                    .' AND ((tx_dlf_metadata.uid=tx_dlf_metadataformat.parent_id AND tx_dlf_metadataformat.encoded=tx_dlf_formats.uid AND tx_dlf_formats.type='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->dmdSec[$dmdId]['type'], 'tx_dlf_formats').') OR tx_dlf_metadata.format=0)'
410
                    .Helper::whereClause('tx_dlf_metadata', TRUE)
411
                    .Helper::whereClause('tx_dlf_metadataformat')
412
                    .Helper::whereClause('tx_dlf_formats')
413
            );
414
            // We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly.
415
            $domNode = dom_import_simplexml($this->dmdSec[$dmdId]['xml']);
416
            $domXPath = new \DOMXPath($domNode->ownerDocument);
417
            $this->registerNamespaces($domXPath);
418
            // OK, now make the XPath queries.
419
            while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
420
                // Set metadata field's value(s).
421
                if ($resArray['format'] > 0
422
                    && !empty($resArray['xpath'])
423
                    && ($values = $domXPath->evaluate($resArray['xpath'], $domNode))) {
424
                    if ($values instanceof \DOMNodeList
425
                        && $values->length > 0) {
426
                        $metadata[$resArray['index_name']] = [];
427
                        foreach ($values as $value) {
428
                            $metadata[$resArray['index_name']][] = trim((string) $value->nodeValue);
429
                        }
430
                    } elseif (!($values instanceof \DOMNodeList)) {
431
                        $metadata[$resArray['index_name']] = [trim((string) $values)];
432
                    }
433
                }
434
                // Set default value if applicable.
435
                if (empty($metadata[$resArray['index_name']][0])
436
                    && strlen($resArray['default_value']) > 0) {
437
                    $metadata[$resArray['index_name']] = [$resArray['default_value']];
438
                }
439
                // Set sorting value if applicable.
440
                if (!empty($metadata[$resArray['index_name']])
441
                    && $resArray['is_sortable']) {
442
                    if ($resArray['format'] > 0
443
                        && !empty($resArray['xpath_sorting'])
444
                        && ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode))) {
445
                        if ($values instanceof \DOMNodeList
446
                            && $values->length > 0) {
447
                            $metadata[$resArray['index_name'].'_sorting'][0] = trim((string) $values->item(0)->nodeValue);
448
                        } elseif (!($values instanceof \DOMNodeList)) {
449
                            $metadata[$resArray['index_name'].'_sorting'][0] = trim((string) $values);
450
                        }
451
                    }
452
                    if (empty($metadata[$resArray['index_name'].'_sorting'][0])) {
453
                        $metadata[$resArray['index_name'].'_sorting'][0] = $metadata[$resArray['index_name']][0];
454
                    }
455
                }
456
            }
457
            // Set title to empty string if not present.
458
            if (empty($metadata['title'][0])) {
459
                $metadata['title'][0] = '';
460
                $metadata['title_sorting'][0] = '';
461
            }
462
            // Add collections from database to toplevel element if document is already saved.
463
            if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($this->uid)
464
                && $id == $this->_getToplevelId()) {
465
                $result = $GLOBALS['TYPO3_DB']->exec_SELECT_mm_query(
466
                    'tx_dlf_collections.index_name AS index_name',
467
                    'tx_dlf_documents',
468
                    'tx_dlf_relations',
469
                    'tx_dlf_collections',
470
                    'AND tx_dlf_collections.pid='.intval($cPid)
471
                        .' AND tx_dlf_documents.uid='.intval($this->uid)
472
                        .' AND tx_dlf_relations.ident='.$GLOBALS['TYPO3_DB']->fullQuoteStr('docs_colls', 'tx_dlf_relations')
473
                        .' AND tx_dlf_collections.sys_language_uid IN (-1,0)'
474
                        .Helper::whereClause('tx_dlf_documents')
475
                        .Helper::whereClause('tx_dlf_collections'),
476
                    'tx_dlf_collections.index_name'
477
                );
478
                while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
479
                    if (!in_array($resArray['index_name'], $metadata['collection'])) {
480
                        $metadata['collection'][] = $resArray['index_name'];
481
                    }
482
                }
483
            }
484
        } else {
485
            // There is no dmdSec for this structure node.
486
            return [];
487
        }
488
        return $metadata;
489
    }
490
491
    /**
492
     * {@inheritDoc}
493
     * @see \Kitodo\Dlf\Common\Document::getRawText()
494
     */
495
    public function getRawText($id) {
496
        $rawText = '';
497
        // Get text from raw text array if available.
498
        if (!empty($this->rawTextArray[$id])) {
499
            return $this->rawTextArray[$id];
500
        }
501
        // Load fileGrps and check for fulltext files.
502
        $this->_getFileGrps();
503
        if ($this->hasFulltext) {
504
            $rawText = $this->getRawTextFromXml($id);
505
        }
506
        return $rawText;
507
    }
508
509
    /**
510
     * {@inheritDoc}
511
     * @see Document::getStructureDepth()
512
     */
513
    public function getStructureDepth($logId)
514
    {
515
        return count($this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$logId.'"]/ancestor::*'));
516
    }
517
518
    /**
519
     * {@inheritDoc}
520
     * @see \Kitodo\Dlf\Common\Document::init()
521
     */
522
    protected function init() {
523
        // Get METS node from XML file.
524
        $this->registerNamespaces($this->xml);
525
        $mets = $this->xml->xpath('//mets:mets');
526
        if ($mets) {
527
            $this->mets = $mets[0];
528
            // Register namespaces.
529
            $this->registerNamespaces($this->mets);
530
        } else {
531
            Helper::devLog('No METS part found in document with UID '.$this->uid, DEVLOG_SEVERITY_ERROR);
532
        }
533
    }
534
535
    /**
536
     * {@inheritDoc}
537
     * @see \Kitodo\Dlf\Common\Document::loadLocation()
538
     */
539
    protected function loadLocation($location) {
540
        // Turn off libxml's error logging.
541
        $libxmlErrors = libxml_use_internal_errors(TRUE);
542
        // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
543
        $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE);
544
        // Load XML from file.
545
        $xml = simplexml_load_string(\TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($location));
0 ignored issues
show
Bug introduced by
It seems like TYPO3\CMS\Core\Utility\G...lity::getUrl($location) can also be of type false; however, parameter $data of simplexml_load_string() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

545
        $xml = simplexml_load_string(/** @scrutinizer ignore-type */ \TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($location));
Loading history...
546
        // reset entity loader setting
547
        libxml_disable_entity_loader($previousValueOfEntityLoader);
548
        // Reset libxml's error logging.
549
        libxml_use_internal_errors($libxmlErrors);
550
        // Set some basic properties.
551
        if ($xml !== FALSE) {
552
            $this->xml = $xml;
553
            return TRUE;
554
        } else {
555
            Helper::devLog('Could not load XML file from "'.$location.'"', DEVLOG_SEVERITY_ERROR);
556
        }
557
    }
558
559
    /**
560
     * {@inheritDoc}
561
     * @see \Kitodo\Dlf\Common\Document::ensureHasFulltextIsSet()
562
     */
563
    protected function ensureHasFulltextIsSet() {
564
        // Are the fileGrps already loaded?
565
        if (!$this->fileGrpsLoaded) {
566
            $this->_getFileGrps();
567
        }
568
    }
569
570
    /**
571
     * {@inheritDoc}
572
     * @see Document::getParentDocumentUid()
573
     */
574
    protected function getParentDocumentUidForSaving($pid, $core)
575
    {
576
        $partof = 0;
577
        // Get the closest ancestor of the current document which has a MPTR child.
578
        $parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$this->_getToplevelId().'"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr');
579
        if (!empty($parentMptr[0])) {
580
            $parentLocation = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
581
            if ($parentLocation != $this->location) {
582
                $parentDoc = self::getInstance($parentLocation, $pid);
583
                if ($parentDoc->ready) {
584
                    if ($parentDoc->pid != $pid) {
585
                        $parentDoc->save($pid, $core);
586
                    }
587
                    $partof = $parentDoc->uid;
588
                }
589
            }
590
        }
591
        return $partof;
592
    }
593
594
    /**
595
     * {@inheritDoc}
596
     * @see Document::setPreloadedDocument()
597
     */
598
    protected function setPreloadedDocument($preloadedDocument) {
599
600
        if ($preloadedDocument instanceof \SimpleXMLElement) {
601
            $this->xml = $preloadedDocument;
602
            return true;
603
        }
604
        return false;
605
    }
606
607
    /**
608
     * {@inheritDoc}
609
     * @see Document::getDocument()
610
     */
611
    protected function getDocument() {
612
        return $this->mets;
613
    }
614
615
    /**
616
     * This returns $this->cPid via __get()
617
     *
618
     * @access protected
619
     *
620
     * @return integer The PID of the metadata definitions
621
     */
622
    protected function _getCPid() {
623
        return $this->cPid;
624
    }
625
626
    /**
627
     * This builds an array of the document's dmdSecs
628
     *
629
     * @access protected
630
     *
631
     * @return array Array of dmdSecs with their IDs as array key
632
     */
633
    protected function _getDmdSec() {
634
        if (!$this->dmdSecLoaded) {
635
            // Get available data formats.
636
            $this->loadFormats();
637
            // Get dmdSec nodes from METS.
638
            $dmdIds = $this->mets->xpath('./mets:dmdSec/@ID');
639
            foreach ($dmdIds as $dmdId) {
640
                if ($type = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) {
641
                    if (!empty($this->formats[(string) $type[0]])) {
642
                        $type = (string) $type[0];
643
                        $xml = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="'.$type.'"]/mets:xmlData/'.strtolower($type).':'.$this->formats[$type]['rootElement']);
644
                    }
645
                } elseif ($type = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) {
646
                    if (!empty($this->formats[(string) $type[0]])) {
647
                        $type = (string) $type[0];
648
                        $xml = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="'.$type.'"]/mets:xmlData/'.strtolower($type).':'.$this->formats[$type]['rootElement']);
649
                    }
650
                }
651
                if ($xml) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $xml does not seem to be defined for all execution paths leading up to this point.
Loading history...
652
                    $this->dmdSec[(string) $dmdId]['type'] = $type;
653
                    $this->dmdSec[(string) $dmdId]['xml'] = $xml[0];
654
                    $this->registerNamespaces($this->dmdSec[(string) $dmdId]['xml']);
655
                }
656
            }
657
            $this->dmdSecLoaded = TRUE;
658
        }
659
        return $this->dmdSec;
660
    }
661
662
    /**
663
     * This builds the file ID -> USE concordance
664
     *
665
     * @access protected
666
     *
667
     * @return array Array of file use groups with file IDs
668
     */
669
    protected function _getFileGrps() {
670
        if (!$this->fileGrpsLoaded) {
671
            // Get configured USE attributes.
672
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
673
            $useGrps = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $extConf['fileGrps']);
674
            if (!empty($extConf['fileGrpThumbs'])) {
675
                $useGrps[] = $extConf['fileGrpThumbs'];
676
            }
677
            if (!empty($extConf['fileGrpDownload'])) {
678
                $useGrps[] = $extConf['fileGrpDownload'];
679
            }
680
            if (!empty($extConf['fileGrpFulltext'])) {
681
                $useGrps[] = $extConf['fileGrpFulltext'];
682
            }
683
            if (!empty($extConf['fileGrpAudio'])) {
684
                $useGrps[] = $extConf['fileGrpAudio'];
685
            }
686
            // Get all file groups.
687
            $fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp');
688
            // Build concordance for configured USE attributes.
689
            foreach ($fileGrps as $fileGrp) {
690
                if (in_array((string) $fileGrp['USE'], $useGrps)) {
691
                    foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) {
692
                        $this->fileGrps[(string) $file->attributes()->ID] = (string) $fileGrp['USE'];
693
                    }
694
                }
695
            }
696
            // Are there any fulltext files available?
697
            if (!empty($extConf['fileGrpFulltext'])
698
                && in_array($extConf['fileGrpFulltext'], $this->fileGrps)) {
699
                    $this->hasFulltext = TRUE;
700
            }
701
            $this->fileGrpsLoaded = TRUE;
702
        }
703
        return $this->fileGrps;
704
    }
705
706
    /**
707
     * {@inheritDoc}
708
     * @see \Kitodo\Dlf\Common\Document::prepareMetadataArray()
709
     */
710
    protected function prepareMetadataArray($cPid) {
711
        // Get all logical structure nodes with metadata.
712
        if (($ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID'))) {
713
            foreach ($ids as $id) {
714
                $this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid);
715
            }
716
        }
717
        // Set current PID for metadata definitions.
718
    }
719
720
    /**
721
     * This returns $this->mets via __get()
722
     *
723
     * @access protected
724
     *
725
     * @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object
726
     */
727
    protected function _getMets() {
728
        return $this->mets;
729
    }
730
731
    /**
732
     * {@inheritDoc}
733
     * @see \Kitodo\Dlf\Common\Document::_getPhysicalStructure()
734
     */
735
    protected function _getPhysicalStructure() {
736
        // Is there no physical structure array yet?
737
        if (!$this->physicalStructureLoaded) {
738
            // Does the document have a structMap node of type "PHYSICAL"?
739
            $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div');
740
            if ($elementNodes) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $elementNodes of type SimpleXMLElement[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
741
                // Get file groups.
742
                $fileUse = $this->_getFileGrps();
743
                // Get the physical sequence's metadata.
744
                $physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]');
745
                $physSeq[0] = (string) $physNode[0]['ID'];
1 ignored issue
show
Comprehensibility Best Practice introduced by
$physSeq was never initialized. Although not strictly required by PHP, it is generally a good practice to add $physSeq = array(); before regardless.
Loading history...
746
                $this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID'];
747
                $this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : '');
748
                $this->physicalStructureInfo[$physSeq[0]]['order'] = (isset($physNode[0]['ORDER']) ? (string) $physNode[0]['ORDER'] : '');
749
                $this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : '');
750
                $this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : '');
751
                $this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE'];
752
                $this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : '');
753
                // Get the file representations from fileSec node.
754
                foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) {
755
                    // Check if file has valid @USE attribute.
756
                    if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
757
                        $this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
758
                    }
759
                }
760
                // Build the physical elements' array from the physical structMap node.
761
                foreach ($elementNodes as $elementNode) {
762
                    $elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID'];
763
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID'];
1 ignored issue
show
Comprehensibility Best Practice introduced by
The variable $elements seems to be defined later in this foreach loop on line 762. Are you sure it is defined here?
Loading history...
764
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : '');
765
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : '');
766
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : '');
767
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : '');
768
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE'];
769
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : '');
770
                    // Get the file representations from fileSec node.
771
                    foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) {
772
                        // Check if file has valid @USE attribute.
773
                        if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
774
                            $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
775
                        }
776
                    }
777
                }
778
                // Sort array by keys (= @ORDER).
779
                if (ksort($elements)) {
780
                    // Set total number of pages/tracks.
781
                    $this->numPages = count($elements);
782
                    // Merge and re-index the array to get nice numeric indexes.
783
                    $this->physicalStructure = array_merge($physSeq, $elements);
784
                }
785
            }
786
            $this->physicalStructureLoaded = TRUE;
787
        }
788
        return $this->physicalStructure;
789
    }
790
791
    /**
792
     * {@inheritDoc}
793
     * @see \Kitodo\Dlf\Common\Document::_getSmLinks()
794
     */
795
    protected function _getSmLinks() {
796
        if (!$this->smLinksLoaded) {
797
            $smLinks = $this->mets->xpath('./mets:structLink/mets:smLink');
798
            foreach ($smLinks as $smLink) {
799
                $this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to;
800
                $this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from;
801
            }
802
            $this->smLinksLoaded = TRUE;
803
        }
804
        return $this->smLinks;
805
    }
806
807
    /**
808
     * {@inheritDoc}
809
     * @see \Kitodo\Dlf\Common\Document::_getThumbnail()
810
     */
811
    protected function _getThumbnail($forceReload = FALSE) {
812
        if (!$this->thumbnailLoaded
813
            || $forceReload) {
814
            // Retain current PID.
815
            $cPid = ($this->cPid ? $this->cPid : $this->pid);
816
            if (!$cPid) {
817
                Helper::devLog('Invalid PID '.$cPid.' for structure definitions', DEVLOG_SEVERITY_ERROR);
818
                $this->thumbnailLoaded = TRUE;
819
                return $this->thumbnail;
820
            }
821
            // Load extension configuration.
822
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
823
            if (empty($extConf['fileGrpThumbs'])) {
824
                Helper::devLog('No fileGrp for thumbnails specified', DEVLOG_SEVERITY_WARNING);
825
                $this->thumbnailLoaded = TRUE;
826
                return $this->thumbnail;
827
            }
828
            $strctId = $this->_getToplevelId();
829
            $metadata = $this->getTitledata($cPid);
830
831
            /** @var QueryBuilder $queryBuilder */
832
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
833
                ->getQueryBuilderForTable('tx_dlf_structures');
834
835
            // Get structure element to get thumbnail from.
836
            $result = $queryBuilder
837
                ->select('tx_dlf_structures.thumbnail AS thumbnail')
838
                ->from('tx_dlf_structures')
839
                ->where(
840
                    $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($cPid)),
841
                    $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
842
                    Helper::whereExpression('tx_dlf_structures')
843
                )
844
                ->setMaxResults(1)
845
                ->execute();
846
847
            $allResults = $result->fetchAll();
848
849
            if (count($allResults) == 1) {
850
                $resArray = $allResults[0];
851
                // Get desired thumbnail structure if not the toplevel structure itself.
852
                if (!empty($resArray['thumbnail'])) {
853
                    $strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid);
854
                    // Check if this document has a structure element of the desired type.
855
                    $strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="'.$strctType.'"]/@ID');
856
                    if (!empty($strctIds)) {
857
                        $strctId = (string) $strctIds[0];
858
                    }
859
                }
860
                // Load smLinks.
861
                $this->_getSmLinks();
862
                // Get thumbnail location.
863
                if ($this->_getPhysicalStructure()
864
                    && !empty($this->smLinks['l2p'][$strctId])) {
865
                    $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$extConf['fileGrpThumbs']]);
866
                } else {
867
                    $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']]);
868
                }
869
            } else {
870
                Helper::devLog('No structure of type "'.$metadata['type'][0].'" found in database', DEVLOG_SEVERITY_ERROR);
871
            }
872
            $this->thumbnailLoaded = TRUE;
873
        }
874
        return $this->thumbnail;
875
    }
876
877
    /**
878
     * {@inheritDoc}
879
     * @see \Kitodo\Dlf\Common\Document::_getToplevelId()
880
     */
881
    protected function _getToplevelId() {
882
        if (empty($this->toplevelId)) {
883
            // Get all logical structure nodes with metadata, but without associated METS-Pointers.
884
            if (($divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]'))) {
885
                // Load smLinks.
886
                $this->_getSmLinks();
887
                foreach ($divs as $div) {
888
                    $id = (string) $div['ID'];
889
                    // Are there physical structure nodes for this logical structure?
890
                    if (array_key_exists($id, $this->smLinks['l2p'])) {
891
                        // Yes. That's what we're looking for.
892
                        $this->toplevelId = $id;
893
                        break;
894
                    } elseif (empty($this->toplevelId)) {
895
                        // No. Remember this anyway, but keep looking for a better one.
896
                        $this->toplevelId = $id;
897
                    }
898
                }
899
            }
900
        }
901
        return $this->toplevelId;
902
    }
903
904
    /**
905
     * This magic method is executed prior to any serialization of the object
906
     * @see __wakeup()
907
     *
908
     * @access public
909
     *
910
     * @return array Properties to be serialized
911
     */
912
    public function __sleep() {
913
        // \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization
914
        $this->asXML = $this->xml->asXML();
915
        return ['uid', 'pid', 'recordId', 'parentId', 'asXML'];
916
    }
917
918
    /**
919
     * This magic method is used for setting a string value for the object
920
     *
921
     * @access public
922
     *
923
     * @return string String representing the METS object
924
     */
925
    public function __toString() {
926
        $xml = new \DOMDocument('1.0', 'utf-8');
927
        $xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), TRUE));
928
        $xml->formatOutput = TRUE;
929
        return $xml->saveXML();
930
    }
931
932
    /**
933
     * This magic method is executed after the object is deserialized
934
     * @see __sleep()
935
     *
936
     * @access public
937
     *
938
     * @return void
939
     */
940
    public function __wakeup() {
941
        // Turn off libxml's error logging.
942
        $libxmlErrors = libxml_use_internal_errors(TRUE);
943
        // Reload XML from string.
944
        $xml = @simplexml_load_string($this->asXML);
945
        // Reset libxml's error logging.
946
        libxml_use_internal_errors($libxmlErrors);
947
        if ($xml !== FALSE) {
948
            $this->asXML = '';
949
            $this->xml = $xml;
950
            // Rebuild the unserializable properties.
951
            $this->init();
952
        } else {
953
            Helper::devLog('Could not load XML after deserialization', DEVLOG_SEVERITY_ERROR);
954
        }
955
    }
956
}
957