Scrutinizer GitHub App not installed

We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.

Install GitHub App

GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( 4b6957...7b04b6 )
by Sebastian
29s queued 11s
created

MetsDocument::ensureHasFulltextIsSet()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 2
c 1
b 0
f 0
dl 0
loc 5
rs 10
cc 2
nc 2
nop 0
1
<?php
2
3
namespace Kitodo\Dlf\Common;
4
5
/**
6
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
7
 *
8
 * This file is part of the Kitodo and TYPO3 projects.
9
 *
10
 * @license GNU General Public License version 3 or later.
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 */
14
15
use TYPO3\CMS\Core\Database\ConnectionPool;
16
use TYPO3\CMS\Core\Utility\GeneralUtility;
17
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
18
use Ubl\Iiif\Tools\IiifHelper;
19
use Ubl\Iiif\Services\AbstractImageService;
20
21
/**
22
 * MetsDocument class for the 'dlf' extension.
23
 *
24
 * @author	Sebastian Meyer <[email protected]>
25
 * @author	Henrik Lochmann <[email protected]>
26
 * @package	TYPO3
27
 * @subpackage	tx_dlf
28
 * @access	public
29
 * @property-write integer $cPid This holds the PID for the configuration
30
 * @property-read array $dmdSec This holds the XML file's dmdSec parts with their IDs as array key
31
 * @property-read array $fileGrps This holds the file ID -> USE concordance
32
 * @property-read boolean $hasFulltext Are there any fulltext files available?
33
 * @property-read string $location This holds the documents location
34
 * @property-read array $metadataArray This holds the documents' parsed metadata array
35
 * @property-read \SimpleXMLElement $mets This holds the XML file's METS part as \SimpleXMLElement object
36
 * @property-read integer $numPages The holds the total number of pages
37
 * @property-read integer $parentId This holds the UID of the parent document or zero if not multi-volumed
38
 * @property-read array $physicalStructure This holds the physical structure
39
 * @property-read array $physicalStructureInfo This holds the physical structure metadata
40
 * @property-read integer $pid This holds the PID of the document or zero if not in database
41
 * @property-read boolean $ready Is the document instantiated successfully?
42
 * @property-read string $recordId The METS file's / IIIF manifest's record identifier
43
 * @property-read integer $rootId This holds the UID of the root document or zero if not multi-volumed
44
 * @property-read array $smLinks This holds the smLinks between logical and physical structMap
45
 * @property-read array $tableOfContents This holds the logical structure
46
 * @property-read string $thumbnail This holds the document's thumbnail location
47
 * @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF)
48
 * @property-read mixed $uid This holds the UID or the URL of the document
49
 */
50
final class MetsDocument extends Document
51
{
52
    /**
53
     * This holds the whole XML file as string for serialization purposes
54
     * @see __sleep() / __wakeup()
55
     *
56
     * @var string
57
     * @access protected
58
     */
59
    protected $asXML = '';
60
61
    /**
62
     * This holds the XML file's dmdSec parts with their IDs as array key
63
     *
64
     * @var array
65
     * @access protected
66
     */
67
    protected $dmdSec = [];
68
69
    /**
70
     * Are the METS file's dmdSecs loaded?
71
     * @see $dmdSec
72
     *
73
     * @var boolean
74
     * @access protected
75
     */
76
    protected $dmdSecLoaded = FALSE;
77
78
    /**
79
     * The extension key
80
     *
81
     * @var	string
82
     * @access public
83
     */
84
    public static $extKey = 'dlf';
85
86
    /**
87
     * This holds the file ID -> USE concordance
88
     * @see _getFileGrps()
89
     *
90
     * @var array
91
     * @access protected
92
     */
93
    protected $fileGrps = [];
94
95
    /**
96
     * Are the file groups loaded?
97
     * @see $fileGrps
98
     *
99
     * @var boolean
100
     * @access protected
101
     */
102
    protected $fileGrpsLoaded = FALSE;
103
104
    /**
105
     * This holds the XML file's METS part as \SimpleXMLElement object
106
     *
107
     * @var \SimpleXMLElement
108
     * @access protected
109
     */
110
    protected $mets;
111
112
    /**
113
     * This holds the whole XML file as \SimpleXMLElement object
114
     *
115
     * @var \SimpleXMLElement
116
     * @access protected
117
     */
118
    protected $xml;
119
120
    /**
121
     * This adds metadata from METS structural map to metadata array.
122
     *
123
     * @access	public
124
     *
125
     * @param	array	&$metadata: The metadata array to extend
126
     * @param	string	$id: The @ID attribute of the logical structure node
127
     *
128
     * @return  void
129
     */
130
    public function addMetadataFromMets(&$metadata, $id)
131
    {
132
        $details = $this->getLogicalStructure($id);
133
        if (!empty($details)) {
134
            $metadata['mets_label'][0] = $details['label'];
135
            $metadata['mets_orderlabel'][0] = $details['orderlabel'];
136
        }
137
    }
138
139
    /**
140
     *
141
     * {@inheritDoc}
142
     * @see \Kitodo\Dlf\Common\Document::establishRecordId()
143
     */
144
    protected function establishRecordId($pid)
145
    {
146
        // Check for METS object @ID.
147
        if (!empty($this->mets['OBJID'])) {
148
            $this->recordId = (string) $this->mets['OBJID'];
149
        }
150
        // Get hook objects.
151
        $hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php');
152
        // Apply hooks.
153
        foreach ($hookObjects as $hookObj) {
154
            if (method_exists($hookObj, 'construct_postProcessRecordId')) {
155
                $hookObj->construct_postProcessRecordId($this->xml, $this->recordId);
156
            }
157
        }
158
    }
159
160
    /**
161
     *
162
     * {@inheritDoc}
163
     * @see \Kitodo\Dlf\Common\Document::getDownloadLocation()
164
     */
165
    public function getDownloadLocation($id)
166
    {
167
        $fileMimeType = $this->getFileMimeType($id);
168
        $fileLocation = $this->getFileLocation($id);
169
        if ($fileMimeType == 'application/vnd.kitodo.iiif') {
170
            $fileLocation = strrpos($fileLocation, "info.json") == strlen($fileLocation) - 9 ? $fileLocation : strrpos($fileLocation, "/") == strlen($fileLocation) ? $fileLocation . "info.json" : $fileLocation . "/info.json";
171
            $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
172
            IiifHelper::setUrlReader(IiifUrlReader::getInstance());
173
            IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']);
174
            IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']);
175
            $service = IiifHelper::loadIiifResource($fileLocation);
176
            if ($service != NULL && $service instanceof AbstractImageService) {
177
                return $service->getImageUrl();
178
            }
179
        } elseif ($fileMimeType == 'application/vnd.netfpx') {
180
            $baseURL = $fileLocation . (strpos($fileLocation, "?") === FALSE ? "?" : "");
181
            // TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server'
182
            return $baseURL . "&CVT=jpeg";
183
        }
184
        return $fileLocation;
185
    }
186
187
    /**
188
     * {@inheritDoc}
189
     * @see \Kitodo\Dlf\Common\Document::getFileLocation()
190
     */
191
    public function getFileLocation($id)
192
    {
193
        $location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]');
194
        if (
195
            !empty($id)
196
            && !empty($location)
197
        ) {
198
            return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href;
199
        } else {
200
            Helper::devLog('There is no file node with @ID "' . $id . '"', DEVLOG_SEVERITY_WARNING);
201
            return '';
202
        }
203
    }
204
205
    /**
206
     * {@inheritDoc}
207
     * @see \Kitodo\Dlf\Common\Document::getFileMimeType()
208
     */
209
    public function getFileMimeType($id)
210
    {
211
        $mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE');
212
        if (
213
            !empty($id)
214
            && !empty($mimetype)
215
        ) {
216
            return (string) $mimetype[0];
217
        } else {
218
            Helper::devLog('There is no file node with @ID "' . $id . '" or no MIME type specified', DEVLOG_SEVERITY_WARNING);
219
            return '';
220
        }
221
    }
222
223
    /**
224
     * {@inheritDoc}
225
     * @see \Kitodo\Dlf\Common\Document::getLogicalStructure()
226
     */
227
    public function getLogicalStructure($id, $recursive = FALSE)
228
    {
229
        $details = [];
230
        // Is the requested logical unit already loaded?
231
        if (
232
            !$recursive
233
            && !empty($this->logicalUnits[$id])
234
        ) {
235
            // Yes. Return it.
236
            return $this->logicalUnits[$id];
237
        } elseif (!empty($id)) {
238
            // Get specified logical unit.
239
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]');
240
        } else {
241
            // Get all logical units at top level.
242
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div');
243
        }
244
        if (!empty($divs)) {
245
            if (!$recursive) {
246
                // Get the details for the first xpath hit.
247
                $details = $this->getLogicalStructureInfo($divs[0]);
248
            } else {
249
                // Walk the logical structure recursively and fill the whole table of contents.
250
                foreach ($divs as $div) {
251
                    $this->tableOfContents[] = $this->getLogicalStructureInfo($div, TRUE);
252
                }
253
            }
254
        }
255
        return $details;
256
    }
257
258
    /**
259
     * This gets details about a logical structure element
260
     *
261
     * @access protected
262
     *
263
     * @param \SimpleXMLElement $structure: The logical structure node
264
     * @param boolean $recursive: Whether to include the child elements
265
     *
266
     * @return array Array of the element's id, label, type and physical page indexes/mptr link
267
     */
268
    protected function getLogicalStructureInfo(\SimpleXMLElement $structure, $recursive = FALSE)
269
    {
270
        // Get attributes.
271
        foreach ($structure->attributes() as $attribute => $value) {
272
            $attributes[$attribute] = (string) $value;
273
        }
274
        // Load plugin configuration.
275
        $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
276
        // Extract identity information.
277
        $details = [];
278
        $details['id'] = $attributes['ID'];
279
        $details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : '');
280
        $details['order'] = (isset($attributes['ORDER']) ? $attributes['ORDER'] : '');
281
        $details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : '');
282
        $details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : '');
283
        $details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : '');
284
        $details['volume'] = '';
285
        // Set volume information only if no label is set and this is the toplevel structure element.
286
        if (
287
            empty($details['label'])
288
            && $details['id'] == $this->_getToplevelId()
289
        ) {
290
            $metadata = $this->getMetadata($details['id']);
291
            if (!empty($metadata['volume'][0])) {
292
                $details['volume'] = $metadata['volume'][0];
293
            }
294
        }
295
        $details['pagination'] = '';
296
        $details['type'] = $attributes['TYPE'];
297
        $details['thumbnailId'] = '';
298
        // Load smLinks.
299
        $this->_getSmLinks();
300
        // Load physical structure.
301
        $this->_getPhysicalStructure();
302
        // Get the physical page or external file this structure element is pointing at.
303
        $details['points'] = '';
304
        // Is there a mptr node?
305
        if (count($structure->children('http://www.loc.gov/METS/')->mptr)) {
306
            // Yes. Get the file reference.
307
            $details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
308
        } elseif (
309
            !empty($this->physicalStructure)
310
            && array_key_exists($details['id'], $this->smLinks['l2p'])
311
        ) {
312
            // Link logical structure to the first corresponding physical page/track.
313
            $details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, TRUE)), 1);
314
            if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$extConf['fileGrpThumbs']])) {
315
                $details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$extConf['fileGrpThumbs']];
316
            }
317
            // Get page/track number of the first page/track related to this structure element.
318
            $details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel'];
319
        } elseif ($details['id'] == $this->_getToplevelId()) {
320
            // Point to self if this is the toplevel structure.
321
            $details['points'] = 1;
322
            if (
323
                !empty($this->physicalStructure)
324
                && !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']])
325
            ) {
326
                $details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']];
327
            }
328
        }
329
        // Get the files this structure element is pointing at.
330
        $details['files'] = [];
331
        $fileUse = $this->_getFileGrps();
332
        // Get the file representations from fileSec node.
333
        foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) {
334
            // Check if file has valid @USE attribute.
335
            if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
336
                $details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
337
            }
338
        }
339
        // Keep for later usage.
340
        $this->logicalUnits[$details['id']] = $details;
341
        // Walk the structure recursively? And are there any children of the current element?
342
        if (
343
            $recursive
344
            && count($structure->children('http://www.loc.gov/METS/')->div)
345
        ) {
346
            $details['children'] = [];
347
            foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) {
348
                // Repeat for all children.
349
                $details['children'][] = $this->getLogicalStructureInfo($child, TRUE);
350
            }
351
        }
352
        return $details;
353
    }
354
355
    /**
356
     * {@inheritDoc}
357
     * @see \Kitodo\Dlf\Common\Document::getMetadata()
358
     */
359
    public function getMetadata($id, $cPid = 0)
360
    {
361
        // Make sure $cPid is a non-negative integer.
362
        $cPid = max(intval($cPid), 0);
363
        // If $cPid is not given, try to get it elsewhere.
364
        if (
365
            !$cPid
366
            && ($this->cPid || $this->pid)
367
        ) {
368
            // Retain current PID.
369
            $cPid = ($this->cPid ? $this->cPid : $this->pid);
370
        } elseif (!$cPid) {
371
            Helper::devLog('Invalid PID ' . $cPid . ' for metadata definitions', DEVLOG_SEVERITY_WARNING);
372
            return [];
373
        }
374
        // Get metadata from parsed metadata array if available.
375
        if (
376
            !empty($this->metadataArray[$id])
377
            && $this->metadataArray[0] == $cPid
378
        ) {
379
            return $this->metadataArray[$id];
380
        }
381
        // Initialize metadata array with empty values.
382
        $metadata = [
383
            'title' => [],
384
            'title_sorting' => [],
385
            'author' => [],
386
            'place' => [],
387
            'year' => [],
388
            'prod_id' => [],
389
            'record_id' => [],
390
            'opac_id' => [],
391
            'union_id' => [],
392
            'urn' => [],
393
            'purl' => [],
394
            'type' => [],
395
            'volume' => [],
396
            'volume_sorting' => [],
397
            'license' => [],
398
            'terms' => [],
399
            'restrictions' => [],
400
            'out_of_print' => [],
401
            'rights_info' => [],
402
            'collection' => [],
403
            'owner' => [],
404
            'mets_label' => [],
405
            'mets_orderlabel' => [],
406
            'document_format' => [],
407
        ];
408
        $metadata['document_format'][] = 'METS';
409
        // Get the logical structure node's @DMDID.
410
        if (!empty($this->logicalUnits[$id])) {
411
            $dmdIds = $this->logicalUnits[$id]['dmdId'];
412
        } else {
413
            $dmdIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@DMDID');
414
            $dmdIds = (string) $dmdIds[0];
415
        }
416
        if (!empty($dmdIds)) {
417
            // Handle multiple DMDIDs separately.
418
            $dmdIds = explode(' ', $dmdIds);
419
            $hasSupportedMetadata = FALSE;
420
        } else {
421
            // There is no dmdSec for this structure node.
422
            return [];
423
        }
424
        // Load available metadata formats and dmdSecs.
425
        $this->loadFormats();
426
        $this->_getDmdSec();
427
        foreach ($dmdIds as $dmdId) {
428
            // Is this metadata format supported?
429
            if (!empty($this->formats[$this->dmdSec[$dmdId]['type']])) {
430
                if (!empty($this->formats[$this->dmdSec[$dmdId]['type']]['class'])) {
431
                    $class = $this->formats[$this->dmdSec[$dmdId]['type']]['class'];
432
                    // Get the metadata from class.
433
                    if (
434
                        class_exists($class)
435
                        && ($obj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance($class)) instanceof MetadataInterface
436
                    ) {
437
                        $obj->extractMetadata($this->dmdSec[$dmdId]['xml'], $metadata);
438
                    } else {
439
                        Helper::devLog('Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this->dmdSec[$dmdId]['type'] . '"', DEVLOG_SEVERITY_WARNING);
440
                    }
441
                }
442
            } else {
443
                Helper::devLog('Unsupported metadata format "' . $this->dmdSec[$dmdId]['type'] . '" in dmdSec with @ID "' . $dmdId . '"', DEVLOG_SEVERITY_NOTICE);
444
                // Continue searching for supported metadata with next @DMDID.
445
                continue;
446
            }
447
            // Get the structure's type.
448
            if (!empty($this->logicalUnits[$id])) {
449
                $metadata['type'] = [$this->logicalUnits[$id]['type']];
450
            } else {
451
                $struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE');
452
                if (!empty($struct)) {
453
                    $metadata['type'] = [(string) $struct[0]];
454
                }
455
            }
456
            // Get the additional metadata from database.
457
            $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
458
                'tx_dlf_metadata.index_name AS index_name,tx_dlf_metadataformat.xpath AS xpath,tx_dlf_metadataformat.xpath_sorting AS xpath_sorting,tx_dlf_metadata.is_sortable AS is_sortable,tx_dlf_metadata.default_value AS default_value,tx_dlf_metadata.format AS format',
459
                'tx_dlf_metadata,tx_dlf_metadataformat,tx_dlf_formats',
460
                'tx_dlf_metadata.pid=' . $cPid
461
                    . ' AND tx_dlf_metadataformat.pid=' . $cPid
462
                    . ' AND ((tx_dlf_metadata.uid=tx_dlf_metadataformat.parent_id AND tx_dlf_metadataformat.encoded=tx_dlf_formats.uid AND tx_dlf_formats.type=' . $GLOBALS['TYPO3_DB']->fullQuoteStr($this->dmdSec[$dmdId]['type'], 'tx_dlf_formats') . ') OR tx_dlf_metadata.format=0)'
463
                    . Helper::whereClause('tx_dlf_metadata', TRUE)
464
                    . Helper::whereClause('tx_dlf_metadataformat')
465
                    . Helper::whereClause('tx_dlf_formats')
466
            );
467
            // We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly.
468
            $domNode = dom_import_simplexml($this->dmdSec[$dmdId]['xml']);
469
            $domXPath = new \DOMXPath($domNode->ownerDocument);
470
            $this->registerNamespaces($domXPath);
471
            // OK, now make the XPath queries.
472
            while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
473
                // Set metadata field's value(s).
474
                if (
475
                    $resArray['format'] > 0
476
                    && !empty($resArray['xpath'])
477
                    && ($values = $domXPath->evaluate($resArray['xpath'], $domNode))
478
                ) {
479
                    if (
480
                        $values instanceof \DOMNodeList
481
                        && $values->length > 0
482
                    ) {
483
                        $metadata[$resArray['index_name']] = [];
484
                        foreach ($values as $value) {
485
                            $metadata[$resArray['index_name']][] = trim((string) $value->nodeValue);
486
                        }
487
                    } elseif (!($values instanceof \DOMNodeList)) {
488
                        $metadata[$resArray['index_name']] = [trim((string) $values)];
489
                    }
490
                }
491
                // Set default value if applicable.
492
                if (
493
                    empty($metadata[$resArray['index_name']][0])
494
                    && strlen($resArray['default_value']) > 0
495
                ) {
496
                    $metadata[$resArray['index_name']] = [$resArray['default_value']];
497
                }
498
                // Set sorting value if applicable.
499
                if (
500
                    !empty($metadata[$resArray['index_name']])
501
                    && $resArray['is_sortable']
502
                ) {
503
                    if (
504
                        $resArray['format'] > 0
505
                        && !empty($resArray['xpath_sorting'])
506
                        && ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode))
507
                    ) {
508
                        if (
509
                            $values instanceof \DOMNodeList
510
                            && $values->length > 0
511
                        ) {
512
                            $metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values->item(0)->nodeValue);
513
                        } elseif (!($values instanceof \DOMNodeList)) {
514
                            $metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values);
515
                        }
516
                    }
517
                    if (empty($metadata[$resArray['index_name'] . '_sorting'][0])) {
518
                        $metadata[$resArray['index_name'] . '_sorting'][0] = $metadata[$resArray['index_name']][0];
519
                    }
520
                }
521
            }
522
            // Set title to empty string if not present.
523
            if (empty($metadata['title'][0])) {
524
                $metadata['title'][0] = '';
525
                $metadata['title_sorting'][0] = '';
526
            }
527
            // Add collections from database to toplevel element if document is already saved.
528
            if (
529
                \TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($this->uid)
530
                && $id == $this->_getToplevelId()
531
            ) {
532
                $result = $GLOBALS['TYPO3_DB']->exec_SELECT_mm_query(
533
                    'tx_dlf_collections.index_name AS index_name',
534
                    'tx_dlf_documents',
535
                    'tx_dlf_relations',
536
                    'tx_dlf_collections',
537
                    'AND tx_dlf_collections.pid=' . intval($cPid)
538
                        . ' AND tx_dlf_documents.uid=' . intval($this->uid)
539
                        . ' AND tx_dlf_relations.ident=' . $GLOBALS['TYPO3_DB']->fullQuoteStr('docs_colls', 'tx_dlf_relations')
540
                        . ' AND tx_dlf_collections.sys_language_uid IN (-1,0)'
541
                        . Helper::whereClause('tx_dlf_documents')
542
                        . Helper::whereClause('tx_dlf_collections'),
543
                    'tx_dlf_collections.index_name'
544
                );
545
                while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
546
                    if (!in_array($resArray['index_name'], $metadata['collection'])) {
547
                        $metadata['collection'][] = $resArray['index_name'];
548
                    }
549
                }
550
            }
551
            // Extract metadata only from first supported dmdSec.
552
            $hasSupportedMetadata = TRUE;
553
            break;
554
        }
555
        if ($hasSupportedMetadata) {
556
            return $metadata;
557
        } else {
558
            Helper::devLog('No supported metadata found for logical structure with @ID "' . $id . '"', DEVLOG_SEVERITY_WARNING);
559
            return [];
560
        }
561
    }
562
563
    /**
564
     * {@inheritDoc}
565
     * @see \Kitodo\Dlf\Common\Document::getRawText()
566
     */
567
    public function getRawText($id)
568
    {
569
        $rawText = '';
570
        // Get text from raw text array if available.
571
        if (!empty($this->rawTextArray[$id])) {
572
            return $this->rawTextArray[$id];
573
        }
574
        // Load fileGrps and check for fulltext files.
575
        $this->_getFileGrps();
576
        if ($this->hasFulltext) {
577
            $rawText = $this->getRawTextFromXml($id);
578
        }
579
        return $rawText;
580
    }
581
582
    /**
583
     * {@inheritDoc}
584
     * @see Document::getStructureDepth()
585
     */
586
    public function getStructureDepth($logId)
587
    {
588
        $ancestors = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*');
589
        if (!empty($ancestors)) {
590
            return count($ancestors);
591
        } else {
592
            return 0;
593
        }
594
    }
595
596
    /**
597
     * {@inheritDoc}
598
     * @see \Kitodo\Dlf\Common\Document::init()
599
     */
600
    protected function init()
601
    {
602
        // Get METS node from XML file.
603
        $this->registerNamespaces($this->xml);
604
        $mets = $this->xml->xpath('//mets:mets');
605
        if (!empty($mets)) {
606
            $this->mets = $mets[0];
607
            // Register namespaces.
608
            $this->registerNamespaces($this->mets);
609
        } else {
610
            Helper::devLog('No METS part found in document with UID ' . $this->uid, DEVLOG_SEVERITY_ERROR);
611
        }
612
    }
613
614
    /**
615
     * {@inheritDoc}
616
     * @see \Kitodo\Dlf\Common\Document::loadLocation()
617
     */
618
    protected function loadLocation($location)
619
    {
620
        $fileResource = GeneralUtility::getUrl($location);
621
        if ($fileResource !== FALSE) {
622
            // Turn off libxml's error logging.
623
            $libxmlErrors = libxml_use_internal_errors(TRUE);
624
            // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
625
            $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE);
626
            // Load XML from file.
627
            $xml = simplexml_load_string($fileResource);
628
            // reset entity loader setting
629
            libxml_disable_entity_loader($previousValueOfEntityLoader);
630
            // Reset libxml's error logging.
631
            libxml_use_internal_errors($libxmlErrors);
632
            // Set some basic properties.
633
            if ($xml !== FALSE) {
634
                $this->xml = $xml;
635
                return TRUE;
636
            }
637
        }
638
        Helper::devLog('Could not load XML file from "' . $location . '"', DEVLOG_SEVERITY_ERROR);
639
        return FALSE;
640
    }
641
642
    /**
643
     * {@inheritDoc}
644
     * @see \Kitodo\Dlf\Common\Document::ensureHasFulltextIsSet()
645
     */
646
    protected function ensureHasFulltextIsSet()
647
    {
648
        // Are the fileGrps already loaded?
649
        if (!$this->fileGrpsLoaded) {
650
            $this->_getFileGrps();
651
        }
652
    }
653
654
    /**
655
     * {@inheritDoc}
656
     * @see Document::getParentDocumentUid()
657
     */
658
    protected function getParentDocumentUidForSaving($pid, $core)
659
    {
660
        $partof = 0;
661
        // Get the closest ancestor of the current document which has a MPTR child.
662
        $parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $this->_getToplevelId() . '"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr');
663
        if (!empty($parentMptr)) {
664
            $parentLocation = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
665
            if ($parentLocation != $this->location) {
666
                $parentDoc = self::getInstance($parentLocation, $pid);
667
                if ($parentDoc->ready) {
668
                    if ($parentDoc->pid != $pid) {
669
                        $parentDoc->save($pid, $core);
670
                    }
671
                    $partof = $parentDoc->uid;
672
                }
673
            }
674
        }
675
        return $partof;
676
    }
677
678
    /**
679
     * {@inheritDoc}
680
     * @see Document::setPreloadedDocument()
681
     */
682
    protected function setPreloadedDocument($preloadedDocument)
683
    {
684
685
        if ($preloadedDocument instanceof \SimpleXMLElement) {
686
            $this->xml = $preloadedDocument;
687
            return TRUE;
688
        }
689
        return FALSE;
690
    }
691
692
    /**
693
     * {@inheritDoc}
694
     * @see Document::getDocument()
695
     */
696
    protected function getDocument()
697
    {
698
        return $this->mets;
699
    }
700
701
    /**
702
     * This returns $this->cPid via __get()
703
     *
704
     * @access protected
705
     *
706
     * @return integer The PID of the metadata definitions
707
     */
708
    protected function _getCPid()
709
    {
710
        return $this->cPid;
711
    }
712
713
    /**
714
     * This builds an array of the document's dmdSecs
715
     *
716
     * @access protected
717
     *
718
     * @return array Array of dmdSecs with their IDs as array key
719
     */
720
    protected function _getDmdSec()
721
    {
722
        if (!$this->dmdSecLoaded) {
723
            // Get available data formats.
724
            $this->loadFormats();
725
            // Get dmdSec nodes from METS.
726
            $dmdIds = $this->mets->xpath('./mets:dmdSec/@ID');
727
            if (!empty($dmdIds)) {
728
                foreach ($dmdIds as $dmdId) {
729
                    if ($type = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) {
730
                        if (!empty($this->formats[(string) $type[0]])) {
731
                            $type = (string) $type[0];
732
                            $xml = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']);
733
                        }
734
                    } elseif ($type = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) {
735
                        if (!empty($this->formats[(string) $type[0]])) {
736
                            $type = (string) $type[0];
737
                            $xml = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']);
738
                        }
739
                    }
740
                    if (!empty($xml)) {
741
                        $this->dmdSec[(string) $dmdId]['type'] = $type;
742
                        $this->dmdSec[(string) $dmdId]['xml'] = $xml[0];
743
                        $this->registerNamespaces($this->dmdSec[(string) $dmdId]['xml']);
744
                    }
745
                }
746
            }
747
            $this->dmdSecLoaded = TRUE;
748
        }
749
        return $this->dmdSec;
750
    }
751
752
    /**
753
     * This builds the file ID -> USE concordance
754
     *
755
     * @access protected
756
     *
757
     * @return array Array of file use groups with file IDs
758
     */
759
    protected function _getFileGrps()
760
    {
761
        if (!$this->fileGrpsLoaded) {
762
            // Get configured USE attributes.
763
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
764
            $useGrps = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $extConf['fileGrps']);
765
            if (!empty($extConf['fileGrpThumbs'])) {
766
                $useGrps[] = $extConf['fileGrpThumbs'];
767
            }
768
            if (!empty($extConf['fileGrpDownload'])) {
769
                $useGrps[] = $extConf['fileGrpDownload'];
770
            }
771
            if (!empty($extConf['fileGrpFulltext'])) {
772
                $useGrps[] = $extConf['fileGrpFulltext'];
773
            }
774
            if (!empty($extConf['fileGrpAudio'])) {
775
                $useGrps[] = $extConf['fileGrpAudio'];
776
            }
777
            // Get all file groups.
778
            $fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp');
779
            if (!empty($fileGrps)) {
780
                // Build concordance for configured USE attributes.
781
                foreach ($fileGrps as $fileGrp) {
782
                    if (in_array((string) $fileGrp['USE'], $useGrps)) {
783
                        foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) {
784
                            $this->fileGrps[(string) $file->attributes()->ID] = (string) $fileGrp['USE'];
785
                        }
786
                    }
787
                }
788
            }
789
            // Are there any fulltext files available?
790
            if (
791
                !empty($extConf['fileGrpFulltext'])
792
                && in_array($extConf['fileGrpFulltext'], $this->fileGrps)
793
            ) {
794
                $this->hasFulltext = TRUE;
795
            }
796
            $this->fileGrpsLoaded = TRUE;
797
        }
798
        return $this->fileGrps;
799
    }
800
801
    /**
802
     * {@inheritDoc}
803
     * @see \Kitodo\Dlf\Common\Document::prepareMetadataArray()
804
     */
805
    protected function prepareMetadataArray($cPid)
806
    {
807
        $ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID');
808
        // Get all logical structure nodes with metadata.
809
        if (!empty($ids)) {
810
            foreach ($ids as $id) {
811
                $this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid);
812
            }
813
        }
814
        // Set current PID for metadata definitions.
815
    }
816
817
    /**
818
     * This returns $this->mets via __get()
819
     *
820
     * @access protected
821
     *
822
     * @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object
823
     */
824
    protected function _getMets()
825
    {
826
        return $this->mets;
827
    }
828
829
    /**
830
     * {@inheritDoc}
831
     * @see \Kitodo\Dlf\Common\Document::_getPhysicalStructure()
832
     */
833
    protected function _getPhysicalStructure()
834
    {
835
        // Is there no physical structure array yet?
836
        if (!$this->physicalStructureLoaded) {
837
            // Does the document have a structMap node of type "PHYSICAL"?
838
            $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div');
839
            if (!empty($elementNodes)) {
840
                // Get file groups.
841
                $fileUse = $this->_getFileGrps();
842
                // Get the physical sequence's metadata.
843
                $physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]');
844
                $physSeq[0] = (string) $physNode[0]['ID'];
845
                $this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID'];
846
                $this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : '');
847
                $this->physicalStructureInfo[$physSeq[0]]['order'] = (isset($physNode[0]['ORDER']) ? (string) $physNode[0]['ORDER'] : '');
848
                $this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : '');
849
                $this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : '');
850
                $this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE'];
851
                $this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : '');
852
                // Get the file representations from fileSec node.
853
                foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) {
854
                    // Check if file has valid @USE attribute.
855
                    if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
856
                        $this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
857
                    }
858
                }
859
                // Build the physical elements' array from the physical structMap node.
860
                foreach ($elementNodes as $elementNode) {
861
                    $elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID'];
862
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID'];
863
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : '');
864
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : '');
865
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : '');
866
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : '');
867
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE'];
868
                    $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : '');
869
                    // Get the file representations from fileSec node.
870
                    foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) {
871
                        // Check if file has valid @USE attribute.
872
                        if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) {
873
                            $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID;
874
                        }
875
                    }
876
                }
877
                // Sort array by keys (= @ORDER).
878
                if (ksort($elements)) {
879
                    // Set total number of pages/tracks.
880
                    $this->numPages = count($elements);
881
                    // Merge and re-index the array to get nice numeric indexes.
882
                    $this->physicalStructure = array_merge($physSeq, $elements);
883
                }
884
            }
885
            $this->physicalStructureLoaded = TRUE;
886
        }
887
        return $this->physicalStructure;
888
    }
889
890
    /**
891
     * {@inheritDoc}
892
     * @see \Kitodo\Dlf\Common\Document::_getSmLinks()
893
     */
894
    protected function _getSmLinks()
895
    {
896
        if (!$this->smLinksLoaded) {
897
            $smLinks = $this->mets->xpath('./mets:structLink/mets:smLink');
898
            if (!empty($smLinks)) {
899
                foreach ($smLinks as $smLink) {
900
                    $this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to;
901
                    $this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from;
902
                }
903
            }
904
            $this->smLinksLoaded = TRUE;
905
        }
906
        return $this->smLinks;
907
    }
908
909
    /**
910
     * {@inheritDoc}
911
     * @see \Kitodo\Dlf\Common\Document::_getThumbnail()
912
     */
913
    protected function _getThumbnail($forceReload = FALSE)
914
    {
915
        if (
916
            !$this->thumbnailLoaded
917
            || $forceReload
918
        ) {
919
            // Retain current PID.
920
            $cPid = ($this->cPid ? $this->cPid : $this->pid);
921
            if (!$cPid) {
922
                Helper::devLog('Invalid PID ' . $cPid . ' for structure definitions', DEVLOG_SEVERITY_ERROR);
923
                $this->thumbnailLoaded = TRUE;
924
                return $this->thumbnail;
925
            }
926
            // Load extension configuration.
927
            $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
928
            if (empty($extConf['fileGrpThumbs'])) {
929
                Helper::devLog('No fileGrp for thumbnails specified', DEVLOG_SEVERITY_WARNING);
930
                $this->thumbnailLoaded = TRUE;
931
                return $this->thumbnail;
932
            }
933
            $strctId = $this->_getToplevelId();
934
            $metadata = $this->getTitledata($cPid);
935
936
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
937
                ->getQueryBuilderForTable('tx_dlf_structures');
938
939
            // Get structure element to get thumbnail from.
940
            $result = $queryBuilder
941
                ->select('tx_dlf_structures.thumbnail AS thumbnail')
942
                ->from('tx_dlf_structures')
943
                ->where(
944
                    $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($cPid)),
945
                    $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])),
946
                    Helper::whereExpression('tx_dlf_structures')
947
                )
948
                ->setMaxResults(1)
949
                ->execute();
950
951
            $allResults = $result->fetchAll();
952
953
            if (count($allResults) == 1) {
954
                $resArray = $allResults[0];
955
                // Get desired thumbnail structure if not the toplevel structure itself.
956
                if (!empty($resArray['thumbnail'])) {
957
                    $strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid);
958
                    // Check if this document has a structure element of the desired type.
959
                    $strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID');
960
                    if (!empty($strctIds)) {
961
                        $strctId = (string) $strctIds[0];
962
                    }
963
                }
964
                // Load smLinks.
965
                $this->_getSmLinks();
966
                // Get thumbnail location.
967
                if (
968
                    $this->_getPhysicalStructure()
969
                    && !empty($this->smLinks['l2p'][$strctId])
970
                ) {
971
                    $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$extConf['fileGrpThumbs']]);
972
                } else {
973
                    $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']]);
974
                }
975
            } else {
976
                Helper::devLog('No structure of type "' . $metadata['type'][0] . '" found in database', DEVLOG_SEVERITY_ERROR);
977
            }
978
            $this->thumbnailLoaded = TRUE;
979
        }
980
        return $this->thumbnail;
981
    }
982
983
    /**
984
     * {@inheritDoc}
985
     * @see \Kitodo\Dlf\Common\Document::_getToplevelId()
986
     */
987
    protected function _getToplevelId()
988
    {
989
        if (empty($this->toplevelId)) {
990
            // Get all logical structure nodes with metadata, but without associated METS-Pointers.
991
            $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]');
992
            if (!empty($divs)) {
993
                // Load smLinks.
994
                $this->_getSmLinks();
995
                foreach ($divs as $div) {
996
                    $id = (string) $div['ID'];
997
                    // Are there physical structure nodes for this logical structure?
998
                    if (array_key_exists($id, $this->smLinks['l2p'])) {
999
                        // Yes. That's what we're looking for.
1000
                        $this->toplevelId = $id;
1001
                        break;
1002
                    } elseif (empty($this->toplevelId)) {
1003
                        // No. Remember this anyway, but keep looking for a better one.
1004
                        $this->toplevelId = $id;
1005
                    }
1006
                }
1007
            }
1008
        }
1009
        return $this->toplevelId;
1010
    }
1011
1012
    /**
1013
     * This magic method is executed prior to any serialization of the object
1014
     * @see __wakeup()
1015
     *
1016
     * @access public
1017
     *
1018
     * @return array Properties to be serialized
1019
     */
1020
    public function __sleep()
1021
    {
1022
        // \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization
1023
        $this->asXML = $this->xml->asXML();
1024
        return ['uid', 'pid', 'recordId', 'parentId', 'asXML'];
1025
    }
1026
1027
    /**
1028
     * This magic method is used for setting a string value for the object
1029
     *
1030
     * @access public
1031
     *
1032
     * @return string String representing the METS object
1033
     */
1034
    public function __toString()
1035
    {
1036
        $xml = new \DOMDocument('1.0', 'utf-8');
1037
        $xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), TRUE));
1038
        $xml->formatOutput = TRUE;
1039
        return $xml->saveXML();
1040
    }
1041
1042
    /**
1043
     * This magic method is executed after the object is deserialized
1044
     * @see __sleep()
1045
     *
1046
     * @access public
1047
     *
1048
     * @return void
1049
     */
1050
    public function __wakeup()
1051
    {
1052
        // Turn off libxml's error logging.
1053
        $libxmlErrors = libxml_use_internal_errors(TRUE);
1054
        // Reload XML from string.
1055
        $xml = @simplexml_load_string($this->asXML);
1056
        // Reset libxml's error logging.
1057
        libxml_use_internal_errors($libxmlErrors);
1058
        if ($xml !== FALSE) {
1059
            $this->asXML = '';
1060
            $this->xml = $xml;
1061
            // Rebuild the unserializable properties.
1062
            $this->init();
1063
        } else {
1064
            Helper::devLog('Could not load XML after deserialization', DEVLOG_SEVERITY_ERROR);
1065
        }
1066
    }
1067
}
1068