We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
Checks if the types of the passed arguments in a function/method call are compatible.
1 | <?php |
||
2 | |||
3 | /** |
||
4 | * (c) Kitodo. Key to digital objects e.V. <[email protected]> |
||
5 | * |
||
6 | * This file is part of the Kitodo and TYPO3 projects. |
||
7 | * |
||
8 | * @license GNU General Public License version 3 or later. |
||
9 | * For the full copyright and license information, please read the |
||
10 | * LICENSE.txt file that was distributed with this source code. |
||
11 | */ |
||
12 | |||
13 | namespace Kitodo\Dlf\Common; |
||
14 | |||
15 | use \DOMDocument; |
||
16 | use \DOMElement; |
||
17 | use \DOMNode; |
||
18 | use \DOMNodeList; |
||
19 | use \DOMXPath; |
||
20 | use \SimpleXMLElement; |
||
21 | use TYPO3\CMS\Core\Configuration\ExtensionConfiguration; |
||
22 | use TYPO3\CMS\Core\Database\ConnectionPool; |
||
23 | use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction; |
||
24 | use TYPO3\CMS\Core\Log\LogManager; |
||
25 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
26 | use Ubl\Iiif\Tools\IiifHelper; |
||
27 | use Ubl\Iiif\Services\AbstractImageService; |
||
28 | |||
29 | /** |
||
30 | * MetsDocument class for the 'dlf' extension. |
||
31 | * |
||
32 | * @package TYPO3 |
||
33 | * @subpackage dlf |
||
34 | * |
||
35 | * @access public |
||
36 | * |
||
37 | * @property int $cPid this holds the PID for the configuration |
||
38 | * @property-read array $formats this holds the configuration for all supported metadata encodings |
||
39 | * @property bool $formatsLoaded flag with information if the available metadata formats are loaded |
||
40 | * @property-read bool $hasFulltext flag with information if there are any fulltext files available |
||
41 | * @property array $lastSearchedPhysicalPage the last searched logical and physical page |
||
42 | * @property array $logicalUnits this holds the logical units |
||
43 | * @property-read array $metadataArray this holds the documents' parsed metadata array |
||
44 | * @property bool $metadataArrayLoaded flag with information if the metadata array is loaded |
||
45 | * @property-read int $numPages the holds the total number of pages |
||
46 | * @property-read int $numMeasures This holds the total number of measures |
||
47 | * @property-read int $parentId this holds the UID of the parent document or zero if not multi-volumed |
||
48 | * @property-read array $physicalStructure this holds the physical structure |
||
49 | * @property-read array $physicalStructureInfo this holds the physical structure metadata |
||
50 | * @property-read array $musicalStructure This holds the musical structure |
||
51 | * @property-read array $musicalStructureInfo This holds the musical structure metadata |
||
52 | * @property bool $physicalStructureLoaded flag with information if the physical structure is loaded |
||
53 | * @property-read int $pid this holds the PID of the document or zero if not in database |
||
54 | * @property array $rawTextArray this holds the documents' raw text pages with their corresponding structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key |
||
55 | * @property-read bool $ready Is the document instantiated successfully? |
||
56 | * @property-read string $recordId the METS file's / IIIF manifest's record identifier |
||
57 | * @property-read int $rootId this holds the UID of the root document or zero if not multi-volumed |
||
58 | * @property-read array $smLinks this holds the smLinks between logical and physical structMap |
||
59 | * @property bool $smLinksLoaded flag with information if the smLinks are loaded |
||
60 | * @property-read array $tableOfContents this holds the logical structure |
||
61 | * @property bool $tableOfContentsLoaded flag with information if the table of contents is loaded |
||
62 | * @property-read string $thumbnail this holds the document's thumbnail location |
||
63 | * @property bool $thumbnailLoaded flag with information if the thumbnail is loaded |
||
64 | * @property-read string $toplevelId this holds the toplevel structure's "@ID" (METS) or the manifest's "@id" (IIIF) |
||
65 | * @property SimpleXMLElement $xml this holds the whole XML file as SimpleXMLElement object |
||
66 | * @property-read array $mdSec associative array of METS metadata sections indexed by their IDs. |
||
67 | * @property bool $mdSecLoaded flag with information if the array of METS metadata sections is loaded |
||
68 | * @property-read array $dmdSec subset of `$mdSec` storing only the dmdSec entries; kept for compatibility. |
||
69 | * @property-read array $fileGrps this holds the file ID -> USE concordance |
||
70 | * @property bool $fileGrpsLoaded flag with information if file groups array is loaded |
||
71 | * @property-read array $fileInfos additional information about files (e.g., ADMID), indexed by ID. |
||
72 | * @property-read SimpleXMLElement $mets this holds the XML file's METS part as SimpleXMLElement object |
||
73 | * @property-read string $parentHref URL of the parent document (determined via mptr element), or empty string if none is available |
||
74 | */ |
||
75 | final class MetsDocument extends AbstractDocument |
||
76 | { |
||
77 | /** |
||
78 | * @access protected |
||
79 | * @var string[] Subsections / tags that may occur within `<mets:amdSec>` |
||
80 | * |
||
81 | * @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#amdSec |
||
82 | * @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#mdSecType |
||
83 | */ |
||
84 | protected const ALLOWED_AMD_SEC = ['techMD', 'rightsMD', 'sourceMD', 'digiprovMD']; |
||
85 | |||
86 | /** |
||
87 | * @access protected |
||
88 | * @var string This holds the whole XML file as string for serialization purposes |
||
89 | * |
||
90 | * @see __sleep() / __wakeup() |
||
91 | */ |
||
92 | protected string $asXML = ''; |
||
93 | |||
94 | /** |
||
95 | * @access protected |
||
96 | * @var array This maps the ID of each amdSec to the IDs of its children (techMD etc.). When an ADMID references an amdSec instead of techMD etc., this is used to iterate the child elements. |
||
97 | */ |
||
98 | protected array $amdSecChildIds = []; |
||
99 | |||
100 | /** |
||
101 | * @access protected |
||
102 | * @var array Associative array of METS metadata sections indexed by their IDs. |
||
103 | */ |
||
104 | protected array $mdSec = []; |
||
105 | |||
106 | /** |
||
107 | * @access protected |
||
108 | * @var bool Are the METS file's metadata sections loaded? |
||
109 | * |
||
110 | * @see MetsDocument::$mdSec |
||
111 | */ |
||
112 | protected bool $mdSecLoaded = false; |
||
113 | |||
114 | /** |
||
115 | * @access protected |
||
116 | * @var array Subset of $mdSec storing only the dmdSec entries; kept for compatibility. |
||
117 | */ |
||
118 | protected array $dmdSec = []; |
||
119 | |||
120 | /** |
||
121 | * @access protected |
||
122 | * @var array This holds the file ID -> USE concordance |
||
123 | * |
||
124 | * @see magicGetFileGrps() |
||
125 | */ |
||
126 | protected array $fileGrps = []; |
||
127 | |||
128 | /** |
||
129 | * @access protected |
||
130 | * @var bool Are the image file groups loaded? |
||
131 | * |
||
132 | * @see $fileGrps |
||
133 | */ |
||
134 | protected bool $fileGrpsLoaded = false; |
||
135 | |||
136 | /** |
||
137 | * @access protected |
||
138 | * @var SimpleXMLElement This holds the XML file's METS part as SimpleXMLElement object |
||
139 | */ |
||
140 | protected SimpleXMLElement $mets; |
||
141 | |||
142 | /** |
||
143 | * @access protected |
||
144 | * @var string URL of the parent document (determined via mptr element), or empty string if none is available |
||
145 | */ |
||
146 | protected string $parentHref = ''; |
||
147 | |||
148 | /** |
||
149 | * @access protected |
||
150 | * @var array the extension settings |
||
151 | */ |
||
152 | protected array $settings = []; |
||
153 | |||
154 | /** |
||
155 | * This holds the musical structure |
||
156 | * |
||
157 | * @var array |
||
158 | * @access protected |
||
159 | */ |
||
160 | protected array $musicalStructure = []; |
||
161 | |||
162 | /** |
||
163 | * This holds the musical structure metadata |
||
164 | * |
||
165 | * @var array |
||
166 | * @access protected |
||
167 | */ |
||
168 | protected array $musicalStructureInfo = []; |
||
169 | |||
170 | /** |
||
171 | * Is the musical structure loaded? |
||
172 | * @see $musicalStructure |
||
173 | * |
||
174 | * @var bool |
||
175 | * @access protected |
||
176 | */ |
||
177 | protected bool $musicalStructureLoaded = false; |
||
178 | |||
179 | /** |
||
180 | * The holds the total number of measures |
||
181 | * |
||
182 | * @var int |
||
183 | * @access protected |
||
184 | */ |
||
185 | protected int $numMeasures; |
||
186 | |||
187 | /** |
||
188 | * This adds metadata from METS structural map to metadata array. |
||
189 | * |
||
190 | * @access public |
||
191 | * |
||
192 | * @param array &$metadata The metadata array to extend |
||
193 | * @param string $id The "@ID" attribute of the logical structure node |
||
194 | * |
||
195 | * @return void |
||
196 | */ |
||
197 | public function addMetadataFromMets(array &$metadata, string $id): void |
||
198 | { |
||
199 | $details = $this->getLogicalStructure($id); |
||
200 | if (!empty($details)) { |
||
201 | $metadata['mets_order'][0] = $details['order']; |
||
202 | $metadata['mets_label'][0] = $details['label']; |
||
203 | $metadata['mets_orderlabel'][0] = $details['orderlabel']; |
||
204 | } |
||
205 | } |
||
206 | |||
207 | /** |
||
208 | * @see AbstractDocument::establishRecordId() |
||
209 | */ |
||
210 | protected function establishRecordId(int $pid): void |
||
211 | { |
||
212 | // Check for METS object @ID. |
||
213 | if (!empty($this->mets['OBJID'])) { |
||
214 | $this->recordId = (string) $this->mets['OBJID']; |
||
215 | } |
||
216 | // Get hook objects. |
||
217 | $hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php'); |
||
218 | // Apply hooks. |
||
219 | foreach ($hookObjects as $hookObj) { |
||
220 | if (method_exists($hookObj, 'postProcessRecordId')) { |
||
221 | $hookObj->postProcessRecordId($this->xml, $this->recordId); |
||
222 | } |
||
223 | } |
||
224 | } |
||
225 | |||
226 | /** |
||
227 | * @see AbstractDocument::getDownloadLocation() |
||
228 | */ |
||
229 | public function getDownloadLocation(string $id): string |
||
230 | { |
||
231 | $file = $this->getFileInfo($id); |
||
232 | if ($file['mimeType'] === 'application/vnd.kitodo.iiif') { |
||
233 | $file['location'] = (strrpos($file['location'], 'info.json') === strlen($file['location']) - 9) ? $file['location'] : (strrpos($file['location'], '/') === strlen($file['location']) ? $file['location'] . 'info.json' : $file['location'] . '/info.json'); |
||
234 | $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'iiif'); |
||
235 | IiifHelper::setUrlReader(IiifUrlReader::getInstance()); |
||
236 | IiifHelper::setMaxThumbnailHeight($conf['thumbnailHeight']); |
||
237 | IiifHelper::setMaxThumbnailWidth($conf['thumbnailWidth']); |
||
238 | $service = IiifHelper::loadIiifResource($file['location']); |
||
239 | if ($service instanceof AbstractImageService) { |
||
240 | return $service->getImageUrl(); |
||
241 | } |
||
242 | } elseif ($file['mimeType'] === 'application/vnd.netfpx') { |
||
243 | $baseURL = $file['location'] . (strpos($file['location'], '?') === false ? '?' : ''); |
||
244 | // TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server' |
||
245 | return $baseURL . '&CVT=jpeg'; |
||
246 | } |
||
247 | return $file['location']; |
||
248 | } |
||
249 | |||
250 | /** |
||
251 | * {@inheritDoc} |
||
252 | * @see AbstractDocument::getFileInfo() |
||
253 | */ |
||
254 | public function getFileInfo($id): ?array |
||
255 | { |
||
256 | $this->magicGetFileGrps(); |
||
257 | |||
258 | if (isset($this->fileInfos[$id]) && empty($this->fileInfos[$id]['location'])) { |
||
259 | $this->fileInfos[$id]['location'] = $this->getFileLocation($id); |
||
260 | } |
||
261 | |||
262 | if (isset($this->fileInfos[$id]) && empty($this->fileInfos[$id]['mimeType'])) { |
||
263 | $this->fileInfos[$id]['mimeType'] = $this->getFileMimeType($id); |
||
264 | } |
||
265 | |||
266 | return $this->fileInfos[$id] ?? null; |
||
267 | } |
||
268 | |||
269 | /** |
||
270 | * @see AbstractDocument::getFileLocation() |
||
271 | */ |
||
272 | public function getFileLocation(string $id): string |
||
273 | { |
||
274 | $location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]'); |
||
275 | if ( |
||
276 | !empty($id) |
||
277 | && !empty($location) |
||
278 | ) { |
||
279 | return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
280 | } else { |
||
281 | $this->logger->warning('There is no file node with @ID "' . $id . '"'); |
||
282 | return ''; |
||
283 | } |
||
284 | } |
||
285 | |||
286 | /** |
||
287 | * This gets the measure beginning of a page |
||
288 | */ |
||
289 | public function getPageBeginning($pageId, $fileId) |
||
290 | { |
||
291 | $mets = $this->mets |
||
292 | ->xpath( |
||
293 | './mets:structMap[@TYPE="PHYSICAL"]' . |
||
294 | '//mets:div[@ID="' . $pageId . '"]' . |
||
295 | '/mets:fptr[@FILEID="' . $fileId . '"]' . |
||
296 | '/mets:area/@BEGIN' |
||
297 | ); |
||
298 | return empty($mets) ? '' : $mets[0]->__toString(); |
||
299 | } |
||
300 | |||
301 | /** |
||
302 | * {@inheritDoc} |
||
303 | * @see AbstractDocument::getFileMimeType() |
||
304 | */ |
||
305 | public function getFileMimeType(string $id): string |
||
306 | { |
||
307 | $mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE'); |
||
308 | if ( |
||
309 | !empty($id) |
||
310 | && !empty($mimetype) |
||
311 | ) { |
||
312 | return (string) $mimetype[0]; |
||
313 | } else { |
||
314 | $this->logger->warning('There is no file node with @ID "' . $id . '" or no MIME type specified'); |
||
315 | return ''; |
||
316 | } |
||
317 | } |
||
318 | |||
319 | /** |
||
320 | * @see AbstractDocument::getLogicalStructure() |
||
321 | */ |
||
322 | public function getLogicalStructure(string $id, bool $recursive = false): array |
||
323 | { |
||
324 | $details = []; |
||
325 | // Is the requested logical unit already loaded? |
||
326 | if ( |
||
327 | !$recursive |
||
328 | && !empty($this->logicalUnits[$id]) |
||
329 | ) { |
||
330 | // Yes. Return it. |
||
331 | return $this->logicalUnits[$id]; |
||
332 | } elseif (!empty($id)) { |
||
333 | // Get specified logical unit. |
||
334 | $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]'); |
||
335 | } else { |
||
336 | // Get all logical units at top level. |
||
337 | $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div'); |
||
338 | } |
||
339 | if (!empty($divs)) { |
||
340 | if (!$recursive) { |
||
341 | // Get the details for the first xpath hit. |
||
342 | $details = $this->getLogicalStructureInfo($divs[0]); |
||
343 | } else { |
||
344 | // Walk the logical structure recursively and fill the whole table of contents. |
||
345 | foreach ($divs as $div) { |
||
346 | $this->tableOfContents[] = $this->getLogicalStructureInfo($div, $recursive); |
||
347 | } |
||
348 | } |
||
349 | } |
||
350 | return $details; |
||
351 | } |
||
352 | |||
353 | /** |
||
354 | * This gets details about a logical structure element |
||
355 | * |
||
356 | * @access protected |
||
357 | * |
||
358 | * @param SimpleXMLElement $structure The logical structure node |
||
359 | * @param bool $recursive Whether to include the child elements |
||
360 | * |
||
361 | * @return array Array of the element's id, label, type and physical page indexes/mptr link |
||
362 | */ |
||
363 | protected function getLogicalStructureInfo(SimpleXMLElement $structure, bool $recursive = false): array |
||
364 | { |
||
365 | $attributes = $structure->attributes(); |
||
366 | |||
367 | // Extract identity information. |
||
368 | $details = [ |
||
369 | 'id' => (string) $attributes['ID'], |
||
370 | 'dmdId' => isset($attributes['DMDID']) ? (string) $attributes['DMDID'] : '', |
||
371 | 'admId' => isset($attributes['ADMID']) ? (string) $attributes['ADMID'] : '', |
||
372 | 'order' => isset($attributes['ORDER']) ? (string) $attributes['ORDER'] : '', |
||
373 | 'label' => isset($attributes['LABEL']) ? (string) $attributes['LABEL'] : '', |
||
374 | 'orderlabel' => isset($attributes['ORDERLABEL']) ? (string) $attributes['ORDERLABEL'] : '', |
||
375 | 'contentIds' => isset($attributes['CONTENTIDS']) ? (string) $attributes['CONTENTIDS'] : '', |
||
376 | 'volume' => '', |
||
377 | 'year' => '', |
||
378 | 'pagination' => '', |
||
379 | 'type' => isset($attributes['TYPE']) ? (string) $attributes['TYPE'] : '', |
||
380 | 'description' => '', |
||
381 | 'thumbnailId' => null, |
||
382 | 'files' => [], |
||
383 | ]; |
||
384 | |||
385 | // Set volume and year information only if no label is set and this is the toplevel structure element. |
||
386 | if (empty($details['label']) && empty($details['orderlabel'])) { |
||
387 | $metadata = $this->getMetadata($details['id']); |
||
388 | $details['volume'] = $metadata['volume'][0] ?? ''; |
||
389 | $details['year'] = $metadata['year'][0] ?? ''; |
||
390 | } |
||
391 | |||
392 | // add description for 3D objects |
||
393 | if ($details['type'] == 'object') { |
||
394 | $metadata = $this->getMetadata($details['id']); |
||
395 | $details['description'] = $metadata['description'][0] ?? ''; |
||
396 | } |
||
397 | |||
398 | // Load smLinks. |
||
399 | $this->magicGetSmLinks(); |
||
400 | // Load physical structure. |
||
401 | $this->magicGetPhysicalStructure(); |
||
402 | |||
403 | $this->getPage($details, $structure->children('http://www.loc.gov/METS/')->mptr); |
||
404 | $this->getFiles($details, $structure->children('http://www.loc.gov/METS/')->fptr); |
||
405 | |||
406 | // Keep for later usage. |
||
407 | $this->logicalUnits[$details['id']] = $details; |
||
408 | // Walk the structure recursively? And are there any children of the current element? |
||
409 | if ( |
||
410 | $recursive |
||
411 | && count($structure->children('http://www.loc.gov/METS/')->div) |
||
412 | ) { |
||
413 | $details['children'] = []; |
||
414 | foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) { |
||
415 | // Repeat for all children. |
||
416 | $details['children'][] = $this->getLogicalStructureInfo($child, true); |
||
417 | } |
||
418 | } |
||
419 | return $details; |
||
420 | } |
||
421 | |||
422 | /** |
||
423 | * Get the files this structure element is pointing at. |
||
424 | * |
||
425 | * @param ?SimpleXMLElement $filePointers |
||
426 | * |
||
427 | * @return void |
||
428 | */ |
||
429 | private function getFiles(array &$details, ?SimpleXMLElement $filePointers): void |
||
430 | { |
||
431 | $fileUse = $this->magicGetFileGrps(); |
||
432 | // Get the file representations from fileSec node. |
||
433 | foreach ($filePointers as $filePointer) { |
||
434 | $fileId = (string) $filePointer->attributes()->FILEID; |
||
435 | // Check if file has valid @USE attribute. |
||
436 | if (!empty($fileUse[$fileId])) { |
||
437 | $details['files'][$fileUse[$fileId]] = $fileId; |
||
438 | } |
||
439 | } |
||
440 | } |
||
441 | |||
442 | /** |
||
443 | * Get the physical page or external file this structure element is pointing at. |
||
444 | * |
||
445 | * @access private |
||
446 | * |
||
447 | * @param array $details passed as reference |
||
448 | * @param ?SimpleXMLElement $metsPointers |
||
449 | * |
||
450 | * @return void |
||
451 | */ |
||
452 | private function getPage(array &$details, ?SimpleXMLElement $metsPointers): void |
||
453 | { |
||
454 | if (count($metsPointers)) { |
||
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
455 | // Yes. Get the file reference. |
||
456 | $details['points'] = (string) $metsPointers[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
457 | } elseif ( |
||
458 | !empty($this->physicalStructure) |
||
459 | && array_key_exists($details['id'], $this->smLinks['l2p']) |
||
460 | ) { |
||
461 | // Link logical structure to the first corresponding physical page/track. |
||
462 | $details['points'] = max((int) array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, true), 1); |
||
463 | $details['thumbnailId'] = $this->getThumbnail(); |
||
464 | // Get page/track number of the first page/track related to this structure element. |
||
465 | $details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel']; |
||
466 | } elseif ($details['id'] == $this->magicGetToplevelId()) { |
||
467 | // Point to self if this is the toplevel structure. |
||
468 | $details['points'] = 1; |
||
469 | $details['thumbnailId'] = $this->getThumbnail(); |
||
470 | } |
||
471 | if ($details['thumbnailId'] === null) { |
||
472 | unset($details['thumbnailId']); |
||
473 | } |
||
474 | } |
||
475 | |||
476 | /** |
||
477 | * Get thumbnail for logical structure info. |
||
478 | * |
||
479 | * @access private |
||
480 | * |
||
481 | * @param string $id empty if top level document, else passed the id of parent document |
||
482 | * |
||
483 | * @return ?string thumbnail or null if not found |
||
484 | */ |
||
485 | private function getThumbnail(string $id = '') |
||
486 | { |
||
487 | // Load plugin configuration. |
||
488 | $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'files'); |
||
489 | $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
||
490 | |||
491 | $thumbnail = null; |
||
492 | |||
493 | while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
||
494 | if (empty($id)) { |
||
495 | $thumbnail = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb] ?? null; |
||
496 | } else { |
||
497 | $parentId = $this->smLinks['l2p'][$id][0] ?? null; |
||
498 | $thumbnail = $this->physicalStructureInfo[$parentId]['files'][$fileGrpThumb] ?? null; |
||
499 | } |
||
500 | |||
501 | if (!empty($thumbnail)) { |
||
502 | break; |
||
503 | } |
||
504 | } |
||
505 | return $thumbnail; |
||
506 | } |
||
507 | |||
508 | /** |
||
509 | * @see AbstractDocument::getMetadata() |
||
510 | */ |
||
511 | public function getMetadata(string $id, int $cPid = 0): array |
||
512 | { |
||
513 | $cPid = $this->ensureValidPid($cPid); |
||
514 | |||
515 | if ($cPid == 0) { |
||
516 | $this->logger->warning('Invalid PID for metadata definitions'); |
||
517 | return []; |
||
518 | } |
||
519 | |||
520 | $metadata = $this->getMetadataFromArray($id, $cPid); |
||
521 | |||
522 | if (empty($metadata)) { |
||
523 | return []; |
||
524 | } |
||
525 | |||
526 | $metadata = $this->processMetadataSections($id, $cPid, $metadata); |
||
527 | |||
528 | if (!empty($metadata)) { |
||
529 | $metadata = $this->setDefaultTitleAndDate($metadata); |
||
530 | } |
||
531 | |||
532 | return $metadata; |
||
533 | } |
||
534 | |||
535 | /** |
||
536 | * Ensure that pId is valid. |
||
537 | * |
||
538 | * @access private |
||
539 | * |
||
540 | * @param integer $cPid |
||
541 | * |
||
542 | * @return integer |
||
543 | */ |
||
544 | private function ensureValidPid(int $cPid): int |
||
545 | { |
||
546 | $cPid = max($cPid, 0); |
||
547 | if ($cPid == 0 && ($this->cPid || $this->pid)) { |
||
548 | // Retain current PID. |
||
549 | $cPid = $this->cPid ?: $this->pid; |
||
550 | } |
||
551 | return $cPid; |
||
552 | } |
||
553 | |||
554 | /** |
||
555 | * Get metadata from array. |
||
556 | * |
||
557 | * @access private |
||
558 | * |
||
559 | * @param string $id |
||
560 | * @param integer $cPid |
||
561 | * |
||
562 | * @return array |
||
563 | */ |
||
564 | private function getMetadataFromArray(string $id, int $cPid): array |
||
565 | { |
||
566 | if (!empty($this->metadataArray[$id]) && $this->metadataArray[0] == $cPid) { |
||
567 | return $this->metadataArray[$id]; |
||
568 | } |
||
569 | return $this->initializeMetadata('METS'); |
||
570 | } |
||
571 | |||
572 | /** |
||
573 | * Process metadata sections. |
||
574 | * |
||
575 | * @access private |
||
576 | * |
||
577 | * @param string $id |
||
578 | * @param integer $cPid |
||
579 | * @param array $metadata |
||
580 | * |
||
581 | * @return array |
||
582 | */ |
||
583 | private function processMetadataSections(string $id, int $cPid, array $metadata): array |
||
584 | { |
||
585 | $mdIds = $this->getMetadataIds($id); |
||
586 | if (empty($mdIds)) { |
||
587 | // There is no metadata section for this structure node. |
||
588 | return []; |
||
589 | } |
||
590 | // Array used as set of available section types (dmdSec, techMD, ...) |
||
591 | $metadataSections = []; |
||
592 | // Load available metadata formats and metadata sections. |
||
593 | $this->loadFormats(); |
||
594 | $this->magicGetMdSec(); |
||
595 | |||
596 | $metadata['type'] = $this->getLogicalUnitType($id); |
||
597 | |||
598 | foreach ($mdIds as $dmdId) { |
||
599 | $mdSectionType = $this->mdSec[$dmdId]['section']; |
||
600 | |||
601 | if ($this->hasMetadataSection($metadataSections, $mdSectionType, 'dmdSec')) { |
||
602 | continue; |
||
603 | } |
||
604 | |||
605 | if (!$this->extractAndProcessMetadata($dmdId, $mdSectionType, $metadata, $cPid, $metadataSections)) { |
||
606 | continue; |
||
607 | } |
||
608 | |||
609 | $metadataSections[] = $mdSectionType; |
||
610 | } |
||
611 | |||
612 | // Files are not expected to reference a dmdSec |
||
613 | if (isset($this->fileInfos[$id]) || in_array('dmdSec', $metadataSections)) { |
||
614 | return $metadata; |
||
615 | } else { |
||
616 | $this->logger->warning('No supported descriptive metadata found for logical structure with @ID "' . $id . '"'); |
||
617 | return []; |
||
618 | } |
||
619 | } |
||
620 | |||
621 | /** |
||
622 | * @param array $allSubentries |
||
623 | * @param string $parentIndex |
||
624 | * @param DOMNode $parentNode |
||
625 | * @return array|false |
||
626 | */ |
||
627 | private function getSubentries($allSubentries, string $parentIndex, DOMNode $parentNode) |
||
628 | { |
||
629 | $domXPath = new DOMXPath($parentNode->ownerDocument); |
||
630 | $this->registerNamespaces($domXPath); |
||
631 | $theseSubentries = []; |
||
632 | foreach ($allSubentries as $subentry) { |
||
633 | if ($subentry['parent_index_name'] == $parentIndex) { |
||
634 | $values = $domXPath->evaluate($subentry['xpath'], $parentNode); |
||
635 | if (!empty($subentry['xpath']) && ($values)) { |
||
636 | $theseSubentries = array_merge($theseSubentries, $this->getSubentryValue($values, $subentry)); |
||
637 | } |
||
638 | // Set default value if applicable. |
||
639 | if ( |
||
640 | empty($theseSubentries[$subentry['index_name']][0]) |
||
641 | && strlen($subentry['default_value']) > 0 |
||
642 | ) { |
||
643 | $theseSubentries[$subentry['index_name']] = [$subentry['default_value']]; |
||
644 | } |
||
645 | } |
||
646 | } |
||
647 | if (empty($theseSubentries)) { |
||
648 | return false; |
||
649 | } |
||
650 | return $theseSubentries; |
||
651 | } |
||
652 | |||
653 | /** |
||
654 | * @param $values |
||
655 | * @param $subentry |
||
656 | * @return array |
||
657 | */ |
||
658 | private function getSubentryValue($values, $subentry) |
||
659 | { |
||
660 | $theseSubentries = []; |
||
661 | if ( |
||
662 | ($values instanceof DOMNodeList |
||
663 | && $values->length > 0) || is_string($values) |
||
664 | ) { |
||
665 | if (is_string($values)) { |
||
666 | // if concat is used evaluate returns a string |
||
667 | $theseSubentries[$subentry['index_name']][] = trim($values); |
||
668 | } else { |
||
669 | foreach ($values as $value) { |
||
670 | if (!empty(trim((string) $value->nodeValue))) { |
||
671 | $theseSubentries[$subentry['index_name']][] = trim((string) $value->nodeValue); |
||
672 | } |
||
673 | } |
||
674 | } |
||
675 | } elseif (!($values instanceof DOMNodeList)) { |
||
676 | $theseSubentries[$subentry['index_name']] = [trim((string) $values->nodeValue)]; |
||
677 | } |
||
678 | return $theseSubentries; |
||
679 | } |
||
680 | |||
681 | /** |
||
682 | * Get logical unit type. |
||
683 | * |
||
684 | * @access private |
||
685 | * |
||
686 | * @param string $id |
||
687 | * |
||
688 | * @return array |
||
689 | */ |
||
690 | private function getLogicalUnitType(string $id): array |
||
691 | { |
||
692 | if (!empty($this->logicalUnits[$id])) { |
||
693 | return [$this->logicalUnits[$id]['type']]; |
||
694 | } else { |
||
695 | $struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE'); |
||
696 | if (!empty($struct)) { |
||
697 | return [(string) $struct[0]]; |
||
698 | } |
||
699 | } |
||
700 | return []; |
||
701 | } |
||
702 | |||
703 | /** |
||
704 | * Extract and process metadata. |
||
705 | * |
||
706 | * @access private |
||
707 | * |
||
708 | * @param string $dmdId |
||
709 | * @param string $mdSectionType |
||
710 | * @param array $metadata |
||
711 | * @param integer $cPid |
||
712 | * @param array $metadataSections |
||
713 | * |
||
714 | * @return boolean |
||
715 | */ |
||
716 | private function extractAndProcessMetadata(string $dmdId, string $mdSectionType, array &$metadata, int $cPid, array $metadataSections): bool |
||
717 | { |
||
718 | if ($this->hasMetadataSection($metadataSections, $mdSectionType, 'dmdSec')) { |
||
719 | return true; |
||
720 | } |
||
721 | |||
722 | $metadataExtracted = $this->extractMetadataIfTypeSupported($dmdId, $mdSectionType, $metadata); |
||
723 | |||
724 | if (!$metadataExtracted) { |
||
725 | return false; |
||
726 | } |
||
727 | |||
728 | $additionalMetadata = $this->getAdditionalMetadataFromDatabase($cPid, $dmdId); |
||
729 | // We need a DOMDocument here, because SimpleXML doesn't support XPath functions properly. |
||
730 | $domNode = dom_import_simplexml($this->mdSec[$dmdId]['xml']); |
||
731 | $domXPath = new DOMXPath($domNode->ownerDocument); |
||
732 | $this->registerNamespaces($domXPath); |
||
733 | |||
734 | $this->processAdditionalMetadata($additionalMetadata, $domXPath, $domNode, $metadata); |
||
735 | |||
736 | return true; |
||
737 | } |
||
738 | |||
739 | /** |
||
740 | * Check if searched metadata section is stored in the array. |
||
741 | * |
||
742 | * @access private |
||
743 | * |
||
744 | * @param array $metadataSections |
||
745 | * @param string $currentMetadataSection |
||
746 | * @param string $searchedMetadataSection |
||
747 | * |
||
748 | * @return boolean |
||
749 | */ |
||
750 | private function hasMetadataSection(array $metadataSections, string $currentMetadataSection, string $searchedMetadataSection): bool |
||
751 | { |
||
752 | return $currentMetadataSection === $searchedMetadataSection && in_array($searchedMetadataSection, $metadataSections); |
||
753 | } |
||
754 | |||
755 | /** |
||
756 | * Process additional metadata. |
||
757 | * |
||
758 | * @access private |
||
759 | * |
||
760 | * @param array $additionalMetadata |
||
761 | * @param DOMXPath $domXPath |
||
762 | * @param DOMElement $domNode |
||
763 | * @param array $metadata |
||
764 | * |
||
765 | * @return void |
||
766 | */ |
||
767 | private function processAdditionalMetadata(array $additionalMetadata, DOMXPath $domXPath, DOMElement $domNode, array &$metadata): void |
||
768 | { |
||
769 | $subentries = []; |
||
770 | if (isset($additionalMetadata['subentries'])) { |
||
771 | $subentries = $additionalMetadata['subentries']; |
||
772 | unset($additionalMetadata['subentries']); |
||
773 | } |
||
774 | foreach ($additionalMetadata as $resArray) { |
||
775 | $this->setMetadataFieldValues($resArray, $domXPath, $domNode, $metadata, $subentries); |
||
776 | $this->setDefaultMetadataValue($resArray, $metadata); |
||
777 | $this->setSortableMetadataValue($resArray, $domXPath, $domNode, $metadata); |
||
778 | } |
||
779 | } |
||
780 | |||
781 | /** |
||
782 | * Set metadata field values. |
||
783 | * |
||
784 | * @access private |
||
785 | * |
||
786 | * @param array $resArray |
||
787 | * @param DOMXPath $domXPath |
||
788 | * @param DOMElement $domNode |
||
789 | * @param array $metadata |
||
790 | * @param array $subentryResults |
||
791 | * |
||
792 | * @return void |
||
793 | */ |
||
794 | private function setMetadataFieldValues(array $resArray, DOMXPath $domXPath, DOMElement $domNode, array &$metadata, array $subentryResults): void |
||
795 | { |
||
796 | if ($resArray['format'] > 0 && !empty($resArray['xpath'])) { |
||
797 | $values = $domXPath->evaluate($resArray['xpath'], $domNode); |
||
798 | if ($values instanceof DOMNodeList && $values->length > 0) { |
||
799 | $metadata[$resArray['index_name']] = []; |
||
800 | foreach ($values as $value) { |
||
801 | $subentries = $this->getSubentries($subentryResults, $resArray['index_name'], $value); |
||
802 | if ($subentries) { |
||
803 | $metadata[$resArray['index_name']][] = $subentries; |
||
804 | } else { |
||
805 | $metadata[$resArray['index_name']][] = trim((string) $value->nodeValue); |
||
806 | } |
||
807 | } |
||
808 | } elseif (!($values instanceof DOMNodeList)) { |
||
809 | $metadata[$resArray['index_name']] = [trim((string) $values)]; |
||
810 | } |
||
811 | } |
||
812 | } |
||
813 | |||
814 | /** |
||
815 | * Set default metadata value. |
||
816 | * |
||
817 | * @access private |
||
818 | * |
||
819 | * @param array $resArray |
||
820 | * @param array $metadata |
||
821 | * |
||
822 | * @return void |
||
823 | */ |
||
824 | private function setDefaultMetadataValue(array $resArray, array &$metadata): void |
||
825 | { |
||
826 | if (empty($metadata[$resArray['index_name']][0]) && strlen($resArray['default_value']) > 0) { |
||
827 | $metadata[$resArray['index_name']] = [$resArray['default_value']]; |
||
828 | } |
||
829 | } |
||
830 | |||
831 | /** |
||
832 | * Set sortable metadata value. |
||
833 | * |
||
834 | * @access private |
||
835 | * |
||
836 | * @param array $resArray |
||
837 | * @param $domXPath |
||
838 | * @param DOMElement $domNode |
||
839 | * @param array $metadata |
||
840 | * |
||
841 | * @return void |
||
842 | */ |
||
843 | private function setSortableMetadataValue(array $resArray, DOMXPath $domXPath, DOMElement $domNode, array &$metadata): void |
||
844 | { |
||
845 | $indexName = $resArray['index_name']; |
||
846 | $currentMetadata = $metadata[$indexName][0]; |
||
847 | |||
848 | if (!empty($metadata[$indexName]) && $resArray['is_sortable']) { |
||
849 | if ($resArray['format'] > 0 && !empty($resArray['xpath_sorting'])) { |
||
850 | $values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode); |
||
851 | if ($values instanceof DOMNodeList && $values->length > 0) { |
||
852 | $metadata[$indexName . '_sorting'][0] = trim((string) $values->item(0)->nodeValue); |
||
853 | } elseif (!($values instanceof DOMNodeList)) { |
||
854 | $metadata[$indexName . '_sorting'][0] = trim((string) $values); |
||
855 | } |
||
856 | } |
||
857 | if (empty($metadata[$indexName . '_sorting'][0])) { |
||
858 | if (is_array($currentMetadata)) { |
||
859 | $sortingValue = implode(',', array_column($currentMetadata, 0)); |
||
860 | $metadata[$indexName . '_sorting'][0] = $sortingValue; |
||
861 | } else { |
||
862 | $metadata[$indexName . '_sorting'][0] = $currentMetadata; |
||
863 | } |
||
864 | } |
||
865 | } |
||
866 | } |
||
867 | |||
868 | /** |
||
869 | * Set default title and date if those metadata is not set. |
||
870 | * |
||
871 | * @access private |
||
872 | * |
||
873 | * @param array $metadata |
||
874 | * |
||
875 | * @return array |
||
876 | */ |
||
877 | private function setDefaultTitleAndDate(array $metadata): array |
||
878 | { |
||
879 | // Set title to empty string if not present. |
||
880 | if (empty($metadata['title'][0])) { |
||
881 | $metadata['title'][0] = ''; |
||
882 | $metadata['title_sorting'][0] = ''; |
||
883 | } |
||
884 | |||
885 | // Set title_sorting to title as default. |
||
886 | if (empty($metadata['title_sorting'][0])) { |
||
887 | $metadata['title_sorting'][0] = $metadata['title'][0]; |
||
888 | } |
||
889 | |||
890 | // Set date to empty string if not present. |
||
891 | if (empty($metadata['date'][0])) { |
||
892 | $metadata['date'][0] = ''; |
||
893 | } |
||
894 | |||
895 | return $metadata; |
||
896 | } |
||
897 | |||
898 | /** |
||
899 | * Extract metadata if metadata type is supported. |
||
900 | * |
||
901 | * @access private |
||
902 | * |
||
903 | * @param string $dmdId descriptive metadata id |
||
904 | * @param string $mdSectionType metadata section type |
||
905 | * @param array &$metadata |
||
906 | * |
||
907 | * @return bool true if extraction successful, false otherwise |
||
908 | */ |
||
909 | private function extractMetadataIfTypeSupported(string $dmdId, string $mdSectionType, array &$metadata) |
||
910 | { |
||
911 | // Is this metadata format supported? |
||
912 | if (!empty($this->formats[$this->mdSec[$dmdId]['type']])) { |
||
913 | if (!empty($this->formats[$this->mdSec[$dmdId]['type']]['class'])) { |
||
914 | $class = $this->formats[$this->mdSec[$dmdId]['type']]['class']; |
||
915 | // Get the metadata from class. |
||
916 | if (class_exists($class)) { |
||
917 | $obj = GeneralUtility::makeInstance($class); |
||
918 | if ($obj instanceof MetadataInterface) { |
||
919 | $obj->extractMetadata($this->mdSec[$dmdId]['xml'], $metadata, GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'general')['useExternalApisForMetadata']); |
||
920 | return true; |
||
921 | } |
||
922 | } else { |
||
923 | $this->logger->warning('Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this->mdSec[$dmdId]['type'] . '"'); |
||
924 | } |
||
925 | } |
||
926 | } else { |
||
927 | $this->logger->notice('Unsupported metadata format "' . $this->mdSec[$dmdId]['type'] . '" in ' . $mdSectionType . ' with @ID "' . $dmdId . '"'); |
||
928 | } |
||
929 | return false; |
||
930 | } |
||
931 | |||
932 | /** |
||
933 | * Get additional data from database. |
||
934 | * |
||
935 | * @access private |
||
936 | * |
||
937 | * @param int $cPid page id |
||
938 | * @param string $dmdId descriptive metadata id |
||
939 | * |
||
940 | * @return array additional metadata data queried from database |
||
941 | */ |
||
942 | private function getAdditionalMetadataFromDatabase(int $cPid, string $dmdId) |
||
943 | { |
||
944 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
||
945 | ->getQueryBuilderForTable('tx_dlf_metadata'); |
||
946 | // Get hidden records, too. |
||
947 | $queryBuilder |
||
948 | ->getRestrictions() |
||
949 | ->removeByType(HiddenRestriction::class); |
||
950 | // Get all metadata with configured xpath and applicable format first. |
||
951 | // Exclude metadata with subentries, we will fetch them later. |
||
952 | $resultWithFormat = $queryBuilder |
||
953 | ->select( |
||
954 | 'tx_dlf_metadata.index_name AS index_name', |
||
955 | 'tx_dlf_metadataformat_joins.xpath AS xpath', |
||
956 | 'tx_dlf_metadataformat_joins.xpath_sorting AS xpath_sorting', |
||
957 | 'tx_dlf_metadata.is_sortable AS is_sortable', |
||
958 | 'tx_dlf_metadata.default_value AS default_value', |
||
959 | 'tx_dlf_metadata.format AS format' |
||
960 | ) |
||
961 | ->from('tx_dlf_metadata') |
||
962 | ->innerJoin( |
||
963 | 'tx_dlf_metadata', |
||
964 | 'tx_dlf_metadataformat', |
||
965 | 'tx_dlf_metadataformat_joins', |
||
966 | $queryBuilder->expr()->eq( |
||
967 | 'tx_dlf_metadataformat_joins.parent_id', |
||
968 | 'tx_dlf_metadata.uid' |
||
969 | ) |
||
970 | ) |
||
971 | ->innerJoin( |
||
972 | 'tx_dlf_metadataformat_joins', |
||
973 | 'tx_dlf_formats', |
||
974 | 'tx_dlf_formats_joins', |
||
975 | $queryBuilder->expr()->eq( |
||
976 | 'tx_dlf_formats_joins.uid', |
||
977 | 'tx_dlf_metadataformat_joins.encoded' |
||
978 | ) |
||
979 | ) |
||
980 | ->where( |
||
981 | $queryBuilder->expr()->eq('tx_dlf_metadata.pid', $cPid), |
||
982 | $queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
||
983 | $queryBuilder->expr()->eq('tx_dlf_metadataformat_joins.pid', $cPid), |
||
984 | $queryBuilder->expr()->eq('tx_dlf_formats_joins.type', $queryBuilder->createNamedParameter($this->mdSec[$dmdId]['type'])) |
||
985 | ) |
||
986 | ->execute(); |
||
987 | // Get all metadata without a format, but with a default value next. |
||
988 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
||
989 | ->getQueryBuilderForTable('tx_dlf_metadata'); |
||
990 | // Get hidden records, too. |
||
991 | $queryBuilder |
||
992 | ->getRestrictions() |
||
993 | ->removeByType(HiddenRestriction::class); |
||
994 | $resultWithoutFormat = $queryBuilder |
||
995 | ->select( |
||
996 | 'tx_dlf_metadata.index_name AS index_name', |
||
997 | 'tx_dlf_metadata.is_sortable AS is_sortable', |
||
998 | 'tx_dlf_metadata.default_value AS default_value', |
||
999 | 'tx_dlf_metadata.format AS format' |
||
1000 | ) |
||
1001 | ->from('tx_dlf_metadata') |
||
1002 | ->where( |
||
1003 | $queryBuilder->expr()->eq('tx_dlf_metadata.pid', $cPid), |
||
1004 | $queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
||
1005 | $queryBuilder->expr()->eq('tx_dlf_metadata.format', 0), |
||
1006 | $queryBuilder->expr()->neq('tx_dlf_metadata.default_value', $queryBuilder->createNamedParameter('')) |
||
1007 | ) |
||
1008 | ->execute(); |
||
1009 | // Merge both result sets. |
||
1010 | $allResults = array_merge($resultWithFormat->fetchAllAssociative(), $resultWithoutFormat->fetchAllAssociative()); |
||
1011 | |||
1012 | // Get subentries separately. |
||
1013 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
||
1014 | ->getQueryBuilderForTable('tx_dlf_metadata'); |
||
1015 | // Get hidden records, too. |
||
1016 | $queryBuilder |
||
1017 | ->getRestrictions() |
||
1018 | ->removeByType(HiddenRestriction::class); |
||
1019 | $subentries = $queryBuilder |
||
1020 | ->select( |
||
1021 | 'tx_dlf_subentries_joins.index_name AS index_name', |
||
1022 | 'tx_dlf_metadata.index_name AS parent_index_name', |
||
1023 | 'tx_dlf_subentries_joins.xpath AS xpath', |
||
1024 | 'tx_dlf_subentries_joins.default_value AS default_value' |
||
1025 | ) |
||
1026 | ->from('tx_dlf_metadata') |
||
1027 | ->innerJoin( |
||
1028 | 'tx_dlf_metadata', |
||
1029 | 'tx_dlf_metadataformat', |
||
1030 | 'tx_dlf_metadataformat_joins', |
||
1031 | $queryBuilder->expr()->eq( |
||
1032 | 'tx_dlf_metadataformat_joins.parent_id', |
||
1033 | 'tx_dlf_metadata.uid' |
||
1034 | ) |
||
1035 | ) |
||
1036 | ->innerJoin( |
||
1037 | 'tx_dlf_metadataformat_joins', |
||
1038 | 'tx_dlf_metadatasubentries', |
||
1039 | 'tx_dlf_subentries_joins', |
||
1040 | $queryBuilder->expr()->eq( |
||
1041 | 'tx_dlf_subentries_joins.parent_id', |
||
1042 | 'tx_dlf_metadataformat_joins.uid' |
||
1043 | ) |
||
1044 | ) |
||
1045 | ->where( |
||
1046 | $queryBuilder->expr()->eq('tx_dlf_metadata.pid', (int) $cPid), |
||
1047 | $queryBuilder->expr()->gt('tx_dlf_metadataformat_joins.subentries', 0), |
||
1048 | $queryBuilder->expr()->eq('tx_dlf_subentries_joins.l18n_parent', 0), |
||
1049 | $queryBuilder->expr()->eq('tx_dlf_subentries_joins.pid', (int) $cPid) |
||
1050 | ) |
||
1051 | ->orderBy('tx_dlf_subentries_joins.sorting') |
||
1052 | ->execute(); |
||
1053 | $subentriesResult = $subentries->fetchAll(); |
||
1054 | |||
1055 | return array_merge($allResults, ['subentries' => $subentriesResult]); |
||
1056 | } |
||
1057 | |||
1058 | /** |
||
1059 | * Get IDs of (descriptive and administrative) metadata sections |
||
1060 | * referenced by node of given $id. The $id may refer to either |
||
1061 | * a logical structure node or to a file. |
||
1062 | * |
||
1063 | * @access protected |
||
1064 | * |
||
1065 | * @param string $id The "@ID" attribute of the file node |
||
1066 | * |
||
1067 | * @return array |
||
1068 | */ |
||
1069 | protected function getMetadataIds(string $id): array |
||
1070 | { |
||
1071 | // Load amdSecChildIds concordance |
||
1072 | $this->magicGetMdSec(); |
||
1073 | $fileInfo = $this->getFileInfo($id); |
||
1074 | |||
1075 | // Get DMDID and ADMID of logical structure node |
||
1076 | if (!empty($this->logicalUnits[$id])) { |
||
1077 | $dmdIds = $this->logicalUnits[$id]['dmdId'] ?? ''; |
||
1078 | $admIds = $this->logicalUnits[$id]['admId'] ?? ''; |
||
1079 | } else { |
||
1080 | $mdSec = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]')[0]; |
||
1081 | if ($mdSec) { |
||
1082 | $dmdIds = (string) $mdSec->attributes()->DMDID; |
||
1083 | $admIds = (string) $mdSec->attributes()->ADMID; |
||
1084 | } elseif (isset($fileInfo)) { |
||
1085 | $dmdIds = $fileInfo['dmdId']; |
||
1086 | $admIds = $fileInfo['admId']; |
||
1087 | } else { |
||
1088 | $dmdIds = ''; |
||
1089 | $admIds = ''; |
||
1090 | } |
||
1091 | } |
||
1092 | |||
1093 | // Handle multiple DMDIDs/ADMIDs |
||
1094 | $allMdIds = explode(' ', $dmdIds); |
||
1095 | |||
1096 | foreach (explode(' ', $admIds) as $admId) { |
||
1097 | if (isset($this->mdSec[$admId])) { |
||
1098 | // $admId references an actual metadata section such as techMD |
||
1099 | $allMdIds[] = $admId; |
||
1100 | } elseif (isset($this->amdSecChildIds[$admId])) { |
||
1101 | // $admId references a <mets:amdSec> element. Resolve child elements. |
||
1102 | foreach ($this->amdSecChildIds[$admId] as $childId) { |
||
1103 | $allMdIds[] = $childId; |
||
1104 | } |
||
1105 | } |
||
1106 | } |
||
1107 | |||
1108 | return array_filter( |
||
1109 | $allMdIds, |
||
1110 | function ($element) { |
||
1111 | return !empty($element); |
||
1112 | } |
||
1113 | ); |
||
1114 | } |
||
1115 | |||
1116 | /** |
||
1117 | * @see AbstractDocument::getFullText() |
||
1118 | */ |
||
1119 | public function getFullText(string $id): string |
||
1120 | { |
||
1121 | $fullText = ''; |
||
1122 | |||
1123 | // Load fileGrps and check for full text files. |
||
1124 | $this->magicGetFileGrps(); |
||
1125 | if ($this->hasFulltext) { |
||
1126 | $fullText = $this->getFullTextFromXml($id); |
||
1127 | } |
||
1128 | return $fullText; |
||
1129 | } |
||
1130 | |||
1131 | /** |
||
1132 | * @see AbstractDocument::getStructureDepth() |
||
1133 | */ |
||
1134 | public function getStructureDepth(string $logId) |
||
1135 | { |
||
1136 | $ancestors = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*'); |
||
1137 | if (!empty($ancestors)) { |
||
1138 | return count($ancestors); |
||
1139 | } else { |
||
1140 | return 0; |
||
1141 | } |
||
1142 | } |
||
1143 | |||
1144 | /** |
||
1145 | * @see AbstractDocument::init() |
||
1146 | */ |
||
1147 | protected function init(string $location, array $settings): void |
||
1148 | { |
||
1149 | $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($this)); |
||
1150 | $this->settings = $settings; |
||
1151 | // Get METS node from XML file. |
||
1152 | $this->registerNamespaces($this->xml); |
||
1153 | $mets = $this->xml->xpath('//mets:mets'); |
||
1154 | if (!empty($mets)) { |
||
1155 | $this->mets = $mets[0]; |
||
1156 | // Register namespaces. |
||
1157 | $this->registerNamespaces($this->mets); |
||
1158 | } else { |
||
1159 | if (!empty($location)) { |
||
1160 | $this->logger->error('No METS part found in document with location "' . $location . '".'); |
||
1161 | } elseif (!empty($this->recordId)) { |
||
1162 | $this->logger->error('No METS part found in document with recordId "' . $this->recordId . '".'); |
||
1163 | } else { |
||
1164 | $this->logger->error('No METS part found in current document.'); |
||
1165 | } |
||
1166 | } |
||
1167 | } |
||
1168 | |||
1169 | /** |
||
1170 | * @see AbstractDocument::loadLocation() |
||
1171 | */ |
||
1172 | protected function loadLocation(string $location): bool |
||
1173 | { |
||
1174 | $fileResource = Helper::getUrl($location); |
||
1175 | if ($fileResource !== false) { |
||
1176 | $xml = Helper::getXmlFileAsString($fileResource); |
||
1177 | // Set some basic properties. |
||
1178 | if ($xml !== false) { |
||
1179 | $this->xml = $xml; |
||
1180 | return true; |
||
1181 | } |
||
1182 | } |
||
1183 | $this->logger->error('Could not load XML file from "' . $location . '"'); |
||
1184 | return false; |
||
1185 | } |
||
1186 | |||
1187 | /** |
||
1188 | * @see AbstractDocument::ensureHasFulltextIsSet() |
||
1189 | */ |
||
1190 | protected function ensureHasFulltextIsSet(): void |
||
1191 | { |
||
1192 | // Are the fileGrps already loaded? |
||
1193 | if (!$this->fileGrpsLoaded) { |
||
1194 | $this->magicGetFileGrps(); |
||
1195 | } |
||
1196 | } |
||
1197 | |||
1198 | /** |
||
1199 | * @see AbstractDocument::setPreloadedDocument() |
||
1200 | */ |
||
1201 | protected function setPreloadedDocument($preloadedDocument): bool |
||
1202 | { |
||
1203 | |||
1204 | if ($preloadedDocument instanceof SimpleXMLElement) { |
||
1205 | $this->xml = $preloadedDocument; |
||
1206 | return true; |
||
1207 | } |
||
1208 | return false; |
||
1209 | } |
||
1210 | |||
1211 | /** |
||
1212 | * @see AbstractDocument::getDocument() |
||
1213 | */ |
||
1214 | protected function getDocument(): SimpleXMLElement |
||
1215 | { |
||
1216 | return $this->mets; |
||
1217 | } |
||
1218 | |||
1219 | /** |
||
1220 | * This builds an array of the document's metadata sections |
||
1221 | * |
||
1222 | * @access protected |
||
1223 | * |
||
1224 | * @return array Array of metadata sections with their IDs as array key |
||
1225 | */ |
||
1226 | protected function magicGetMdSec(): array |
||
1227 | { |
||
1228 | if (!$this->mdSecLoaded) { |
||
1229 | $this->loadFormats(); |
||
1230 | |||
1231 | foreach ($this->mets->xpath('./mets:dmdSec') as $dmdSecTag) { |
||
1232 | $dmdSec = $this->processMdSec($dmdSecTag); |
||
1233 | |||
1234 | if ($dmdSec !== null) { |
||
1235 | $this->mdSec[$dmdSec['id']] = $dmdSec; |
||
1236 | $this->dmdSec[$dmdSec['id']] = $dmdSec; |
||
1237 | } |
||
1238 | } |
||
1239 | |||
1240 | foreach ($this->mets->xpath('./mets:amdSec') as $amdSecTag) { |
||
1241 | $childIds = []; |
||
1242 | |||
1243 | foreach ($amdSecTag->children('http://www.loc.gov/METS/') as $mdSecTag) { |
||
1244 | if (!in_array($mdSecTag->getName(), self::ALLOWED_AMD_SEC)) { |
||
1245 | continue; |
||
1246 | } |
||
1247 | |||
1248 | // TODO: Should we check that the format may occur within this type (e.g., to ignore VIDEOMD within rightsMD)? |
||
1249 | $mdSec = $this->processMdSec($mdSecTag); |
||
1250 | |||
1251 | if ($mdSec !== null) { |
||
1252 | $this->mdSec[$mdSec['id']] = $mdSec; |
||
1253 | |||
1254 | $childIds[] = $mdSec['id']; |
||
1255 | } |
||
1256 | } |
||
1257 | |||
1258 | $amdSecId = (string) $amdSecTag->attributes()->ID; |
||
1259 | if (!empty($amdSecId)) { |
||
1260 | $this->amdSecChildIds[$amdSecId] = $childIds; |
||
1261 | } |
||
1262 | } |
||
1263 | |||
1264 | $this->mdSecLoaded = true; |
||
1265 | } |
||
1266 | return $this->mdSec; |
||
1267 | } |
||
1268 | |||
1269 | /** |
||
1270 | * Gets the document's metadata sections |
||
1271 | * |
||
1272 | * @access protected |
||
1273 | * |
||
1274 | * @return array Array of metadata sections with their IDs as array key |
||
1275 | */ |
||
1276 | protected function magicGetDmdSec(): array |
||
1277 | { |
||
1278 | $this->magicGetMdSec(); |
||
1279 | return $this->dmdSec; |
||
1280 | } |
||
1281 | |||
1282 | /** |
||
1283 | * Processes an element of METS `mdSecType`. |
||
1284 | * |
||
1285 | * @access protected |
||
1286 | * |
||
1287 | * @param SimpleXMLElement $element |
||
1288 | * |
||
1289 | * @return array|null The processed metadata section |
||
1290 | */ |
||
1291 | protected function processMdSec(SimpleXMLElement $element): ?array |
||
1292 | { |
||
1293 | $mdId = (string) $element->attributes()->ID; |
||
1294 | if (empty($mdId)) { |
||
1295 | return null; |
||
1296 | } |
||
1297 | |||
1298 | $this->registerNamespaces($element); |
||
1299 | |||
1300 | $type = ''; |
||
1301 | $mdType = $element->xpath('./mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE'); |
||
1302 | $otherMdType = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE'); |
||
1303 | |||
1304 | if (!empty($mdType) && !empty($this->formats[(string) $mdType[0]])) { |
||
1305 | $type = (string) $mdType[0]; |
||
1306 | $xml = $element->xpath('./mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
||
1307 | } elseif (!empty($otherMdType) && !empty($this->formats[(string) $otherMdType[0]])) { |
||
1308 | $type = (string) $otherMdType[0]; |
||
1309 | $xml = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
||
1310 | } |
||
1311 | |||
1312 | if (empty($xml)) { |
||
1313 | return null; |
||
1314 | } |
||
1315 | |||
1316 | $this->registerNamespaces($xml[0]); |
||
1317 | |||
1318 | return [ |
||
1319 | 'id' => $mdId, |
||
1320 | 'section' => $element->getName(), |
||
1321 | 'type' => $type, |
||
1322 | 'xml' => $xml[0], |
||
1323 | ]; |
||
1324 | } |
||
1325 | |||
1326 | /** |
||
1327 | * This builds the file ID -> USE concordance |
||
1328 | * |
||
1329 | * @access protected |
||
1330 | * |
||
1331 | * @return array Array of file use groups with file IDs |
||
1332 | */ |
||
1333 | protected function magicGetFileGrps(): array |
||
1334 | { |
||
1335 | if (!$this->fileGrpsLoaded) { |
||
1336 | // Get configured USE attributes. |
||
1337 | $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'files'); |
||
1338 | $useGrps = GeneralUtility::trimExplode(',', $extConf['fileGrpImages']); |
||
1339 | if (!empty($extConf['fileGrpThumbs'])) { |
||
1340 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs'])); |
||
1341 | } |
||
1342 | if (!empty($extConf['fileGrpDownload'])) { |
||
1343 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpDownload'])); |
||
1344 | } |
||
1345 | if (!empty($extConf['fileGrpFulltext'])) { |
||
1346 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext'])); |
||
1347 | } |
||
1348 | if (!empty($extConf['fileGrpAudio'])) { |
||
1349 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpAudio'])); |
||
1350 | } |
||
1351 | if (!empty($extConf['fileGrpScore'])) { |
||
1352 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpScore'])); |
||
1353 | } |
||
1354 | |||
1355 | // Get all file groups. |
||
1356 | $fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp'); |
||
1357 | if (!empty($fileGrps)) { |
||
1358 | // Build concordance for configured USE attributes. |
||
1359 | foreach ($fileGrps as $fileGrp) { |
||
1360 | if (in_array((string) $fileGrp['USE'], $useGrps)) { |
||
1361 | foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) { |
||
1362 | $fileId = (string) $file->attributes()->ID; |
||
1363 | $this->fileGrps[$fileId] = (string) $fileGrp['USE']; |
||
1364 | $this->fileInfos[$fileId] = [ |
||
1365 | 'fileGrp' => (string) $fileGrp['USE'], |
||
1366 | 'admId' => (string) $file->attributes()->ADMID, |
||
1367 | 'dmdId' => (string) $file->attributes()->DMDID, |
||
1368 | ]; |
||
1369 | } |
||
1370 | } |
||
1371 | } |
||
1372 | } |
||
1373 | |||
1374 | // Are there any fulltext files available? |
||
1375 | if ( |
||
1376 | !empty($extConf['fileGrpFulltext']) |
||
1377 | && array_intersect(GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']), $this->fileGrps) !== [] |
||
1378 | ) { |
||
1379 | $this->hasFulltext = true; |
||
1380 | } |
||
1381 | $this->fileGrpsLoaded = true; |
||
1382 | } |
||
1383 | return $this->fileGrps; |
||
1384 | } |
||
1385 | |||
1386 | /** |
||
1387 | * @see AbstractDocument::prepareMetadataArray() |
||
1388 | */ |
||
1389 | protected function prepareMetadataArray(int $cPid): void |
||
1390 | { |
||
1391 | $ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID'); |
||
1392 | // Get all logical structure nodes with metadata. |
||
1393 | if (!empty($ids)) { |
||
1394 | foreach ($ids as $id) { |
||
1395 | $this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid); |
||
1396 | } |
||
1397 | } |
||
1398 | // Set current PID for metadata definitions. |
||
1399 | } |
||
1400 | |||
1401 | /** |
||
1402 | * This returns $this->mets via __get() |
||
1403 | * |
||
1404 | * @access protected |
||
1405 | * |
||
1406 | * @return SimpleXMLElement The XML's METS part as SimpleXMLElement object |
||
1407 | */ |
||
1408 | protected function magicGetMets(): SimpleXMLElement |
||
1409 | { |
||
1410 | return $this->mets; |
||
1411 | } |
||
1412 | |||
1413 | /** |
||
1414 | * @see AbstractDocument::magicGetPhysicalStructure() |
||
1415 | */ |
||
1416 | protected function magicGetPhysicalStructure(): array |
||
1417 | { |
||
1418 | // Is there no physical structure array yet? |
||
1419 | if (!$this->physicalStructureLoaded) { |
||
1420 | // Does the document have a structMap node of type "PHYSICAL"? |
||
1421 | $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div'); |
||
1422 | if (!empty($elementNodes)) { |
||
1423 | // Get file groups. |
||
1424 | $fileUse = $this->magicGetFileGrps(); |
||
1425 | // Get the physical sequence's metadata. |
||
1426 | $physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]'); |
||
1427 | $firstNode = $physNode[0]; |
||
1428 | $id = (string) $firstNode['ID']; |
||
1429 | $this->physicalStructureInfo[$id]['id'] = $id; |
||
1430 | $this->physicalStructureInfo[$id]['dmdId'] = isset($firstNode['DMDID']) ? (string) $firstNode['DMDID'] : ''; |
||
1431 | $this->physicalStructureInfo[$id]['admId'] = isset($firstNode['ADMID']) ? (string) $firstNode['ADMID'] : ''; |
||
1432 | $this->physicalStructureInfo[$id]['order'] = isset($firstNode['ORDER']) ? (string) $firstNode['ORDER'] : ''; |
||
1433 | $this->physicalStructureInfo[$id]['label'] = isset($firstNode['LABEL']) ? (string) $firstNode['LABEL'] : ''; |
||
1434 | $this->physicalStructureInfo[$id]['orderlabel'] = isset($firstNode['ORDERLABEL']) ? (string) $firstNode['ORDERLABEL'] : ''; |
||
1435 | $this->physicalStructureInfo[$id]['type'] = (string) $firstNode['TYPE']; |
||
1436 | $this->physicalStructureInfo[$id]['contentIds'] = isset($firstNode['CONTENTIDS']) ? (string) $firstNode['CONTENTIDS'] : ''; |
||
1437 | |||
1438 | $this->getFileRepresentation($id, $firstNode); |
||
1439 | |||
1440 | $this->physicalStructure = $this->getPhysicalElements($elementNodes, $fileUse); |
||
1441 | } |
||
1442 | $this->physicalStructureLoaded = true; |
||
1443 | |||
1444 | } |
||
1445 | |||
1446 | return $this->physicalStructure; |
||
1447 | } |
||
1448 | |||
1449 | /** |
||
1450 | * Get the file representations from fileSec node. |
||
1451 | * |
||
1452 | * @access private |
||
1453 | * |
||
1454 | * @param string $id |
||
1455 | * @param SimpleXMLElement $physicalNode |
||
1456 | * |
||
1457 | * @return void |
||
1458 | */ |
||
1459 | private function getFileRepresentation(string $id, SimpleXMLElement $physicalNode): void |
||
1460 | { |
||
1461 | // Get file groups. |
||
1462 | $fileUse = $this->magicGetFileGrps(); |
||
1463 | |||
1464 | foreach ($physicalNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
1465 | $fileNode = $fptr->area ?? $fptr; |
||
1466 | $fileId = (string) $fileNode->attributes()->FILEID; |
||
1467 | |||
1468 | // Check if file has valid @USE attribute. |
||
1469 | if (!empty($fileUse[$fileId])) { |
||
1470 | $this->physicalStructureInfo[$id]['files'][$fileUse[$fileId]] = $fileId; |
||
1471 | } |
||
1472 | } |
||
1473 | } |
||
1474 | |||
1475 | /** |
||
1476 | * Build the physical elements' array from the physical structMap node. |
||
1477 | * |
||
1478 | * @access private |
||
1479 | * |
||
1480 | * @param array $elementNodes |
||
1481 | * @param array $fileUse |
||
1482 | * |
||
1483 | * @return array |
||
1484 | */ |
||
1485 | private function getPhysicalElements(array $elementNodes, array $fileUse): array |
||
1486 | { |
||
1487 | $elements = []; |
||
1488 | $id = ''; |
||
1489 | |||
1490 | foreach ($elementNodes as $elementNode) { |
||
1491 | $id = (string) $elementNode['ID']; |
||
1492 | $order = (int) $elementNode['ORDER']; |
||
1493 | $elements[$order] = $id; |
||
1494 | $this->physicalStructureInfo[$elements[$order]]['id'] = $id; |
||
1495 | $this->physicalStructureInfo[$elements[$order]]['dmdId'] = isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : ''; |
||
1496 | $this->physicalStructureInfo[$elements[$order]]['admId'] = isset($elementNode['ADMID']) ? (string) $elementNode['ADMID'] : ''; |
||
1497 | $this->physicalStructureInfo[$elements[$order]]['order'] = isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : ''; |
||
1498 | $this->physicalStructureInfo[$elements[$order]]['label'] = isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : ''; |
||
1499 | $this->physicalStructureInfo[$elements[$order]]['orderlabel'] = isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : ''; |
||
1500 | $this->physicalStructureInfo[$elements[$order]]['type'] = (string) $elementNode['TYPE']; |
||
1501 | $this->physicalStructureInfo[$elements[$order]]['contentIds'] = isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : ''; |
||
1502 | // Get the file representations from fileSec node. |
||
1503 | foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
1504 | $fileNode = $fptr->area ?? $fptr; |
||
1505 | $fileId = (string) $fileNode->attributes()->FILEID; |
||
1506 | |||
1507 | // Check if file has valid @USE attribute. |
||
1508 | if (!empty($fileUse[(string) $fileId])) { |
||
1509 | $this->physicalStructureInfo[$elements[$order]]['files'][$fileUse[$fileId]] = $fileId; |
||
1510 | } |
||
1511 | } |
||
1512 | |||
1513 | // Get track info wtih begin end extent time for later assignment with musical |
||
1514 | if ((string) $elementNode['TYPE'] === 'track') { |
||
1515 | foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
1516 | if (isset($fptr->area) && ((string) $fptr->area->attributes()->BETYPE === 'TIME')) { |
||
1517 | // Check if file has valid @USE attribute. |
||
1518 | if (!empty($fileUse[(string) $fptr->area->attributes()->FILEID])) { |
||
1519 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['tracks'][$fileUse[(string) $fptr->area->attributes()->FILEID]] = [ |
||
1520 | 'fileid' => (string) $fptr->area->attributes()->FILEID, |
||
1521 | 'begin' => (string) $fptr->area->attributes()->BEGIN, |
||
1522 | 'betype' => (string) $fptr->area->attributes()->BETYPE, |
||
1523 | 'extent' => (string) $fptr->area->attributes()->EXTENT, |
||
1524 | 'exttype' => (string) $fptr->area->attributes()->EXTTYPE, |
||
1525 | ]; |
||
1526 | } |
||
1527 | } |
||
1528 | } |
||
1529 | } |
||
1530 | } |
||
1531 | |||
1532 | // Sort array by keys (= @ORDER). |
||
1533 | ksort($elements); |
||
1534 | // Set total number of pages/tracks. |
||
1535 | $this->numPages = count($elements); |
||
1536 | // Merge and re-index the array to get numeric indexes. |
||
1537 | array_unshift($elements, $id); |
||
1538 | |||
1539 | return $elements; |
||
1540 | } |
||
1541 | |||
1542 | /** |
||
1543 | * @see AbstractDocument::magicGetSmLinks() |
||
1544 | */ |
||
1545 | protected function magicGetSmLinks(): array |
||
1546 | { |
||
1547 | if (!$this->smLinksLoaded) { |
||
1548 | $smLinks = $this->mets->xpath('./mets:structLink/mets:smLink'); |
||
1549 | if (!empty($smLinks)) { |
||
1550 | foreach ($smLinks as $smLink) { |
||
1551 | $this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to; |
||
1552 | $this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from; |
||
1553 | } |
||
1554 | } |
||
1555 | $this->smLinksLoaded = true; |
||
1556 | } |
||
1557 | return $this->smLinks; |
||
1558 | } |
||
1559 | |||
1560 | /** |
||
1561 | * @see AbstractDocument::magicGetThumbnail() |
||
1562 | */ |
||
1563 | protected function magicGetThumbnail(bool $forceReload = false): string |
||
1564 | { |
||
1565 | if ( |
||
1566 | !$this->thumbnailLoaded |
||
1567 | || $forceReload |
||
1568 | ) { |
||
1569 | // Retain current PID. |
||
1570 | $cPid = $this->cPid ?: $this->pid; |
||
1571 | if (!$cPid) { |
||
1572 | $this->logger->error('Invalid PID ' . $cPid . ' for structure definitions'); |
||
1573 | $this->thumbnailLoaded = true; |
||
1574 | return $this->thumbnail; |
||
1575 | } |
||
1576 | // Load extension configuration. |
||
1577 | $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'files'); |
||
1578 | if (empty($extConf['fileGrpThumbs'])) { |
||
1579 | $this->logger->warning('No fileGrp for thumbnails specified'); |
||
1580 | $this->thumbnailLoaded = true; |
||
1581 | return $this->thumbnail; |
||
1582 | } |
||
1583 | $strctId = $this->magicGetToplevelId(); |
||
1584 | $metadata = $this->getToplevelMetadata($cPid); |
||
1585 | |||
1586 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
||
1587 | ->getQueryBuilderForTable('tx_dlf_structures'); |
||
1588 | |||
1589 | // Get structure element to get thumbnail from. |
||
1590 | $result = $queryBuilder |
||
1591 | ->select('tx_dlf_structures.thumbnail AS thumbnail') |
||
1592 | ->from('tx_dlf_structures') |
||
1593 | ->where( |
||
1594 | $queryBuilder->expr()->eq('tx_dlf_structures.pid', $cPid), |
||
1595 | $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])), |
||
1596 | Helper::whereExpression('tx_dlf_structures') |
||
1597 | ) |
||
1598 | ->setMaxResults(1) |
||
1599 | ->execute(); |
||
1600 | |||
1601 | $allResults = $result->fetchAllAssociative(); |
||
1602 | |||
1603 | if (count($allResults) == 1) { |
||
1604 | $resArray = $allResults[0]; |
||
1605 | // Get desired thumbnail structure if not the toplevel structure itself. |
||
1606 | if (!empty($resArray['thumbnail'])) { |
||
1607 | $strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid); |
||
1608 | // Check if this document has a structure element of the desired type. |
||
1609 | $strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID'); |
||
1610 | if (!empty($strctIds)) { |
||
1611 | $strctId = (string) $strctIds[0]; |
||
1612 | } |
||
1613 | } |
||
1614 | // Load smLinks. |
||
1615 | $this->magicGetSmLinks(); |
||
1616 | // Get thumbnail location. |
||
1617 | $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
||
1618 | while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
||
1619 | if ( |
||
1620 | $this->magicGetPhysicalStructure() |
||
1621 | && !empty($this->smLinks['l2p'][$strctId]) |
||
1622 | && !empty($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]) |
||
1623 | ) { |
||
1624 | $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]); |
||
1625 | break; |
||
1626 | } elseif (!empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb])) { |
||
1627 | $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]); |
||
1628 | break; |
||
1629 | } |
||
1630 | } |
||
1631 | } else { |
||
1632 | $this->logger->error('No structure of type "' . $metadata['type'][0] . '" found in database'); |
||
1633 | } |
||
1634 | $this->thumbnailLoaded = true; |
||
1635 | } |
||
1636 | return $this->thumbnail; |
||
1637 | } |
||
1638 | |||
1639 | /** |
||
1640 | * @see AbstractDocument::magicGetToplevelId() |
||
1641 | */ |
||
1642 | protected function magicGetToplevelId(): string |
||
1643 | { |
||
1644 | if (empty($this->toplevelId)) { |
||
1645 | // Get all logical structure nodes with metadata, but without associated METS-Pointers. |
||
1646 | $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]'); |
||
1647 | if (!empty($divs)) { |
||
1648 | // Load smLinks. |
||
1649 | $this->magicGetSmLinks(); |
||
1650 | foreach ($divs as $div) { |
||
1651 | $id = (string) $div['ID']; |
||
1652 | // Are there physical structure nodes for this logical structure? |
||
1653 | if (array_key_exists($id, $this->smLinks['l2p'])) { |
||
1654 | // Yes. That's what we're looking for. |
||
1655 | $this->toplevelId = $id; |
||
1656 | break; |
||
1657 | } elseif (empty($this->toplevelId)) { |
||
1658 | // No. Remember this anyway, but keep looking for a better one. |
||
1659 | $this->toplevelId = $id; |
||
1660 | } |
||
1661 | } |
||
1662 | } |
||
1663 | } |
||
1664 | return $this->toplevelId; |
||
1665 | } |
||
1666 | |||
1667 | /** |
||
1668 | * Try to determine URL of parent document. |
||
1669 | * |
||
1670 | * @access public |
||
1671 | * |
||
1672 | * @return string |
||
1673 | */ |
||
1674 | public function magicGetParentHref(): string |
||
1675 | { |
||
1676 | if (empty($this->parentHref)) { |
||
1677 | // Get the closest ancestor of the current document which has a MPTR child. |
||
1678 | $parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $this->toplevelId . '"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr'); |
||
1679 | if (!empty($parentMptr)) { |
||
1680 | $this->parentHref = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
1681 | } |
||
1682 | } |
||
1683 | |||
1684 | return $this->parentHref; |
||
1685 | } |
||
1686 | |||
1687 | /** |
||
1688 | * This magic method is executed prior to any serialization of the object |
||
1689 | * @see __wakeup() |
||
1690 | * |
||
1691 | * @access public |
||
1692 | * |
||
1693 | * @return array Properties to be serialized |
||
1694 | */ |
||
1695 | public function __sleep(): array |
||
1696 | { |
||
1697 | // SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization |
||
1698 | $this->asXML = $this->xml->asXML(); |
||
1699 | return ['pid', 'recordId', 'parentId', 'asXML']; |
||
1700 | } |
||
1701 | |||
1702 | /** |
||
1703 | * This magic method is used for setting a string value for the object |
||
1704 | * |
||
1705 | * @access public |
||
1706 | * |
||
1707 | * @return string String representing the METS object |
||
1708 | */ |
||
1709 | public function __toString(): string |
||
1710 | { |
||
1711 | $xml = new DOMDocument('1.0', 'utf-8'); |
||
1712 | $xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), true)); |
||
1713 | $xml->formatOutput = true; |
||
1714 | return $xml->saveXML(); |
||
1715 | } |
||
1716 | |||
1717 | /** |
||
1718 | * This magic method is executed after the object is deserialized |
||
1719 | * @see __sleep() |
||
1720 | * |
||
1721 | * @access public |
||
1722 | * |
||
1723 | * @return void |
||
1724 | */ |
||
1725 | public function __wakeup(): void |
||
1726 | { |
||
1727 | $xml = Helper::getXmlFileAsString($this->asXML); |
||
1728 | if ($xml !== false) { |
||
1729 | $this->asXML = ''; |
||
1730 | $this->xml = $xml; |
||
1731 | // Rebuild the unserializable properties. |
||
1732 | $this->init('', $this->settings); |
||
1733 | } else { |
||
1734 | $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(static::class); |
||
1735 | $this->logger->error('Could not load XML after deserialization'); |
||
1736 | } |
||
1737 | } |
||
1738 | |||
1739 | /** |
||
1740 | * This builds an array of the document's musical structure |
||
1741 | * |
||
1742 | * @access protected |
||
1743 | * |
||
1744 | * @return array Array of musical elements' id, type, label and file representations ordered |
||
1745 | * by "@ORDER" attribute |
||
1746 | */ |
||
1747 | protected function magicGetMusicalStructure(): array |
||
1748 | { |
||
1749 | // Is there no musical structure array yet? |
||
1750 | if (!$this->musicalStructureLoaded) { |
||
1751 | $this->numMeasures = 0; |
||
1752 | // Does the document have a structMap node of type "MUSICAL"? |
||
1753 | $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="MUSICAL"]/mets:div[@TYPE="measures"]/mets:div'); |
||
1754 | if (!empty($elementNodes)) { |
||
1755 | $musicalSeq = []; |
||
1756 | // Get file groups. |
||
1757 | $fileUse = $this->magicGetFileGrps(); |
||
1758 | |||
1759 | // Get the musical sequence's metadata. |
||
1760 | $musicalNode = $this->mets->xpath('./mets:structMap[@TYPE="MUSICAL"]/mets:div[@TYPE="measures"]'); |
||
1761 | $musicalSeq[0] = (string) $musicalNode[0]['ID']; |
||
1762 | $this->musicalStructureInfo[$musicalSeq[0]]['id'] = (string) $musicalNode[0]['ID']; |
||
1763 | $this->musicalStructureInfo[$musicalSeq[0]]['dmdId'] = (isset($musicalNode[0]['DMDID']) ? (string) $musicalNode[0]['DMDID'] : ''); |
||
1764 | $this->musicalStructureInfo[$musicalSeq[0]]['order'] = (isset($musicalNode[0]['ORDER']) ? (string) $musicalNode[0]['ORDER'] : ''); |
||
1765 | $this->musicalStructureInfo[$musicalSeq[0]]['label'] = (isset($musicalNode[0]['LABEL']) ? (string) $musicalNode[0]['LABEL'] : ''); |
||
1766 | $this->musicalStructureInfo[$musicalSeq[0]]['orderlabel'] = (isset($musicalNode[0]['ORDERLABEL']) ? (string) $musicalNode[0]['ORDERLABEL'] : ''); |
||
1767 | $this->musicalStructureInfo[$musicalSeq[0]]['type'] = (string) $musicalNode[0]['TYPE']; |
||
1768 | $this->musicalStructureInfo[$musicalSeq[0]]['contentIds'] = (isset($musicalNode[0]['CONTENTIDS']) ? (string) $musicalNode[0]['CONTENTIDS'] : ''); |
||
1769 | // Get the file representations from fileSec node. |
||
1770 | // TODO: Do we need this for the measurement container element? Can it have any files? |
||
1771 | foreach ($musicalNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
1772 | // Check if file has valid @USE attribute. |
||
1773 | if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
||
1774 | $this->musicalStructureInfo[$musicalSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = [ |
||
1775 | 'fileid' => (string) $fptr->area->attributes()->FILEID, |
||
1776 | 'begin' => (string) $fptr->area->attributes()->BEGIN, |
||
1777 | 'end' => (string) $fptr->area->attributes()->END, |
||
1778 | 'type' => (string) $fptr->area->attributes()->BETYPE, |
||
1779 | 'shape' => (string) $fptr->area->attributes()->SHAPE, |
||
1780 | 'coords' => (string) $fptr->area->attributes()->COORDS |
||
1781 | ]; |
||
1782 | } |
||
1783 | |||
1784 | if ((string) $fptr->area->attributes()->BETYPE === 'TIME') { |
||
1785 | $this->musicalStructureInfo[$musicalSeq[0]]['begin'] = (string) $fptr->area->attributes()->BEGIN; |
||
1786 | $this->musicalStructureInfo[$musicalSeq[0]]['end'] = (string) $fptr->area->attributes()->END; |
||
1787 | } |
||
1788 | } |
||
1789 | |||
1790 | $elements = []; |
||
1791 | |||
1792 | // Build the physical elements' array from the physical structMap node. |
||
1793 | foreach ($elementNodes as $elementNode) { |
||
1794 | $elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID']; |
||
1795 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID']; |
||
1796 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : ''); |
||
1797 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : ''); |
||
1798 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : ''); |
||
1799 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : ''); |
||
1800 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE']; |
||
1801 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : ''); |
||
1802 | // Get the file representations from fileSec node. |
||
1803 | |||
1804 | foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
1805 | // Check if file has valid @USE attribute. |
||
1806 | if (!empty($fileUse[(string) $fptr->area->attributes()->FILEID])) { |
||
1807 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->area->attributes()->FILEID]] = [ |
||
1808 | 'fileid' => (string) $fptr->area->attributes()->FILEID, |
||
1809 | 'begin' => (string) $fptr->area->attributes()->BEGIN, |
||
1810 | 'end' => (string) $fptr->area->attributes()->END, |
||
1811 | 'type' => (string) $fptr->area->attributes()->BETYPE, |
||
1812 | 'shape' => (string) $fptr->area->attributes()->SHAPE, |
||
1813 | 'coords' => (string) $fptr->area->attributes()->COORDS |
||
1814 | ]; |
||
1815 | } |
||
1816 | |||
1817 | if ((string) $fptr->area->attributes()->BETYPE === 'TIME') { |
||
1818 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['begin'] = (string) $fptr->area->attributes()->BEGIN; |
||
1819 | $this->musicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['end'] = (string) $fptr->area->attributes()->END; |
||
1820 | } |
||
1821 | } |
||
1822 | } |
||
1823 | |||
1824 | // Sort array by keys (= @ORDER). |
||
1825 | ksort($elements); |
||
1826 | // Set total number of measures. |
||
1827 | $this->numMeasures = count($elements); |
||
1828 | |||
1829 | // Get the track/page info (begin and extent time). |
||
1830 | $this->musicalStructure = []; |
||
1831 | $measurePages = []; |
||
1832 | foreach ($this->magicGetPhysicalStructureInfo() as $physicalId => $page) { |
||
1833 | if ($page['files']['DEFAULT']) { |
||
1834 | $measurePages[$physicalId] = $page['files']['DEFAULT']; |
||
1835 | } |
||
1836 | } |
||
1837 | // Build final musicalStructure: assign pages to measures. |
||
1838 | foreach ($this->musicalStructureInfo as $measureId => $measureInfo) { |
||
1839 | foreach ($measurePages as $physicalId => $file) { |
||
1840 | if ($measureInfo['files']['DEFAULT']['fileid'] === $file) { |
||
1841 | $this->musicalStructure[$measureInfo['order']] = [ |
||
1842 | 'measureid' => $measureId, |
||
1843 | 'physicalid' => $physicalId, |
||
1844 | 'page' => array_search($physicalId, $this->physicalStructure) |
||
1845 | ]; |
||
1846 | } |
||
1847 | } |
||
1848 | } |
||
1849 | |||
1850 | } |
||
1851 | $this->musicalStructureLoaded = true; |
||
1852 | } |
||
1853 | |||
1854 | return $this->musicalStructure; |
||
1855 | } |
||
1856 | |||
1857 | /** |
||
1858 | * This gives an array of the document's musical structure metadata |
||
1859 | * |
||
1860 | * @access protected |
||
1861 | * |
||
1862 | * @return array Array of elements' type, label and file representations ordered by "@ID" attribute |
||
1863 | */ |
||
1864 | protected function magicGetMusicalStructureInfo(): array |
||
1865 | { |
||
1866 | // Is there no musical structure array yet? |
||
1867 | if (!$this->musicalStructureLoaded) { |
||
1868 | // Build musical structure array. |
||
1869 | $this->magicGetMusicalStructure(); |
||
1870 | } |
||
1871 | return $this->musicalStructureInfo; |
||
1872 | } |
||
1873 | |||
1874 | /** |
||
1875 | * This returns $this->numMeasures via __get() |
||
1876 | * |
||
1877 | * @access protected |
||
1878 | * |
||
1879 | * @return int The total number of measres |
||
1880 | */ |
||
1881 | protected function magicGetNumMeasures(): int |
||
1882 | { |
||
1883 | $this->magicGetMusicalStructure(); |
||
1884 | return $this->numMeasures; |
||
1885 | } |
||
1886 | } |
||
1887 |