We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
| Total Complexity | 209 |
| Total Lines | 1161 |
| Duplicated Lines | 0 % |
| Changes | 10 | ||
| Bugs | 0 | Features | 0 |
Complex classes like MetsDocument often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use MetsDocument, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 67 | final class MetsDocument extends AbstractDocument |
||
| 68 | { |
||
| 69 | /** |
||
| 70 | * @access protected |
||
| 71 | * @var string[] Subsections / tags that may occur within `<mets:amdSec>` |
||
| 72 | * |
||
| 73 | * @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#amdSec |
||
| 74 | * @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#mdSecType |
||
| 75 | */ |
||
| 76 | protected const ALLOWED_AMD_SEC = ['techMD', 'rightsMD', 'sourceMD', 'digiprovMD']; |
||
| 77 | |||
| 78 | /** |
||
| 79 | * @access protected |
||
| 80 | * @var string This holds the whole XML file as string for serialization purposes |
||
| 81 | * |
||
| 82 | * @see __sleep() / __wakeup() |
||
| 83 | */ |
||
| 84 | protected string $asXML = ''; |
||
| 85 | |||
| 86 | /** |
||
| 87 | * @access protected |
||
| 88 | * @var array This maps the ID of each amdSec to the IDs of its children (techMD etc.). When an ADMID references an amdSec instead of techMD etc., this is used to iterate the child elements. |
||
| 89 | */ |
||
| 90 | protected array $amdSecChildIds = []; |
||
| 91 | |||
| 92 | /** |
||
| 93 | * @access protected |
||
| 94 | * @var array Associative array of METS metadata sections indexed by their IDs. |
||
| 95 | */ |
||
| 96 | protected array $mdSec = []; |
||
| 97 | |||
| 98 | /** |
||
| 99 | * @access protected |
||
| 100 | * @var bool Are the METS file's metadata sections loaded? |
||
| 101 | * |
||
| 102 | * @see MetsDocument::$mdSec |
||
| 103 | */ |
||
| 104 | protected bool $mdSecLoaded = false; |
||
| 105 | |||
| 106 | /** |
||
| 107 | * @access protected |
||
| 108 | * @var array Subset of $mdSec storing only the dmdSec entries; kept for compatibility. |
||
| 109 | */ |
||
| 110 | protected array $dmdSec = []; |
||
| 111 | |||
| 112 | /** |
||
| 113 | * @access protected |
||
| 114 | * @var array This holds the file ID -> USE concordance |
||
| 115 | * |
||
| 116 | * @see _getFileGrps() |
||
| 117 | */ |
||
| 118 | protected array $fileGrps = []; |
||
| 119 | |||
| 120 | /** |
||
| 121 | * @access protected |
||
| 122 | * @var bool Are the image file groups loaded? |
||
| 123 | * |
||
| 124 | * @see $fileGrps |
||
| 125 | */ |
||
| 126 | protected bool $fileGrpsLoaded = false; |
||
| 127 | |||
| 128 | /** |
||
| 129 | * @access protected |
||
| 130 | * @var \SimpleXMLElement This holds the XML file's METS part as \SimpleXMLElement object |
||
| 131 | */ |
||
| 132 | protected \SimpleXMLElement $mets; |
||
| 133 | |||
| 134 | /** |
||
| 135 | * @access protected |
||
| 136 | * @var string URL of the parent document (determined via mptr element), or empty string if none is available |
||
| 137 | */ |
||
| 138 | protected string $parentHref = ''; |
||
| 139 | |||
| 140 | /** |
||
| 141 | * This adds metadata from METS structural map to metadata array. |
||
| 142 | * |
||
| 143 | * @access public |
||
| 144 | * |
||
| 145 | * @param array &$metadata The metadata array to extend |
||
| 146 | * @param string $id The "@ID" attribute of the logical structure node |
||
| 147 | * |
||
| 148 | * @return void |
||
| 149 | */ |
||
| 150 | public function addMetadataFromMets(array &$metadata, string $id): void |
||
| 151 | { |
||
| 152 | $details = $this->getLogicalStructure($id); |
||
| 153 | if (!empty($details)) { |
||
| 154 | $metadata['mets_order'][0] = $details['order']; |
||
| 155 | $metadata['mets_label'][0] = $details['label']; |
||
| 156 | $metadata['mets_orderlabel'][0] = $details['orderlabel']; |
||
| 157 | } |
||
| 158 | } |
||
| 159 | |||
| 160 | /** |
||
| 161 | * @see AbstractDocument::establishRecordId() |
||
| 162 | */ |
||
| 163 | protected function establishRecordId(int $pid): void |
||
| 164 | { |
||
| 165 | // Check for METS object @ID. |
||
| 166 | if (!empty($this->mets['OBJID'])) { |
||
| 167 | $this->recordId = (string) $this->mets['OBJID']; |
||
| 168 | } |
||
| 169 | // Get hook objects. |
||
| 170 | $hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php'); |
||
| 171 | // Apply hooks. |
||
| 172 | foreach ($hookObjects as $hookObj) { |
||
| 173 | if (method_exists($hookObj, 'construct_postProcessRecordId')) { |
||
| 174 | $hookObj->construct_postProcessRecordId($this->xml, $this->recordId); |
||
| 175 | } |
||
| 176 | } |
||
| 177 | } |
||
| 178 | |||
| 179 | /** |
||
| 180 | * @see AbstractDocument::getDownloadLocation() |
||
| 181 | */ |
||
| 182 | public function getDownloadLocation(string $id): string |
||
| 183 | { |
||
| 184 | $file = $this->getFileInfo($id); |
||
| 185 | if ($file['mimeType'] === 'application/vnd.kitodo.iiif') { |
||
| 186 | $file['location'] = (strrpos($file['location'], 'info.json') === strlen($file['location']) - 9) ? $file['location'] : (strrpos($file['location'], '/') === strlen($file['location']) ? $file['location'] . 'info.json' : $file['location'] . '/info.json'); |
||
| 187 | $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
||
| 188 | IiifHelper::setUrlReader(IiifUrlReader::getInstance()); |
||
| 189 | IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']); |
||
| 190 | IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']); |
||
| 191 | $service = IiifHelper::loadIiifResource($file['location']); |
||
| 192 | if ($service !== null && $service instanceof AbstractImageService) { |
||
| 193 | return $service->getImageUrl(); |
||
| 194 | } |
||
| 195 | } elseif ($file['mimeType'] === 'application/vnd.netfpx') { |
||
| 196 | $baseURL = $file['location'] . (strpos($file['location'], '?') === false ? '?' : ''); |
||
| 197 | // TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server' |
||
| 198 | return $baseURL . '&CVT=jpeg'; |
||
| 199 | } |
||
| 200 | return $file['location']; |
||
| 201 | } |
||
| 202 | |||
| 203 | /** |
||
| 204 | * {@inheritDoc} |
||
| 205 | * @see AbstractDocument::getFileInfo() |
||
| 206 | */ |
||
| 207 | public function getFileInfo($id) |
||
| 208 | { |
||
| 209 | $this->_getFileGrps(); |
||
| 210 | |||
| 211 | if (isset($this->fileInfos[$id]) && empty($this->fileInfos[$id]['location'])) { |
||
| 212 | $this->fileInfos[$id]['location'] = $this->getFileLocation($id); |
||
|
|
|||
| 213 | } |
||
| 214 | |||
| 215 | if (isset($this->fileInfos[$id]) && empty($this->fileInfos[$id]['mimeType'])) { |
||
| 216 | $this->fileInfos[$id]['mimeType'] = $this->getFileMimeType($id); |
||
| 217 | } |
||
| 218 | |||
| 219 | return $this->fileInfos[$id]; |
||
| 220 | } |
||
| 221 | |||
| 222 | /** |
||
| 223 | * @see AbstractDocument::getFileLocation() |
||
| 224 | */ |
||
| 225 | public function getFileLocation(string $id): string |
||
| 226 | { |
||
| 227 | $location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]'); |
||
| 228 | if ( |
||
| 229 | !empty($id) |
||
| 230 | && !empty($location) |
||
| 231 | ) { |
||
| 232 | return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
| 233 | } else { |
||
| 234 | $this->logger->warning('There is no file node with @ID "' . $id . '"'); |
||
| 235 | return ''; |
||
| 236 | } |
||
| 237 | } |
||
| 238 | |||
| 239 | /** |
||
| 240 | * @see AbstractDocument::getFileMimeType() |
||
| 241 | */ |
||
| 242 | public function getFileMimeType(string $id): string |
||
| 243 | { |
||
| 244 | $mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE'); |
||
| 245 | if ( |
||
| 246 | !empty($id) |
||
| 247 | && !empty($mimetype) |
||
| 248 | ) { |
||
| 249 | return (string) $mimetype[0]; |
||
| 250 | } else { |
||
| 251 | $this->logger->warning('There is no file node with @ID "' . $id . '" or no MIME type specified'); |
||
| 252 | return ''; |
||
| 253 | } |
||
| 254 | } |
||
| 255 | |||
| 256 | /** |
||
| 257 | * @see AbstractDocument::getLogicalStructure() |
||
| 258 | */ |
||
| 259 | public function getLogicalStructure(string $id, bool $recursive = false): array |
||
| 260 | { |
||
| 261 | $details = []; |
||
| 262 | // Is the requested logical unit already loaded? |
||
| 263 | if ( |
||
| 264 | !$recursive |
||
| 265 | && !empty($this->logicalUnits[$id]) |
||
| 266 | ) { |
||
| 267 | // Yes. Return it. |
||
| 268 | return $this->logicalUnits[$id]; |
||
| 269 | } elseif (!empty($id)) { |
||
| 270 | // Get specified logical unit. |
||
| 271 | $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]'); |
||
| 272 | } else { |
||
| 273 | // Get all logical units at top level. |
||
| 274 | $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div'); |
||
| 275 | } |
||
| 276 | if (!empty($divs)) { |
||
| 277 | if (!$recursive) { |
||
| 278 | // Get the details for the first xpath hit. |
||
| 279 | $details = $this->getLogicalStructureInfo($divs[0]); |
||
| 280 | } else { |
||
| 281 | // Walk the logical structure recursively and fill the whole table of contents. |
||
| 282 | foreach ($divs as $div) { |
||
| 283 | $this->tableOfContents[] = $this->getLogicalStructureInfo($div, $recursive); |
||
| 284 | } |
||
| 285 | } |
||
| 286 | } |
||
| 287 | return $details; |
||
| 288 | } |
||
| 289 | |||
| 290 | /** |
||
| 291 | * This gets details about a logical structure element |
||
| 292 | * |
||
| 293 | * @access protected |
||
| 294 | * |
||
| 295 | * @param \SimpleXMLElement $structure The logical structure node |
||
| 296 | * @param bool $recursive Whether to include the child elements |
||
| 297 | * |
||
| 298 | * @return array Array of the element's id, label, type and physical page indexes/mptr link |
||
| 299 | */ |
||
| 300 | protected function getLogicalStructureInfo(\SimpleXMLElement $structure, bool $recursive = false): array |
||
| 301 | { |
||
| 302 | $attributes = []; |
||
| 303 | // Get attributes. |
||
| 304 | foreach ($structure->attributes() as $attribute => $value) { |
||
| 305 | $attributes[$attribute] = (string) $value; |
||
| 306 | } |
||
| 307 | // Load plugin configuration. |
||
| 308 | $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
||
| 309 | // Extract identity information. |
||
| 310 | $details = []; |
||
| 311 | $details['id'] = $attributes['ID']; |
||
| 312 | $details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : ''); |
||
| 313 | $details['admId'] = (isset($attributes['ADMID']) ? $attributes['ADMID'] : ''); |
||
| 314 | $details['order'] = (isset($attributes['ORDER']) ? $attributes['ORDER'] : ''); |
||
| 315 | $details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : ''); |
||
| 316 | $details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : ''); |
||
| 317 | $details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : ''); |
||
| 318 | $details['volume'] = ''; |
||
| 319 | // Set volume and year information only if no label is set and this is the toplevel structure element. |
||
| 320 | if ( |
||
| 321 | empty($details['label']) |
||
| 322 | && empty($details['orderlabel']) |
||
| 323 | ) { |
||
| 324 | $metadata = $this->getMetadata($details['id']); |
||
| 325 | if (!empty($metadata['volume'][0])) { |
||
| 326 | $details['volume'] = $metadata['volume'][0]; |
||
| 327 | } |
||
| 328 | if (!empty($metadata['year'][0])) { |
||
| 329 | $details['year'] = $metadata['year'][0]; |
||
| 330 | } |
||
| 331 | } |
||
| 332 | $details['pagination'] = ''; |
||
| 333 | $details['type'] = $attributes['TYPE']; |
||
| 334 | // add description for 3D objects |
||
| 335 | if ($details['type'] == 'object') { |
||
| 336 | $metadata = $this->getMetadata($details['id']); |
||
| 337 | $details['description'] = $metadata['description'][0] ?? ''; |
||
| 338 | } |
||
| 339 | $details['thumbnailId'] = ''; |
||
| 340 | // Load smLinks. |
||
| 341 | $this->_getSmLinks(); |
||
| 342 | // Load physical structure. |
||
| 343 | $this->_getPhysicalStructure(); |
||
| 344 | // Get the physical page or external file this structure element is pointing at. |
||
| 345 | $details['points'] = ''; |
||
| 346 | // Is there a mptr node? |
||
| 347 | if (count($structure->children('http://www.loc.gov/METS/')->mptr)) { |
||
| 348 | // Yes. Get the file reference. |
||
| 349 | $details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
| 350 | } elseif ( |
||
| 351 | !empty($this->physicalStructure) |
||
| 352 | && array_key_exists($details['id'], $this->smLinks['l2p']) |
||
| 353 | ) { |
||
| 354 | // Link logical structure to the first corresponding physical page/track. |
||
| 355 | $details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, true)), 1); |
||
| 356 | $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
||
| 357 | while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
||
| 358 | if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb])) { |
||
| 359 | $details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb]; |
||
| 360 | break; |
||
| 361 | } |
||
| 362 | } |
||
| 363 | // Get page/track number of the first page/track related to this structure element. |
||
| 364 | $details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel']; |
||
| 365 | } elseif ($details['id'] == $this->_getToplevelId()) { |
||
| 366 | // Point to self if this is the toplevel structure. |
||
| 367 | $details['points'] = 1; |
||
| 368 | $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
||
| 369 | while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
||
| 370 | if ( |
||
| 371 | !empty($this->physicalStructure) |
||
| 372 | && !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]) |
||
| 373 | ) { |
||
| 374 | $details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]; |
||
| 375 | break; |
||
| 376 | } |
||
| 377 | } |
||
| 378 | } |
||
| 379 | // Get the files this structure element is pointing at. |
||
| 380 | $details['files'] = []; |
||
| 381 | $fileUse = $this->_getFileGrps(); |
||
| 382 | // Get the file representations from fileSec node. |
||
| 383 | foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
| 384 | // Check if file has valid @USE attribute. |
||
| 385 | if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
||
| 386 | $details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
||
| 387 | } |
||
| 388 | } |
||
| 389 | // Keep for later usage. |
||
| 390 | $this->logicalUnits[$details['id']] = $details; |
||
| 391 | // Walk the structure recursively? And are there any children of the current element? |
||
| 392 | if ( |
||
| 393 | $recursive |
||
| 394 | && count($structure->children('http://www.loc.gov/METS/')->div) |
||
| 395 | ) { |
||
| 396 | $details['children'] = []; |
||
| 397 | foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) { |
||
| 398 | // Repeat for all children. |
||
| 399 | $details['children'][] = $this->getLogicalStructureInfo($child, true); |
||
| 400 | } |
||
| 401 | } |
||
| 402 | return $details; |
||
| 403 | } |
||
| 404 | |||
| 405 | /** |
||
| 406 | * @see AbstractDocument::getMetadata() |
||
| 407 | */ |
||
| 408 | public function getMetadata(string $id, int $cPid = 0): array |
||
| 409 | { |
||
| 410 | // Make sure $cPid is a non-negative integer. |
||
| 411 | $cPid = max(intval($cPid), 0); |
||
| 412 | // If $cPid is not given, try to get it elsewhere. |
||
| 413 | if ( |
||
| 414 | !$cPid |
||
| 415 | && ($this->cPid || $this->pid) |
||
| 416 | ) { |
||
| 417 | // Retain current PID. |
||
| 418 | $cPid = ($this->cPid ? $this->cPid : $this->pid); |
||
| 419 | } elseif (!$cPid) { |
||
| 420 | $this->logger->warning('Invalid PID ' . $cPid . ' for metadata definitions'); |
||
| 421 | return []; |
||
| 422 | } |
||
| 423 | // Get metadata from parsed metadata array if available. |
||
| 424 | if ( |
||
| 425 | !empty($this->metadataArray[$id]) |
||
| 426 | && $this->metadataArray[0] == $cPid |
||
| 427 | ) { |
||
| 428 | return $this->metadataArray[$id]; |
||
| 429 | } |
||
| 430 | |||
| 431 | $metadata = $this->initializeMetadata('METS'); |
||
| 432 | |||
| 433 | $mdIds = $this->getMetadataIds($id); |
||
| 434 | if (empty($mdIds)) { |
||
| 435 | // There is no metadata section for this structure node. |
||
| 436 | return []; |
||
| 437 | } |
||
| 438 | // Associative array used as set of available section types (dmdSec, techMD, ...) |
||
| 439 | $hasMetadataSection = []; |
||
| 440 | // Load available metadata formats and metadata sections. |
||
| 441 | $this->loadFormats(); |
||
| 442 | $this->_getMdSec(); |
||
| 443 | // Get the structure's type. |
||
| 444 | if (!empty($this->logicalUnits[$id])) { |
||
| 445 | $metadata['type'] = [$this->logicalUnits[$id]['type']]; |
||
| 446 | } else { |
||
| 447 | $struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE'); |
||
| 448 | if (!empty($struct)) { |
||
| 449 | $metadata['type'] = [(string) $struct[0]]; |
||
| 450 | } |
||
| 451 | } |
||
| 452 | foreach ($mdIds as $dmdId) { |
||
| 453 | $mdSectionType = $this->mdSec[$dmdId]['section']; |
||
| 454 | |||
| 455 | // To preserve behavior of previous Kitodo versions, extract metadata only from first supported dmdSec |
||
| 456 | // However, we want to extract, for example, all techMD sections (VIDEOMD, AUDIOMD) |
||
| 457 | if ($mdSectionType === 'dmdSec' && isset($hasMetadataSection['dmdSec'])) { |
||
| 458 | continue; |
||
| 459 | } |
||
| 460 | |||
| 461 | // Is this metadata format supported? |
||
| 462 | if (!empty($this->formats[$this->mdSec[$dmdId]['type']])) { |
||
| 463 | if (!empty($this->formats[$this->mdSec[$dmdId]['type']]['class'])) { |
||
| 464 | $class = $this->formats[$this->mdSec[$dmdId]['type']]['class']; |
||
| 465 | // Get the metadata from class. |
||
| 466 | if ( |
||
| 467 | class_exists($class) |
||
| 468 | && ($obj = GeneralUtility::makeInstance($class)) instanceof MetadataInterface |
||
| 469 | ) { |
||
| 470 | $obj->extractMetadata($this->mdSec[$dmdId]['xml'], $metadata); |
||
| 471 | } else { |
||
| 472 | $this->logger->warning('Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this->mdSec[$dmdId]['type'] . '"'); |
||
| 473 | } |
||
| 474 | } |
||
| 475 | } else { |
||
| 476 | $this->logger->notice('Unsupported metadata format "' . $this->mdSec[$dmdId]['type'] . '" in ' . $mdSectionType . ' with @ID "' . $dmdId . '"'); |
||
| 477 | // Continue searching for supported metadata with next @DMDID. |
||
| 478 | continue; |
||
| 479 | } |
||
| 480 | // Get the additional metadata from database. |
||
| 481 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
||
| 482 | ->getQueryBuilderForTable('tx_dlf_metadata'); |
||
| 483 | // Get hidden records, too. |
||
| 484 | $queryBuilder |
||
| 485 | ->getRestrictions() |
||
| 486 | ->removeByType(HiddenRestriction::class); |
||
| 487 | // Get all metadata with configured xpath and applicable format first. |
||
| 488 | $resultWithFormat = $queryBuilder |
||
| 489 | ->select( |
||
| 490 | 'tx_dlf_metadata.index_name AS index_name', |
||
| 491 | 'tx_dlf_metadataformat_joins.xpath AS xpath', |
||
| 492 | 'tx_dlf_metadataformat_joins.xpath_sorting AS xpath_sorting', |
||
| 493 | 'tx_dlf_metadata.is_sortable AS is_sortable', |
||
| 494 | 'tx_dlf_metadata.default_value AS default_value', |
||
| 495 | 'tx_dlf_metadata.format AS format' |
||
| 496 | ) |
||
| 497 | ->from('tx_dlf_metadata') |
||
| 498 | ->innerJoin( |
||
| 499 | 'tx_dlf_metadata', |
||
| 500 | 'tx_dlf_metadataformat', |
||
| 501 | 'tx_dlf_metadataformat_joins', |
||
| 502 | $queryBuilder->expr()->eq( |
||
| 503 | 'tx_dlf_metadataformat_joins.parent_id', |
||
| 504 | 'tx_dlf_metadata.uid' |
||
| 505 | ) |
||
| 506 | ) |
||
| 507 | ->innerJoin( |
||
| 508 | 'tx_dlf_metadataformat_joins', |
||
| 509 | 'tx_dlf_formats', |
||
| 510 | 'tx_dlf_formats_joins', |
||
| 511 | $queryBuilder->expr()->eq( |
||
| 512 | 'tx_dlf_formats_joins.uid', |
||
| 513 | 'tx_dlf_metadataformat_joins.encoded' |
||
| 514 | ) |
||
| 515 | ) |
||
| 516 | ->where( |
||
| 517 | $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)), |
||
| 518 | $queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
||
| 519 | $queryBuilder->expr()->eq('tx_dlf_metadataformat_joins.pid', intval($cPid)), |
||
| 520 | $queryBuilder->expr()->eq('tx_dlf_formats_joins.type', $queryBuilder->createNamedParameter($this->mdSec[$dmdId]['type'])) |
||
| 521 | ) |
||
| 522 | ->execute(); |
||
| 523 | // Get all metadata without a format, but with a default value next. |
||
| 524 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
||
| 525 | ->getQueryBuilderForTable('tx_dlf_metadata'); |
||
| 526 | // Get hidden records, too. |
||
| 527 | $queryBuilder |
||
| 528 | ->getRestrictions() |
||
| 529 | ->removeByType(HiddenRestriction::class); |
||
| 530 | $resultWithoutFormat = $queryBuilder |
||
| 531 | ->select( |
||
| 532 | 'tx_dlf_metadata.index_name AS index_name', |
||
| 533 | 'tx_dlf_metadata.is_sortable AS is_sortable', |
||
| 534 | 'tx_dlf_metadata.default_value AS default_value', |
||
| 535 | 'tx_dlf_metadata.format AS format' |
||
| 536 | ) |
||
| 537 | ->from('tx_dlf_metadata') |
||
| 538 | ->where( |
||
| 539 | $queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)), |
||
| 540 | $queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
||
| 541 | $queryBuilder->expr()->eq('tx_dlf_metadata.format', 0), |
||
| 542 | $queryBuilder->expr()->neq('tx_dlf_metadata.default_value', $queryBuilder->createNamedParameter('')) |
||
| 543 | ) |
||
| 544 | ->execute(); |
||
| 545 | // Merge both result sets. |
||
| 546 | $allResults = array_merge($resultWithFormat->fetchAll(), $resultWithoutFormat->fetchAll()); |
||
| 547 | // We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly. |
||
| 548 | $domNode = dom_import_simplexml($this->mdSec[$dmdId]['xml']); |
||
| 549 | $domXPath = new \DOMXPath($domNode->ownerDocument); |
||
| 550 | $this->registerNamespaces($domXPath); |
||
| 551 | // OK, now make the XPath queries. |
||
| 552 | foreach ($allResults as $resArray) { |
||
| 553 | // Set metadata field's value(s). |
||
| 554 | if ( |
||
| 555 | $resArray['format'] > 0 |
||
| 556 | && !empty($resArray['xpath']) |
||
| 557 | && ($values = $domXPath->evaluate($resArray['xpath'], $domNode)) |
||
| 558 | ) { |
||
| 559 | if ( |
||
| 560 | $values instanceof \DOMNodeList |
||
| 561 | && $values->length > 0 |
||
| 562 | ) { |
||
| 563 | $metadata[$resArray['index_name']] = []; |
||
| 564 | foreach ($values as $value) { |
||
| 565 | $metadata[$resArray['index_name']][] = trim((string) $value->nodeValue); |
||
| 566 | } |
||
| 567 | } elseif (!($values instanceof \DOMNodeList)) { |
||
| 568 | $metadata[$resArray['index_name']] = [trim((string) $values)]; |
||
| 569 | } |
||
| 570 | } |
||
| 571 | // Set default value if applicable. |
||
| 572 | if ( |
||
| 573 | empty($metadata[$resArray['index_name']][0]) |
||
| 574 | && strlen($resArray['default_value']) > 0 |
||
| 575 | ) { |
||
| 576 | $metadata[$resArray['index_name']] = [$resArray['default_value']]; |
||
| 577 | } |
||
| 578 | // Set sorting value if applicable. |
||
| 579 | if ( |
||
| 580 | !empty($metadata[$resArray['index_name']]) |
||
| 581 | && $resArray['is_sortable'] |
||
| 582 | ) { |
||
| 583 | if ( |
||
| 584 | $resArray['format'] > 0 |
||
| 585 | && !empty($resArray['xpath_sorting']) |
||
| 586 | && ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode)) |
||
| 587 | ) { |
||
| 588 | if ( |
||
| 589 | $values instanceof \DOMNodeList |
||
| 590 | && $values->length > 0 |
||
| 591 | ) { |
||
| 592 | $metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values->item(0)->nodeValue); |
||
| 593 | } elseif (!($values instanceof \DOMNodeList)) { |
||
| 594 | $metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values); |
||
| 595 | } |
||
| 596 | } |
||
| 597 | if (empty($metadata[$resArray['index_name'] . '_sorting'][0])) { |
||
| 598 | $metadata[$resArray['index_name'] . '_sorting'][0] = $metadata[$resArray['index_name']][0]; |
||
| 599 | } |
||
| 600 | } |
||
| 601 | } |
||
| 602 | |||
| 603 | $hasMetadataSection[$mdSectionType] = true; |
||
| 604 | } |
||
| 605 | // Set title to empty string if not present. |
||
| 606 | if (empty($metadata['title'][0])) { |
||
| 607 | $metadata['title'][0] = ''; |
||
| 608 | $metadata['title_sorting'][0] = ''; |
||
| 609 | } |
||
| 610 | // Set title_sorting to title as default. |
||
| 611 | if (empty($metadata['title_sorting'][0])) { |
||
| 612 | $metadata['title_sorting'][0] = $metadata['title'][0]; |
||
| 613 | } |
||
| 614 | // Set date to empty string if not present. |
||
| 615 | if (empty($metadata['date'][0])) { |
||
| 616 | $metadata['date'][0] = ''; |
||
| 617 | } |
||
| 618 | |||
| 619 | // Files are not expected to reference a dmdSec |
||
| 620 | if (isset($this->fileInfos[$id]) || isset($hasMetadataSection['dmdSec'])) { |
||
| 621 | return $metadata; |
||
| 622 | } else { |
||
| 623 | $this->logger->warning('No supported descriptive metadata found for logical structure with @ID "' . $id . '"'); |
||
| 624 | return []; |
||
| 625 | } |
||
| 626 | } |
||
| 627 | |||
| 628 | /** |
||
| 629 | * Get IDs of (descriptive and administrative) metadata sections |
||
| 630 | * referenced by node of given $id. The $id may refer to either |
||
| 631 | * a logical structure node or to a file. |
||
| 632 | * |
||
| 633 | * @access protected |
||
| 634 | * |
||
| 635 | * @param string $id The "@ID" attribute of the file node |
||
| 636 | * |
||
| 637 | * @return array |
||
| 638 | */ |
||
| 639 | protected function getMetadataIds(string $id): array |
||
| 640 | { |
||
| 641 | // Load amdSecChildIds concordance |
||
| 642 | $this->_getMdSec(); |
||
| 643 | $fileInfo = $this->getFileInfo($id); |
||
| 644 | |||
| 645 | // Get DMDID and ADMID of logical structure node |
||
| 646 | if (!empty($this->logicalUnits[$id])) { |
||
| 647 | $dmdIds = $this->logicalUnits[$id]['dmdId'] ?? ''; |
||
| 648 | $admIds = $this->logicalUnits[$id]['admId'] ?? ''; |
||
| 649 | } else { |
||
| 650 | $mdSec = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]')[0]; |
||
| 651 | if ($mdSec) { |
||
| 652 | $dmdIds = (string) $mdSec->attributes()->DMDID; |
||
| 653 | $admIds = (string) $mdSec->attributes()->ADMID; |
||
| 654 | } else if (isset($fileInfo)) { |
||
| 655 | $dmdIds = $fileInfo['dmdId']; |
||
| 656 | $admIds = $fileInfo['admId']; |
||
| 657 | } else { |
||
| 658 | $dmdIds = ''; |
||
| 659 | $admIds = ''; |
||
| 660 | } |
||
| 661 | } |
||
| 662 | |||
| 663 | // Handle multiple DMDIDs/ADMIDs |
||
| 664 | $allMdIds = explode(' ', $dmdIds); |
||
| 665 | |||
| 666 | foreach (explode(' ', $admIds) as $admId) { |
||
| 667 | if (isset($this->mdSec[$admId])) { |
||
| 668 | // $admId references an actual metadata section such as techMD |
||
| 669 | $allMdIds[] = $admId; |
||
| 670 | } elseif (isset($this->amdSecChildIds[$admId])) { |
||
| 671 | // $admId references a <mets:amdSec> element. Resolve child elements. |
||
| 672 | foreach ($this->amdSecChildIds[$admId] as $childId) { |
||
| 673 | $allMdIds[] = $childId; |
||
| 674 | } |
||
| 675 | } |
||
| 676 | } |
||
| 677 | |||
| 678 | return array_filter($allMdIds, function ($element) { |
||
| 679 | return !empty($element); |
||
| 680 | }); |
||
| 681 | } |
||
| 682 | |||
| 683 | /** |
||
| 684 | * @see AbstractDocument::getFullText() |
||
| 685 | */ |
||
| 686 | public function getFullText(string $id): string |
||
| 687 | { |
||
| 688 | $fullText = ''; |
||
| 689 | |||
| 690 | // Load fileGrps and check for full text files. |
||
| 691 | $this->_getFileGrps(); |
||
| 692 | if ($this->hasFulltext) { |
||
| 693 | $fullText = $this->getFullTextFromXml($id); |
||
| 694 | } |
||
| 695 | return $fullText; |
||
| 696 | } |
||
| 697 | |||
| 698 | /** |
||
| 699 | * @see AbstractDocument::getStructureDepth() |
||
| 700 | */ |
||
| 701 | public function getStructureDepth(string $logId) |
||
| 702 | { |
||
| 703 | $ancestors = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*'); |
||
| 704 | if (!empty($ancestors)) { |
||
| 705 | return count($ancestors); |
||
| 706 | } else { |
||
| 707 | return 0; |
||
| 708 | } |
||
| 709 | } |
||
| 710 | |||
| 711 | /** |
||
| 712 | * @see AbstractDocument::init() |
||
| 713 | */ |
||
| 714 | protected function init(string $location): void |
||
| 715 | { |
||
| 716 | $this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($this)); |
||
| 717 | // Get METS node from XML file. |
||
| 718 | $this->registerNamespaces($this->xml); |
||
| 719 | $mets = $this->xml->xpath('//mets:mets'); |
||
| 720 | if (!empty($mets)) { |
||
| 721 | $this->mets = $mets[0]; |
||
| 722 | // Register namespaces. |
||
| 723 | $this->registerNamespaces($this->mets); |
||
| 724 | } else { |
||
| 725 | if (!empty($location)) { |
||
| 726 | $this->logger->error('No METS part found in document with location "' . $location . '".'); |
||
| 727 | } else if (!empty($this->recordId)) { |
||
| 728 | $this->logger->error('No METS part found in document with recordId "' . $this->recordId . '".'); |
||
| 729 | } else { |
||
| 730 | $this->logger->error('No METS part found in current document.'); |
||
| 731 | } |
||
| 732 | } |
||
| 733 | } |
||
| 734 | |||
| 735 | /** |
||
| 736 | * @see AbstractDocument::loadLocation() |
||
| 737 | */ |
||
| 738 | protected function loadLocation(string $location): bool |
||
| 739 | { |
||
| 740 | $fileResource = Helper::getUrl($location); |
||
| 741 | if ($fileResource !== false) { |
||
| 742 | $xml = Helper::getXmlFileAsString($fileResource); |
||
| 743 | // Set some basic properties. |
||
| 744 | if ($xml !== false) { |
||
| 745 | $this->xml = $xml; |
||
| 746 | return true; |
||
| 747 | } |
||
| 748 | } |
||
| 749 | $this->logger->error('Could not load XML file from "' . $location . '"'); |
||
| 750 | return false; |
||
| 751 | } |
||
| 752 | |||
| 753 | /** |
||
| 754 | * @see AbstractDocument::ensureHasFulltextIsSet() |
||
| 755 | */ |
||
| 756 | protected function ensureHasFulltextIsSet(): void |
||
| 757 | { |
||
| 758 | // Are the fileGrps already loaded? |
||
| 759 | if (!$this->fileGrpsLoaded) { |
||
| 760 | $this->_getFileGrps(); |
||
| 761 | } |
||
| 762 | } |
||
| 763 | |||
| 764 | /** |
||
| 765 | * @see AbstractDocument::setPreloadedDocument() |
||
| 766 | */ |
||
| 767 | protected function setPreloadedDocument($preloadedDocument): bool |
||
| 775 | } |
||
| 776 | |||
| 777 | /** |
||
| 778 | * @see AbstractDocument::getDocument() |
||
| 779 | */ |
||
| 780 | protected function getDocument(): \SimpleXMLElement |
||
| 783 | } |
||
| 784 | |||
| 785 | /** |
||
| 786 | * This builds an array of the document's metadata sections |
||
| 787 | * |
||
| 788 | * @access protected |
||
| 789 | * |
||
| 790 | * @return array Array of metadata sections with their IDs as array key |
||
| 791 | */ |
||
| 792 | protected function _getMdSec(): array |
||
| 793 | { |
||
| 794 | if (!$this->mdSecLoaded) { |
||
| 795 | $this->loadFormats(); |
||
| 796 | |||
| 797 | foreach ($this->mets->xpath('./mets:dmdSec') as $dmdSecTag) { |
||
| 798 | $dmdSec = $this->processMdSec($dmdSecTag); |
||
| 799 | |||
| 800 | if ($dmdSec !== null) { |
||
| 801 | $this->mdSec[$dmdSec['id']] = $dmdSec; |
||
| 802 | $this->dmdSec[$dmdSec['id']] = $dmdSec; |
||
| 803 | } |
||
| 804 | } |
||
| 805 | |||
| 806 | foreach ($this->mets->xpath('./mets:amdSec') as $amdSecTag) { |
||
| 807 | $childIds = []; |
||
| 808 | |||
| 809 | foreach ($amdSecTag->children('http://www.loc.gov/METS/') as $mdSecTag) { |
||
| 810 | if (!in_array($mdSecTag->getName(), self::ALLOWED_AMD_SEC)) { |
||
| 811 | continue; |
||
| 812 | } |
||
| 813 | |||
| 814 | // TODO: Should we check that the format may occur within this type (e.g., to ignore VIDEOMD within rightsMD)? |
||
| 815 | $mdSec = $this->processMdSec($mdSecTag); |
||
| 816 | |||
| 817 | if ($mdSec !== null) { |
||
| 818 | $this->mdSec[$mdSec['id']] = $mdSec; |
||
| 819 | |||
| 820 | $childIds[] = $mdSec['id']; |
||
| 821 | } |
||
| 822 | } |
||
| 823 | |||
| 824 | $amdSecId = (string) $amdSecTag->attributes()->ID; |
||
| 825 | if (!empty($amdSecId)) { |
||
| 826 | $this->amdSecChildIds[$amdSecId] = $childIds; |
||
| 827 | } |
||
| 828 | } |
||
| 829 | |||
| 830 | $this->mdSecLoaded = true; |
||
| 831 | } |
||
| 832 | return $this->mdSec; |
||
| 833 | } |
||
| 834 | |||
| 835 | /** |
||
| 836 | * Gets the document's metadata sections |
||
| 837 | * |
||
| 838 | * @access protected |
||
| 839 | * |
||
| 840 | * @return array Array of metadata sections with their IDs as array key |
||
| 841 | */ |
||
| 842 | protected function _getDmdSec(): array |
||
| 843 | { |
||
| 844 | $this->_getMdSec(); |
||
| 845 | return $this->dmdSec; |
||
| 846 | } |
||
| 847 | |||
| 848 | /** |
||
| 849 | * Processes an element of METS `mdSecType`. |
||
| 850 | * |
||
| 851 | * @access protected |
||
| 852 | * |
||
| 853 | * @param \SimpleXMLElement $element |
||
| 854 | * |
||
| 855 | * @return array|null The processed metadata section |
||
| 856 | */ |
||
| 857 | protected function processMdSec(\SimpleXMLElement $element): ?array |
||
| 858 | { |
||
| 859 | $mdId = (string) $element->attributes()->ID; |
||
| 860 | if (empty($mdId)) { |
||
| 861 | return null; |
||
| 862 | } |
||
| 863 | |||
| 864 | $this->registerNamespaces($element); |
||
| 865 | if ($type = $element->xpath('./mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) { |
||
| 866 | if (!empty($this->formats[(string) $type[0]])) { |
||
| 867 | $type = (string) $type[0]; |
||
| 868 | $xml = $element->xpath('./mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
||
| 869 | } |
||
| 870 | } elseif ($type = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) { |
||
| 871 | if (!empty($this->formats[(string) $type[0]])) { |
||
| 872 | $type = (string) $type[0]; |
||
| 873 | $xml = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
||
| 874 | } |
||
| 875 | } |
||
| 876 | |||
| 877 | if (empty($xml)) { |
||
| 878 | return null; |
||
| 879 | } |
||
| 880 | |||
| 881 | $this->registerNamespaces($xml[0]); |
||
| 882 | |||
| 883 | return [ |
||
| 884 | 'id' => $mdId, |
||
| 885 | 'section' => $element->getName(), |
||
| 886 | 'type' => $type, |
||
| 887 | 'xml' => $xml[0], |
||
| 888 | ]; |
||
| 889 | } |
||
| 890 | |||
| 891 | /** |
||
| 892 | * This builds the file ID -> USE concordance |
||
| 893 | * |
||
| 894 | * @access protected |
||
| 895 | * |
||
| 896 | * @return array Array of file use groups with file IDs |
||
| 897 | */ |
||
| 898 | protected function _getFileGrps(): array |
||
| 899 | { |
||
| 900 | if (!$this->fileGrpsLoaded) { |
||
| 901 | // Get configured USE attributes. |
||
| 902 | $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
||
| 903 | $useGrps = GeneralUtility::trimExplode(',', $extConf['fileGrpImages']); |
||
| 904 | if (!empty($extConf['fileGrpThumbs'])) { |
||
| 905 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs'])); |
||
| 906 | } |
||
| 907 | if (!empty($extConf['fileGrpDownload'])) { |
||
| 908 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpDownload'])); |
||
| 909 | } |
||
| 910 | if (!empty($extConf['fileGrpFulltext'])) { |
||
| 911 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext'])); |
||
| 912 | } |
||
| 913 | if (!empty($extConf['fileGrpAudio'])) { |
||
| 914 | $useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpAudio'])); |
||
| 915 | } |
||
| 916 | // Get all file groups. |
||
| 917 | $fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp'); |
||
| 918 | if (!empty($fileGrps)) { |
||
| 919 | // Build concordance for configured USE attributes. |
||
| 920 | foreach ($fileGrps as $fileGrp) { |
||
| 921 | if (in_array((string) $fileGrp['USE'], $useGrps)) { |
||
| 922 | foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) { |
||
| 923 | $fileId = (string) $file->attributes()->ID; |
||
| 924 | $this->fileGrps[$fileId] = (string) $fileGrp['USE']; |
||
| 925 | $this->fileInfos[$fileId] = [ |
||
| 926 | 'fileGrp' => (string) $fileGrp['USE'], |
||
| 927 | 'admId' => (string) $file->attributes()->ADMID, |
||
| 928 | 'dmdId' => (string) $file->attributes()->DMDID, |
||
| 929 | ]; |
||
| 930 | } |
||
| 931 | } |
||
| 932 | } |
||
| 933 | } |
||
| 934 | // Are there any fulltext files available? |
||
| 935 | if ( |
||
| 936 | !empty($extConf['fileGrpFulltext']) |
||
| 937 | && array_intersect(GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']), $this->fileGrps) !== [] |
||
| 938 | ) { |
||
| 939 | $this->hasFulltext = true; |
||
| 940 | } |
||
| 941 | $this->fileGrpsLoaded = true; |
||
| 942 | } |
||
| 943 | return $this->fileGrps; |
||
| 944 | } |
||
| 945 | |||
| 946 | /** |
||
| 947 | * @see AbstractDocument::prepareMetadataArray() |
||
| 948 | */ |
||
| 949 | protected function prepareMetadataArray(int $cPid): void |
||
| 950 | { |
||
| 951 | $ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID'); |
||
| 952 | // Get all logical structure nodes with metadata. |
||
| 953 | if (!empty($ids)) { |
||
| 954 | foreach ($ids as $id) { |
||
| 955 | $this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid); |
||
| 956 | } |
||
| 957 | } |
||
| 958 | // Set current PID for metadata definitions. |
||
| 959 | } |
||
| 960 | |||
| 961 | /** |
||
| 962 | * This returns $this->mets via __get() |
||
| 963 | * |
||
| 964 | * @access protected |
||
| 965 | * |
||
| 966 | * @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object |
||
| 967 | */ |
||
| 968 | protected function _getMets(): \SimpleXMLElement |
||
| 969 | { |
||
| 970 | return $this->mets; |
||
| 971 | } |
||
| 972 | |||
| 973 | /** |
||
| 974 | * @see AbstractDocument::_getPhysicalStructure() |
||
| 975 | */ |
||
| 976 | protected function _getPhysicalStructure(): array |
||
| 977 | { |
||
| 978 | // Is there no physical structure array yet? |
||
| 979 | if (!$this->physicalStructureLoaded) { |
||
| 980 | // Does the document have a structMap node of type "PHYSICAL"? |
||
| 981 | $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div'); |
||
| 982 | if (!empty($elementNodes)) { |
||
| 983 | // Get file groups. |
||
| 984 | $fileUse = $this->_getFileGrps(); |
||
| 985 | // Get the physical sequence's metadata. |
||
| 986 | $physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]'); |
||
| 987 | $physSeq[0] = (string) $physNode[0]['ID']; |
||
| 988 | $this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID']; |
||
| 989 | $this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : ''); |
||
| 990 | $this->physicalStructureInfo[$physSeq[0]]['admId'] = (isset($physNode[0]['ADMID']) ? (string) $physNode[0]['ADMID'] : ''); |
||
| 991 | $this->physicalStructureInfo[$physSeq[0]]['order'] = (isset($physNode[0]['ORDER']) ? (string) $physNode[0]['ORDER'] : ''); |
||
| 992 | $this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : ''); |
||
| 993 | $this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : ''); |
||
| 994 | $this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE']; |
||
| 995 | $this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : ''); |
||
| 996 | // Get the file representations from fileSec node. |
||
| 997 | foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
| 998 | // Check if file has valid @USE attribute. |
||
| 999 | if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
||
| 1000 | $this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
||
| 1001 | } |
||
| 1002 | } |
||
| 1003 | // Build the physical elements' array from the physical structMap node. |
||
| 1004 | foreach ($elementNodes as $elementNode) { |
||
| 1005 | $elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID']; |
||
| 1006 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID']; |
||
| 1007 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : ''); |
||
| 1008 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['admId'] = (isset($elementNode['ADMID']) ? (string) $elementNode['ADMID'] : ''); |
||
| 1009 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : ''); |
||
| 1010 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : ''); |
||
| 1011 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : ''); |
||
| 1012 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE']; |
||
| 1013 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : ''); |
||
| 1014 | // Get the file representations from fileSec node. |
||
| 1015 | foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
| 1016 | // Check if file has valid @USE attribute. |
||
| 1017 | if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
||
| 1018 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
||
| 1019 | } |
||
| 1020 | } |
||
| 1021 | } |
||
| 1022 | // Sort array by keys (= @ORDER). |
||
| 1023 | ksort($elements); |
||
| 1024 | // Set total number of pages/tracks. |
||
| 1025 | $this->numPages = count($elements); |
||
| 1026 | // Merge and re-index the array to get numeric indexes. |
||
| 1027 | $this->physicalStructure = array_merge($physSeq, $elements); |
||
| 1028 | } |
||
| 1029 | $this->physicalStructureLoaded = true; |
||
| 1030 | } |
||
| 1031 | return $this->physicalStructure; |
||
| 1032 | } |
||
| 1033 | |||
| 1034 | /** |
||
| 1035 | * @see AbstractDocument::_getSmLinks() |
||
| 1036 | */ |
||
| 1037 | protected function _getSmLinks(): array |
||
| 1038 | { |
||
| 1039 | if (!$this->smLinksLoaded) { |
||
| 1040 | $smLinks = $this->mets->xpath('./mets:structLink/mets:smLink'); |
||
| 1041 | if (!empty($smLinks)) { |
||
| 1042 | foreach ($smLinks as $smLink) { |
||
| 1043 | $this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to; |
||
| 1044 | $this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from; |
||
| 1045 | } |
||
| 1046 | } |
||
| 1047 | $this->smLinksLoaded = true; |
||
| 1048 | } |
||
| 1049 | return $this->smLinks; |
||
| 1050 | } |
||
| 1051 | |||
| 1052 | /** |
||
| 1053 | * @see AbstractDocument::_getThumbnail() |
||
| 1054 | */ |
||
| 1055 | protected function _getThumbnail(bool $forceReload = false): string |
||
| 1056 | { |
||
| 1057 | if ( |
||
| 1058 | !$this->thumbnailLoaded |
||
| 1059 | || $forceReload |
||
| 1060 | ) { |
||
| 1061 | // Retain current PID. |
||
| 1062 | $cPid = ($this->cPid ? $this->cPid : $this->pid); |
||
| 1063 | if (!$cPid) { |
||
| 1064 | $this->logger->error('Invalid PID ' . $cPid . ' for structure definitions'); |
||
| 1065 | $this->thumbnailLoaded = true; |
||
| 1066 | return $this->thumbnail; |
||
| 1067 | } |
||
| 1068 | // Load extension configuration. |
||
| 1069 | $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
||
| 1070 | if (empty($extConf['fileGrpThumbs'])) { |
||
| 1071 | $this->logger->warning('No fileGrp for thumbnails specified'); |
||
| 1072 | $this->thumbnailLoaded = true; |
||
| 1073 | return $this->thumbnail; |
||
| 1074 | } |
||
| 1075 | $strctId = $this->_getToplevelId(); |
||
| 1076 | $metadata = $this->getTitledata($cPid); |
||
| 1077 | |||
| 1078 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
||
| 1079 | ->getQueryBuilderForTable('tx_dlf_structures'); |
||
| 1080 | |||
| 1081 | // Get structure element to get thumbnail from. |
||
| 1082 | $result = $queryBuilder |
||
| 1083 | ->select('tx_dlf_structures.thumbnail AS thumbnail') |
||
| 1084 | ->from('tx_dlf_structures') |
||
| 1085 | ->where( |
||
| 1086 | $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($cPid)), |
||
| 1087 | $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])), |
||
| 1088 | Helper::whereExpression('tx_dlf_structures') |
||
| 1089 | ) |
||
| 1090 | ->setMaxResults(1) |
||
| 1091 | ->execute(); |
||
| 1092 | |||
| 1093 | $allResults = $result->fetchAll(); |
||
| 1094 | |||
| 1095 | if (count($allResults) == 1) { |
||
| 1096 | $resArray = $allResults[0]; |
||
| 1097 | // Get desired thumbnail structure if not the toplevel structure itself. |
||
| 1098 | if (!empty($resArray['thumbnail'])) { |
||
| 1099 | $strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid); |
||
| 1100 | // Check if this document has a structure element of the desired type. |
||
| 1101 | $strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID'); |
||
| 1102 | if (!empty($strctIds)) { |
||
| 1103 | $strctId = (string) $strctIds[0]; |
||
| 1104 | } |
||
| 1105 | } |
||
| 1106 | // Load smLinks. |
||
| 1107 | $this->_getSmLinks(); |
||
| 1108 | // Get thumbnail location. |
||
| 1109 | $fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
||
| 1110 | while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
||
| 1111 | if ( |
||
| 1112 | $this->_getPhysicalStructure() |
||
| 1113 | && !empty($this->smLinks['l2p'][$strctId]) |
||
| 1114 | && !empty($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]) |
||
| 1115 | ) { |
||
| 1116 | $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]); |
||
| 1117 | break; |
||
| 1118 | } elseif (!empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb])) { |
||
| 1119 | $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]); |
||
| 1120 | break; |
||
| 1121 | } |
||
| 1122 | } |
||
| 1123 | } else { |
||
| 1124 | $this->logger->error('No structure of type "' . $metadata['type'][0] . '" found in database'); |
||
| 1125 | } |
||
| 1126 | $this->thumbnailLoaded = true; |
||
| 1127 | } |
||
| 1128 | return $this->thumbnail; |
||
| 1129 | } |
||
| 1130 | |||
| 1131 | /** |
||
| 1132 | * @see AbstractDocument::_getToplevelId() |
||
| 1133 | */ |
||
| 1134 | protected function _getToplevelId(): string |
||
| 1135 | { |
||
| 1136 | if (empty($this->toplevelId)) { |
||
| 1137 | // Get all logical structure nodes with metadata, but without associated METS-Pointers. |
||
| 1138 | $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]'); |
||
| 1139 | if (!empty($divs)) { |
||
| 1140 | // Load smLinks. |
||
| 1141 | $this->_getSmLinks(); |
||
| 1142 | foreach ($divs as $div) { |
||
| 1143 | $id = (string) $div['ID']; |
||
| 1144 | // Are there physical structure nodes for this logical structure? |
||
| 1145 | if (array_key_exists($id, $this->smLinks['l2p'])) { |
||
| 1146 | // Yes. That's what we're looking for. |
||
| 1147 | $this->toplevelId = $id; |
||
| 1148 | break; |
||
| 1149 | } elseif (empty($this->toplevelId)) { |
||
| 1150 | // No. Remember this anyway, but keep looking for a better one. |
||
| 1151 | $this->toplevelId = $id; |
||
| 1152 | } |
||
| 1153 | } |
||
| 1154 | } |
||
| 1155 | } |
||
| 1156 | return $this->toplevelId; |
||
| 1157 | } |
||
| 1158 | |||
| 1159 | /** |
||
| 1160 | * Try to determine URL of parent document. |
||
| 1161 | * |
||
| 1162 | * @access public |
||
| 1163 | * |
||
| 1164 | * @return string |
||
| 1165 | */ |
||
| 1166 | public function _getParentHref(): string |
||
| 1167 | { |
||
| 1168 | if (empty($this->parentHref)) { |
||
| 1169 | // Get the closest ancestor of the current document which has a MPTR child. |
||
| 1170 | $parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $this->toplevelId . '"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr'); |
||
| 1171 | if (!empty($parentMptr)) { |
||
| 1172 | $this->parentHref = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
| 1173 | } |
||
| 1174 | } |
||
| 1175 | |||
| 1176 | return $this->parentHref; |
||
| 1177 | } |
||
| 1178 | |||
| 1179 | /** |
||
| 1180 | * This magic method is executed prior to any serialization of the object |
||
| 1181 | * @see __wakeup() |
||
| 1182 | * |
||
| 1183 | * @access public |
||
| 1184 | * |
||
| 1185 | * @return array Properties to be serialized |
||
| 1186 | */ |
||
| 1187 | public function __sleep(): array |
||
| 1192 | } |
||
| 1193 | |||
| 1194 | /** |
||
| 1195 | * This magic method is used for setting a string value for the object |
||
| 1196 | * |
||
| 1197 | * @access public |
||
| 1198 | * |
||
| 1199 | * @return string String representing the METS object |
||
| 1200 | */ |
||
| 1201 | public function __toString(): string |
||
| 1202 | { |
||
| 1203 | $xml = new \DOMDocument('1.0', 'utf-8'); |
||
| 1204 | $xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), true)); |
||
| 1205 | $xml->formatOutput = true; |
||
| 1207 | } |
||
| 1208 | |||
| 1209 | /** |
||
| 1210 | * This magic method is executed after the object is deserialized |
||
| 1211 | * @see __sleep() |
||
| 1212 | * |
||
| 1213 | * @access public |
||
| 1214 | * |
||
| 1215 | * @return void |
||
| 1216 | */ |
||
| 1217 | public function __wakeup(): void |
||
| 1228 | } |
||
| 1229 | } |
||
| 1231 |