We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
Total Complexity | 173 |
Total Lines | 925 |
Duplicated Lines | 0 % |
Changes | 4 | ||
Bugs | 0 | Features | 0 |
Complex classes like MetsDocument often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use MetsDocument, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
29 | final class MetsDocument extends Document |
||
30 | { |
||
31 | /** |
||
32 | * This holds the whole XML file as string for serialization purposes |
||
33 | * @see __sleep() / __wakeup() |
||
34 | * |
||
35 | * @var string |
||
36 | * @access protected |
||
37 | */ |
||
38 | protected $asXML = ''; |
||
39 | |||
40 | /** |
||
41 | * This holds the XML file's dmdSec parts with their IDs as array key |
||
42 | * |
||
43 | * @var array |
||
44 | * @access protected |
||
45 | */ |
||
46 | protected $dmdSec = []; |
||
47 | |||
48 | /** |
||
49 | * Are the METS file's dmdSecs loaded? |
||
50 | * @see $dmdSec |
||
51 | * |
||
52 | * @var boolean |
||
53 | * @access protected |
||
54 | */ |
||
55 | protected $dmdSecLoaded = FALSE; |
||
56 | |||
57 | /** |
||
58 | * The extension key |
||
59 | * |
||
60 | * @var string |
||
61 | * @access public |
||
62 | */ |
||
63 | public static $extKey = 'dlf'; |
||
64 | |||
65 | /** |
||
66 | * This holds the file ID -> USE concordance |
||
67 | * @see _getFileGrps() |
||
68 | * |
||
69 | * @var array |
||
70 | * @access protected |
||
71 | */ |
||
72 | protected $fileGrps = []; |
||
73 | |||
74 | /** |
||
75 | * Are the file groups loaded? |
||
76 | * @see $fileGrps |
||
77 | * |
||
78 | * @var boolean |
||
79 | * @access protected |
||
80 | */ |
||
81 | protected $fileGrpsLoaded = FALSE; |
||
82 | |||
83 | /** |
||
84 | * Are the available metadata formats loaded? |
||
85 | * @see $formats |
||
86 | * |
||
87 | * @var boolean |
||
88 | * @access protected |
||
89 | */ |
||
90 | protected $formatsLoaded = FALSE; |
||
91 | |||
92 | /** |
||
93 | * This holds the XML file's METS part as \SimpleXMLElement object |
||
94 | * |
||
95 | * @var \SimpleXMLElement |
||
96 | * @access protected |
||
97 | */ |
||
98 | protected $mets; |
||
99 | |||
100 | /** |
||
101 | * This holds the whole XML file as \SimpleXMLElement object |
||
102 | * |
||
103 | * @var \SimpleXMLElement |
||
104 | * @access protected |
||
105 | */ |
||
106 | protected $xml; |
||
107 | |||
108 | /** |
||
109 | * This adds metadata from METS structural map to metadata array. |
||
110 | * |
||
111 | * @access public |
||
112 | * |
||
113 | * @param array &$metadata: The metadata array to extend |
||
114 | * @param string $id: The @ID attribute of the logical structure node |
||
115 | * |
||
116 | * @return void |
||
117 | */ |
||
118 | public function addMetadataFromMets(&$metadata, $id) { |
||
119 | $details = $this->getLogicalStructure($id); |
||
120 | if (!empty($details)) { |
||
121 | $metadata['mets_label'][0] = $details['label']; |
||
122 | $metadata['mets_orderlabel'][0] = $details['orderlabel']; |
||
123 | } |
||
124 | } |
||
125 | |||
126 | /** |
||
127 | * |
||
128 | * {@inheritDoc} |
||
129 | * @see \Kitodo\Dlf\Common\Document::establishRecordId() |
||
130 | */ |
||
131 | protected function establishRecordId($pid) { |
||
132 | // Check for METS object @ID. |
||
133 | if (!empty($this->mets['OBJID'])) { |
||
134 | $this->recordId = (string) $this->mets['OBJID']; |
||
135 | } |
||
136 | // Get hook objects. |
||
137 | $hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php'); |
||
138 | // Apply hooks. |
||
139 | foreach ($hookObjects as $hookObj) { |
||
140 | if (method_exists($hookObj, 'construct_postProcessRecordId')) { |
||
141 | $hookObj->construct_postProcessRecordId($this->xml, $this->recordId); |
||
142 | } |
||
143 | } |
||
144 | } |
||
145 | |||
146 | /** |
||
147 | * |
||
148 | * {@inheritDoc} |
||
149 | * @see \Kitodo\Dlf\Common\Document::getDownloadLocation() |
||
150 | */ |
||
151 | public function getDownloadLocation($id) { |
||
171 | } |
||
172 | |||
173 | /** |
||
174 | * {@inheritDoc} |
||
175 | * @see \Kitodo\Dlf\Common\Document::getFileLocation() |
||
176 | */ |
||
177 | public function getFileLocation($id) { |
||
178 | if (!empty($id) |
||
179 | && ($location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="'.$id.'"]/mets:FLocat[@LOCTYPE="URL"]'))) { |
||
180 | return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
181 | } else { |
||
182 | Helper::devLog('There is no file node with @ID "'.$id.'"', DEVLOG_SEVERITY_WARNING); |
||
183 | return ''; |
||
184 | } |
||
185 | } |
||
186 | |||
187 | /** |
||
188 | * {@inheritDoc} |
||
189 | * @see \Kitodo\Dlf\Common\Document::getFileMimeType() |
||
190 | */ |
||
191 | public function getFileMimeType($id) { |
||
192 | if (!empty($id) |
||
193 | && ($mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="'.$id.'"]/@MIMETYPE'))) { |
||
194 | return (string) $mimetype[0]; |
||
195 | } else { |
||
196 | Helper::devLog('There is no file node with @ID "'.$id.'" or no MIME type specified', DEVLOG_SEVERITY_WARNING); |
||
197 | return ''; |
||
198 | } |
||
199 | } |
||
200 | |||
201 | /** |
||
202 | * {@inheritDoc} |
||
203 | * @see \Kitodo\Dlf\Common\Document::getLogicalStructure() |
||
204 | */ |
||
205 | public function getLogicalStructure($id, $recursive = FALSE) { |
||
206 | $details = []; |
||
207 | // Is the requested logical unit already loaded? |
||
208 | if (!$recursive |
||
209 | && !empty($this->logicalUnits[$id])) { |
||
210 | // Yes. Return it. |
||
211 | return $this->logicalUnits[$id]; |
||
212 | } elseif (!empty($id)) { |
||
213 | // Get specified logical unit. |
||
214 | $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]'); |
||
215 | } else { |
||
216 | // Get all logical units at top level. |
||
217 | $divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div'); |
||
218 | } |
||
219 | if (!empty($divs)) { |
||
220 | if (!$recursive) { |
||
221 | // Get the details for the first xpath hit. |
||
222 | $details = $this->getLogicalStructureInfo($divs[0]); |
||
223 | } else { |
||
224 | // Walk the logical structure recursively and fill the whole table of contents. |
||
225 | foreach ($divs as $div) { |
||
226 | $this->tableOfContents[] = $this->getLogicalStructureInfo($div, TRUE); |
||
227 | } |
||
228 | } |
||
229 | } |
||
230 | return $details; |
||
231 | } |
||
232 | |||
233 | /** |
||
234 | * This gets details about a logical structure element |
||
235 | * |
||
236 | * @access protected |
||
237 | * |
||
238 | * @param \SimpleXMLElement $structure: The logical structure node |
||
239 | * @param boolean $recursive: Whether to include the child elements |
||
240 | * |
||
241 | * @return array Array of the element's id, label, type and physical page indexes/mptr link |
||
242 | */ |
||
243 | protected function getLogicalStructureInfo(\SimpleXMLElement $structure, $recursive = FALSE) { |
||
244 | // Get attributes. |
||
245 | foreach ($structure->attributes() as $attribute => $value) { |
||
246 | $attributes[$attribute] = (string) $value; |
||
247 | } |
||
248 | // Load plugin configuration. |
||
249 | $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]); |
||
250 | // Extract identity information. |
||
251 | $details = []; |
||
252 | $details['id'] = $attributes['ID']; |
||
1 ignored issue
–
show
|
|||
253 | $details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : ''); |
||
254 | $details['order'] = (isset($attributes['ORDER']) ? $attributes['ORDER'] : ''); |
||
255 | $details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : ''); |
||
256 | $details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : ''); |
||
257 | $details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : ''); |
||
258 | $details['volume'] = ''; |
||
259 | // Set volume information only if no label is set and this is the toplevel structure element. |
||
260 | if (empty($details['label']) |
||
261 | && $details['id'] == $this->_getToplevelId()) { |
||
262 | $metadata = $this->getMetadata($details['id']); |
||
263 | if (!empty($metadata['volume'][0])) { |
||
264 | $details['volume'] = $metadata['volume'][0]; |
||
265 | } |
||
266 | } |
||
267 | $details['pagination'] = ''; |
||
268 | $details['type'] = $attributes['TYPE']; |
||
269 | $details['thumbnailId'] = ''; |
||
270 | // Load smLinks. |
||
271 | $this->_getSmLinks(); |
||
272 | // Load physical structure. |
||
273 | $this->_getPhysicalStructure(); |
||
274 | // Get the physical page or external file this structure element is pointing at. |
||
275 | $details['points'] = ''; |
||
276 | // Is there a mptr node? |
||
277 | if (count($structure->children('http://www.loc.gov/METS/')->mptr)) { |
||
278 | // Yes. Get the file reference. |
||
279 | $details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
280 | } elseif (!empty($this->physicalStructure) |
||
281 | && array_key_exists($details['id'], $this->smLinks['l2p'])) { // Are there any physical elements and is this logical unit linked to at least one of them? |
||
282 | $details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, TRUE)), 1); |
||
283 | if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$extConf['fileGrpThumbs']])) { |
||
284 | $details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$extConf['fileGrpThumbs']]; |
||
285 | } |
||
286 | // Get page/track number of the first page/track related to this structure element. |
||
287 | $details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel']; |
||
288 | } elseif ($details['id'] == $this->_getToplevelId()) { // Is this the toplevel structure element? |
||
289 | // Yes. Point to itself. |
||
290 | $details['points'] = 1; |
||
291 | if (!empty($this->physicalStructure) |
||
292 | && !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']])) { |
||
293 | $details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']]; |
||
294 | } |
||
295 | } |
||
296 | // Get the files this structure element is pointing at. |
||
297 | $details['files'] = []; |
||
298 | $fileUse = $this->_getFileGrps(); |
||
299 | // Get the file representations from fileSec node. |
||
300 | foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
301 | // Check if file has valid @USE attribute. |
||
302 | if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
||
303 | $details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
||
304 | } |
||
305 | } |
||
306 | // Keep for later usage. |
||
307 | $this->logicalUnits[$details['id']] = $details; |
||
308 | // Walk the structure recursively? And are there any children of the current element? |
||
309 | if ($recursive |
||
310 | && count($structure->children('http://www.loc.gov/METS/')->div)) { |
||
311 | $details['children'] = []; |
||
312 | foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) { |
||
313 | // Repeat for all children. |
||
314 | $details['children'][] = $this->getLogicalStructureInfo($child, TRUE); |
||
315 | } |
||
316 | } |
||
317 | return $details; |
||
318 | } |
||
319 | |||
320 | /** |
||
321 | * {@inheritDoc} |
||
322 | * @see \Kitodo\Dlf\Common\Document::getMetadata() |
||
323 | */ |
||
324 | public function getMetadata($id, $cPid = 0) { |
||
325 | // Make sure $cPid is a non-negative integer. |
||
326 | $cPid = max(intval($cPid), 0); |
||
327 | // If $cPid is not given, try to get it elsewhere. |
||
328 | if (!$cPid |
||
329 | && ($this->cPid || $this->pid)) { |
||
330 | // Retain current PID. |
||
331 | $cPid = ($this->cPid ? $this->cPid : $this->pid); |
||
332 | } elseif (!$cPid) { |
||
333 | Helper::devLog('Invalid PID '.$cPid.' for metadata definitions', DEVLOG_SEVERITY_WARNING); |
||
334 | return []; |
||
335 | } |
||
336 | // Get metadata from parsed metadata array if available. |
||
337 | if (!empty($this->metadataArray[$id]) |
||
338 | && $this->metadataArray[0] == $cPid) { |
||
339 | return $this->metadataArray[$id]; |
||
340 | } |
||
341 | // Initialize metadata array with empty values. |
||
342 | $metadata = [ |
||
343 | 'title' => [], |
||
344 | 'title_sorting' => [], |
||
345 | 'author' => [], |
||
346 | 'place' => [], |
||
347 | 'year' => [], |
||
348 | 'prod_id' => [], |
||
349 | 'record_id' => [], |
||
350 | 'opac_id' => [], |
||
351 | 'union_id' => [], |
||
352 | 'urn' => [], |
||
353 | 'purl' => [], |
||
354 | 'type' => [], |
||
355 | 'volume' => [], |
||
356 | 'volume_sorting' => [], |
||
357 | 'license' => [], |
||
358 | 'terms' => [], |
||
359 | 'restrictions' => [], |
||
360 | 'out_of_print' => [], |
||
361 | 'rights_info' => [], |
||
362 | 'collection' => [], |
||
363 | 'owner' => [], |
||
364 | 'mets_label' => [], |
||
365 | 'mets_orderlabel' => [], |
||
366 | 'document_format' => [], |
||
367 | ]; |
||
368 | $metadata['document_format'][] = 'METS'; |
||
369 | // Get the logical structure node's DMDID. |
||
370 | if (!empty($this->logicalUnits[$id])) { |
||
371 | $dmdId = $this->logicalUnits[$id]['dmdId']; |
||
372 | } else { |
||
373 | $dmdId = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]/@DMDID'); |
||
374 | $dmdId = (string) $dmdId[0]; |
||
375 | } |
||
376 | if (!empty($dmdId)) { |
||
377 | // Load available metadata formats and dmdSecs. |
||
378 | $this->loadFormats(); |
||
379 | $this->_getDmdSec(); |
||
380 | // Is this metadata format supported? |
||
381 | if (!empty($this->formats[$this->dmdSec[$dmdId]['type']])) { |
||
382 | if (!empty($this->formats[$this->dmdSec[$dmdId]['type']]['class'])) { |
||
383 | $class = $this->formats[$this->dmdSec[$dmdId]['type']]['class']; |
||
384 | // Get the metadata from class. |
||
385 | if (class_exists($class) |
||
386 | && ($obj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance($class)) instanceof MetadataInterface) { |
||
387 | $obj->extractMetadata($this->dmdSec[$dmdId]['xml'], $metadata); |
||
388 | } else { |
||
389 | Helper::devLog('Invalid class/method "'.$class.'->extractMetadata()" for metadata format "'.$this->dmdSec[$dmdId]['type'].'"', DEVLOG_SEVERITY_WARNING); |
||
390 | } |
||
391 | } |
||
392 | } else { |
||
393 | Helper::devLog('Unsupported metadata format "'.$this->dmdSec[$dmdId]['type'].'" in dmdSec with @ID "'.$dmdId.'"', DEVLOG_SEVERITY_WARNING); |
||
394 | return []; |
||
395 | } |
||
396 | // Get the structure's type. |
||
397 | if (!empty($this->logicalUnits[$id])) { |
||
398 | $metadata['type'] = [$this->logicalUnits[$id]['type']]; |
||
399 | } else { |
||
400 | $struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]/@TYPE'); |
||
401 | $metadata['type'] = [(string) $struct[0]]; |
||
402 | } |
||
403 | // Get the additional metadata from database. |
||
404 | $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery( |
||
405 | 'tx_dlf_metadata.index_name AS index_name,tx_dlf_metadataformat.xpath AS xpath,tx_dlf_metadataformat.xpath_sorting AS xpath_sorting,tx_dlf_metadata.is_sortable AS is_sortable,tx_dlf_metadata.default_value AS default_value,tx_dlf_metadata.format AS format', |
||
406 | 'tx_dlf_metadata,tx_dlf_metadataformat,tx_dlf_formats', |
||
407 | 'tx_dlf_metadata.pid='.$cPid |
||
408 | .' AND tx_dlf_metadataformat.pid='.$cPid |
||
409 | .' AND ((tx_dlf_metadata.uid=tx_dlf_metadataformat.parent_id AND tx_dlf_metadataformat.encoded=tx_dlf_formats.uid AND tx_dlf_formats.type='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->dmdSec[$dmdId]['type'], 'tx_dlf_formats').') OR tx_dlf_metadata.format=0)' |
||
410 | .Helper::whereClause('tx_dlf_metadata', TRUE) |
||
411 | .Helper::whereClause('tx_dlf_metadataformat') |
||
412 | .Helper::whereClause('tx_dlf_formats') |
||
413 | ); |
||
414 | // We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly. |
||
415 | $domNode = dom_import_simplexml($this->dmdSec[$dmdId]['xml']); |
||
416 | $domXPath = new \DOMXPath($domNode->ownerDocument); |
||
417 | $this->registerNamespaces($domXPath); |
||
418 | // OK, now make the XPath queries. |
||
419 | while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) { |
||
420 | // Set metadata field's value(s). |
||
421 | if ($resArray['format'] > 0 |
||
422 | && !empty($resArray['xpath']) |
||
423 | && ($values = $domXPath->evaluate($resArray['xpath'], $domNode))) { |
||
424 | if ($values instanceof \DOMNodeList |
||
425 | && $values->length > 0) { |
||
426 | $metadata[$resArray['index_name']] = []; |
||
427 | foreach ($values as $value) { |
||
428 | $metadata[$resArray['index_name']][] = trim((string) $value->nodeValue); |
||
429 | } |
||
430 | } elseif (!($values instanceof \DOMNodeList)) { |
||
431 | $metadata[$resArray['index_name']] = [trim((string) $values)]; |
||
432 | } |
||
433 | } |
||
434 | // Set default value if applicable. |
||
435 | if (empty($metadata[$resArray['index_name']][0]) |
||
436 | && strlen($resArray['default_value']) > 0) { |
||
437 | $metadata[$resArray['index_name']] = [$resArray['default_value']]; |
||
438 | } |
||
439 | // Set sorting value if applicable. |
||
440 | if (!empty($metadata[$resArray['index_name']]) |
||
441 | && $resArray['is_sortable']) { |
||
442 | if ($resArray['format'] > 0 |
||
443 | && !empty($resArray['xpath_sorting']) |
||
444 | && ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode))) { |
||
445 | if ($values instanceof \DOMNodeList |
||
446 | && $values->length > 0) { |
||
447 | $metadata[$resArray['index_name'].'_sorting'][0] = trim((string) $values->item(0)->nodeValue); |
||
448 | } elseif (!($values instanceof \DOMNodeList)) { |
||
449 | $metadata[$resArray['index_name'].'_sorting'][0] = trim((string) $values); |
||
450 | } |
||
451 | } |
||
452 | if (empty($metadata[$resArray['index_name'].'_sorting'][0])) { |
||
453 | $metadata[$resArray['index_name'].'_sorting'][0] = $metadata[$resArray['index_name']][0]; |
||
454 | } |
||
455 | } |
||
456 | } |
||
457 | // Set title to empty string if not present. |
||
458 | if (empty($metadata['title'][0])) { |
||
459 | $metadata['title'][0] = ''; |
||
460 | $metadata['title_sorting'][0] = ''; |
||
461 | } |
||
462 | // Add collections from database to toplevel element if document is already saved. |
||
463 | if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($this->uid) |
||
464 | && $id == $this->_getToplevelId()) { |
||
465 | $result = $GLOBALS['TYPO3_DB']->exec_SELECT_mm_query( |
||
466 | 'tx_dlf_collections.index_name AS index_name', |
||
467 | 'tx_dlf_documents', |
||
468 | 'tx_dlf_relations', |
||
469 | 'tx_dlf_collections', |
||
470 | 'AND tx_dlf_collections.pid='.intval($cPid) |
||
471 | .' AND tx_dlf_documents.uid='.intval($this->uid) |
||
472 | .' AND tx_dlf_relations.ident='.$GLOBALS['TYPO3_DB']->fullQuoteStr('docs_colls', 'tx_dlf_relations') |
||
473 | .' AND tx_dlf_collections.sys_language_uid IN (-1,0)' |
||
474 | .Helper::whereClause('tx_dlf_documents') |
||
475 | .Helper::whereClause('tx_dlf_collections'), |
||
476 | 'tx_dlf_collections.index_name' |
||
477 | ); |
||
478 | while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) { |
||
479 | if (!in_array($resArray['index_name'], $metadata['collection'])) { |
||
480 | $metadata['collection'][] = $resArray['index_name']; |
||
481 | } |
||
482 | } |
||
483 | } |
||
484 | } else { |
||
485 | // There is no dmdSec for this structure node. |
||
486 | return []; |
||
487 | } |
||
488 | return $metadata; |
||
489 | } |
||
490 | |||
491 | /** |
||
492 | * {@inheritDoc} |
||
493 | * @see \Kitodo\Dlf\Common\Document::getRawText() |
||
494 | */ |
||
495 | public function getRawText($id) { |
||
496 | $rawText = ''; |
||
497 | // Get text from raw text array if available. |
||
498 | if (!empty($this->rawTextArray[$id])) { |
||
499 | return $this->rawTextArray[$id]; |
||
500 | } |
||
501 | // Load fileGrps and check for fulltext files. |
||
502 | $this->_getFileGrps(); |
||
503 | if ($this->hasFulltext) { |
||
504 | $rawText = $this->getRawTextFromXml($id); |
||
505 | } |
||
506 | return $rawText; |
||
507 | } |
||
508 | |||
509 | /** |
||
510 | * {@inheritDoc} |
||
511 | * @see Document::getStructureDepth() |
||
512 | */ |
||
513 | public function getStructureDepth($logId) |
||
514 | { |
||
515 | return count($this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$logId.'"]/ancestor::*')); |
||
516 | } |
||
517 | |||
518 | /** |
||
519 | * {@inheritDoc} |
||
520 | * @see \Kitodo\Dlf\Common\Document::init() |
||
521 | */ |
||
522 | protected function init() { |
||
523 | // Get METS node from XML file. |
||
524 | $this->registerNamespaces($this->xml); |
||
525 | $mets = $this->xml->xpath('//mets:mets'); |
||
526 | if ($mets) { |
||
527 | $this->mets = $mets[0]; |
||
528 | // Register namespaces. |
||
529 | $this->registerNamespaces($this->mets); |
||
530 | } else { |
||
531 | Helper::devLog('No METS part found in document with UID '.$this->uid, DEVLOG_SEVERITY_ERROR); |
||
532 | } |
||
533 | } |
||
534 | |||
535 | /** |
||
536 | * {@inheritDoc} |
||
537 | * @see \Kitodo\Dlf\Common\Document::loadLocation() |
||
538 | */ |
||
539 | protected function loadLocation($location) { |
||
540 | // Turn off libxml's error logging. |
||
541 | $libxmlErrors = libxml_use_internal_errors(TRUE); |
||
542 | // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept |
||
543 | $previousValueOfEntityLoader = libxml_disable_entity_loader(TRUE); |
||
544 | // Load XML from file. |
||
545 | $xml = simplexml_load_string(\TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($location)); |
||
546 | // reset entity loader setting |
||
547 | libxml_disable_entity_loader($previousValueOfEntityLoader); |
||
548 | // Reset libxml's error logging. |
||
549 | libxml_use_internal_errors($libxmlErrors); |
||
550 | // Set some basic properties. |
||
551 | if ($xml !== FALSE) { |
||
552 | $this->xml = $xml; |
||
553 | return TRUE; |
||
554 | } else { |
||
555 | Helper::devLog('Could not load XML file from "'.$location.'"', DEVLOG_SEVERITY_ERROR); |
||
556 | } |
||
557 | } |
||
558 | |||
559 | /** |
||
560 | * {@inheritDoc} |
||
561 | * @see \Kitodo\Dlf\Common\Document::ensureHasFulltextIsSet() |
||
562 | */ |
||
563 | protected function ensureHasFulltextIsSet() { |
||
564 | // Are the fileGrps already loaded? |
||
565 | if (!$this->fileGrpsLoaded) { |
||
566 | $this->_getFileGrps(); |
||
567 | } |
||
568 | } |
||
569 | |||
570 | /** |
||
571 | * {@inheritDoc} |
||
572 | * @see Document::getParentDocumentUid() |
||
573 | */ |
||
574 | protected function getParentDocumentUidForSaving($pid, $core) |
||
575 | { |
||
576 | $partof = 0; |
||
577 | // Get the closest ancestor of the current document which has a MPTR child. |
||
578 | $parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$this->_getToplevelId().'"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr'); |
||
579 | if (!empty($parentMptr[0])) { |
||
580 | $parentLocation = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
||
581 | if ($parentLocation != $this->location) { |
||
582 | $parentDoc = self::getInstance($parentLocation, $pid); |
||
583 | if ($parentDoc->ready) { |
||
584 | if ($parentDoc->pid != $pid) { |
||
585 | $parentDoc->save($pid, $core); |
||
586 | } |
||
587 | $partof = $parentDoc->uid; |
||
588 | } |
||
589 | } |
||
590 | } |
||
591 | return $partof; |
||
592 | } |
||
593 | |||
594 | /** |
||
595 | * {@inheritDoc} |
||
596 | * @see Document::setPreloadedDocument() |
||
597 | */ |
||
598 | protected function setPreloadedDocument($preloadedDocument) { |
||
599 | |||
600 | if ($preloadedDocument instanceof \SimpleXMLElement) { |
||
601 | $this->xml = $preloadedDocument; |
||
602 | return true; |
||
603 | } |
||
604 | return false; |
||
605 | } |
||
606 | |||
607 | /** |
||
608 | * {@inheritDoc} |
||
609 | * @see Document::getDocument() |
||
610 | */ |
||
611 | protected function getDocument() { |
||
613 | } |
||
614 | |||
615 | /** |
||
616 | * This returns $this->cPid via __get() |
||
617 | * |
||
618 | * @access protected |
||
619 | * |
||
620 | * @return integer The PID of the metadata definitions |
||
621 | */ |
||
622 | protected function _getCPid() { |
||
623 | return $this->cPid; |
||
624 | } |
||
625 | |||
626 | /** |
||
627 | * This builds an array of the document's dmdSecs |
||
628 | * |
||
629 | * @access protected |
||
630 | * |
||
631 | * @return array Array of dmdSecs with their IDs as array key |
||
632 | */ |
||
633 | protected function _getDmdSec() { |
||
634 | if (!$this->dmdSecLoaded) { |
||
635 | // Get available data formats. |
||
636 | $this->loadFormats(); |
||
637 | // Get dmdSec nodes from METS. |
||
638 | $dmdIds = $this->mets->xpath('./mets:dmdSec/@ID'); |
||
639 | foreach ($dmdIds as $dmdId) { |
||
640 | if ($type = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) { |
||
641 | if (!empty($this->formats[(string) $type[0]])) { |
||
642 | $type = (string) $type[0]; |
||
643 | $xml = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="'.$type.'"]/mets:xmlData/'.strtolower($type).':'.$this->formats[$type]['rootElement']); |
||
644 | } |
||
645 | } elseif ($type = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) { |
||
646 | if (!empty($this->formats[(string) $type[0]])) { |
||
647 | $type = (string) $type[0]; |
||
648 | $xml = $this->mets->xpath('./mets:dmdSec[@ID="'.(string) $dmdId.'"]/mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="'.$type.'"]/mets:xmlData/'.strtolower($type).':'.$this->formats[$type]['rootElement']); |
||
649 | } |
||
650 | } |
||
651 | if ($xml) { |
||
652 | $this->dmdSec[(string) $dmdId]['type'] = $type; |
||
653 | $this->dmdSec[(string) $dmdId]['xml'] = $xml[0]; |
||
654 | $this->registerNamespaces($this->dmdSec[(string) $dmdId]['xml']); |
||
655 | } |
||
656 | } |
||
657 | $this->dmdSecLoaded = TRUE; |
||
658 | } |
||
659 | return $this->dmdSec; |
||
660 | } |
||
661 | |||
662 | /** |
||
663 | * This builds the file ID -> USE concordance |
||
664 | * |
||
665 | * @access protected |
||
666 | * |
||
667 | * @return array Array of file use groups with file IDs |
||
668 | */ |
||
669 | protected function _getFileGrps() { |
||
670 | if (!$this->fileGrpsLoaded) { |
||
671 | // Get configured USE attributes. |
||
672 | $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]); |
||
673 | $useGrps = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $extConf['fileGrps']); |
||
674 | if (!empty($extConf['fileGrpThumbs'])) { |
||
675 | $useGrps[] = $extConf['fileGrpThumbs']; |
||
676 | } |
||
677 | if (!empty($extConf['fileGrpDownload'])) { |
||
678 | $useGrps[] = $extConf['fileGrpDownload']; |
||
679 | } |
||
680 | if (!empty($extConf['fileGrpFulltext'])) { |
||
681 | $useGrps[] = $extConf['fileGrpFulltext']; |
||
682 | } |
||
683 | if (!empty($extConf['fileGrpAudio'])) { |
||
684 | $useGrps[] = $extConf['fileGrpAudio']; |
||
685 | } |
||
686 | // Get all file groups. |
||
687 | $fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp'); |
||
688 | // Build concordance for configured USE attributes. |
||
689 | foreach ($fileGrps as $fileGrp) { |
||
690 | if (in_array((string) $fileGrp['USE'], $useGrps)) { |
||
691 | foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) { |
||
692 | $this->fileGrps[(string) $file->attributes()->ID] = (string) $fileGrp['USE']; |
||
693 | } |
||
694 | } |
||
695 | } |
||
696 | // Are there any fulltext files available? |
||
697 | if (!empty($extConf['fileGrpFulltext']) |
||
698 | && in_array($extConf['fileGrpFulltext'], $this->fileGrps)) { |
||
699 | $this->hasFulltext = TRUE; |
||
700 | } |
||
701 | $this->fileGrpsLoaded = TRUE; |
||
702 | } |
||
703 | return $this->fileGrps; |
||
704 | } |
||
705 | |||
706 | /** |
||
707 | * {@inheritDoc} |
||
708 | * @see \Kitodo\Dlf\Common\Document::prepareMetadataArray() |
||
709 | */ |
||
710 | protected function prepareMetadataArray($cPid) { |
||
711 | // Get all logical structure nodes with metadata. |
||
712 | if (($ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID'))) { |
||
713 | foreach ($ids as $id) { |
||
714 | $this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid); |
||
715 | } |
||
716 | } |
||
717 | // Set current PID for metadata definitions. |
||
718 | } |
||
719 | |||
720 | /** |
||
721 | * This returns $this->mets via __get() |
||
722 | * |
||
723 | * @access protected |
||
724 | * |
||
725 | * @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object |
||
726 | */ |
||
727 | protected function _getMets() { |
||
728 | return $this->mets; |
||
729 | } |
||
730 | |||
731 | /** |
||
732 | * {@inheritDoc} |
||
733 | * @see \Kitodo\Dlf\Common\Document::_getPhysicalStructure() |
||
734 | */ |
||
735 | protected function _getPhysicalStructure() { |
||
736 | // Is there no physical structure array yet? |
||
737 | if (!$this->physicalStructureLoaded) { |
||
738 | // Does the document have a structMap node of type "PHYSICAL"? |
||
739 | $elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div'); |
||
740 | if ($elementNodes) { |
||
741 | // Get file groups. |
||
742 | $fileUse = $this->_getFileGrps(); |
||
743 | // Get the physical sequence's metadata. |
||
744 | $physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]'); |
||
745 | $physSeq[0] = (string) $physNode[0]['ID']; |
||
1 ignored issue
–
show
|
|||
746 | $this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID']; |
||
747 | $this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : ''); |
||
748 | $this->physicalStructureInfo[$physSeq[0]]['order'] = (isset($physNode[0]['ORDER']) ? (string) $physNode[0]['ORDER'] : ''); |
||
749 | $this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : ''); |
||
750 | $this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : ''); |
||
751 | $this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE']; |
||
752 | $this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : ''); |
||
753 | // Get the file representations from fileSec node. |
||
754 | foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
755 | // Check if file has valid @USE attribute. |
||
756 | if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
||
757 | $this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
||
758 | } |
||
759 | } |
||
760 | // Build the physical elements' array from the physical structMap node. |
||
761 | foreach ($elementNodes as $elementNode) { |
||
762 | $elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID']; |
||
763 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID']; |
||
1 ignored issue
–
show
|
|||
764 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : ''); |
||
765 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : ''); |
||
766 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : ''); |
||
767 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : ''); |
||
768 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE']; |
||
769 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : ''); |
||
770 | // Get the file representations from fileSec node. |
||
771 | foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
||
772 | // Check if file has valid @USE attribute. |
||
773 | if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
||
774 | $this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
||
775 | } |
||
776 | } |
||
777 | } |
||
778 | // Sort array by keys (= @ORDER). |
||
779 | if (ksort($elements)) { |
||
780 | // Set total number of pages/tracks. |
||
781 | $this->numPages = count($elements); |
||
782 | // Merge and re-index the array to get nice numeric indexes. |
||
783 | $this->physicalStructure = array_merge($physSeq, $elements); |
||
784 | } |
||
785 | } |
||
786 | $this->physicalStructureLoaded = TRUE; |
||
787 | } |
||
788 | return $this->physicalStructure; |
||
789 | } |
||
790 | |||
791 | /** |
||
792 | * {@inheritDoc} |
||
793 | * @see \Kitodo\Dlf\Common\Document::_getSmLinks() |
||
794 | */ |
||
795 | protected function _getSmLinks() { |
||
796 | if (!$this->smLinksLoaded) { |
||
797 | $smLinks = $this->mets->xpath('./mets:structLink/mets:smLink'); |
||
798 | foreach ($smLinks as $smLink) { |
||
799 | $this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to; |
||
800 | $this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from; |
||
801 | } |
||
802 | $this->smLinksLoaded = TRUE; |
||
803 | } |
||
804 | return $this->smLinks; |
||
805 | } |
||
806 | |||
807 | /** |
||
808 | * {@inheritDoc} |
||
809 | * @see \Kitodo\Dlf\Common\Document::_getThumbnail() |
||
810 | */ |
||
811 | protected function _getThumbnail($forceReload = FALSE) { |
||
812 | if (!$this->thumbnailLoaded |
||
813 | || $forceReload) { |
||
814 | // Retain current PID. |
||
815 | $cPid = ($this->cPid ? $this->cPid : $this->pid); |
||
816 | if (!$cPid) { |
||
817 | Helper::devLog('Invalid PID '.$cPid.' for structure definitions', DEVLOG_SEVERITY_ERROR); |
||
818 | $this->thumbnailLoaded = TRUE; |
||
819 | return $this->thumbnail; |
||
820 | } |
||
821 | // Load extension configuration. |
||
822 | $extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]); |
||
823 | if (empty($extConf['fileGrpThumbs'])) { |
||
824 | Helper::devLog('No fileGrp for thumbnails specified', DEVLOG_SEVERITY_WARNING); |
||
825 | $this->thumbnailLoaded = TRUE; |
||
826 | return $this->thumbnail; |
||
827 | } |
||
828 | $strctId = $this->_getToplevelId(); |
||
829 | $metadata = $this->getTitledata($cPid); |
||
830 | |||
831 | /** @var QueryBuilder $queryBuilder */ |
||
832 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
||
833 | ->getQueryBuilderForTable('tx_dlf_structures'); |
||
834 | |||
835 | // Get structure element to get thumbnail from. |
||
836 | $result = $queryBuilder |
||
837 | ->select('tx_dlf_structures.thumbnail AS thumbnail') |
||
838 | ->from('tx_dlf_structures') |
||
839 | ->where( |
||
840 | $queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($cPid)), |
||
841 | $queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])), |
||
842 | Helper::whereExpression('tx_dlf_structures') |
||
843 | ) |
||
844 | ->setMaxResults(1) |
||
845 | ->execute(); |
||
846 | |||
847 | $allResults = $result->fetchAll(); |
||
848 | |||
849 | if (count($allResults) == 1) { |
||
850 | $resArray = $allResults[0]; |
||
851 | // Get desired thumbnail structure if not the toplevel structure itself. |
||
852 | if (!empty($resArray['thumbnail'])) { |
||
853 | $strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid); |
||
854 | // Check if this document has a structure element of the desired type. |
||
855 | $strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="'.$strctType.'"]/@ID'); |
||
856 | if (!empty($strctIds)) { |
||
857 | $strctId = (string) $strctIds[0]; |
||
858 | } |
||
859 | } |
||
860 | // Load smLinks. |
||
861 | $this->_getSmLinks(); |
||
862 | // Get thumbnail location. |
||
863 | if ($this->_getPhysicalStructure() |
||
864 | && !empty($this->smLinks['l2p'][$strctId])) { |
||
865 | $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$extConf['fileGrpThumbs']]); |
||
866 | } else { |
||
867 | $this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$extConf['fileGrpThumbs']]); |
||
868 | } |
||
869 | } else { |
||
870 | Helper::devLog('No structure of type "'.$metadata['type'][0].'" found in database', DEVLOG_SEVERITY_ERROR); |
||
871 | } |
||
872 | $this->thumbnailLoaded = TRUE; |
||
873 | } |
||
874 | return $this->thumbnail; |
||
875 | } |
||
876 | |||
877 | /** |
||
878 | * {@inheritDoc} |
||
879 | * @see \Kitodo\Dlf\Common\Document::_getToplevelId() |
||
880 | */ |
||
881 | protected function _getToplevelId() { |
||
882 | if (empty($this->toplevelId)) { |
||
883 | // Get all logical structure nodes with metadata, but without associated METS-Pointers. |
||
884 | if (($divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]'))) { |
||
885 | // Load smLinks. |
||
886 | $this->_getSmLinks(); |
||
887 | foreach ($divs as $div) { |
||
888 | $id = (string) $div['ID']; |
||
889 | // Are there physical structure nodes for this logical structure? |
||
890 | if (array_key_exists($id, $this->smLinks['l2p'])) { |
||
891 | // Yes. That's what we're looking for. |
||
892 | $this->toplevelId = $id; |
||
893 | break; |
||
894 | } elseif (empty($this->toplevelId)) { |
||
895 | // No. Remember this anyway, but keep looking for a better one. |
||
896 | $this->toplevelId = $id; |
||
897 | } |
||
898 | } |
||
899 | } |
||
900 | } |
||
901 | return $this->toplevelId; |
||
902 | } |
||
903 | |||
904 | /** |
||
905 | * This magic method is executed prior to any serialization of the object |
||
906 | * @see __wakeup() |
||
907 | * |
||
908 | * @access public |
||
909 | * |
||
910 | * @return array Properties to be serialized |
||
911 | */ |
||
912 | public function __sleep() { |
||
916 | } |
||
917 | |||
918 | /** |
||
919 | * This magic method is used for setting a string value for the object |
||
920 | * |
||
921 | * @access public |
||
922 | * |
||
923 | * @return string String representing the METS object |
||
924 | */ |
||
925 | public function __toString() { |
||
926 | $xml = new \DOMDocument('1.0', 'utf-8'); |
||
927 | $xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), TRUE)); |
||
930 | } |
||
931 | |||
932 | /** |
||
933 | * This magic method is executed after the object is deserialized |
||
934 | * @see __sleep() |
||
935 | * |
||
936 | * @access public |
||
937 | * |
||
938 | * @return void |
||
939 | */ |
||
940 | public function __wakeup() { |
||
954 | } |
||
955 | } |
||
956 | } |
||
957 |