1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* (c) Kitodo. Key to digital objects e.V. <[email protected]> |
5
|
|
|
* |
6
|
|
|
* This file is part of the Kitodo and TYPO3 projects. |
7
|
|
|
* |
8
|
|
|
* @license GNU General Public License version 3 or later. |
9
|
|
|
* For the full copyright and license information, please read the |
10
|
|
|
* LICENSE.txt file that was distributed with this source code. |
11
|
|
|
*/ |
12
|
|
|
|
13
|
|
|
namespace Kitodo\Dlf\Common; |
14
|
|
|
|
15
|
|
|
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration; |
16
|
|
|
use TYPO3\CMS\Core\Database\ConnectionPool; |
17
|
|
|
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction; |
18
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
19
|
|
|
use Ubl\Iiif\Tools\IiifHelper; |
20
|
|
|
use Ubl\Iiif\Services\AbstractImageService; |
21
|
|
|
use TYPO3\CMS\Core\Log\LogManager; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* MetsDocument class for the 'dlf' extension. |
25
|
|
|
* |
26
|
|
|
* @author Sebastian Meyer <[email protected]> |
27
|
|
|
* @author Henrik Lochmann <[email protected]> |
28
|
|
|
* @package TYPO3 |
29
|
|
|
* @subpackage dlf |
30
|
|
|
* @access public |
31
|
|
|
* @property int $cPid This holds the PID for the configuration |
32
|
|
|
* @property-read array $dmdSec This holds the XML file's dmdSec parts with their IDs as array key |
33
|
|
|
* @property-read array $fileGrps This holds the file ID -> USE concordance |
34
|
|
|
* @property-read bool $hasFulltext Are there any fulltext files available? |
35
|
|
|
* @property-read array $metadataArray This holds the documents' parsed metadata array |
36
|
|
|
* @property-read \SimpleXMLElement $mets This holds the XML file's METS part as \SimpleXMLElement object |
37
|
|
|
* @property-read int $numPages The holds the total number of pages |
38
|
|
|
* @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed |
39
|
|
|
* @property-read array $physicalStructure This holds the physical structure |
40
|
|
|
* @property-read array $physicalStructureInfo This holds the physical structure metadata |
41
|
|
|
* @property-read int $pid This holds the PID of the document or zero if not in database |
42
|
|
|
* @property-read bool $ready Is the document instantiated successfully? |
43
|
|
|
* @property-read string $recordId The METS file's / IIIF manifest's record identifier |
44
|
|
|
* @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed |
45
|
|
|
* @property-read array $smLinks This holds the smLinks between logical and physical structMap |
46
|
|
|
* @property-read array $tableOfContents This holds the logical structure |
47
|
|
|
* @property-read string $thumbnail This holds the document's thumbnail location |
48
|
|
|
* @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF) |
49
|
|
|
*/ |
50
|
|
|
final class MetsDocument extends Doc |
51
|
|
|
{ |
52
|
|
|
/** |
53
|
|
|
* This holds the whole XML file as string for serialization purposes |
54
|
|
|
* @see __sleep() / __wakeup() |
55
|
|
|
* |
56
|
|
|
* @var string |
57
|
|
|
* @access protected |
58
|
|
|
*/ |
59
|
|
|
protected $asXML = ''; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* This holds the XML file's dmdSec parts with their IDs as array key |
63
|
|
|
* |
64
|
|
|
* @var array |
65
|
|
|
* @access protected |
66
|
|
|
*/ |
67
|
|
|
protected $dmdSec = []; |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* Are the METS file's dmdSecs loaded? |
71
|
|
|
* @see $dmdSec |
72
|
|
|
* |
73
|
|
|
* @var bool |
74
|
|
|
* @access protected |
75
|
|
|
*/ |
76
|
|
|
protected $dmdSecLoaded = false; |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* The extension key |
80
|
|
|
* |
81
|
|
|
* @var string |
82
|
|
|
* @access public |
83
|
|
|
*/ |
84
|
|
|
public static $extKey = 'dlf'; |
85
|
|
|
|
86
|
|
|
/** |
87
|
|
|
* This holds the file ID -> USE concordance |
88
|
|
|
* @see _getFileGrps() |
89
|
|
|
* |
90
|
|
|
* @var array |
91
|
|
|
* @access protected |
92
|
|
|
*/ |
93
|
|
|
protected $fileGrps = []; |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Are the image file groups loaded? |
97
|
|
|
* @see $fileGrps |
98
|
|
|
* |
99
|
|
|
* @var bool |
100
|
|
|
* @access protected |
101
|
|
|
*/ |
102
|
|
|
protected $fileGrpsLoaded = false; |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* This holds the XML file's METS part as \SimpleXMLElement object |
106
|
|
|
* |
107
|
|
|
* @var \SimpleXMLElement |
108
|
|
|
* @access protected |
109
|
|
|
*/ |
110
|
|
|
protected $mets; |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* This holds the whole XML file as \SimpleXMLElement object |
114
|
|
|
* |
115
|
|
|
* @var \SimpleXMLElement |
116
|
|
|
* @access protected |
117
|
|
|
*/ |
118
|
|
|
protected $xml; |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* This adds metadata from METS structural map to metadata array. |
122
|
|
|
* |
123
|
|
|
* @access public |
124
|
|
|
* |
125
|
|
|
* @param array &$metadata: The metadata array to extend |
126
|
|
|
* @param string $id: The "@ID" attribute of the logical structure node |
127
|
|
|
* |
128
|
|
|
* @return void |
129
|
|
|
*/ |
130
|
|
|
public function addMetadataFromMets(&$metadata, $id) |
131
|
|
|
{ |
132
|
|
|
$details = $this->getLogicalStructure($id); |
133
|
|
|
if (!empty($details)) { |
134
|
|
|
$metadata['mets_order'][0] = $details['order']; |
135
|
|
|
$metadata['mets_label'][0] = $details['label']; |
136
|
|
|
$metadata['mets_orderlabel'][0] = $details['orderlabel']; |
137
|
|
|
} |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* |
142
|
|
|
* {@inheritDoc} |
143
|
|
|
* @see \Kitodo\Dlf\Common\Doc::establishRecordId() |
144
|
|
|
*/ |
145
|
|
|
protected function establishRecordId($pid) |
146
|
|
|
{ |
147
|
|
|
// Check for METS object @ID. |
148
|
|
|
if (!empty($this->mets['OBJID'])) { |
149
|
|
|
$this->recordId = (string) $this->mets['OBJID']; |
150
|
|
|
} |
151
|
|
|
// Get hook objects. |
152
|
|
|
$hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php'); |
153
|
|
|
// Apply hooks. |
154
|
|
|
foreach ($hookObjects as $hookObj) { |
155
|
|
|
if (method_exists($hookObj, 'construct_postProcessRecordId')) { |
156
|
|
|
$hookObj->construct_postProcessRecordId($this->xml, $this->recordId); |
157
|
|
|
} |
158
|
|
|
} |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
/** |
162
|
|
|
* |
163
|
|
|
* {@inheritDoc} |
164
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getDownloadLocation() |
165
|
|
|
*/ |
166
|
|
|
public function getDownloadLocation($id) |
167
|
|
|
{ |
168
|
|
|
$fileMimeType = $this->getFileMimeType($id); |
169
|
|
|
$fileLocation = $this->getFileLocation($id); |
170
|
|
|
if ($fileMimeType === 'application/vnd.kitodo.iiif') { |
171
|
|
|
$fileLocation = (strrpos($fileLocation, 'info.json') === strlen($fileLocation) - 9) ? $fileLocation : (strrpos($fileLocation, '/') === strlen($fileLocation) ? $fileLocation . 'info.json' : $fileLocation . '/info.json'); |
172
|
|
|
$conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
173
|
|
|
IiifHelper::setUrlReader(IiifUrlReader::getInstance()); |
174
|
|
|
IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']); |
175
|
|
|
IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']); |
176
|
|
|
$service = IiifHelper::loadIiifResource($fileLocation); |
177
|
|
|
if ($service !== null && $service instanceof AbstractImageService) { |
178
|
|
|
return $service->getImageUrl(); |
179
|
|
|
} |
180
|
|
|
} elseif ($fileMimeType === 'application/vnd.netfpx') { |
181
|
|
|
$baseURL = $fileLocation . (strpos($fileLocation, '?') === false ? '?' : ''); |
182
|
|
|
// TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server' |
183
|
|
|
return $baseURL . '&CVT=jpeg'; |
184
|
|
|
} |
185
|
|
|
return $fileLocation; |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
/** |
189
|
|
|
* {@inheritDoc} |
190
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getFileLocation() |
191
|
|
|
*/ |
192
|
|
|
public function getFileLocation($id) |
193
|
|
|
{ |
194
|
|
|
$location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]'); |
195
|
|
|
if ( |
196
|
|
|
!empty($id) |
197
|
|
|
&& !empty($location) |
198
|
|
|
) { |
199
|
|
|
return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href; |
200
|
|
|
} else { |
201
|
|
|
$this->logger->warning('There is no file node with @ID "' . $id . '"'); |
202
|
|
|
return ''; |
203
|
|
|
} |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
/** |
207
|
|
|
* {@inheritDoc} |
208
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getFileMimeType() |
209
|
|
|
*/ |
210
|
|
|
public function getFileMimeType($id) |
211
|
|
|
{ |
212
|
|
|
$mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE'); |
213
|
|
|
if ( |
214
|
|
|
!empty($id) |
215
|
|
|
&& !empty($mimetype) |
216
|
|
|
) { |
217
|
|
|
return (string) $mimetype[0]; |
218
|
|
|
} else { |
219
|
|
|
$this->logger->warning('There is no file node with @ID "' . $id . '" or no MIME type specified'); |
220
|
|
|
return ''; |
221
|
|
|
} |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
/** |
225
|
|
|
* {@inheritDoc} |
226
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getLogicalStructure() |
227
|
|
|
*/ |
228
|
|
|
public function getLogicalStructure($id, $recursive = false) |
229
|
|
|
{ |
230
|
|
|
$details = []; |
231
|
|
|
// Is the requested logical unit already loaded? |
232
|
|
|
if ( |
233
|
|
|
!$recursive |
234
|
|
|
&& !empty($this->logicalUnits[$id]) |
235
|
|
|
) { |
236
|
|
|
// Yes. Return it. |
237
|
|
|
return $this->logicalUnits[$id]; |
238
|
|
|
} elseif (!empty($id)) { |
239
|
|
|
// Get specified logical unit. |
240
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]'); |
241
|
|
|
} else { |
242
|
|
|
// Get all logical units at top level. |
243
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div'); |
244
|
|
|
} |
245
|
|
|
if (!empty($divs)) { |
246
|
|
|
if (!$recursive) { |
247
|
|
|
// Get the details for the first xpath hit. |
248
|
|
|
$details = $this->getLogicalStructureInfo($divs[0]); |
249
|
|
|
} else { |
250
|
|
|
// Walk the logical structure recursively and fill the whole table of contents. |
251
|
|
|
foreach ($divs as $div) { |
252
|
|
|
$this->tableOfContents[] = $this->getLogicalStructureInfo($div, $recursive); |
253
|
|
|
} |
254
|
|
|
} |
255
|
|
|
} |
256
|
|
|
return $details; |
257
|
|
|
} |
258
|
|
|
|
259
|
|
|
/** |
260
|
|
|
* This gets details about a logical structure element |
261
|
|
|
* |
262
|
|
|
* @access protected |
263
|
|
|
* |
264
|
|
|
* @param \SimpleXMLElement $structure: The logical structure node |
265
|
|
|
* @param bool $recursive: Whether to include the child elements |
266
|
|
|
* |
267
|
|
|
* @return array Array of the element's id, label, type and physical page indexes/mptr link |
268
|
|
|
*/ |
269
|
|
|
protected function getLogicalStructureInfo(\SimpleXMLElement $structure, $recursive = false) |
270
|
|
|
{ |
271
|
|
|
// Load plugin configuration. |
272
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
273
|
|
|
// Extract identity information. |
274
|
|
|
$details = $this->getLogicalStructureDetails($structure); |
275
|
|
|
// Load smLinks. |
276
|
|
|
$this->_getSmLinks(); |
277
|
|
|
// Load physical structure. |
278
|
|
|
$this->_getPhysicalStructure(); |
279
|
|
|
// Get the physical page or external file this structure element is pointing at. |
280
|
|
|
$details['points'] = ''; |
281
|
|
|
// Is there a mptr node? |
282
|
|
|
if (count($structure->children('http://www.loc.gov/METS/')->mptr)) { |
283
|
|
|
// Yes. Get the file reference. |
284
|
|
|
$details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
285
|
|
|
} elseif ( |
286
|
|
|
!empty($this->physicalStructure) |
287
|
|
|
&& array_key_exists($details['id'], $this->smLinks['l2p']) |
288
|
|
|
) { |
289
|
|
|
// Link logical structure to the first corresponding physical page/track. |
290
|
|
|
$details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, true)), 1); |
291
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
292
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
293
|
|
|
if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb])) { |
294
|
|
|
$details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb]; |
295
|
|
|
break; |
296
|
|
|
} |
297
|
|
|
} |
298
|
|
|
// Get page/track number of the first page/track related to this structure element. |
299
|
|
|
$details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel']; |
300
|
|
|
} elseif ($details['id'] == $this->_getToplevelId()) { |
301
|
|
|
// Point to self if this is the toplevel structure. |
302
|
|
|
$details['points'] = 1; |
303
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
304
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
305
|
|
|
if ( |
306
|
|
|
!empty($this->physicalStructure) |
307
|
|
|
&& !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]) |
308
|
|
|
) { |
309
|
|
|
$details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]; |
310
|
|
|
break; |
311
|
|
|
} |
312
|
|
|
} |
313
|
|
|
} |
314
|
|
|
// Get the files this structure element is pointing at. |
315
|
|
|
$details['files'] = []; |
316
|
|
|
$fileUse = $this->_getFileGrps(); |
317
|
|
|
// Get the file representations from fileSec node. |
318
|
|
|
foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
319
|
|
|
// Check if file has valid @USE attribute. |
320
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
|
|
|
|
321
|
|
|
$details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
322
|
|
|
} |
323
|
|
|
} |
324
|
|
|
// Keep for later usage. |
325
|
|
|
$this->logicalUnits[$details['id']] = $details; |
326
|
|
|
// Walk the structure recursively? And are there any children of the current element? |
327
|
|
|
if ( |
328
|
|
|
$recursive |
329
|
|
|
&& count($structure->children('http://www.loc.gov/METS/')->div) |
330
|
|
|
) { |
331
|
|
|
$details['children'] = []; |
332
|
|
|
foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) { |
333
|
|
|
// Repeat for all children. |
334
|
|
|
$details['children'][] = $this->getLogicalStructureInfo($child, true); |
|
|
|
|
335
|
|
|
} |
336
|
|
|
} |
337
|
|
|
return $details; |
338
|
|
|
} |
339
|
|
|
|
340
|
|
|
/** |
341
|
|
|
* This gets basic details about a logical structure element |
342
|
|
|
* |
343
|
|
|
* @access protected |
344
|
|
|
* |
345
|
|
|
* @param \SimpleXMLElement $structure: The logical structure node |
346
|
|
|
* |
347
|
|
|
* @return array Array of the element's id, label, type and physical page indexes/mptr link |
348
|
|
|
*/ |
349
|
|
|
private function getLogicalStructureDetails(\SimpleXMLElement $structure) { |
350
|
|
|
// Get attributes. |
351
|
|
|
foreach ($structure->attributes() as $attribute => $value) { |
352
|
|
|
$attributes[$attribute] = (string) $value; |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
// Extract identity information. |
356
|
|
|
$details = []; |
357
|
|
|
$details['id'] = $attributes['ID']; |
358
|
|
|
$details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : ''); |
359
|
|
|
$details['order'] = (isset($attributes['ORDER']) ? $attributes['ORDER'] : ''); |
360
|
|
|
$details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : ''); |
361
|
|
|
$details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : ''); |
362
|
|
|
$details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : ''); |
363
|
|
|
$details['volume'] = ''; |
364
|
|
|
// Set volume information only if no label is set and this is the toplevel structure element. |
365
|
|
|
if ( |
366
|
|
|
empty($details['label']) |
367
|
|
|
&& $details['id'] == $this->_getToplevelId() |
368
|
|
|
) { |
369
|
|
|
$metadata = $this->getMetadata($details['id']); |
370
|
|
|
if (!empty($metadata['volume'][0])) { |
371
|
|
|
$details['volume'] = $metadata['volume'][0]; |
372
|
|
|
} |
373
|
|
|
} |
374
|
|
|
$details['pagination'] = ''; |
375
|
|
|
$details['type'] = $attributes['TYPE']; |
376
|
|
|
$details['thumbnailId'] = ''; |
377
|
|
|
return $details; |
378
|
|
|
} |
379
|
|
|
|
380
|
|
|
/** |
381
|
|
|
* {@inheritDoc} |
382
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getMetadata() |
383
|
|
|
*/ |
384
|
|
|
public function getMetadata($id, $cPid = 0) |
385
|
|
|
{ |
386
|
|
|
// Make sure $cPid is a non-negative integer. |
387
|
|
|
$cPid = max(intval($cPid), 0); |
388
|
|
|
// If $cPid is not given, try to get it elsewhere. |
389
|
|
|
if ( |
390
|
|
|
!$cPid |
391
|
|
|
&& ($this->cPid || $this->pid) |
392
|
|
|
) { |
393
|
|
|
// Retain current PID. |
394
|
|
|
$cPid = ($this->cPid ? $this->cPid : $this->pid); |
395
|
|
|
} elseif (!$cPid) { |
396
|
|
|
$this->logger->warning('Invalid PID ' . $cPid . ' for metadata definitions'); |
397
|
|
|
return []; |
398
|
|
|
} |
399
|
|
|
// Get metadata from parsed metadata array if available. |
400
|
|
|
if ( |
401
|
|
|
!empty($this->metadataArray[$id]) |
402
|
|
|
&& $this->metadataArray[0] == $cPid |
403
|
|
|
) { |
404
|
|
|
return $this->metadataArray[$id]; |
405
|
|
|
} |
406
|
|
|
// Initialize metadata array with empty values. |
407
|
|
|
$metadata = [ |
408
|
|
|
'title' => [], |
409
|
|
|
'title_sorting' => [], |
410
|
|
|
'author' => [], |
411
|
|
|
'place' => [], |
412
|
|
|
'year' => [], |
413
|
|
|
'prod_id' => [], |
414
|
|
|
'record_id' => [], |
415
|
|
|
'opac_id' => [], |
416
|
|
|
'union_id' => [], |
417
|
|
|
'urn' => [], |
418
|
|
|
'purl' => [], |
419
|
|
|
'type' => [], |
420
|
|
|
'volume' => [], |
421
|
|
|
'volume_sorting' => [], |
422
|
|
|
'license' => [], |
423
|
|
|
'terms' => [], |
424
|
|
|
'restrictions' => [], |
425
|
|
|
'out_of_print' => [], |
426
|
|
|
'rights_info' => [], |
427
|
|
|
'collection' => [], |
428
|
|
|
'owner' => [], |
429
|
|
|
'mets_label' => [], |
430
|
|
|
'mets_orderlabel' => [], |
431
|
|
|
'document_format' => ['METS'], |
432
|
|
|
]; |
433
|
|
|
// Get the logical structure node's @DMDID. |
434
|
|
|
if (!empty($this->logicalUnits[$id])) { |
435
|
|
|
$dmdIds = $this->logicalUnits[$id]['dmdId']; |
436
|
|
|
} else { |
437
|
|
|
$dmdIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@DMDID'); |
438
|
|
|
$dmdIds = (string) $dmdIds[0]; |
439
|
|
|
} |
440
|
|
|
if (!empty($dmdIds)) { |
441
|
|
|
// Handle multiple DMDIDs separately. |
442
|
|
|
$dmdIds = explode(' ', $dmdIds); |
443
|
|
|
$hasSupportedMetadata = false; |
444
|
|
|
} else { |
445
|
|
|
// There is no dmdSec for this structure node. |
446
|
|
|
return []; |
447
|
|
|
} |
448
|
|
|
// Load available metadata formats and dmdSecs. |
449
|
|
|
$this->loadFormats(); |
450
|
|
|
$this->_getDmdSec(); |
451
|
|
|
foreach ($dmdIds as $dmdId) { |
452
|
|
|
// Is this metadata format supported? |
453
|
|
|
if (!empty($this->formats[$this->dmdSec[$dmdId]['type']])) { |
454
|
|
|
if (!empty($this->formats[$this->dmdSec[$dmdId]['type']]['class'])) { |
455
|
|
|
$class = $this->formats[$this->dmdSec[$dmdId]['type']]['class']; |
456
|
|
|
// Get the metadata from class. |
457
|
|
|
if ( |
458
|
|
|
class_exists($class) |
459
|
|
|
&& ($obj = GeneralUtility::makeInstance($class)) instanceof MetadataInterface |
460
|
|
|
) { |
461
|
|
|
$obj->extractMetadata($this->dmdSec[$dmdId]['xml'], $metadata); |
462
|
|
|
} else { |
463
|
|
|
$this->logger->warning('Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this->dmdSec[$dmdId]['type'] . '"'); |
464
|
|
|
} |
465
|
|
|
} |
466
|
|
|
} else { |
467
|
|
|
$this->logger->notice('Unsupported metadata format "' . $this->dmdSec[$dmdId]['type'] . '" in dmdSec with @ID "' . $dmdId . '"'); |
468
|
|
|
// Continue searching for supported metadata with next @DMDID. |
469
|
|
|
continue; |
470
|
|
|
} |
471
|
|
|
// Get the structure's type. |
472
|
|
|
if (!empty($this->logicalUnits[$id])) { |
473
|
|
|
$metadata['type'] = [$this->logicalUnits[$id]['type']]; |
474
|
|
|
} else { |
475
|
|
|
$struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE'); |
476
|
|
|
if (!empty($struct)) { |
477
|
|
|
$metadata['type'] = [(string) $struct[0]]; |
478
|
|
|
} |
479
|
|
|
} |
480
|
|
|
// Get the additional metadata from database. |
481
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
482
|
|
|
->getQueryBuilderForTable('tx_dlf_metadata'); |
483
|
|
|
// Get hidden records, too. |
484
|
|
|
$queryBuilder |
485
|
|
|
->getRestrictions() |
486
|
|
|
->removeByType(HiddenRestriction::class); |
487
|
|
|
// Get all metadata with configured xpath and applicable format first. |
488
|
|
|
$resultWithFormat = $queryBuilder |
489
|
|
|
->select( |
490
|
|
|
'tx_dlf_metadata.index_name AS index_name', |
491
|
|
|
'tx_dlf_metadataformat_joins.xpath AS xpath', |
492
|
|
|
'tx_dlf_metadataformat_joins.xpath_sorting AS xpath_sorting', |
493
|
|
|
'tx_dlf_metadata.is_sortable AS is_sortable', |
494
|
|
|
'tx_dlf_metadata.default_value AS default_value', |
495
|
|
|
'tx_dlf_metadata.format AS format' |
496
|
|
|
) |
497
|
|
|
->from('tx_dlf_metadata') |
498
|
|
|
->innerJoin( |
499
|
|
|
'tx_dlf_metadata', |
500
|
|
|
'tx_dlf_metadataformat', |
501
|
|
|
'tx_dlf_metadataformat_joins', |
502
|
|
|
$queryBuilder->expr()->eq( |
503
|
|
|
'tx_dlf_metadataformat_joins.parent_id', |
504
|
|
|
'tx_dlf_metadata.uid' |
505
|
|
|
) |
506
|
|
|
) |
507
|
|
|
->innerJoin( |
508
|
|
|
'tx_dlf_metadataformat_joins', |
509
|
|
|
'tx_dlf_formats', |
510
|
|
|
'tx_dlf_formats_joins', |
511
|
|
|
$queryBuilder->expr()->eq( |
512
|
|
|
'tx_dlf_formats_joins.uid', |
513
|
|
|
'tx_dlf_metadataformat_joins.encoded' |
514
|
|
|
) |
515
|
|
|
) |
516
|
|
|
->where( |
517
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)), |
518
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
519
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadataformat_joins.pid', intval($cPid)), |
520
|
|
|
$queryBuilder->expr()->eq('tx_dlf_formats_joins.type', $queryBuilder->createNamedParameter($this->dmdSec[$dmdId]['type'])) |
521
|
|
|
) |
522
|
|
|
->execute(); |
523
|
|
|
// Get all metadata without a format, but with a default value next. |
524
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
525
|
|
|
->getQueryBuilderForTable('tx_dlf_metadata'); |
526
|
|
|
// Get hidden records, too. |
527
|
|
|
$queryBuilder |
528
|
|
|
->getRestrictions() |
529
|
|
|
->removeByType(HiddenRestriction::class); |
530
|
|
|
$resultWithoutFormat = $queryBuilder |
531
|
|
|
->select( |
532
|
|
|
'tx_dlf_metadata.index_name AS index_name', |
533
|
|
|
'tx_dlf_metadata.is_sortable AS is_sortable', |
534
|
|
|
'tx_dlf_metadata.default_value AS default_value', |
535
|
|
|
'tx_dlf_metadata.format AS format' |
536
|
|
|
) |
537
|
|
|
->from('tx_dlf_metadata') |
538
|
|
|
->where( |
539
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)), |
540
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
541
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.format', 0), |
542
|
|
|
$queryBuilder->expr()->neq('tx_dlf_metadata.default_value', $queryBuilder->createNamedParameter('')) |
543
|
|
|
) |
544
|
|
|
->execute(); |
545
|
|
|
// Merge both result sets. |
546
|
|
|
$allResults = array_merge($resultWithFormat->fetchAll(), $resultWithoutFormat->fetchAll()); |
547
|
|
|
// We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly. |
548
|
|
|
$domNode = dom_import_simplexml($this->dmdSec[$dmdId]['xml']); |
549
|
|
|
$domXPath = new \DOMXPath($domNode->ownerDocument); |
|
|
|
|
550
|
|
|
$this->registerNamespaces($domXPath); |
551
|
|
|
// OK, now make the XPath queries. |
552
|
|
|
foreach ($allResults as $resArray) { |
553
|
|
|
// Set metadata field's value(s). |
554
|
|
|
if ( |
555
|
|
|
$resArray['format'] > 0 |
556
|
|
|
&& !empty($resArray['xpath']) |
557
|
|
|
&& ($values = $domXPath->evaluate($resArray['xpath'], $domNode)) |
558
|
|
|
) { |
559
|
|
|
if ( |
560
|
|
|
$values instanceof \DOMNodeList |
561
|
|
|
&& $values->length > 0 |
562
|
|
|
) { |
563
|
|
|
$metadata[$resArray['index_name']] = []; |
564
|
|
|
foreach ($values as $value) { |
565
|
|
|
$metadata[$resArray['index_name']][] = trim((string) $value->nodeValue); |
566
|
|
|
} |
567
|
|
|
} elseif (!($values instanceof \DOMNodeList)) { |
568
|
|
|
$metadata[$resArray['index_name']] = [trim((string) $values)]; |
569
|
|
|
} |
570
|
|
|
} |
571
|
|
|
// Set default value if applicable. |
572
|
|
|
if ( |
573
|
|
|
empty($metadata[$resArray['index_name']][0]) |
574
|
|
|
&& strlen($resArray['default_value']) > 0 |
575
|
|
|
) { |
576
|
|
|
$metadata[$resArray['index_name']] = [$resArray['default_value']]; |
577
|
|
|
} |
578
|
|
|
// Set sorting value if applicable. |
579
|
|
|
if ( |
580
|
|
|
!empty($metadata[$resArray['index_name']]) |
581
|
|
|
&& $resArray['is_sortable'] |
582
|
|
|
) { |
583
|
|
|
if ( |
584
|
|
|
$resArray['format'] > 0 |
585
|
|
|
&& !empty($resArray['xpath_sorting']) |
586
|
|
|
&& ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode)) |
587
|
|
|
) { |
588
|
|
|
if ( |
589
|
|
|
$values instanceof \DOMNodeList |
590
|
|
|
&& $values->length > 0 |
591
|
|
|
) { |
592
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values->item(0)->nodeValue); |
593
|
|
|
} elseif (!($values instanceof \DOMNodeList)) { |
594
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values); |
595
|
|
|
} |
596
|
|
|
} |
597
|
|
|
if (empty($metadata[$resArray['index_name'] . '_sorting'][0])) { |
598
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = $metadata[$resArray['index_name']][0]; |
599
|
|
|
} |
600
|
|
|
} |
601
|
|
|
} |
602
|
|
|
// Set title to empty string if not present. |
603
|
|
|
if (empty($metadata['title'][0])) { |
604
|
|
|
$metadata['title'][0] = ''; |
605
|
|
|
$metadata['title_sorting'][0] = ''; |
606
|
|
|
} |
607
|
|
|
// Extract metadata only from first supported dmdSec. |
608
|
|
|
$hasSupportedMetadata = true; |
609
|
|
|
break; |
610
|
|
|
} |
611
|
|
|
if ($hasSupportedMetadata) { |
612
|
|
|
return $metadata; |
613
|
|
|
} else { |
614
|
|
|
$this->logger->warning('No supported metadata found for logical structure with @ID "' . $id . '"'); |
615
|
|
|
return []; |
616
|
|
|
} |
617
|
|
|
} |
618
|
|
|
|
619
|
|
|
/** |
620
|
|
|
* {@inheritDoc} |
621
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getFullText() |
622
|
|
|
*/ |
623
|
|
|
public function getFullText($id) |
624
|
|
|
{ |
625
|
|
|
$fullText = ''; |
626
|
|
|
|
627
|
|
|
// Load fileGrps and check for full text files. |
628
|
|
|
$this->_getFileGrps(); |
629
|
|
|
if ($this->hasFulltext) { |
630
|
|
|
$fullText = $this->getFullTextFromXml($id); |
631
|
|
|
} |
632
|
|
|
return $fullText; |
633
|
|
|
} |
634
|
|
|
|
635
|
|
|
/** |
636
|
|
|
* {@inheritDoc} |
637
|
|
|
* @see Doc::getStructureDepth() |
638
|
|
|
*/ |
639
|
|
|
public function getStructureDepth($logId) |
640
|
|
|
{ |
641
|
|
|
$ancestors = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*'); |
642
|
|
|
if (!empty($ancestors)) { |
643
|
|
|
return count($ancestors); |
644
|
|
|
} else { |
645
|
|
|
return 0; |
646
|
|
|
} |
647
|
|
|
} |
648
|
|
|
|
649
|
|
|
/** |
650
|
|
|
* {@inheritDoc} |
651
|
|
|
* @see \Kitodo\Dlf\Common\Doc::init() |
652
|
|
|
*/ |
653
|
|
|
protected function init($location) |
654
|
|
|
{ |
655
|
|
|
$this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($this)); |
656
|
|
|
// Get METS node from XML file. |
657
|
|
|
$this->registerNamespaces($this->xml); |
658
|
|
|
$mets = $this->xml->xpath('//mets:mets'); |
659
|
|
|
if (!empty($mets)) { |
660
|
|
|
$this->mets = $mets[0]; |
661
|
|
|
// Register namespaces. |
662
|
|
|
$this->registerNamespaces($this->mets); |
663
|
|
|
} else { |
664
|
|
|
if (!empty($location)) { |
665
|
|
|
$this->logger->error('No METS part found in document with location "' . $location . '".'); |
666
|
|
|
} else if (!empty($this->recordId)) { |
667
|
|
|
$this->logger->error('No METS part found in document with recordId "' . $this->recordId . '".'); |
668
|
|
|
} else { |
669
|
|
|
$this->logger->error('No METS part found in current document.'); |
670
|
|
|
} |
671
|
|
|
} |
672
|
|
|
} |
673
|
|
|
|
674
|
|
|
/** |
675
|
|
|
* {@inheritDoc} |
676
|
|
|
* @see \Kitodo\Dlf\Common\Doc::loadLocation() |
677
|
|
|
*/ |
678
|
|
|
protected function loadLocation($location) |
679
|
|
|
{ |
680
|
|
|
$fileResource = Helper::getUrl($location); |
681
|
|
|
if ($fileResource !== false) { |
682
|
|
|
$xml = Helper::getXmlFileAsString($fileResource); |
683
|
|
|
// Set some basic properties. |
684
|
|
|
if ($xml !== false) { |
685
|
|
|
$this->xml = $xml; |
686
|
|
|
return true; |
687
|
|
|
} |
688
|
|
|
} |
689
|
|
|
$this->logger->error('Could not load XML file from "' . $location . '"'); |
690
|
|
|
return false; |
691
|
|
|
} |
692
|
|
|
|
693
|
|
|
/** |
694
|
|
|
* {@inheritDoc} |
695
|
|
|
* @see \Kitodo\Dlf\Common\Doc::ensureHasFulltextIsSet() |
696
|
|
|
*/ |
697
|
|
|
protected function ensureHasFulltextIsSet() |
698
|
|
|
{ |
699
|
|
|
// Are the fileGrps already loaded? |
700
|
|
|
if (!$this->fileGrpsLoaded) { |
701
|
|
|
$this->_getFileGrps(); |
702
|
|
|
} |
703
|
|
|
} |
704
|
|
|
|
705
|
|
|
/** |
706
|
|
|
* {@inheritDoc} |
707
|
|
|
* @see Doc::setPreloadedDocument() |
708
|
|
|
*/ |
709
|
|
|
protected function setPreloadedDocument($preloadedDocument) |
710
|
|
|
{ |
711
|
|
|
|
712
|
|
|
if ($preloadedDocument instanceof \SimpleXMLElement) { |
713
|
|
|
$this->xml = $preloadedDocument; |
714
|
|
|
return true; |
715
|
|
|
} |
716
|
|
|
return false; |
717
|
|
|
} |
718
|
|
|
|
719
|
|
|
/** |
720
|
|
|
* {@inheritDoc} |
721
|
|
|
* @see Doc::getDocument() |
722
|
|
|
*/ |
723
|
|
|
protected function getDocument() |
724
|
|
|
{ |
725
|
|
|
return $this->mets; |
726
|
|
|
} |
727
|
|
|
|
728
|
|
|
/** |
729
|
|
|
* This builds an array of the document's dmdSecs |
730
|
|
|
* |
731
|
|
|
* @access protected |
732
|
|
|
* |
733
|
|
|
* @return array Array of dmdSecs with their IDs as array key |
734
|
|
|
*/ |
735
|
|
|
protected function _getDmdSec() |
736
|
|
|
{ |
737
|
|
|
if (!$this->dmdSecLoaded) { |
738
|
|
|
// Get available data formats. |
739
|
|
|
$this->loadFormats(); |
740
|
|
|
// Get dmdSec nodes from METS. |
741
|
|
|
$dmdIds = $this->mets->xpath('./mets:dmdSec/@ID'); |
742
|
|
|
if (!empty($dmdIds)) { |
743
|
|
|
foreach ($dmdIds as $dmdId) { |
744
|
|
|
if ($type = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) { |
745
|
|
|
if (!empty($this->formats[(string) $type[0]])) { |
746
|
|
|
$type = (string) $type[0]; |
747
|
|
|
$xml = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
748
|
|
|
} |
749
|
|
|
} elseif ($type = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) { |
750
|
|
|
if (!empty($this->formats[(string) $type[0]])) { |
751
|
|
|
$type = (string) $type[0]; |
752
|
|
|
$xml = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
753
|
|
|
} |
754
|
|
|
} |
755
|
|
|
if (!empty($xml)) { |
756
|
|
|
$this->dmdSec[(string) $dmdId]['type'] = $type; |
757
|
|
|
$this->dmdSec[(string) $dmdId]['xml'] = $xml[0]; |
758
|
|
|
$this->registerNamespaces($this->dmdSec[(string) $dmdId]['xml']); |
759
|
|
|
} |
760
|
|
|
} |
761
|
|
|
} |
762
|
|
|
$this->dmdSecLoaded = true; |
763
|
|
|
} |
764
|
|
|
return $this->dmdSec; |
765
|
|
|
} |
766
|
|
|
|
767
|
|
|
/** |
768
|
|
|
* This builds the file ID -> USE concordance |
769
|
|
|
* |
770
|
|
|
* @access protected |
771
|
|
|
* |
772
|
|
|
* @return array Array of file use groups with file IDs |
773
|
|
|
*/ |
774
|
|
|
protected function _getFileGrps() |
775
|
|
|
{ |
776
|
|
|
if (!$this->fileGrpsLoaded) { |
777
|
|
|
// Get configured USE attributes. |
778
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
779
|
|
|
$useGrps = GeneralUtility::trimExplode(',', $extConf['fileGrpImages']); |
780
|
|
|
if (!empty($extConf['fileGrpThumbs'])) { |
781
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs'])); |
782
|
|
|
} |
783
|
|
|
if (!empty($extConf['fileGrpDownload'])) { |
784
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpDownload'])); |
785
|
|
|
} |
786
|
|
|
if (!empty($extConf['fileGrpFulltext'])) { |
787
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext'])); |
788
|
|
|
} |
789
|
|
|
if (!empty($extConf['fileGrpAudio'])) { |
790
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpAudio'])); |
791
|
|
|
} |
792
|
|
|
// Get all file groups. |
793
|
|
|
$fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp'); |
794
|
|
|
if (!empty($fileGrps)) { |
795
|
|
|
// Build concordance for configured USE attributes. |
796
|
|
|
foreach ($fileGrps as $fileGrp) { |
797
|
|
|
if (in_array((string) $fileGrp['USE'], $useGrps)) { |
798
|
|
|
foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) { |
799
|
|
|
$this->fileGrps[(string) $file->attributes()->ID] = (string) $fileGrp['USE']; |
|
|
|
|
800
|
|
|
} |
801
|
|
|
} |
802
|
|
|
} |
803
|
|
|
} |
804
|
|
|
// Are there any fulltext files available? |
805
|
|
|
if ( |
806
|
|
|
!empty($extConf['fileGrpFulltext']) |
807
|
|
|
&& array_intersect(GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']), $this->fileGrps) !== [] |
808
|
|
|
) { |
809
|
|
|
$this->hasFulltext = true; |
810
|
|
|
} |
811
|
|
|
$this->fileGrpsLoaded = true; |
812
|
|
|
} |
813
|
|
|
return $this->fileGrps; |
814
|
|
|
} |
815
|
|
|
|
816
|
|
|
/** |
817
|
|
|
* {@inheritDoc} |
818
|
|
|
* @see \Kitodo\Dlf\Common\Doc::prepareMetadataArray() |
819
|
|
|
*/ |
820
|
|
|
protected function prepareMetadataArray($cPid) |
821
|
|
|
{ |
822
|
|
|
$ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID'); |
823
|
|
|
// Get all logical structure nodes with metadata. |
824
|
|
|
if (!empty($ids)) { |
825
|
|
|
foreach ($ids as $id) { |
826
|
|
|
$this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid); |
827
|
|
|
} |
828
|
|
|
} |
829
|
|
|
// Set current PID for metadata definitions. |
830
|
|
|
} |
831
|
|
|
|
832
|
|
|
/** |
833
|
|
|
* This returns $this->mets via __get() |
834
|
|
|
* |
835
|
|
|
* @access protected |
836
|
|
|
* |
837
|
|
|
* @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object |
838
|
|
|
*/ |
839
|
|
|
protected function _getMets() |
840
|
|
|
{ |
841
|
|
|
return $this->mets; |
842
|
|
|
} |
843
|
|
|
|
844
|
|
|
/** |
845
|
|
|
* {@inheritDoc} |
846
|
|
|
* @see \Kitodo\Dlf\Common\Doc::_getPhysicalStructure() |
847
|
|
|
*/ |
848
|
|
|
protected function _getPhysicalStructure() |
849
|
|
|
{ |
850
|
|
|
// Is there no physical structure array yet? |
851
|
|
|
if (!$this->physicalStructureLoaded) { |
852
|
|
|
// Does the document have a structMap node of type "PHYSICAL"? |
853
|
|
|
$elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div'); |
854
|
|
|
if (!empty($elementNodes)) { |
855
|
|
|
// Get file groups. |
856
|
|
|
$fileUse = $this->_getFileGrps(); |
857
|
|
|
// Get the physical sequence's metadata. |
858
|
|
|
$physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]'); |
859
|
|
|
$physSeq[0] = (string) $physNode[0]['ID']; |
860
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID']; |
861
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : ''); |
862
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['order'] = (isset($physNode[0]['ORDER']) ? (string) $physNode[0]['ORDER'] : ''); |
863
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : ''); |
864
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : ''); |
865
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE']; |
866
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : ''); |
867
|
|
|
// Get the file representations from fileSec node. |
868
|
|
|
foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
869
|
|
|
// Check if file has valid @USE attribute. |
870
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
871
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
872
|
|
|
} |
873
|
|
|
} |
874
|
|
|
// Build the physical elements' array from the physical structMap node. |
875
|
|
|
foreach ($elementNodes as $elementNode) { |
876
|
|
|
$elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID']; |
877
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID']; |
878
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : ''); |
879
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : ''); |
880
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : ''); |
881
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : ''); |
882
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE']; |
883
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : ''); |
884
|
|
|
// Get the file representations from fileSec node. |
885
|
|
|
foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
886
|
|
|
// Check if file has valid @USE attribute. |
887
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
888
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
889
|
|
|
} |
890
|
|
|
} |
891
|
|
|
} |
892
|
|
|
// Sort array by keys (= @ORDER). |
893
|
|
|
if (ksort($elements)) { |
894
|
|
|
// Set total number of pages/tracks. |
895
|
|
|
$this->numPages = count($elements); |
896
|
|
|
// Merge and re-index the array to get nice numeric indexes. |
897
|
|
|
$this->physicalStructure = array_merge($physSeq, $elements); |
898
|
|
|
} |
899
|
|
|
} |
900
|
|
|
$this->physicalStructureLoaded = true; |
901
|
|
|
} |
902
|
|
|
return $this->physicalStructure; |
903
|
|
|
} |
904
|
|
|
|
905
|
|
|
/** |
906
|
|
|
* {@inheritDoc} |
907
|
|
|
* @see \Kitodo\Dlf\Common\Doc::_getSmLinks() |
908
|
|
|
*/ |
909
|
|
|
protected function _getSmLinks() |
910
|
|
|
{ |
911
|
|
|
if (!$this->smLinksLoaded) { |
912
|
|
|
$smLinks = $this->mets->xpath('./mets:structLink/mets:smLink'); |
913
|
|
|
if (!empty($smLinks)) { |
914
|
|
|
foreach ($smLinks as $smLink) { |
915
|
|
|
$this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to; |
916
|
|
|
$this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from; |
917
|
|
|
} |
918
|
|
|
} |
919
|
|
|
$this->smLinksLoaded = true; |
920
|
|
|
} |
921
|
|
|
return $this->smLinks; |
922
|
|
|
} |
923
|
|
|
|
924
|
|
|
/** |
925
|
|
|
* {@inheritDoc} |
926
|
|
|
* @see \Kitodo\Dlf\Common\Doc::_getThumbnail() |
927
|
|
|
*/ |
928
|
|
|
protected function _getThumbnail($forceReload = false) |
929
|
|
|
{ |
930
|
|
|
if ( |
931
|
|
|
!$this->thumbnailLoaded |
932
|
|
|
|| $forceReload |
933
|
|
|
) { |
934
|
|
|
// Retain current PID. |
935
|
|
|
$cPid = ($this->cPid ? $this->cPid : $this->pid); |
936
|
|
|
if (!$cPid) { |
937
|
|
|
$this->logger->error('Invalid PID ' . $cPid . ' for structure definitions'); |
938
|
|
|
$this->thumbnailLoaded = true; |
939
|
|
|
return $this->thumbnail; |
940
|
|
|
} |
941
|
|
|
// Load extension configuration. |
942
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
943
|
|
|
if (empty($extConf['fileGrpThumbs'])) { |
944
|
|
|
$this->logger->warning('No fileGrp for thumbnails specified'); |
945
|
|
|
$this->thumbnailLoaded = true; |
946
|
|
|
return $this->thumbnail; |
947
|
|
|
} |
948
|
|
|
$strctId = $this->_getToplevelId(); |
949
|
|
|
$metadata = $this->getTitledata($cPid); |
950
|
|
|
|
951
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
952
|
|
|
->getQueryBuilderForTable('tx_dlf_structures'); |
953
|
|
|
|
954
|
|
|
// Get structure element to get thumbnail from. |
955
|
|
|
$result = $queryBuilder |
956
|
|
|
->select('tx_dlf_structures.thumbnail AS thumbnail') |
957
|
|
|
->from('tx_dlf_structures') |
958
|
|
|
->where( |
959
|
|
|
$queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($cPid)), |
960
|
|
|
$queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])), |
961
|
|
|
Helper::whereExpression('tx_dlf_structures') |
962
|
|
|
) |
963
|
|
|
->setMaxResults(1) |
964
|
|
|
->execute(); |
965
|
|
|
|
966
|
|
|
$allResults = $result->fetchAll(); |
967
|
|
|
|
968
|
|
|
if (count($allResults) == 1) { |
969
|
|
|
$resArray = $allResults[0]; |
970
|
|
|
// Get desired thumbnail structure if not the toplevel structure itself. |
971
|
|
|
if (!empty($resArray['thumbnail'])) { |
972
|
|
|
$strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid); |
973
|
|
|
// Check if this document has a structure element of the desired type. |
974
|
|
|
$strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID'); |
975
|
|
|
if (!empty($strctIds)) { |
976
|
|
|
$strctId = (string) $strctIds[0]; |
977
|
|
|
} |
978
|
|
|
} |
979
|
|
|
// Load smLinks. |
980
|
|
|
$this->_getSmLinks(); |
981
|
|
|
// Get thumbnail location. |
982
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
983
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
984
|
|
|
if ( |
985
|
|
|
$this->_getPhysicalStructure() |
986
|
|
|
&& !empty($this->smLinks['l2p'][$strctId]) |
987
|
|
|
&& !empty($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]) |
988
|
|
|
) { |
989
|
|
|
$this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]); |
990
|
|
|
break; |
991
|
|
|
} elseif (!empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb])) { |
992
|
|
|
$this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]); |
993
|
|
|
break; |
994
|
|
|
} |
995
|
|
|
} |
996
|
|
|
} else { |
997
|
|
|
$this->logger->error('No structure of type "' . $metadata['type'][0] . '" found in database'); |
998
|
|
|
} |
999
|
|
|
$this->thumbnailLoaded = true; |
1000
|
|
|
} |
1001
|
|
|
return $this->thumbnail; |
1002
|
|
|
} |
1003
|
|
|
|
1004
|
|
|
/** |
1005
|
|
|
* {@inheritDoc} |
1006
|
|
|
* @see \Kitodo\Dlf\Common\Doc::_getToplevelId() |
1007
|
|
|
*/ |
1008
|
|
|
protected function _getToplevelId() |
1009
|
|
|
{ |
1010
|
|
|
if (empty($this->toplevelId)) { |
1011
|
|
|
// Get all logical structure nodes with metadata, but without associated METS-Pointers. |
1012
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]'); |
1013
|
|
|
if (!empty($divs)) { |
1014
|
|
|
// Load smLinks. |
1015
|
|
|
$this->_getSmLinks(); |
1016
|
|
|
foreach ($divs as $div) { |
1017
|
|
|
$id = (string) $div['ID']; |
1018
|
|
|
// Are there physical structure nodes for this logical structure? |
1019
|
|
|
if (array_key_exists($id, $this->smLinks['l2p'])) { |
1020
|
|
|
// Yes. That's what we're looking for. |
1021
|
|
|
$this->toplevelId = $id; |
1022
|
|
|
break; |
1023
|
|
|
} elseif (empty($this->toplevelId)) { |
1024
|
|
|
// No. Remember this anyway, but keep looking for a better one. |
1025
|
|
|
$this->toplevelId = $id; |
1026
|
|
|
} |
1027
|
|
|
} |
1028
|
|
|
} |
1029
|
|
|
} |
1030
|
|
|
return $this->toplevelId; |
1031
|
|
|
} |
1032
|
|
|
|
1033
|
|
|
/** |
1034
|
|
|
* This magic method is executed prior to any serialization of the object |
1035
|
|
|
* @see __wakeup() |
1036
|
|
|
* |
1037
|
|
|
* @access public |
1038
|
|
|
* |
1039
|
|
|
* @return array Properties to be serialized |
1040
|
|
|
*/ |
1041
|
|
|
public function __sleep() |
1042
|
|
|
{ |
1043
|
|
|
// \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization |
1044
|
|
|
$this->asXML = $this->xml->asXML(); |
|
|
|
|
1045
|
|
|
return ['uid', 'pid', 'recordId', 'parentId', 'asXML']; |
1046
|
|
|
} |
1047
|
|
|
|
1048
|
|
|
/** |
1049
|
|
|
* This magic method is used for setting a string value for the object |
1050
|
|
|
* |
1051
|
|
|
* @access public |
1052
|
|
|
* |
1053
|
|
|
* @return string String representing the METS object |
1054
|
|
|
*/ |
1055
|
|
|
public function __toString() |
1056
|
|
|
{ |
1057
|
|
|
$xml = new \DOMDocument('1.0', 'utf-8'); |
1058
|
|
|
$xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), true)); |
1059
|
|
|
$xml->formatOutput = true; |
1060
|
|
|
return $xml->saveXML(); |
1061
|
|
|
} |
1062
|
|
|
|
1063
|
|
|
/** |
1064
|
|
|
* This magic method is executed after the object is deserialized |
1065
|
|
|
* @see __sleep() |
1066
|
|
|
* |
1067
|
|
|
* @access public |
1068
|
|
|
* |
1069
|
|
|
* @return void |
1070
|
|
|
*/ |
1071
|
|
|
public function __wakeup() |
1072
|
|
|
{ |
1073
|
|
|
$xml = Helper::getXmlFileAsString($this->asXML); |
1074
|
|
|
if ($xml !== false) { |
1075
|
|
|
$this->asXML = ''; |
1076
|
|
|
$this->xml = $xml; |
1077
|
|
|
// Rebuild the unserializable properties. |
1078
|
|
|
$this->init(''); |
1079
|
|
|
} else { |
1080
|
|
|
$this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(static::class); |
1081
|
|
|
$this->logger->error('Could not load XML after deserialization'); |
1082
|
|
|
} |
1083
|
|
|
} |
1084
|
|
|
} |
1085
|
|
|
|
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.