1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* (c) Kitodo. Key to digital objects e.V. <[email protected]> |
5
|
|
|
* |
6
|
|
|
* This file is part of the Kitodo and TYPO3 projects. |
7
|
|
|
* |
8
|
|
|
* @license GNU General Public License version 3 or later. |
9
|
|
|
* For the full copyright and license information, please read the |
10
|
|
|
* LICENSE.txt file that was distributed with this source code. |
11
|
|
|
*/ |
12
|
|
|
|
13
|
|
|
namespace Kitodo\Dlf\Common; |
14
|
|
|
|
15
|
|
|
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration; |
16
|
|
|
use TYPO3\CMS\Core\Database\ConnectionPool; |
17
|
|
|
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction; |
18
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
19
|
|
|
use Ubl\Iiif\Tools\IiifHelper; |
20
|
|
|
use Ubl\Iiif\Services\AbstractImageService; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* MetsDocument class for the 'dlf' extension. |
24
|
|
|
* |
25
|
|
|
* @author Sebastian Meyer <[email protected]> |
26
|
|
|
* @author Henrik Lochmann <[email protected]> |
27
|
|
|
* @package TYPO3 |
28
|
|
|
* @subpackage dlf |
29
|
|
|
* @access public |
30
|
|
|
* @property int $cPid This holds the PID for the configuration |
31
|
|
|
* @property-read array $dmdSec This holds the XML file's dmdSec parts with their IDs as array key |
32
|
|
|
* @property-read array $fileGrps This holds the file ID -> USE concordance |
33
|
|
|
* @property-read bool $hasFulltext Are there any fulltext files available? |
34
|
|
|
* @property-read string $location This holds the documents location |
35
|
|
|
* @property-read array $metadataArray This holds the documents' parsed metadata array |
36
|
|
|
* @property-read \SimpleXMLElement $mets This holds the XML file's METS part as \SimpleXMLElement object |
37
|
|
|
* @property-read int $numPages The holds the total number of pages |
38
|
|
|
* @property-read int $parentId This holds the UID of the parent document or zero if not multi-volumed |
39
|
|
|
* @property-read array $physicalStructure This holds the physical structure |
40
|
|
|
* @property-read array $physicalStructureInfo This holds the physical structure metadata |
41
|
|
|
* @property-read int $pid This holds the PID of the document or zero if not in database |
42
|
|
|
* @property-read bool $ready Is the document instantiated successfully? |
43
|
|
|
* @property-read string $recordId The METS file's / IIIF manifest's record identifier |
44
|
|
|
* @property-read int $rootId This holds the UID of the root document or zero if not multi-volumed |
45
|
|
|
* @property-read array $smLinks This holds the smLinks between logical and physical structMap |
46
|
|
|
* @property-read array $tableOfContents This holds the logical structure |
47
|
|
|
* @property-read string $thumbnail This holds the document's thumbnail location |
48
|
|
|
* @property-read string $toplevelId This holds the toplevel structure's @ID (METS) or the manifest's @id (IIIF) |
49
|
|
|
* @property-read mixed $uid This holds the UID or the URL of the document |
50
|
|
|
*/ |
51
|
|
|
final class MetsDocument extends Doc |
52
|
|
|
{ |
53
|
|
|
/** |
54
|
|
|
* This holds the whole XML file as string for serialization purposes |
55
|
|
|
* @see __sleep() / __wakeup() |
56
|
|
|
* |
57
|
|
|
* @var string |
58
|
|
|
* @access protected |
59
|
|
|
*/ |
60
|
|
|
protected $asXML = ''; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* This holds the XML file's dmdSec parts with their IDs as array key |
64
|
|
|
* |
65
|
|
|
* @var array |
66
|
|
|
* @access protected |
67
|
|
|
*/ |
68
|
|
|
protected $dmdSec = []; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Are the METS file's dmdSecs loaded? |
72
|
|
|
* @see $dmdSec |
73
|
|
|
* |
74
|
|
|
* @var bool |
75
|
|
|
* @access protected |
76
|
|
|
*/ |
77
|
|
|
protected $dmdSecLoaded = false; |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* The extension key |
81
|
|
|
* |
82
|
|
|
* @var string |
83
|
|
|
* @access public |
84
|
|
|
*/ |
85
|
|
|
public static $extKey = 'dlf'; |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* This holds the file ID -> USE concordance |
89
|
|
|
* @see _getFileGrps() |
90
|
|
|
* |
91
|
|
|
* @var array |
92
|
|
|
* @access protected |
93
|
|
|
*/ |
94
|
|
|
protected $fileGrps = []; |
95
|
|
|
|
96
|
|
|
/** |
97
|
|
|
* Are the image file groups loaded? |
98
|
|
|
* @see $fileGrps |
99
|
|
|
* |
100
|
|
|
* @var bool |
101
|
|
|
* @access protected |
102
|
|
|
*/ |
103
|
|
|
protected $fileGrpsLoaded = false; |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* This holds the XML file's METS part as \SimpleXMLElement object |
107
|
|
|
* |
108
|
|
|
* @var \SimpleXMLElement |
109
|
|
|
* @access protected |
110
|
|
|
*/ |
111
|
|
|
protected $mets; |
112
|
|
|
|
113
|
|
|
/** |
114
|
|
|
* This holds the whole XML file as \SimpleXMLElement object |
115
|
|
|
* |
116
|
|
|
* @var \SimpleXMLElement |
117
|
|
|
* @access protected |
118
|
|
|
*/ |
119
|
|
|
protected $xml; |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* This adds metadata from METS structural map to metadata array. |
123
|
|
|
* |
124
|
|
|
* @access public |
125
|
|
|
* |
126
|
|
|
* @param array &$metadata: The metadata array to extend |
127
|
|
|
* @param string $id: The @ID attribute of the logical structure node |
128
|
|
|
* |
129
|
|
|
* @return void |
130
|
|
|
*/ |
131
|
|
|
public function addMetadataFromMets(&$metadata, $id) |
132
|
|
|
{ |
133
|
|
|
$details = $this->getLogicalStructure($id); |
134
|
|
|
if (!empty($details)) { |
135
|
|
|
$metadata['mets_order'][0] = $details['order']; |
136
|
|
|
$metadata['mets_label'][0] = $details['label']; |
137
|
|
|
$metadata['mets_orderlabel'][0] = $details['orderlabel']; |
138
|
|
|
} |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
/** |
142
|
|
|
* |
143
|
|
|
* {@inheritDoc} |
144
|
|
|
* @see \Kitodo\Dlf\Common\Doc::establishRecordId() |
145
|
|
|
*/ |
146
|
|
|
protected function establishRecordId($pid) |
147
|
|
|
{ |
148
|
|
|
// Check for METS object @ID. |
149
|
|
|
if (!empty($this->mets['OBJID'])) { |
150
|
|
|
$this->recordId = (string) $this->mets['OBJID']; |
151
|
|
|
} |
152
|
|
|
// Get hook objects. |
153
|
|
|
$hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php'); |
154
|
|
|
// Apply hooks. |
155
|
|
|
foreach ($hookObjects as $hookObj) { |
156
|
|
|
if (method_exists($hookObj, 'construct_postProcessRecordId')) { |
157
|
|
|
$hookObj->construct_postProcessRecordId($this->xml, $this->recordId); |
158
|
|
|
} |
159
|
|
|
} |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
/** |
163
|
|
|
* |
164
|
|
|
* {@inheritDoc} |
165
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getDownloadLocation() |
166
|
|
|
*/ |
167
|
|
|
public function getDownloadLocation($id) |
168
|
|
|
{ |
169
|
|
|
$fileMimeType = $this->getFileMimeType($id); |
170
|
|
|
$fileLocation = $this->getFileLocation($id); |
171
|
|
|
if ($fileMimeType === 'application/vnd.kitodo.iiif') { |
172
|
|
|
$fileLocation = (strrpos($fileLocation, 'info.json') === strlen($fileLocation) - 9) ? $fileLocation : (strrpos($fileLocation, '/') === strlen($fileLocation) ? $fileLocation . 'info.json' : $fileLocation . '/info.json'); |
173
|
|
|
$conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
174
|
|
|
IiifHelper::setUrlReader(IiifUrlReader::getInstance()); |
175
|
|
|
IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']); |
176
|
|
|
IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']); |
177
|
|
|
$service = IiifHelper::loadIiifResource($fileLocation); |
178
|
|
|
if ($service !== null && $service instanceof AbstractImageService) { |
179
|
|
|
return $service->getImageUrl(); |
180
|
|
|
} |
181
|
|
|
} elseif ($fileMimeType === 'application/vnd.netfpx') { |
182
|
|
|
$baseURL = $fileLocation . (strpos($fileLocation, '?') === false ? '?' : ''); |
183
|
|
|
// TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server' |
184
|
|
|
return $baseURL . '&CVT=jpeg'; |
185
|
|
|
} |
186
|
|
|
return $fileLocation; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
/** |
190
|
|
|
* {@inheritDoc} |
191
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getFileLocation() |
192
|
|
|
*/ |
193
|
|
|
public function getFileLocation($id) |
194
|
|
|
{ |
195
|
|
|
$location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]'); |
196
|
|
|
if ( |
197
|
|
|
!empty($id) |
198
|
|
|
&& !empty($location) |
199
|
|
|
) { |
200
|
|
|
return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href; |
201
|
|
|
} else { |
202
|
|
|
$this->logger->warning('There is no file node with @ID "' . $id . '"'); |
203
|
|
|
return ''; |
204
|
|
|
} |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
/** |
208
|
|
|
* {@inheritDoc} |
209
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getFileMimeType() |
210
|
|
|
*/ |
211
|
|
|
public function getFileMimeType($id) |
212
|
|
|
{ |
213
|
|
|
$mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE'); |
214
|
|
|
if ( |
215
|
|
|
!empty($id) |
216
|
|
|
&& !empty($mimetype) |
217
|
|
|
) { |
218
|
|
|
return (string) $mimetype[0]; |
219
|
|
|
} else { |
220
|
|
|
$this->logger->warning('There is no file node with @ID "' . $id . '" or no MIME type specified'); |
221
|
|
|
return ''; |
222
|
|
|
} |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* {@inheritDoc} |
227
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getLogicalStructure() |
228
|
|
|
*/ |
229
|
|
|
public function getLogicalStructure($id, $recursive = false) |
230
|
|
|
{ |
231
|
|
|
$details = []; |
232
|
|
|
// Is the requested logical unit already loaded? |
233
|
|
|
if ( |
234
|
|
|
!$recursive |
235
|
|
|
&& !empty($this->logicalUnits[$id]) |
236
|
|
|
) { |
237
|
|
|
// Yes. Return it. |
238
|
|
|
return $this->logicalUnits[$id]; |
239
|
|
|
} elseif (!empty($id)) { |
240
|
|
|
// Get specified logical unit. |
241
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]'); |
242
|
|
|
} else { |
243
|
|
|
// Get all logical units at top level. |
244
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div'); |
245
|
|
|
} |
246
|
|
|
if (!empty($divs)) { |
247
|
|
|
if (!$recursive) { |
248
|
|
|
// Get the details for the first xpath hit. |
249
|
|
|
$details = $this->getLogicalStructureInfo($divs[0]); |
250
|
|
|
} else { |
251
|
|
|
// Walk the logical structure recursively and fill the whole table of contents. |
252
|
|
|
foreach ($divs as $div) { |
253
|
|
|
$this->tableOfContents[] = $this->getLogicalStructureInfo($div, $recursive); |
254
|
|
|
} |
255
|
|
|
} |
256
|
|
|
} |
257
|
|
|
return $details; |
258
|
|
|
} |
259
|
|
|
|
260
|
|
|
/** |
261
|
|
|
* This gets details about a logical structure element |
262
|
|
|
* |
263
|
|
|
* @access protected |
264
|
|
|
* |
265
|
|
|
* @param \SimpleXMLElement $structure: The logical structure node |
266
|
|
|
* @param bool $recursive: Whether to include the child elements |
267
|
|
|
* |
268
|
|
|
* @return array Array of the element's id, label, type and physical page indexes/mptr link |
269
|
|
|
*/ |
270
|
|
|
protected function getLogicalStructureInfo(\SimpleXMLElement $structure, $recursive = false) |
271
|
|
|
{ |
272
|
|
|
// Get attributes. |
273
|
|
|
foreach ($structure->attributes() as $attribute => $value) { |
274
|
|
|
$attributes[$attribute] = (string) $value; |
275
|
|
|
} |
276
|
|
|
// Load plugin configuration. |
277
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
278
|
|
|
// Extract identity information. |
279
|
|
|
$details = []; |
280
|
|
|
$details['id'] = $attributes['ID']; |
281
|
|
|
$details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : ''); |
282
|
|
|
$details['order'] = (isset($attributes['ORDER']) ? $attributes['ORDER'] : ''); |
283
|
|
|
$details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : ''); |
284
|
|
|
$details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : ''); |
285
|
|
|
$details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : ''); |
286
|
|
|
$details['volume'] = ''; |
287
|
|
|
// Set volume information only if no label is set and this is the toplevel structure element. |
288
|
|
|
if ( |
289
|
|
|
empty($details['label']) |
290
|
|
|
&& $details['id'] == $this->_getToplevelId() |
291
|
|
|
) { |
292
|
|
|
$metadata = $this->getMetadata($details['id']); |
293
|
|
|
if (!empty($metadata['volume'][0])) { |
294
|
|
|
$details['volume'] = $metadata['volume'][0]; |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
$details['pagination'] = ''; |
298
|
|
|
$details['type'] = $attributes['TYPE']; |
299
|
|
|
$details['thumbnailId'] = ''; |
300
|
|
|
// Load smLinks. |
301
|
|
|
$this->_getSmLinks(); |
302
|
|
|
// Load physical structure. |
303
|
|
|
$this->_getPhysicalStructure(); |
304
|
|
|
// Get the physical page or external file this structure element is pointing at. |
305
|
|
|
$details['points'] = ''; |
306
|
|
|
// Is there a mptr node? |
307
|
|
|
if (count($structure->children('http://www.loc.gov/METS/')->mptr)) { |
308
|
|
|
// Yes. Get the file reference. |
309
|
|
|
$details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
310
|
|
|
} elseif ( |
311
|
|
|
!empty($this->physicalStructure) |
312
|
|
|
&& array_key_exists($details['id'], $this->smLinks['l2p']) |
313
|
|
|
) { |
314
|
|
|
// Link logical structure to the first corresponding physical page/track. |
315
|
|
|
$details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, true)), 1); |
316
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
317
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
318
|
|
|
if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb])) { |
319
|
|
|
$details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb]; |
320
|
|
|
break; |
321
|
|
|
} |
322
|
|
|
} |
323
|
|
|
// Get page/track number of the first page/track related to this structure element. |
324
|
|
|
$details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel']; |
325
|
|
|
} elseif ($details['id'] == $this->_getToplevelId()) { |
326
|
|
|
// Point to self if this is the toplevel structure. |
327
|
|
|
$details['points'] = 1; |
328
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
329
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
330
|
|
|
if ( |
331
|
|
|
!empty($this->physicalStructure) |
332
|
|
|
&& !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]) |
333
|
|
|
) { |
334
|
|
|
$details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]; |
335
|
|
|
break; |
336
|
|
|
} |
337
|
|
|
} |
338
|
|
|
} |
339
|
|
|
// Get the files this structure element is pointing at. |
340
|
|
|
$details['files'] = []; |
341
|
|
|
$fileUse = $this->_getFileGrps(); |
342
|
|
|
// Get the file representations from fileSec node. |
343
|
|
|
foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
344
|
|
|
// Check if file has valid @USE attribute. |
345
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
|
|
|
|
346
|
|
|
$details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
347
|
|
|
} |
348
|
|
|
} |
349
|
|
|
// Keep for later usage. |
350
|
|
|
$this->logicalUnits[$details['id']] = $details; |
351
|
|
|
// Walk the structure recursively? And are there any children of the current element? |
352
|
|
|
if ( |
353
|
|
|
$recursive |
354
|
|
|
&& count($structure->children('http://www.loc.gov/METS/')->div) |
355
|
|
|
) { |
356
|
|
|
$details['children'] = []; |
357
|
|
|
foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) { |
358
|
|
|
// Repeat for all children. |
359
|
|
|
$details['children'][] = $this->getLogicalStructureInfo($child, true); |
|
|
|
|
360
|
|
|
} |
361
|
|
|
} |
362
|
|
|
return $details; |
363
|
|
|
} |
364
|
|
|
|
365
|
|
|
/** |
366
|
|
|
* {@inheritDoc} |
367
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getMetadata() |
368
|
|
|
*/ |
369
|
|
|
public function getMetadata($id, $cPid = 0) |
370
|
|
|
{ |
371
|
|
|
// Make sure $cPid is a non-negative integer. |
372
|
|
|
$cPid = max(intval($cPid), 0); |
373
|
|
|
// If $cPid is not given, try to get it elsewhere. |
374
|
|
|
if ( |
375
|
|
|
!$cPid |
376
|
|
|
&& ($this->cPid || $this->pid) |
377
|
|
|
) { |
378
|
|
|
// Retain current PID. |
379
|
|
|
$cPid = ($this->cPid ? $this->cPid : $this->pid); |
380
|
|
|
} elseif (!$cPid) { |
381
|
|
|
$this->logger->warning('Invalid PID ' . $cPid . ' for metadata definitions'); |
382
|
|
|
return []; |
383
|
|
|
} |
384
|
|
|
// Get metadata from parsed metadata array if available. |
385
|
|
|
if ( |
386
|
|
|
!empty($this->metadataArray[$id]) |
387
|
|
|
&& $this->metadataArray[0] == $cPid |
388
|
|
|
) { |
389
|
|
|
return $this->metadataArray[$id]; |
390
|
|
|
} |
391
|
|
|
// Initialize metadata array with empty values. |
392
|
|
|
$metadata = [ |
393
|
|
|
'title' => [], |
394
|
|
|
'title_sorting' => [], |
395
|
|
|
'author' => [], |
396
|
|
|
'place' => [], |
397
|
|
|
'year' => [], |
398
|
|
|
'prod_id' => [], |
399
|
|
|
'record_id' => [], |
400
|
|
|
'opac_id' => [], |
401
|
|
|
'union_id' => [], |
402
|
|
|
'urn' => [], |
403
|
|
|
'purl' => [], |
404
|
|
|
'type' => [], |
405
|
|
|
'volume' => [], |
406
|
|
|
'volume_sorting' => [], |
407
|
|
|
'license' => [], |
408
|
|
|
'terms' => [], |
409
|
|
|
'restrictions' => [], |
410
|
|
|
'out_of_print' => [], |
411
|
|
|
'rights_info' => [], |
412
|
|
|
'collection' => [], |
413
|
|
|
'owner' => [], |
414
|
|
|
'mets_label' => [], |
415
|
|
|
'mets_orderlabel' => [], |
416
|
|
|
'document_format' => ['METS'], |
417
|
|
|
]; |
418
|
|
|
// Get the logical structure node's @DMDID. |
419
|
|
|
if (!empty($this->logicalUnits[$id])) { |
420
|
|
|
$dmdIds = $this->logicalUnits[$id]['dmdId']; |
421
|
|
|
} else { |
422
|
|
|
$dmdIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@DMDID'); |
423
|
|
|
$dmdIds = (string) $dmdIds[0]; |
424
|
|
|
} |
425
|
|
|
if (!empty($dmdIds)) { |
426
|
|
|
// Handle multiple DMDIDs separately. |
427
|
|
|
$dmdIds = explode(' ', $dmdIds); |
428
|
|
|
$hasSupportedMetadata = false; |
429
|
|
|
} else { |
430
|
|
|
// There is no dmdSec for this structure node. |
431
|
|
|
return []; |
432
|
|
|
} |
433
|
|
|
// Load available metadata formats and dmdSecs. |
434
|
|
|
$this->loadFormats(); |
435
|
|
|
$this->_getDmdSec(); |
436
|
|
|
foreach ($dmdIds as $dmdId) { |
437
|
|
|
// Is this metadata format supported? |
438
|
|
|
if (!empty($this->formats[$this->dmdSec[$dmdId]['type']])) { |
439
|
|
|
if (!empty($this->formats[$this->dmdSec[$dmdId]['type']]['class'])) { |
440
|
|
|
$class = $this->formats[$this->dmdSec[$dmdId]['type']]['class']; |
441
|
|
|
// Get the metadata from class. |
442
|
|
|
if ( |
443
|
|
|
class_exists($class) |
444
|
|
|
&& ($obj = GeneralUtility::makeInstance($class)) instanceof MetadataInterface |
445
|
|
|
) { |
446
|
|
|
$obj->extractMetadata($this->dmdSec[$dmdId]['xml'], $metadata); |
447
|
|
|
} else { |
448
|
|
|
$this->logger->warning('Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this->dmdSec[$dmdId]['type'] . '"'); |
449
|
|
|
} |
450
|
|
|
} |
451
|
|
|
} else { |
452
|
|
|
$this->logger->notice('Unsupported metadata format "' . $this->dmdSec[$dmdId]['type'] . '" in dmdSec with @ID "' . $dmdId . '"'); |
453
|
|
|
// Continue searching for supported metadata with next @DMDID. |
454
|
|
|
continue; |
455
|
|
|
} |
456
|
|
|
// Get the structure's type. |
457
|
|
|
if (!empty($this->logicalUnits[$id])) { |
458
|
|
|
$metadata['type'] = [$this->logicalUnits[$id]['type']]; |
459
|
|
|
} else { |
460
|
|
|
$struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE'); |
461
|
|
|
if (!empty($struct)) { |
462
|
|
|
$metadata['type'] = [(string) $struct[0]]; |
463
|
|
|
} |
464
|
|
|
} |
465
|
|
|
// Get the additional metadata from database. |
466
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
467
|
|
|
->getQueryBuilderForTable('tx_dlf_metadata'); |
468
|
|
|
// Get hidden records, too. |
469
|
|
|
$queryBuilder |
470
|
|
|
->getRestrictions() |
471
|
|
|
->removeByType(HiddenRestriction::class); |
472
|
|
|
// Get all metadata with configured xpath and applicable format first. |
473
|
|
|
$resultWithFormat = $queryBuilder |
474
|
|
|
->select( |
475
|
|
|
'tx_dlf_metadata.index_name AS index_name', |
476
|
|
|
'tx_dlf_metadataformat_joins.xpath AS xpath', |
477
|
|
|
'tx_dlf_metadataformat_joins.xpath_sorting AS xpath_sorting', |
478
|
|
|
'tx_dlf_metadata.is_sortable AS is_sortable', |
479
|
|
|
'tx_dlf_metadata.default_value AS default_value', |
480
|
|
|
'tx_dlf_metadata.format AS format' |
481
|
|
|
) |
482
|
|
|
->from('tx_dlf_metadata') |
483
|
|
|
->innerJoin( |
484
|
|
|
'tx_dlf_metadata', |
485
|
|
|
'tx_dlf_metadataformat', |
486
|
|
|
'tx_dlf_metadataformat_joins', |
487
|
|
|
$queryBuilder->expr()->eq( |
488
|
|
|
'tx_dlf_metadataformat_joins.parent_id', |
489
|
|
|
'tx_dlf_metadata.uid' |
490
|
|
|
) |
491
|
|
|
) |
492
|
|
|
->innerJoin( |
493
|
|
|
'tx_dlf_metadataformat_joins', |
494
|
|
|
'tx_dlf_formats', |
495
|
|
|
'tx_dlf_formats_joins', |
496
|
|
|
$queryBuilder->expr()->eq( |
497
|
|
|
'tx_dlf_formats_joins.uid', |
498
|
|
|
'tx_dlf_metadataformat_joins.encoded' |
499
|
|
|
) |
500
|
|
|
) |
501
|
|
|
->where( |
502
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)), |
503
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
504
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadataformat_joins.pid', intval($cPid)), |
505
|
|
|
$queryBuilder->expr()->eq('tx_dlf_formats_joins.type', $queryBuilder->createNamedParameter($this->dmdSec[$dmdId]['type'])) |
506
|
|
|
) |
507
|
|
|
->execute(); |
508
|
|
|
// Get all metadata without a format, but with a default value next. |
509
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
510
|
|
|
->getQueryBuilderForTable('tx_dlf_metadata'); |
511
|
|
|
// Get hidden records, too. |
512
|
|
|
$queryBuilder |
513
|
|
|
->getRestrictions() |
514
|
|
|
->removeByType(HiddenRestriction::class); |
515
|
|
|
$resultWithoutFormat = $queryBuilder |
516
|
|
|
->select( |
517
|
|
|
'tx_dlf_metadata.index_name AS index_name', |
518
|
|
|
'tx_dlf_metadata.is_sortable AS is_sortable', |
519
|
|
|
'tx_dlf_metadata.default_value AS default_value', |
520
|
|
|
'tx_dlf_metadata.format AS format' |
521
|
|
|
) |
522
|
|
|
->from('tx_dlf_metadata') |
523
|
|
|
->where( |
524
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)), |
525
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
526
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.format', 0), |
527
|
|
|
$queryBuilder->expr()->neq('tx_dlf_metadata.default_value', $queryBuilder->createNamedParameter('')) |
528
|
|
|
) |
529
|
|
|
->execute(); |
530
|
|
|
// Merge both result sets. |
531
|
|
|
$allResults = array_merge($resultWithFormat->fetchAll(), $resultWithoutFormat->fetchAll()); |
532
|
|
|
// We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly. |
533
|
|
|
$domNode = dom_import_simplexml($this->dmdSec[$dmdId]['xml']); |
534
|
|
|
$domXPath = new \DOMXPath($domNode->ownerDocument); |
|
|
|
|
535
|
|
|
$this->registerNamespaces($domXPath); |
536
|
|
|
// OK, now make the XPath queries. |
537
|
|
|
foreach ($allResults as $resArray) { |
538
|
|
|
// Set metadata field's value(s). |
539
|
|
|
if ( |
540
|
|
|
$resArray['format'] > 0 |
541
|
|
|
&& !empty($resArray['xpath']) |
542
|
|
|
&& ($values = $domXPath->evaluate($resArray['xpath'], $domNode)) |
543
|
|
|
) { |
544
|
|
|
if ( |
545
|
|
|
$values instanceof \DOMNodeList |
546
|
|
|
&& $values->length > 0 |
547
|
|
|
) { |
548
|
|
|
$metadata[$resArray['index_name']] = []; |
549
|
|
|
foreach ($values as $value) { |
550
|
|
|
$metadata[$resArray['index_name']][] = trim((string) $value->nodeValue); |
551
|
|
|
} |
552
|
|
|
} elseif (!($values instanceof \DOMNodeList)) { |
553
|
|
|
$metadata[$resArray['index_name']] = [trim((string) $values)]; |
554
|
|
|
} |
555
|
|
|
} |
556
|
|
|
// Set default value if applicable. |
557
|
|
|
if ( |
558
|
|
|
empty($metadata[$resArray['index_name']][0]) |
559
|
|
|
&& strlen($resArray['default_value']) > 0 |
560
|
|
|
) { |
561
|
|
|
$metadata[$resArray['index_name']] = [$resArray['default_value']]; |
562
|
|
|
} |
563
|
|
|
// Set sorting value if applicable. |
564
|
|
|
if ( |
565
|
|
|
!empty($metadata[$resArray['index_name']]) |
566
|
|
|
&& $resArray['is_sortable'] |
567
|
|
|
) { |
568
|
|
|
if ( |
569
|
|
|
$resArray['format'] > 0 |
570
|
|
|
&& !empty($resArray['xpath_sorting']) |
571
|
|
|
&& ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode)) |
572
|
|
|
) { |
573
|
|
|
if ( |
574
|
|
|
$values instanceof \DOMNodeList |
575
|
|
|
&& $values->length > 0 |
576
|
|
|
) { |
577
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values->item(0)->nodeValue); |
578
|
|
|
} elseif (!($values instanceof \DOMNodeList)) { |
579
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values); |
580
|
|
|
} |
581
|
|
|
} |
582
|
|
|
if (empty($metadata[$resArray['index_name'] . '_sorting'][0])) { |
583
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = $metadata[$resArray['index_name']][0]; |
584
|
|
|
} |
585
|
|
|
} |
586
|
|
|
} |
587
|
|
|
// Set title to empty string if not present. |
588
|
|
|
if (empty($metadata['title'][0])) { |
589
|
|
|
$metadata['title'][0] = ''; |
590
|
|
|
$metadata['title_sorting'][0] = ''; |
591
|
|
|
} |
592
|
|
|
// Add collections and owner from database to toplevel element if document is already saved. |
593
|
|
|
if ( |
594
|
|
|
\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($this->uid) |
595
|
|
|
&& $id == $this->_getToplevelId() |
596
|
|
|
) { |
597
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
598
|
|
|
->getQueryBuilderForTable('tx_dlf_documents'); |
599
|
|
|
|
600
|
|
|
$result = $queryBuilder |
601
|
|
|
->select( |
602
|
|
|
'tx_dlf_collections_join.index_name AS index_name' |
603
|
|
|
) |
604
|
|
|
->from('tx_dlf_documents') |
605
|
|
|
->innerJoin( |
606
|
|
|
'tx_dlf_documents', |
607
|
|
|
'tx_dlf_relations', |
608
|
|
|
'tx_dlf_relations_joins', |
609
|
|
|
$queryBuilder->expr()->eq( |
610
|
|
|
'tx_dlf_relations_joins.uid_local', |
611
|
|
|
'tx_dlf_documents.uid' |
612
|
|
|
) |
613
|
|
|
) |
614
|
|
|
->innerJoin( |
615
|
|
|
'tx_dlf_relations_joins', |
616
|
|
|
'tx_dlf_collections', |
617
|
|
|
'tx_dlf_collections_join', |
618
|
|
|
$queryBuilder->expr()->eq( |
619
|
|
|
'tx_dlf_relations_joins.uid_foreign', |
620
|
|
|
'tx_dlf_collections_join.uid' |
621
|
|
|
) |
622
|
|
|
) |
623
|
|
|
->where( |
624
|
|
|
$queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($cPid)), |
625
|
|
|
$queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($this->uid)) |
626
|
|
|
) |
627
|
|
|
->orderBy('tx_dlf_collections_join.index_name', 'ASC') |
628
|
|
|
->execute(); |
629
|
|
|
|
630
|
|
|
$allResults = $result->fetchAll(); |
631
|
|
|
|
632
|
|
|
foreach ($allResults as $resArray) { |
633
|
|
|
if (!in_array($resArray['index_name'], $metadata['collection'])) { |
634
|
|
|
$metadata['collection'][] = $resArray['index_name']; |
635
|
|
|
} |
636
|
|
|
} |
637
|
|
|
|
638
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
639
|
|
|
->getQueryBuilderForTable('tx_dlf_documents'); |
640
|
|
|
|
641
|
|
|
$result = $queryBuilder |
642
|
|
|
->select( |
643
|
|
|
'tx_dlf_documents.owner AS owner' |
644
|
|
|
) |
645
|
|
|
->from('tx_dlf_documents') |
646
|
|
|
->where( |
647
|
|
|
$queryBuilder->expr()->eq('tx_dlf_documents.pid', intval($cPid)), |
648
|
|
|
$queryBuilder->expr()->eq('tx_dlf_documents.uid', intval($this->uid)) |
649
|
|
|
) |
650
|
|
|
->execute(); |
651
|
|
|
|
652
|
|
|
$resArray = $result->fetch(); |
653
|
|
|
|
654
|
|
|
$metadata['owner'][0] = $resArray['owner']; |
655
|
|
|
} |
656
|
|
|
// Extract metadata only from first supported dmdSec. |
657
|
|
|
$hasSupportedMetadata = true; |
658
|
|
|
break; |
659
|
|
|
} |
660
|
|
|
if ($hasSupportedMetadata) { |
661
|
|
|
return $metadata; |
662
|
|
|
} else { |
663
|
|
|
$this->logger->warning('No supported metadata found for logical structure with @ID "' . $id . '"'); |
664
|
|
|
return []; |
665
|
|
|
} |
666
|
|
|
} |
667
|
|
|
|
668
|
|
|
/** |
669
|
|
|
* {@inheritDoc} |
670
|
|
|
* @see \Kitodo\Dlf\Common\Doc::getFullText() |
671
|
|
|
*/ |
672
|
|
|
public function getFullText($id) |
673
|
|
|
{ |
674
|
|
|
$fullText = ''; |
675
|
|
|
|
676
|
|
|
// Load fileGrps and check for full text files. |
677
|
|
|
$this->_getFileGrps(); |
678
|
|
|
if ($this->hasFulltext) { |
679
|
|
|
$fullText = $this->getFullTextFromXml($id); |
680
|
|
|
} |
681
|
|
|
return $fullText; |
682
|
|
|
} |
683
|
|
|
|
684
|
|
|
/** |
685
|
|
|
* {@inheritDoc} |
686
|
|
|
* @see Doc::getStructureDepth() |
687
|
|
|
*/ |
688
|
|
|
public function getStructureDepth($logId) |
689
|
|
|
{ |
690
|
|
|
$ancestors = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*'); |
691
|
|
|
if (!empty($ancestors)) { |
692
|
|
|
return count($ancestors); |
693
|
|
|
} else { |
694
|
|
|
return 0; |
695
|
|
|
} |
696
|
|
|
} |
697
|
|
|
|
698
|
|
|
/** |
699
|
|
|
* {@inheritDoc} |
700
|
|
|
* @see \Kitodo\Dlf\Common\Doc::init() |
701
|
|
|
*/ |
702
|
|
|
protected function init() |
703
|
|
|
{ |
704
|
|
|
// Get METS node from XML file. |
705
|
|
|
$this->registerNamespaces($this->xml); |
706
|
|
|
$mets = $this->xml->xpath('//mets:mets'); |
707
|
|
|
if (!empty($mets)) { |
708
|
|
|
$this->mets = $mets[0]; |
709
|
|
|
// Register namespaces. |
710
|
|
|
$this->registerNamespaces($this->mets); |
711
|
|
|
} else { |
712
|
|
|
$this->logger->error('No METS part found in document with UID ' . $this->uid); |
713
|
|
|
} |
714
|
|
|
} |
715
|
|
|
|
716
|
|
|
/** |
717
|
|
|
* {@inheritDoc} |
718
|
|
|
* @see \Kitodo\Dlf\Common\Doc::loadLocation() |
719
|
|
|
*/ |
720
|
|
|
protected function loadLocation($location) |
721
|
|
|
{ |
722
|
|
|
$fileResource = GeneralUtility::getUrl($location); |
723
|
|
|
if ($fileResource !== false) { |
724
|
|
|
$xml = Helper::getXmlFileAsString($fileResource); |
725
|
|
|
// Set some basic properties. |
726
|
|
|
if ($xml !== false) { |
727
|
|
|
$this->xml = $xml; |
728
|
|
|
return true; |
729
|
|
|
} |
730
|
|
|
} |
731
|
|
|
$this->logger->error('Could not load XML file from "' . $location . '"'); |
732
|
|
|
return false; |
733
|
|
|
} |
734
|
|
|
|
735
|
|
|
/** |
736
|
|
|
* {@inheritDoc} |
737
|
|
|
* @see \Kitodo\Dlf\Common\Doc::ensureHasFulltextIsSet() |
738
|
|
|
*/ |
739
|
|
|
protected function ensureHasFulltextIsSet() |
740
|
|
|
{ |
741
|
|
|
// Are the fileGrps already loaded? |
742
|
|
|
if (!$this->fileGrpsLoaded) { |
743
|
|
|
$this->_getFileGrps(); |
744
|
|
|
} |
745
|
|
|
} |
746
|
|
|
|
747
|
|
|
/** |
748
|
|
|
* {@inheritDoc} |
749
|
|
|
* @see Doc::setPreloadedDocument() |
750
|
|
|
*/ |
751
|
|
|
protected function setPreloadedDocument($preloadedDocument) |
752
|
|
|
{ |
753
|
|
|
|
754
|
|
|
if ($preloadedDocument instanceof \SimpleXMLElement) { |
755
|
|
|
$this->xml = $preloadedDocument; |
756
|
|
|
return true; |
757
|
|
|
} |
758
|
|
|
return false; |
759
|
|
|
} |
760
|
|
|
|
761
|
|
|
/** |
762
|
|
|
* {@inheritDoc} |
763
|
|
|
* @see Doc::getDocument() |
764
|
|
|
*/ |
765
|
|
|
protected function getDocument() |
766
|
|
|
{ |
767
|
|
|
return $this->mets; |
768
|
|
|
} |
769
|
|
|
|
770
|
|
|
/** |
771
|
|
|
* This builds an array of the document's dmdSecs |
772
|
|
|
* |
773
|
|
|
* @access protected |
774
|
|
|
* |
775
|
|
|
* @return array Array of dmdSecs with their IDs as array key |
776
|
|
|
*/ |
777
|
|
|
protected function _getDmdSec() |
778
|
|
|
{ |
779
|
|
|
if (!$this->dmdSecLoaded) { |
780
|
|
|
// Get available data formats. |
781
|
|
|
$this->loadFormats(); |
782
|
|
|
// Get dmdSec nodes from METS. |
783
|
|
|
$dmdIds = $this->mets->xpath('./mets:dmdSec/@ID'); |
784
|
|
|
if (!empty($dmdIds)) { |
785
|
|
|
foreach ($dmdIds as $dmdId) { |
786
|
|
|
if ($type = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) { |
787
|
|
|
if (!empty($this->formats[(string) $type[0]])) { |
788
|
|
|
$type = (string) $type[0]; |
789
|
|
|
$xml = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
790
|
|
|
} |
791
|
|
|
} elseif ($type = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) { |
792
|
|
|
if (!empty($this->formats[(string) $type[0]])) { |
793
|
|
|
$type = (string) $type[0]; |
794
|
|
|
$xml = $this->mets->xpath('./mets:dmdSec[@ID="' . (string) $dmdId . '"]/mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
795
|
|
|
} |
796
|
|
|
} |
797
|
|
|
if (!empty($xml)) { |
798
|
|
|
$this->dmdSec[(string) $dmdId]['type'] = $type; |
799
|
|
|
$this->dmdSec[(string) $dmdId]['xml'] = $xml[0]; |
800
|
|
|
$this->registerNamespaces($this->dmdSec[(string) $dmdId]['xml']); |
801
|
|
|
} |
802
|
|
|
} |
803
|
|
|
} |
804
|
|
|
$this->dmdSecLoaded = true; |
805
|
|
|
} |
806
|
|
|
return $this->dmdSec; |
807
|
|
|
} |
808
|
|
|
|
809
|
|
|
/** |
810
|
|
|
* This builds the file ID -> USE concordance |
811
|
|
|
* |
812
|
|
|
* @access protected |
813
|
|
|
* |
814
|
|
|
* @return array Array of file use groups with file IDs |
815
|
|
|
*/ |
816
|
|
|
protected function _getFileGrps() |
817
|
|
|
{ |
818
|
|
|
if (!$this->fileGrpsLoaded) { |
819
|
|
|
// Get configured USE attributes. |
820
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
821
|
|
|
$useGrps = GeneralUtility::trimExplode(',', $extConf['fileGrpImages']); |
822
|
|
|
if (!empty($extConf['fileGrpThumbs'])) { |
823
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs'])); |
824
|
|
|
} |
825
|
|
|
if (!empty($extConf['fileGrpDownload'])) { |
826
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpDownload'])); |
827
|
|
|
} |
828
|
|
|
if (!empty($extConf['fileGrpFulltext'])) { |
829
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext'])); |
830
|
|
|
} |
831
|
|
|
if (!empty($extConf['fileGrpAudio'])) { |
832
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpAudio'])); |
833
|
|
|
} |
834
|
|
|
// Get all file groups. |
835
|
|
|
$fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp'); |
836
|
|
|
if (!empty($fileGrps)) { |
837
|
|
|
// Build concordance for configured USE attributes. |
838
|
|
|
foreach ($fileGrps as $fileGrp) { |
839
|
|
|
if (in_array((string) $fileGrp['USE'], $useGrps)) { |
840
|
|
|
foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) { |
841
|
|
|
$this->fileGrps[(string) $file->attributes()->ID] = (string) $fileGrp['USE']; |
|
|
|
|
842
|
|
|
} |
843
|
|
|
} |
844
|
|
|
} |
845
|
|
|
} |
846
|
|
|
// Are there any fulltext files available? |
847
|
|
|
if ( |
848
|
|
|
!empty($extConf['fileGrpFulltext']) |
849
|
|
|
&& array_intersect(GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']), $this->fileGrps) !== [] |
850
|
|
|
) { |
851
|
|
|
$this->hasFulltext = true; |
852
|
|
|
} |
853
|
|
|
$this->fileGrpsLoaded = true; |
854
|
|
|
} |
855
|
|
|
return $this->fileGrps; |
856
|
|
|
} |
857
|
|
|
|
858
|
|
|
/** |
859
|
|
|
* {@inheritDoc} |
860
|
|
|
* @see \Kitodo\Dlf\Common\Doc::prepareMetadataArray() |
861
|
|
|
*/ |
862
|
|
|
protected function prepareMetadataArray($cPid) |
863
|
|
|
{ |
864
|
|
|
$ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID'); |
865
|
|
|
// Get all logical structure nodes with metadata. |
866
|
|
|
if (!empty($ids)) { |
867
|
|
|
foreach ($ids as $id) { |
868
|
|
|
$this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid); |
869
|
|
|
} |
870
|
|
|
} |
871
|
|
|
// Set current PID for metadata definitions. |
872
|
|
|
} |
873
|
|
|
|
874
|
|
|
/** |
875
|
|
|
* This returns $this->mets via __get() |
876
|
|
|
* |
877
|
|
|
* @access protected |
878
|
|
|
* |
879
|
|
|
* @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object |
880
|
|
|
*/ |
881
|
|
|
protected function _getMets() |
882
|
|
|
{ |
883
|
|
|
return $this->mets; |
884
|
|
|
} |
885
|
|
|
|
886
|
|
|
/** |
887
|
|
|
* {@inheritDoc} |
888
|
|
|
* @see \Kitodo\Dlf\Common\Doc::_getPhysicalStructure() |
889
|
|
|
*/ |
890
|
|
|
protected function _getPhysicalStructure() |
891
|
|
|
{ |
892
|
|
|
// Is there no physical structure array yet? |
893
|
|
|
if (!$this->physicalStructureLoaded) { |
894
|
|
|
// Does the document have a structMap node of type "PHYSICAL"? |
895
|
|
|
$elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div'); |
896
|
|
|
if (!empty($elementNodes)) { |
897
|
|
|
// Get file groups. |
898
|
|
|
$fileUse = $this->_getFileGrps(); |
899
|
|
|
// Get the physical sequence's metadata. |
900
|
|
|
$physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]'); |
901
|
|
|
$physSeq[0] = (string) $physNode[0]['ID']; |
902
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID']; |
903
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : ''); |
904
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['order'] = (isset($physNode[0]['ORDER']) ? (string) $physNode[0]['ORDER'] : ''); |
905
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : ''); |
906
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : ''); |
907
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE']; |
908
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : ''); |
909
|
|
|
// Get the file representations from fileSec node. |
910
|
|
|
foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
911
|
|
|
// Check if file has valid @USE attribute. |
912
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
913
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
914
|
|
|
} |
915
|
|
|
} |
916
|
|
|
// Build the physical elements' array from the physical structMap node. |
917
|
|
|
foreach ($elementNodes as $elementNode) { |
918
|
|
|
$elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID']; |
919
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID']; |
920
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : ''); |
921
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : ''); |
922
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : ''); |
923
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : ''); |
924
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE']; |
925
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : ''); |
926
|
|
|
// Get the file representations from fileSec node. |
927
|
|
|
foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
928
|
|
|
// Check if file has valid @USE attribute. |
929
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
930
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
931
|
|
|
} |
932
|
|
|
} |
933
|
|
|
} |
934
|
|
|
// Sort array by keys (= @ORDER). |
935
|
|
|
if (ksort($elements)) { |
936
|
|
|
// Set total number of pages/tracks. |
937
|
|
|
$this->numPages = count($elements); |
938
|
|
|
// Merge and re-index the array to get nice numeric indexes. |
939
|
|
|
$this->physicalStructure = array_merge($physSeq, $elements); |
940
|
|
|
} |
941
|
|
|
} |
942
|
|
|
$this->physicalStructureLoaded = true; |
943
|
|
|
} |
944
|
|
|
return $this->physicalStructure; |
945
|
|
|
} |
946
|
|
|
|
947
|
|
|
/** |
948
|
|
|
* {@inheritDoc} |
949
|
|
|
* @see \Kitodo\Dlf\Common\Doc::_getSmLinks() |
950
|
|
|
*/ |
951
|
|
|
protected function _getSmLinks() |
952
|
|
|
{ |
953
|
|
|
if (!$this->smLinksLoaded) { |
954
|
|
|
$smLinks = $this->mets->xpath('./mets:structLink/mets:smLink'); |
955
|
|
|
if (!empty($smLinks)) { |
956
|
|
|
foreach ($smLinks as $smLink) { |
957
|
|
|
$this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to; |
958
|
|
|
$this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from; |
959
|
|
|
} |
960
|
|
|
} |
961
|
|
|
$this->smLinksLoaded = true; |
962
|
|
|
} |
963
|
|
|
return $this->smLinks; |
964
|
|
|
} |
965
|
|
|
|
966
|
|
|
/** |
967
|
|
|
* {@inheritDoc} |
968
|
|
|
* @see \Kitodo\Dlf\Common\Doc::_getThumbnail() |
969
|
|
|
*/ |
970
|
|
|
protected function _getThumbnail($forceReload = false) |
971
|
|
|
{ |
972
|
|
|
if ( |
973
|
|
|
!$this->thumbnailLoaded |
974
|
|
|
|| $forceReload |
975
|
|
|
) { |
976
|
|
|
// Retain current PID. |
977
|
|
|
$cPid = ($this->cPid ? $this->cPid : $this->pid); |
978
|
|
|
if (!$cPid) { |
979
|
|
|
$this->logger->error('Invalid PID ' . $cPid . ' for structure definitions'); |
980
|
|
|
$this->thumbnailLoaded = true; |
981
|
|
|
return $this->thumbnail; |
982
|
|
|
} |
983
|
|
|
// Load extension configuration. |
984
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
985
|
|
|
if (empty($extConf['fileGrpThumbs'])) { |
986
|
|
|
$this->logger->warning('No fileGrp for thumbnails specified'); |
987
|
|
|
$this->thumbnailLoaded = true; |
988
|
|
|
return $this->thumbnail; |
989
|
|
|
} |
990
|
|
|
$strctId = $this->_getToplevelId(); |
991
|
|
|
$metadata = $this->getTitledata($cPid); |
992
|
|
|
|
993
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
994
|
|
|
->getQueryBuilderForTable('tx_dlf_structures'); |
995
|
|
|
|
996
|
|
|
// Get structure element to get thumbnail from. |
997
|
|
|
$result = $queryBuilder |
998
|
|
|
->select('tx_dlf_structures.thumbnail AS thumbnail') |
999
|
|
|
->from('tx_dlf_structures') |
1000
|
|
|
->where( |
1001
|
|
|
$queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($cPid)), |
1002
|
|
|
$queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])), |
1003
|
|
|
Helper::whereExpression('tx_dlf_structures') |
1004
|
|
|
) |
1005
|
|
|
->setMaxResults(1) |
1006
|
|
|
->execute(); |
1007
|
|
|
|
1008
|
|
|
$allResults = $result->fetchAll(); |
1009
|
|
|
|
1010
|
|
|
if (count($allResults) == 1) { |
1011
|
|
|
$resArray = $allResults[0]; |
1012
|
|
|
// Get desired thumbnail structure if not the toplevel structure itself. |
1013
|
|
|
if (!empty($resArray['thumbnail'])) { |
1014
|
|
|
$strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid); |
1015
|
|
|
// Check if this document has a structure element of the desired type. |
1016
|
|
|
$strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID'); |
1017
|
|
|
if (!empty($strctIds)) { |
1018
|
|
|
$strctId = (string) $strctIds[0]; |
1019
|
|
|
} |
1020
|
|
|
} |
1021
|
|
|
// Load smLinks. |
1022
|
|
|
$this->_getSmLinks(); |
1023
|
|
|
// Get thumbnail location. |
1024
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
1025
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
1026
|
|
|
if ( |
1027
|
|
|
$this->_getPhysicalStructure() |
1028
|
|
|
&& !empty($this->smLinks['l2p'][$strctId]) |
1029
|
|
|
&& !empty($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]) |
1030
|
|
|
) { |
1031
|
|
|
$this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]); |
1032
|
|
|
break; |
1033
|
|
|
} elseif (!empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb])) { |
1034
|
|
|
$this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]); |
1035
|
|
|
break; |
1036
|
|
|
} |
1037
|
|
|
} |
1038
|
|
|
} else { |
1039
|
|
|
$this->logger->error('No structure of type "' . $metadata['type'][0] . '" found in database'); |
1040
|
|
|
} |
1041
|
|
|
$this->thumbnailLoaded = true; |
1042
|
|
|
} |
1043
|
|
|
return $this->thumbnail; |
1044
|
|
|
} |
1045
|
|
|
|
1046
|
|
|
/** |
1047
|
|
|
* {@inheritDoc} |
1048
|
|
|
* @see \Kitodo\Dlf\Common\Doc::_getToplevelId() |
1049
|
|
|
*/ |
1050
|
|
|
protected function _getToplevelId() |
1051
|
|
|
{ |
1052
|
|
|
if (empty($this->toplevelId)) { |
1053
|
|
|
// Get all logical structure nodes with metadata, but without associated METS-Pointers. |
1054
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]'); |
1055
|
|
|
if (!empty($divs)) { |
1056
|
|
|
// Load smLinks. |
1057
|
|
|
$this->_getSmLinks(); |
1058
|
|
|
foreach ($divs as $div) { |
1059
|
|
|
$id = (string) $div['ID']; |
1060
|
|
|
// Are there physical structure nodes for this logical structure? |
1061
|
|
|
if (array_key_exists($id, $this->smLinks['l2p'])) { |
1062
|
|
|
// Yes. That's what we're looking for. |
1063
|
|
|
$this->toplevelId = $id; |
1064
|
|
|
break; |
1065
|
|
|
} elseif (empty($this->toplevelId)) { |
1066
|
|
|
// No. Remember this anyway, but keep looking for a better one. |
1067
|
|
|
$this->toplevelId = $id; |
1068
|
|
|
} |
1069
|
|
|
} |
1070
|
|
|
} |
1071
|
|
|
} |
1072
|
|
|
return $this->toplevelId; |
1073
|
|
|
} |
1074
|
|
|
|
1075
|
|
|
/** |
1076
|
|
|
* This magic method is executed prior to any serialization of the object |
1077
|
|
|
* @see __wakeup() |
1078
|
|
|
* |
1079
|
|
|
* @access public |
1080
|
|
|
* |
1081
|
|
|
* @return array Properties to be serialized |
1082
|
|
|
*/ |
1083
|
|
|
public function __sleep() |
1084
|
|
|
{ |
1085
|
|
|
// \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization |
1086
|
|
|
$this->asXML = $this->xml->asXML(); |
|
|
|
|
1087
|
|
|
return ['uid', 'pid', 'recordId', 'parentId', 'asXML']; |
1088
|
|
|
} |
1089
|
|
|
|
1090
|
|
|
/** |
1091
|
|
|
* This magic method is used for setting a string value for the object |
1092
|
|
|
* |
1093
|
|
|
* @access public |
1094
|
|
|
* |
1095
|
|
|
* @return string String representing the METS object |
1096
|
|
|
*/ |
1097
|
|
|
public function __toString() |
1098
|
|
|
{ |
1099
|
|
|
$xml = new \DOMDocument('1.0', 'utf-8'); |
1100
|
|
|
$xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), true)); |
1101
|
|
|
$xml->formatOutput = true; |
1102
|
|
|
return $xml->saveXML(); |
1103
|
|
|
} |
1104
|
|
|
|
1105
|
|
|
/** |
1106
|
|
|
* This magic method is executed after the object is deserialized |
1107
|
|
|
* @see __sleep() |
1108
|
|
|
* |
1109
|
|
|
* @access public |
1110
|
|
|
* |
1111
|
|
|
* @return void |
1112
|
|
|
*/ |
1113
|
|
|
public function __wakeup() |
1114
|
|
|
{ |
1115
|
|
|
$xml = Helper::getXmlFileAsString($this->asXML); |
1116
|
|
|
if ($xml !== false) { |
1117
|
|
|
$this->asXML = ''; |
1118
|
|
|
$this->xml = $xml; |
1119
|
|
|
// Rebuild the unserializable properties. |
1120
|
|
|
$this->init(); |
1121
|
|
|
} else { |
1122
|
|
|
$this->logger->error('Could not load XML after deserialization'); |
1123
|
|
|
} |
1124
|
|
|
} |
1125
|
|
|
} |
1126
|
|
|
|
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.