|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* (c) Kitodo. Key to digital objects e.V. <[email protected]> |
|
5
|
|
|
* |
|
6
|
|
|
* This file is part of the Kitodo and TYPO3 projects. |
|
7
|
|
|
* |
|
8
|
|
|
* @license GNU General Public License version 3 or later. |
|
9
|
|
|
* For the full copyright and license information, please read the |
|
10
|
|
|
* LICENSE.txt file that was distributed with this source code. |
|
11
|
|
|
*/ |
|
12
|
|
|
|
|
13
|
|
|
namespace Kitodo\Dlf\Common; |
|
14
|
|
|
|
|
15
|
|
|
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration; |
|
16
|
|
|
use TYPO3\CMS\Core\Database\ConnectionPool; |
|
17
|
|
|
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction; |
|
18
|
|
|
use TYPO3\CMS\Core\Log\LogManager; |
|
19
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
|
20
|
|
|
use Ubl\Iiif\Tools\IiifHelper; |
|
21
|
|
|
use Ubl\Iiif\Services\AbstractImageService; |
|
22
|
|
|
|
|
23
|
|
|
/** |
|
24
|
|
|
* MetsDocument class for the 'dlf' extension. |
|
25
|
|
|
* |
|
26
|
|
|
* @package TYPO3 |
|
27
|
|
|
* @subpackage dlf |
|
28
|
|
|
* |
|
29
|
|
|
* @access public |
|
30
|
|
|
* |
|
31
|
|
|
* @property int $cPid this holds the PID for the configuration |
|
32
|
|
|
* @property-read array $formats this holds the configuration for all supported metadata encodings |
|
33
|
|
|
* @property bool $formatsLoaded flag with information if the available metadata formats are loaded |
|
34
|
|
|
* @property-read bool $hasFulltext flag with information if there are any fulltext files available |
|
35
|
|
|
* @property array $lastSearchedPhysicalPage the last searched logical and physical page |
|
36
|
|
|
* @property array $logicalUnits this holds the logical units |
|
37
|
|
|
* @property-read array $metadataArray this holds the documents' parsed metadata array |
|
38
|
|
|
* @property bool $metadataArrayLoaded flag with information if the metadata array is loaded |
|
39
|
|
|
* @property-read int $numPages the holds the total number of pages |
|
40
|
|
|
* @property-read int $parentId this holds the UID of the parent document or zero if not multi-volumed |
|
41
|
|
|
* @property-read array $physicalStructure this holds the physical structure |
|
42
|
|
|
* @property-read array $physicalStructureInfo this holds the physical structure metadata |
|
43
|
|
|
* @property bool $physicalStructureLoaded flag with information if the physical structure is loaded |
|
44
|
|
|
* @property-read int $pid this holds the PID of the document or zero if not in database |
|
45
|
|
|
* @property array $rawTextArray this holds the documents' raw text pages with their corresponding structMap//div's ID (METS) or Range / Manifest / Sequence ID (IIIF) as array key |
|
46
|
|
|
* @property-read bool $ready Is the document instantiated successfully? |
|
47
|
|
|
* @property-read string $recordId the METS file's / IIIF manifest's record identifier |
|
48
|
|
|
* @property array $registry this holds the singleton object of the document |
|
49
|
|
|
* @property-read int $rootId this holds the UID of the root document or zero if not multi-volumed |
|
50
|
|
|
* @property-read array $smLinks this holds the smLinks between logical and physical structMap |
|
51
|
|
|
* @property bool $smLinksLoaded flag with information if the smLinks are loaded |
|
52
|
|
|
* @property-read array $tableOfContents this holds the logical structure |
|
53
|
|
|
* @property bool $tableOfContentsLoaded flag with information if the table of contents is loaded |
|
54
|
|
|
* @property-read string $thumbnail this holds the document's thumbnail location |
|
55
|
|
|
* @property bool $thumbnailLoaded flag with information if the thumbnail is loaded |
|
56
|
|
|
* @property-read string $toplevelId this holds the toplevel structure's "@ID" (METS) or the manifest's "@id" (IIIF) |
|
57
|
|
|
* @property \SimpleXMLElement $xml this holds the whole XML file as \SimpleXMLElement object |
|
58
|
|
|
* @property-read array $mdSec associative array of METS metadata sections indexed by their IDs. |
|
59
|
|
|
* @property bool $mdSecLoaded flag with information if the array of METS metadata sections is loaded |
|
60
|
|
|
* @property-read array $dmdSec subset of `$mdSec` storing only the dmdSec entries; kept for compatibility. |
|
61
|
|
|
* @property-read array $fileGrps this holds the file ID -> USE concordance |
|
62
|
|
|
* @property bool $fileGrpsLoaded flag with information if file groups array is loaded |
|
63
|
|
|
* @property-read array $fileInfos additional information about files (e.g., ADMID), indexed by ID. |
|
64
|
|
|
* @property-read \SimpleXMLElement $mets this holds the XML file's METS part as \SimpleXMLElement object |
|
65
|
|
|
* @property-read string $parentHref URL of the parent document (determined via mptr element), or empty string if none is available |
|
66
|
|
|
*/ |
|
67
|
|
|
final class MetsDocument extends AbstractDocument |
|
68
|
|
|
{ |
|
69
|
|
|
/** |
|
70
|
|
|
* @access protected |
|
71
|
|
|
* @var string[] Subsections / tags that may occur within `<mets:amdSec>` |
|
72
|
|
|
* |
|
73
|
|
|
* @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#amdSec |
|
74
|
|
|
* @link https://www.loc.gov/standards/mets/docs/mets.v1-9.html#mdSecType |
|
75
|
|
|
*/ |
|
76
|
|
|
protected const ALLOWED_AMD_SEC = ['techMD', 'rightsMD', 'sourceMD', 'digiprovMD']; |
|
77
|
|
|
|
|
78
|
|
|
/** |
|
79
|
|
|
* @access protected |
|
80
|
|
|
* @var string This holds the whole XML file as string for serialization purposes |
|
81
|
|
|
* |
|
82
|
|
|
* @see __sleep() / __wakeup() |
|
83
|
|
|
*/ |
|
84
|
|
|
protected string $asXML = ''; |
|
85
|
|
|
|
|
86
|
|
|
/** |
|
87
|
|
|
* @access protected |
|
88
|
|
|
* @var array This maps the ID of each amdSec to the IDs of its children (techMD etc.). When an ADMID references an amdSec instead of techMD etc., this is used to iterate the child elements. |
|
89
|
|
|
*/ |
|
90
|
|
|
protected array $amdSecChildIds = []; |
|
91
|
|
|
|
|
92
|
|
|
/** |
|
93
|
|
|
* @access protected |
|
94
|
|
|
* @var array Associative array of METS metadata sections indexed by their IDs. |
|
95
|
|
|
*/ |
|
96
|
|
|
protected array $mdSec = []; |
|
97
|
|
|
|
|
98
|
|
|
/** |
|
99
|
|
|
* @access protected |
|
100
|
|
|
* @var bool Are the METS file's metadata sections loaded? |
|
101
|
|
|
* |
|
102
|
|
|
* @see MetsDocument::$mdSec |
|
103
|
|
|
*/ |
|
104
|
|
|
protected bool $mdSecLoaded = false; |
|
105
|
|
|
|
|
106
|
|
|
/** |
|
107
|
|
|
* @access protected |
|
108
|
|
|
* @var array Subset of $mdSec storing only the dmdSec entries; kept for compatibility. |
|
109
|
|
|
*/ |
|
110
|
|
|
protected array $dmdSec = []; |
|
111
|
|
|
|
|
112
|
|
|
/** |
|
113
|
|
|
* @access protected |
|
114
|
|
|
* @var array This holds the file ID -> USE concordance |
|
115
|
|
|
* |
|
116
|
|
|
* @see _getFileGrps() |
|
117
|
|
|
*/ |
|
118
|
|
|
protected array $fileGrps = []; |
|
119
|
|
|
|
|
120
|
|
|
/** |
|
121
|
|
|
* @access protected |
|
122
|
|
|
* @var bool Are the image file groups loaded? |
|
123
|
|
|
* |
|
124
|
|
|
* @see $fileGrps |
|
125
|
|
|
*/ |
|
126
|
|
|
protected bool $fileGrpsLoaded = false; |
|
127
|
|
|
|
|
128
|
|
|
/** |
|
129
|
|
|
* @access protected |
|
130
|
|
|
* @var \SimpleXMLElement This holds the XML file's METS part as \SimpleXMLElement object |
|
131
|
|
|
*/ |
|
132
|
|
|
protected \SimpleXMLElement $mets; |
|
133
|
|
|
|
|
134
|
|
|
/** |
|
135
|
|
|
* @access protected |
|
136
|
|
|
* @var string URL of the parent document (determined via mptr element), or empty string if none is available |
|
137
|
|
|
*/ |
|
138
|
|
|
protected string $parentHref = ''; |
|
139
|
|
|
|
|
140
|
|
|
/** |
|
141
|
|
|
* This adds metadata from METS structural map to metadata array. |
|
142
|
|
|
* |
|
143
|
|
|
* @access public |
|
144
|
|
|
* |
|
145
|
|
|
* @param array &$metadata The metadata array to extend |
|
146
|
|
|
* @param string $id The "@ID" attribute of the logical structure node |
|
147
|
|
|
* |
|
148
|
|
|
* @return void |
|
149
|
|
|
*/ |
|
150
|
|
|
public function addMetadataFromMets(array &$metadata, string $id): void |
|
151
|
|
|
{ |
|
152
|
|
|
$details = $this->getLogicalStructure($id); |
|
153
|
|
|
if (!empty($details)) { |
|
154
|
|
|
$metadata['mets_order'][0] = $details['order']; |
|
155
|
|
|
$metadata['mets_label'][0] = $details['label']; |
|
156
|
|
|
$metadata['mets_orderlabel'][0] = $details['orderlabel']; |
|
157
|
|
|
} |
|
158
|
|
|
} |
|
159
|
|
|
|
|
160
|
|
|
/** |
|
161
|
|
|
* @see AbstractDocument::establishRecordId() |
|
162
|
|
|
*/ |
|
163
|
|
|
protected function establishRecordId(int $pid): void |
|
164
|
|
|
{ |
|
165
|
|
|
// Check for METS object @ID. |
|
166
|
|
|
if (!empty($this->mets['OBJID'])) { |
|
167
|
|
|
$this->recordId = (string) $this->mets['OBJID']; |
|
168
|
|
|
} |
|
169
|
|
|
// Get hook objects. |
|
170
|
|
|
$hookObjects = Helper::getHookObjects('Classes/Common/MetsDocument.php'); |
|
171
|
|
|
// Apply hooks. |
|
172
|
|
|
foreach ($hookObjects as $hookObj) { |
|
173
|
|
|
if (method_exists($hookObj, 'construct_postProcessRecordId')) { |
|
174
|
|
|
$hookObj->construct_postProcessRecordId($this->xml, $this->recordId); |
|
175
|
|
|
} |
|
176
|
|
|
} |
|
177
|
|
|
} |
|
178
|
|
|
|
|
179
|
|
|
/** |
|
180
|
|
|
* @see AbstractDocument::getDownloadLocation() |
|
181
|
|
|
*/ |
|
182
|
|
|
public function getDownloadLocation(string $id): string |
|
183
|
|
|
{ |
|
184
|
|
|
$file = $this->getFileInfo($id); |
|
185
|
|
|
if ($file['mimeType'] === 'application/vnd.kitodo.iiif') { |
|
186
|
|
|
$file['location'] = (strrpos($file['location'], 'info.json') === strlen($file['location']) - 9) ? $file['location'] : (strrpos($file['location'], '/') === strlen($file['location']) ? $file['location'] . 'info.json' : $file['location'] . '/info.json'); |
|
187
|
|
|
$conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
|
188
|
|
|
IiifHelper::setUrlReader(IiifUrlReader::getInstance()); |
|
189
|
|
|
IiifHelper::setMaxThumbnailHeight($conf['iiifThumbnailHeight']); |
|
190
|
|
|
IiifHelper::setMaxThumbnailWidth($conf['iiifThumbnailWidth']); |
|
191
|
|
|
$service = IiifHelper::loadIiifResource($file['location']); |
|
192
|
|
|
if ($service !== null && $service instanceof AbstractImageService) { |
|
193
|
|
|
return $service->getImageUrl(); |
|
194
|
|
|
} |
|
195
|
|
|
} elseif ($file['mimeType'] === 'application/vnd.netfpx') { |
|
196
|
|
|
$baseURL = $file['location'] . (strpos($file['location'], '?') === false ? '?' : ''); |
|
197
|
|
|
// TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server' |
|
198
|
|
|
return $baseURL . '&CVT=jpeg'; |
|
199
|
|
|
} |
|
200
|
|
|
return $file['location']; |
|
201
|
|
|
} |
|
202
|
|
|
|
|
203
|
|
|
/** |
|
204
|
|
|
* {@inheritDoc} |
|
205
|
|
|
* @see AbstractDocument::getFileInfo() |
|
206
|
|
|
*/ |
|
207
|
|
|
public function getFileInfo($id) |
|
208
|
|
|
{ |
|
209
|
|
|
$this->_getFileGrps(); |
|
210
|
|
|
|
|
211
|
|
|
if (isset($this->fileInfos[$id]) && empty($this->fileInfos[$id]['location'])) { |
|
212
|
|
|
$this->fileInfos[$id]['location'] = $this->getFileLocation($id); |
|
|
|
|
|
|
213
|
|
|
} |
|
214
|
|
|
|
|
215
|
|
|
if (isset($this->fileInfos[$id]) && empty($this->fileInfos[$id]['mimeType'])) { |
|
216
|
|
|
$this->fileInfos[$id]['mimeType'] = $this->getFileMimeType($id); |
|
217
|
|
|
} |
|
218
|
|
|
|
|
219
|
|
|
return $this->fileInfos[$id]; |
|
220
|
|
|
} |
|
221
|
|
|
|
|
222
|
|
|
/** |
|
223
|
|
|
* @see AbstractDocument::getFileLocation() |
|
224
|
|
|
*/ |
|
225
|
|
|
public function getFileLocation(string $id): string |
|
226
|
|
|
{ |
|
227
|
|
|
$location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]'); |
|
228
|
|
|
if ( |
|
229
|
|
|
!empty($id) |
|
230
|
|
|
&& !empty($location) |
|
231
|
|
|
) { |
|
232
|
|
|
return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href; |
|
233
|
|
|
} else { |
|
234
|
|
|
$this->logger->warning('There is no file node with @ID "' . $id . '"'); |
|
235
|
|
|
return ''; |
|
236
|
|
|
} |
|
237
|
|
|
} |
|
238
|
|
|
|
|
239
|
|
|
/** |
|
240
|
|
|
* @see AbstractDocument::getFileMimeType() |
|
241
|
|
|
*/ |
|
242
|
|
|
public function getFileMimeType(string $id): string |
|
243
|
|
|
{ |
|
244
|
|
|
$mimetype = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE'); |
|
245
|
|
|
if ( |
|
246
|
|
|
!empty($id) |
|
247
|
|
|
&& !empty($mimetype) |
|
248
|
|
|
) { |
|
249
|
|
|
return (string) $mimetype[0]; |
|
250
|
|
|
} else { |
|
251
|
|
|
$this->logger->warning('There is no file node with @ID "' . $id . '" or no MIME type specified'); |
|
252
|
|
|
return ''; |
|
253
|
|
|
} |
|
254
|
|
|
} |
|
255
|
|
|
|
|
256
|
|
|
/** |
|
257
|
|
|
* @see AbstractDocument::getLogicalStructure() |
|
258
|
|
|
*/ |
|
259
|
|
|
public function getLogicalStructure(string $id, bool $recursive = false): array |
|
260
|
|
|
{ |
|
261
|
|
|
$details = []; |
|
262
|
|
|
// Is the requested logical unit already loaded? |
|
263
|
|
|
if ( |
|
264
|
|
|
!$recursive |
|
265
|
|
|
&& !empty($this->logicalUnits[$id]) |
|
266
|
|
|
) { |
|
267
|
|
|
// Yes. Return it. |
|
268
|
|
|
return $this->logicalUnits[$id]; |
|
269
|
|
|
} elseif (!empty($id)) { |
|
270
|
|
|
// Get specified logical unit. |
|
271
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]'); |
|
272
|
|
|
} else { |
|
273
|
|
|
// Get all logical units at top level. |
|
274
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div'); |
|
275
|
|
|
} |
|
276
|
|
|
if (!empty($divs)) { |
|
277
|
|
|
if (!$recursive) { |
|
278
|
|
|
// Get the details for the first xpath hit. |
|
279
|
|
|
$details = $this->getLogicalStructureInfo($divs[0]); |
|
280
|
|
|
} else { |
|
281
|
|
|
// Walk the logical structure recursively and fill the whole table of contents. |
|
282
|
|
|
foreach ($divs as $div) { |
|
283
|
|
|
$this->tableOfContents[] = $this->getLogicalStructureInfo($div, $recursive); |
|
284
|
|
|
} |
|
285
|
|
|
} |
|
286
|
|
|
} |
|
287
|
|
|
return $details; |
|
288
|
|
|
} |
|
289
|
|
|
|
|
290
|
|
|
/** |
|
291
|
|
|
* This gets details about a logical structure element |
|
292
|
|
|
* |
|
293
|
|
|
* @access protected |
|
294
|
|
|
* |
|
295
|
|
|
* @param \SimpleXMLElement $structure The logical structure node |
|
296
|
|
|
* @param bool $recursive Whether to include the child elements |
|
297
|
|
|
* |
|
298
|
|
|
* @return array Array of the element's id, label, type and physical page indexes/mptr link |
|
299
|
|
|
*/ |
|
300
|
|
|
protected function getLogicalStructureInfo(\SimpleXMLElement $structure, bool $recursive = false): array |
|
301
|
|
|
{ |
|
302
|
|
|
$attributes = []; |
|
303
|
|
|
// Get attributes. |
|
304
|
|
|
foreach ($structure->attributes() as $attribute => $value) { |
|
305
|
|
|
$attributes[$attribute] = (string) $value; |
|
306
|
|
|
} |
|
307
|
|
|
// Load plugin configuration. |
|
308
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
|
309
|
|
|
// Extract identity information. |
|
310
|
|
|
$details = []; |
|
311
|
|
|
$details['id'] = $attributes['ID']; |
|
312
|
|
|
$details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : ''); |
|
313
|
|
|
$details['admId'] = (isset($attributes['ADMID']) ? $attributes['ADMID'] : ''); |
|
314
|
|
|
$details['order'] = (isset($attributes['ORDER']) ? $attributes['ORDER'] : ''); |
|
315
|
|
|
$details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : ''); |
|
316
|
|
|
$details['orderlabel'] = (isset($attributes['ORDERLABEL']) ? $attributes['ORDERLABEL'] : ''); |
|
317
|
|
|
$details['contentIds'] = (isset($attributes['CONTENTIDS']) ? $attributes['CONTENTIDS'] : ''); |
|
318
|
|
|
$details['volume'] = ''; |
|
319
|
|
|
// Set volume and year information only if no label is set and this is the toplevel structure element. |
|
320
|
|
|
if ( |
|
321
|
|
|
empty($details['label']) |
|
322
|
|
|
&& empty($details['orderlabel']) |
|
323
|
|
|
) { |
|
324
|
|
|
$metadata = $this->getMetadata($details['id']); |
|
325
|
|
|
if (!empty($metadata['volume'][0])) { |
|
326
|
|
|
$details['volume'] = $metadata['volume'][0]; |
|
327
|
|
|
} |
|
328
|
|
|
if (!empty($metadata['year'][0])) { |
|
329
|
|
|
$details['year'] = $metadata['year'][0]; |
|
330
|
|
|
} |
|
331
|
|
|
} |
|
332
|
|
|
$details['pagination'] = ''; |
|
333
|
|
|
$details['type'] = $attributes['TYPE']; |
|
334
|
|
|
// add description for 3D objects |
|
335
|
|
|
if ($details['type'] == 'object') { |
|
336
|
|
|
$metadata = $this->getMetadata($details['id']); |
|
337
|
|
|
$details['description'] = $metadata['description'][0] ?? ''; |
|
338
|
|
|
} |
|
339
|
|
|
$details['thumbnailId'] = ''; |
|
340
|
|
|
// Load smLinks. |
|
341
|
|
|
$this->_getSmLinks(); |
|
342
|
|
|
// Load physical structure. |
|
343
|
|
|
$this->_getPhysicalStructure(); |
|
344
|
|
|
// Get the physical page or external file this structure element is pointing at. |
|
345
|
|
|
$details['points'] = ''; |
|
346
|
|
|
// Is there a mptr node? |
|
347
|
|
|
if (count($structure->children('http://www.loc.gov/METS/')->mptr)) { |
|
348
|
|
|
// Yes. Get the file reference. |
|
349
|
|
|
$details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
|
350
|
|
|
} elseif ( |
|
351
|
|
|
!empty($this->physicalStructure) |
|
352
|
|
|
&& array_key_exists($details['id'], $this->smLinks['l2p']) |
|
353
|
|
|
) { |
|
354
|
|
|
// Link logical structure to the first corresponding physical page/track. |
|
355
|
|
|
$details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalStructure, true)), 1); |
|
356
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
|
357
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
|
358
|
|
|
if (!empty($this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb])) { |
|
359
|
|
|
$details['thumbnailId'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['files'][$fileGrpThumb]; |
|
360
|
|
|
break; |
|
361
|
|
|
} |
|
362
|
|
|
} |
|
363
|
|
|
// Get page/track number of the first page/track related to this structure element. |
|
364
|
|
|
$details['pagination'] = $this->physicalStructureInfo[$this->smLinks['l2p'][$details['id']][0]]['orderlabel']; |
|
365
|
|
|
} elseif ($details['id'] == $this->_getToplevelId()) { |
|
366
|
|
|
// Point to self if this is the toplevel structure. |
|
367
|
|
|
$details['points'] = 1; |
|
368
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
|
369
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
|
370
|
|
|
if ( |
|
371
|
|
|
!empty($this->physicalStructure) |
|
372
|
|
|
&& !empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]) |
|
373
|
|
|
) { |
|
374
|
|
|
$details['thumbnailId'] = $this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]; |
|
375
|
|
|
break; |
|
376
|
|
|
} |
|
377
|
|
|
} |
|
378
|
|
|
} |
|
379
|
|
|
// Get the files this structure element is pointing at. |
|
380
|
|
|
$details['files'] = []; |
|
381
|
|
|
$fileUse = $this->_getFileGrps(); |
|
382
|
|
|
// Get the file representations from fileSec node. |
|
383
|
|
|
foreach ($structure->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
|
384
|
|
|
// Check if file has valid @USE attribute. |
|
385
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
|
|
|
|
|
|
386
|
|
|
$details['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
|
387
|
|
|
} |
|
388
|
|
|
} |
|
389
|
|
|
// Keep for later usage. |
|
390
|
|
|
$this->logicalUnits[$details['id']] = $details; |
|
391
|
|
|
// Walk the structure recursively? And are there any children of the current element? |
|
392
|
|
|
if ( |
|
393
|
|
|
$recursive |
|
394
|
|
|
&& count($structure->children('http://www.loc.gov/METS/')->div) |
|
395
|
|
|
) { |
|
396
|
|
|
$details['children'] = []; |
|
397
|
|
|
foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) { |
|
398
|
|
|
// Repeat for all children. |
|
399
|
|
|
$details['children'][] = $this->getLogicalStructureInfo($child, true); |
|
|
|
|
|
|
400
|
|
|
} |
|
401
|
|
|
} |
|
402
|
|
|
return $details; |
|
403
|
|
|
} |
|
404
|
|
|
|
|
405
|
|
|
/** |
|
406
|
|
|
* @see AbstractDocument::getMetadata() |
|
407
|
|
|
*/ |
|
408
|
|
|
public function getMetadata(string $id, int $cPid = 0): array |
|
409
|
|
|
{ |
|
410
|
|
|
// Make sure $cPid is a non-negative integer. |
|
411
|
|
|
$cPid = max(intval($cPid), 0); |
|
412
|
|
|
// If $cPid is not given, try to get it elsewhere. |
|
413
|
|
|
if ( |
|
414
|
|
|
!$cPid |
|
415
|
|
|
&& ($this->cPid || $this->pid) |
|
416
|
|
|
) { |
|
417
|
|
|
// Retain current PID. |
|
418
|
|
|
$cPid = ($this->cPid ? $this->cPid : $this->pid); |
|
419
|
|
|
} elseif (!$cPid) { |
|
420
|
|
|
$this->logger->warning('Invalid PID ' . $cPid . ' for metadata definitions'); |
|
421
|
|
|
return []; |
|
422
|
|
|
} |
|
423
|
|
|
// Get metadata from parsed metadata array if available. |
|
424
|
|
|
if ( |
|
425
|
|
|
!empty($this->metadataArray[$id]) |
|
426
|
|
|
&& $this->metadataArray[0] == $cPid |
|
427
|
|
|
) { |
|
428
|
|
|
return $this->metadataArray[$id]; |
|
429
|
|
|
} |
|
430
|
|
|
|
|
431
|
|
|
$metadata = $this->initializeMetadata('METS'); |
|
432
|
|
|
|
|
433
|
|
|
$mdIds = $this->getMetadataIds($id); |
|
434
|
|
|
if (empty($mdIds)) { |
|
435
|
|
|
// There is no metadata section for this structure node. |
|
436
|
|
|
return []; |
|
437
|
|
|
} |
|
438
|
|
|
// Associative array used as set of available section types (dmdSec, techMD, ...) |
|
439
|
|
|
$hasMetadataSection = []; |
|
440
|
|
|
// Load available metadata formats and metadata sections. |
|
441
|
|
|
$this->loadFormats(); |
|
442
|
|
|
$this->_getMdSec(); |
|
443
|
|
|
// Get the structure's type. |
|
444
|
|
|
if (!empty($this->logicalUnits[$id])) { |
|
445
|
|
|
$metadata['type'] = [$this->logicalUnits[$id]['type']]; |
|
446
|
|
|
} else { |
|
447
|
|
|
$struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE'); |
|
448
|
|
|
if (!empty($struct)) { |
|
449
|
|
|
$metadata['type'] = [(string) $struct[0]]; |
|
450
|
|
|
} |
|
451
|
|
|
} |
|
452
|
|
|
foreach ($mdIds as $dmdId) { |
|
453
|
|
|
$mdSectionType = $this->mdSec[$dmdId]['section']; |
|
454
|
|
|
|
|
455
|
|
|
// To preserve behavior of previous Kitodo versions, extract metadata only from first supported dmdSec |
|
456
|
|
|
// However, we want to extract, for example, all techMD sections (VIDEOMD, AUDIOMD) |
|
457
|
|
|
if ($mdSectionType === 'dmdSec' && isset($hasMetadataSection['dmdSec'])) { |
|
458
|
|
|
continue; |
|
459
|
|
|
} |
|
460
|
|
|
|
|
461
|
|
|
// Is this metadata format supported? |
|
462
|
|
|
if (!empty($this->formats[$this->mdSec[$dmdId]['type']])) { |
|
463
|
|
|
if (!empty($this->formats[$this->mdSec[$dmdId]['type']]['class'])) { |
|
464
|
|
|
$class = $this->formats[$this->mdSec[$dmdId]['type']]['class']; |
|
465
|
|
|
// Get the metadata from class. |
|
466
|
|
|
if ( |
|
467
|
|
|
class_exists($class) |
|
468
|
|
|
&& ($obj = GeneralUtility::makeInstance($class)) instanceof MetadataInterface |
|
469
|
|
|
) { |
|
470
|
|
|
$obj->extractMetadata($this->mdSec[$dmdId]['xml'], $metadata); |
|
471
|
|
|
} else { |
|
472
|
|
|
$this->logger->warning('Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this->mdSec[$dmdId]['type'] . '"'); |
|
473
|
|
|
} |
|
474
|
|
|
} |
|
475
|
|
|
} else { |
|
476
|
|
|
$this->logger->notice('Unsupported metadata format "' . $this->mdSec[$dmdId]['type'] . '" in ' . $mdSectionType . ' with @ID "' . $dmdId . '"'); |
|
477
|
|
|
// Continue searching for supported metadata with next @DMDID. |
|
478
|
|
|
continue; |
|
479
|
|
|
} |
|
480
|
|
|
// Get the additional metadata from database. |
|
481
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
|
482
|
|
|
->getQueryBuilderForTable('tx_dlf_metadata'); |
|
483
|
|
|
// Get hidden records, too. |
|
484
|
|
|
$queryBuilder |
|
485
|
|
|
->getRestrictions() |
|
486
|
|
|
->removeByType(HiddenRestriction::class); |
|
487
|
|
|
// Get all metadata with configured xpath and applicable format first. |
|
488
|
|
|
$resultWithFormat = $queryBuilder |
|
489
|
|
|
->select( |
|
490
|
|
|
'tx_dlf_metadata.index_name AS index_name', |
|
491
|
|
|
'tx_dlf_metadataformat_joins.xpath AS xpath', |
|
492
|
|
|
'tx_dlf_metadataformat_joins.xpath_sorting AS xpath_sorting', |
|
493
|
|
|
'tx_dlf_metadata.is_sortable AS is_sortable', |
|
494
|
|
|
'tx_dlf_metadata.default_value AS default_value', |
|
495
|
|
|
'tx_dlf_metadata.format AS format' |
|
496
|
|
|
) |
|
497
|
|
|
->from('tx_dlf_metadata') |
|
498
|
|
|
->innerJoin( |
|
499
|
|
|
'tx_dlf_metadata', |
|
500
|
|
|
'tx_dlf_metadataformat', |
|
501
|
|
|
'tx_dlf_metadataformat_joins', |
|
502
|
|
|
$queryBuilder->expr()->eq( |
|
503
|
|
|
'tx_dlf_metadataformat_joins.parent_id', |
|
504
|
|
|
'tx_dlf_metadata.uid' |
|
505
|
|
|
) |
|
506
|
|
|
) |
|
507
|
|
|
->innerJoin( |
|
508
|
|
|
'tx_dlf_metadataformat_joins', |
|
509
|
|
|
'tx_dlf_formats', |
|
510
|
|
|
'tx_dlf_formats_joins', |
|
511
|
|
|
$queryBuilder->expr()->eq( |
|
512
|
|
|
'tx_dlf_formats_joins.uid', |
|
513
|
|
|
'tx_dlf_metadataformat_joins.encoded' |
|
514
|
|
|
) |
|
515
|
|
|
) |
|
516
|
|
|
->where( |
|
517
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)), |
|
518
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
|
519
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadataformat_joins.pid', intval($cPid)), |
|
520
|
|
|
$queryBuilder->expr()->eq('tx_dlf_formats_joins.type', $queryBuilder->createNamedParameter($this->mdSec[$dmdId]['type'])) |
|
521
|
|
|
) |
|
522
|
|
|
->execute(); |
|
523
|
|
|
// Get all metadata without a format, but with a default value next. |
|
524
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
|
525
|
|
|
->getQueryBuilderForTable('tx_dlf_metadata'); |
|
526
|
|
|
// Get hidden records, too. |
|
527
|
|
|
$queryBuilder |
|
528
|
|
|
->getRestrictions() |
|
529
|
|
|
->removeByType(HiddenRestriction::class); |
|
530
|
|
|
$resultWithoutFormat = $queryBuilder |
|
531
|
|
|
->select( |
|
532
|
|
|
'tx_dlf_metadata.index_name AS index_name', |
|
533
|
|
|
'tx_dlf_metadata.is_sortable AS is_sortable', |
|
534
|
|
|
'tx_dlf_metadata.default_value AS default_value', |
|
535
|
|
|
'tx_dlf_metadata.format AS format' |
|
536
|
|
|
) |
|
537
|
|
|
->from('tx_dlf_metadata') |
|
538
|
|
|
->where( |
|
539
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.pid', intval($cPid)), |
|
540
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.l18n_parent', 0), |
|
541
|
|
|
$queryBuilder->expr()->eq('tx_dlf_metadata.format', 0), |
|
542
|
|
|
$queryBuilder->expr()->neq('tx_dlf_metadata.default_value', $queryBuilder->createNamedParameter('')) |
|
543
|
|
|
) |
|
544
|
|
|
->execute(); |
|
545
|
|
|
// Merge both result sets. |
|
546
|
|
|
$allResults = array_merge($resultWithFormat->fetchAll(), $resultWithoutFormat->fetchAll()); |
|
547
|
|
|
// We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly. |
|
548
|
|
|
$domNode = dom_import_simplexml($this->mdSec[$dmdId]['xml']); |
|
549
|
|
|
$domXPath = new \DOMXPath($domNode->ownerDocument); |
|
|
|
|
|
|
550
|
|
|
$this->registerNamespaces($domXPath); |
|
551
|
|
|
// OK, now make the XPath queries. |
|
552
|
|
|
foreach ($allResults as $resArray) { |
|
553
|
|
|
// Set metadata field's value(s). |
|
554
|
|
|
if ( |
|
555
|
|
|
$resArray['format'] > 0 |
|
556
|
|
|
&& !empty($resArray['xpath']) |
|
557
|
|
|
&& ($values = $domXPath->evaluate($resArray['xpath'], $domNode)) |
|
558
|
|
|
) { |
|
559
|
|
|
if ( |
|
560
|
|
|
$values instanceof \DOMNodeList |
|
561
|
|
|
&& $values->length > 0 |
|
562
|
|
|
) { |
|
563
|
|
|
$metadata[$resArray['index_name']] = []; |
|
564
|
|
|
foreach ($values as $value) { |
|
565
|
|
|
$metadata[$resArray['index_name']][] = trim((string) $value->nodeValue); |
|
566
|
|
|
} |
|
567
|
|
|
} elseif (!($values instanceof \DOMNodeList)) { |
|
568
|
|
|
$metadata[$resArray['index_name']] = [trim((string) $values)]; |
|
569
|
|
|
} |
|
570
|
|
|
} |
|
571
|
|
|
// Set default value if applicable. |
|
572
|
|
|
if ( |
|
573
|
|
|
empty($metadata[$resArray['index_name']][0]) |
|
574
|
|
|
&& strlen($resArray['default_value']) > 0 |
|
575
|
|
|
) { |
|
576
|
|
|
$metadata[$resArray['index_name']] = [$resArray['default_value']]; |
|
577
|
|
|
} |
|
578
|
|
|
// Set sorting value if applicable. |
|
579
|
|
|
if ( |
|
580
|
|
|
!empty($metadata[$resArray['index_name']]) |
|
581
|
|
|
&& $resArray['is_sortable'] |
|
582
|
|
|
) { |
|
583
|
|
|
if ( |
|
584
|
|
|
$resArray['format'] > 0 |
|
585
|
|
|
&& !empty($resArray['xpath_sorting']) |
|
586
|
|
|
&& ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode)) |
|
587
|
|
|
) { |
|
588
|
|
|
if ( |
|
589
|
|
|
$values instanceof \DOMNodeList |
|
590
|
|
|
&& $values->length > 0 |
|
591
|
|
|
) { |
|
592
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values->item(0)->nodeValue); |
|
593
|
|
|
} elseif (!($values instanceof \DOMNodeList)) { |
|
594
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = trim((string) $values); |
|
595
|
|
|
} |
|
596
|
|
|
} |
|
597
|
|
|
if (empty($metadata[$resArray['index_name'] . '_sorting'][0])) { |
|
598
|
|
|
$metadata[$resArray['index_name'] . '_sorting'][0] = $metadata[$resArray['index_name']][0]; |
|
599
|
|
|
} |
|
600
|
|
|
} |
|
601
|
|
|
} |
|
602
|
|
|
|
|
603
|
|
|
$hasMetadataSection[$mdSectionType] = true; |
|
604
|
|
|
} |
|
605
|
|
|
// Set title to empty string if not present. |
|
606
|
|
|
if (empty($metadata['title'][0])) { |
|
607
|
|
|
$metadata['title'][0] = ''; |
|
608
|
|
|
$metadata['title_sorting'][0] = ''; |
|
609
|
|
|
} |
|
610
|
|
|
// Set title_sorting to title as default. |
|
611
|
|
|
if (empty($metadata['title_sorting'][0])) { |
|
612
|
|
|
$metadata['title_sorting'][0] = $metadata['title'][0]; |
|
613
|
|
|
} |
|
614
|
|
|
// Set date to empty string if not present. |
|
615
|
|
|
if (empty($metadata['date'][0])) { |
|
616
|
|
|
$metadata['date'][0] = ''; |
|
617
|
|
|
} |
|
618
|
|
|
|
|
619
|
|
|
// Files are not expected to reference a dmdSec |
|
620
|
|
|
if (isset($this->fileInfos[$id]) || isset($hasMetadataSection['dmdSec'])) { |
|
621
|
|
|
return $metadata; |
|
622
|
|
|
} else { |
|
623
|
|
|
$this->logger->warning('No supported descriptive metadata found for logical structure with @ID "' . $id . '"'); |
|
624
|
|
|
return []; |
|
625
|
|
|
} |
|
626
|
|
|
} |
|
627
|
|
|
|
|
628
|
|
|
/** |
|
629
|
|
|
* Get IDs of (descriptive and administrative) metadata sections |
|
630
|
|
|
* referenced by node of given $id. The $id may refer to either |
|
631
|
|
|
* a logical structure node or to a file. |
|
632
|
|
|
* |
|
633
|
|
|
* @access protected |
|
634
|
|
|
* |
|
635
|
|
|
* @param string $id The "@ID" attribute of the file node |
|
636
|
|
|
* |
|
637
|
|
|
* @return array |
|
638
|
|
|
*/ |
|
639
|
|
|
protected function getMetadataIds(string $id): array |
|
640
|
|
|
{ |
|
641
|
|
|
// Load amdSecChildIds concordance |
|
642
|
|
|
$this->_getMdSec(); |
|
643
|
|
|
$fileInfo = $this->getFileInfo($id); |
|
644
|
|
|
|
|
645
|
|
|
// Get DMDID and ADMID of logical structure node |
|
646
|
|
|
if (!empty($this->logicalUnits[$id])) { |
|
647
|
|
|
$dmdIds = $this->logicalUnits[$id]['dmdId'] ?? ''; |
|
648
|
|
|
$admIds = $this->logicalUnits[$id]['admId'] ?? ''; |
|
649
|
|
|
} else { |
|
650
|
|
|
$mdSec = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]')[0]; |
|
651
|
|
|
if ($mdSec) { |
|
|
|
|
|
|
652
|
|
|
$dmdIds = (string) $mdSec->attributes()->DMDID; |
|
653
|
|
|
$admIds = (string) $mdSec->attributes()->ADMID; |
|
654
|
|
|
} else if (isset($fileInfo)) { |
|
655
|
|
|
$dmdIds = $fileInfo['dmdId']; |
|
656
|
|
|
$admIds = $fileInfo['admId']; |
|
657
|
|
|
} else { |
|
658
|
|
|
$dmdIds = ''; |
|
659
|
|
|
$admIds = ''; |
|
660
|
|
|
} |
|
661
|
|
|
} |
|
662
|
|
|
|
|
663
|
|
|
// Handle multiple DMDIDs/ADMIDs |
|
664
|
|
|
$allMdIds = explode(' ', $dmdIds); |
|
665
|
|
|
|
|
666
|
|
|
foreach (explode(' ', $admIds) as $admId) { |
|
667
|
|
|
if (isset($this->mdSec[$admId])) { |
|
668
|
|
|
// $admId references an actual metadata section such as techMD |
|
669
|
|
|
$allMdIds[] = $admId; |
|
670
|
|
|
} elseif (isset($this->amdSecChildIds[$admId])) { |
|
671
|
|
|
// $admId references a <mets:amdSec> element. Resolve child elements. |
|
672
|
|
|
foreach ($this->amdSecChildIds[$admId] as $childId) { |
|
673
|
|
|
$allMdIds[] = $childId; |
|
674
|
|
|
} |
|
675
|
|
|
} |
|
676
|
|
|
} |
|
677
|
|
|
|
|
678
|
|
|
return array_filter($allMdIds, function ($element) { |
|
679
|
|
|
return !empty($element); |
|
680
|
|
|
}); |
|
681
|
|
|
} |
|
682
|
|
|
|
|
683
|
|
|
/** |
|
684
|
|
|
* @see AbstractDocument::getFullText() |
|
685
|
|
|
*/ |
|
686
|
|
|
public function getFullText(string $id): string |
|
687
|
|
|
{ |
|
688
|
|
|
$fullText = ''; |
|
689
|
|
|
|
|
690
|
|
|
// Load fileGrps and check for full text files. |
|
691
|
|
|
$this->_getFileGrps(); |
|
692
|
|
|
if ($this->hasFulltext) { |
|
693
|
|
|
$fullText = $this->getFullTextFromXml($id); |
|
694
|
|
|
} |
|
695
|
|
|
return $fullText; |
|
696
|
|
|
} |
|
697
|
|
|
|
|
698
|
|
|
/** |
|
699
|
|
|
* @see AbstractDocument::getStructureDepth() |
|
700
|
|
|
*/ |
|
701
|
|
|
public function getStructureDepth(string $logId) |
|
702
|
|
|
{ |
|
703
|
|
|
$ancestors = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*'); |
|
704
|
|
|
if (!empty($ancestors)) { |
|
705
|
|
|
return count($ancestors); |
|
706
|
|
|
} else { |
|
707
|
|
|
return 0; |
|
708
|
|
|
} |
|
709
|
|
|
} |
|
710
|
|
|
|
|
711
|
|
|
/** |
|
712
|
|
|
* @see AbstractDocument::init() |
|
713
|
|
|
*/ |
|
714
|
|
|
protected function init(string $location): void |
|
715
|
|
|
{ |
|
716
|
|
|
$this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($this)); |
|
717
|
|
|
// Get METS node from XML file. |
|
718
|
|
|
$this->registerNamespaces($this->xml); |
|
719
|
|
|
$mets = $this->xml->xpath('//mets:mets'); |
|
720
|
|
|
if (!empty($mets)) { |
|
721
|
|
|
$this->mets = $mets[0]; |
|
722
|
|
|
// Register namespaces. |
|
723
|
|
|
$this->registerNamespaces($this->mets); |
|
724
|
|
|
} else { |
|
725
|
|
|
if (!empty($location)) { |
|
726
|
|
|
$this->logger->error('No METS part found in document with location "' . $location . '".'); |
|
727
|
|
|
} else if (!empty($this->recordId)) { |
|
728
|
|
|
$this->logger->error('No METS part found in document with recordId "' . $this->recordId . '".'); |
|
729
|
|
|
} else { |
|
730
|
|
|
$this->logger->error('No METS part found in current document.'); |
|
731
|
|
|
} |
|
732
|
|
|
} |
|
733
|
|
|
} |
|
734
|
|
|
|
|
735
|
|
|
/** |
|
736
|
|
|
* @see AbstractDocument::loadLocation() |
|
737
|
|
|
*/ |
|
738
|
|
|
protected function loadLocation(string $location): bool |
|
739
|
|
|
{ |
|
740
|
|
|
$fileResource = Helper::getUrl($location); |
|
741
|
|
|
if ($fileResource !== false) { |
|
742
|
|
|
$xml = Helper::getXmlFileAsString($fileResource); |
|
743
|
|
|
// Set some basic properties. |
|
744
|
|
|
if ($xml !== false) { |
|
745
|
|
|
$this->xml = $xml; |
|
746
|
|
|
return true; |
|
747
|
|
|
} |
|
748
|
|
|
} |
|
749
|
|
|
$this->logger->error('Could not load XML file from "' . $location . '"'); |
|
750
|
|
|
return false; |
|
751
|
|
|
} |
|
752
|
|
|
|
|
753
|
|
|
/** |
|
754
|
|
|
* @see AbstractDocument::ensureHasFulltextIsSet() |
|
755
|
|
|
*/ |
|
756
|
|
|
protected function ensureHasFulltextIsSet(): void |
|
757
|
|
|
{ |
|
758
|
|
|
// Are the fileGrps already loaded? |
|
759
|
|
|
if (!$this->fileGrpsLoaded) { |
|
760
|
|
|
$this->_getFileGrps(); |
|
761
|
|
|
} |
|
762
|
|
|
} |
|
763
|
|
|
|
|
764
|
|
|
/** |
|
765
|
|
|
* @see AbstractDocument::setPreloadedDocument() |
|
766
|
|
|
*/ |
|
767
|
|
|
protected function setPreloadedDocument($preloadedDocument): bool |
|
768
|
|
|
{ |
|
769
|
|
|
|
|
770
|
|
|
if ($preloadedDocument instanceof \SimpleXMLElement) { |
|
771
|
|
|
$this->xml = $preloadedDocument; |
|
772
|
|
|
return true; |
|
773
|
|
|
} |
|
774
|
|
|
return false; |
|
775
|
|
|
} |
|
776
|
|
|
|
|
777
|
|
|
/** |
|
778
|
|
|
* @see AbstractDocument::getDocument() |
|
779
|
|
|
*/ |
|
780
|
|
|
protected function getDocument(): \SimpleXMLElement |
|
781
|
|
|
{ |
|
782
|
|
|
return $this->mets; |
|
783
|
|
|
} |
|
784
|
|
|
|
|
785
|
|
|
/** |
|
786
|
|
|
* This builds an array of the document's metadata sections |
|
787
|
|
|
* |
|
788
|
|
|
* @access protected |
|
789
|
|
|
* |
|
790
|
|
|
* @return array Array of metadata sections with their IDs as array key |
|
791
|
|
|
*/ |
|
792
|
|
|
protected function _getMdSec(): array |
|
793
|
|
|
{ |
|
794
|
|
|
if (!$this->mdSecLoaded) { |
|
795
|
|
|
$this->loadFormats(); |
|
796
|
|
|
|
|
797
|
|
|
foreach ($this->mets->xpath('./mets:dmdSec') as $dmdSecTag) { |
|
798
|
|
|
$dmdSec = $this->processMdSec($dmdSecTag); |
|
799
|
|
|
|
|
800
|
|
|
if ($dmdSec !== null) { |
|
801
|
|
|
$this->mdSec[$dmdSec['id']] = $dmdSec; |
|
|
|
|
|
|
802
|
|
|
$this->dmdSec[$dmdSec['id']] = $dmdSec; |
|
803
|
|
|
} |
|
804
|
|
|
} |
|
805
|
|
|
|
|
806
|
|
|
foreach ($this->mets->xpath('./mets:amdSec') as $amdSecTag) { |
|
807
|
|
|
$childIds = []; |
|
808
|
|
|
|
|
809
|
|
|
foreach ($amdSecTag->children('http://www.loc.gov/METS/') as $mdSecTag) { |
|
810
|
|
|
if (!in_array($mdSecTag->getName(), self::ALLOWED_AMD_SEC)) { |
|
811
|
|
|
continue; |
|
812
|
|
|
} |
|
813
|
|
|
|
|
814
|
|
|
// TODO: Should we check that the format may occur within this type (e.g., to ignore VIDEOMD within rightsMD)? |
|
815
|
|
|
$mdSec = $this->processMdSec($mdSecTag); |
|
|
|
|
|
|
816
|
|
|
|
|
817
|
|
|
if ($mdSec !== null) { |
|
818
|
|
|
$this->mdSec[$mdSec['id']] = $mdSec; |
|
819
|
|
|
|
|
820
|
|
|
$childIds[] = $mdSec['id']; |
|
821
|
|
|
} |
|
822
|
|
|
} |
|
823
|
|
|
|
|
824
|
|
|
$amdSecId = (string) $amdSecTag->attributes()->ID; |
|
825
|
|
|
if (!empty($amdSecId)) { |
|
826
|
|
|
$this->amdSecChildIds[$amdSecId] = $childIds; |
|
827
|
|
|
} |
|
828
|
|
|
} |
|
829
|
|
|
|
|
830
|
|
|
$this->mdSecLoaded = true; |
|
831
|
|
|
} |
|
832
|
|
|
return $this->mdSec; |
|
833
|
|
|
} |
|
834
|
|
|
|
|
835
|
|
|
/** |
|
836
|
|
|
* Gets the document's metadata sections |
|
837
|
|
|
* |
|
838
|
|
|
* @access protected |
|
839
|
|
|
* |
|
840
|
|
|
* @return array Array of metadata sections with their IDs as array key |
|
841
|
|
|
*/ |
|
842
|
|
|
protected function _getDmdSec(): array |
|
843
|
|
|
{ |
|
844
|
|
|
$this->_getMdSec(); |
|
845
|
|
|
return $this->dmdSec; |
|
846
|
|
|
} |
|
847
|
|
|
|
|
848
|
|
|
/** |
|
849
|
|
|
* Processes an element of METS `mdSecType`. |
|
850
|
|
|
* |
|
851
|
|
|
* @access protected |
|
852
|
|
|
* |
|
853
|
|
|
* @param \SimpleXMLElement $element |
|
854
|
|
|
* |
|
855
|
|
|
* @return array|null The processed metadata section |
|
856
|
|
|
*/ |
|
857
|
|
|
protected function processMdSec(\SimpleXMLElement $element): ?array |
|
858
|
|
|
{ |
|
859
|
|
|
$mdId = (string) $element->attributes()->ID; |
|
860
|
|
|
if (empty($mdId)) { |
|
861
|
|
|
return null; |
|
862
|
|
|
} |
|
863
|
|
|
|
|
864
|
|
|
$this->registerNamespaces($element); |
|
865
|
|
|
if ($type = $element->xpath('./mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE')) { |
|
866
|
|
|
if (!empty($this->formats[(string) $type[0]])) { |
|
867
|
|
|
$type = (string) $type[0]; |
|
868
|
|
|
$xml = $element->xpath('./mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
|
869
|
|
|
} |
|
870
|
|
|
} elseif ($type = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE')) { |
|
871
|
|
|
if (!empty($this->formats[(string) $type[0]])) { |
|
872
|
|
|
$type = (string) $type[0]; |
|
873
|
|
|
$xml = $element->xpath('./mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower($type) . ':' . $this->formats[$type]['rootElement']); |
|
874
|
|
|
} |
|
875
|
|
|
} |
|
876
|
|
|
|
|
877
|
|
|
if (empty($xml)) { |
|
878
|
|
|
return null; |
|
879
|
|
|
} |
|
880
|
|
|
|
|
881
|
|
|
$this->registerNamespaces($xml[0]); |
|
882
|
|
|
|
|
883
|
|
|
return [ |
|
884
|
|
|
'id' => $mdId, |
|
885
|
|
|
'section' => $element->getName(), |
|
886
|
|
|
'type' => $type, |
|
887
|
|
|
'xml' => $xml[0], |
|
|
|
|
|
|
888
|
|
|
]; |
|
889
|
|
|
} |
|
890
|
|
|
|
|
891
|
|
|
/** |
|
892
|
|
|
* This builds the file ID -> USE concordance |
|
893
|
|
|
* |
|
894
|
|
|
* @access protected |
|
895
|
|
|
* |
|
896
|
|
|
* @return array Array of file use groups with file IDs |
|
897
|
|
|
*/ |
|
898
|
|
|
protected function _getFileGrps(): array |
|
899
|
|
|
{ |
|
900
|
|
|
if (!$this->fileGrpsLoaded) { |
|
901
|
|
|
// Get configured USE attributes. |
|
902
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
|
903
|
|
|
$useGrps = GeneralUtility::trimExplode(',', $extConf['fileGrpImages']); |
|
904
|
|
|
if (!empty($extConf['fileGrpThumbs'])) { |
|
905
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs'])); |
|
906
|
|
|
} |
|
907
|
|
|
if (!empty($extConf['fileGrpDownload'])) { |
|
908
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpDownload'])); |
|
909
|
|
|
} |
|
910
|
|
|
if (!empty($extConf['fileGrpFulltext'])) { |
|
911
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext'])); |
|
912
|
|
|
} |
|
913
|
|
|
if (!empty($extConf['fileGrpAudio'])) { |
|
914
|
|
|
$useGrps = array_merge($useGrps, GeneralUtility::trimExplode(',', $extConf['fileGrpAudio'])); |
|
915
|
|
|
} |
|
916
|
|
|
// Get all file groups. |
|
917
|
|
|
$fileGrps = $this->mets->xpath('./mets:fileSec/mets:fileGrp'); |
|
918
|
|
|
if (!empty($fileGrps)) { |
|
919
|
|
|
// Build concordance for configured USE attributes. |
|
920
|
|
|
foreach ($fileGrps as $fileGrp) { |
|
921
|
|
|
if (in_array((string) $fileGrp['USE'], $useGrps)) { |
|
922
|
|
|
foreach ($fileGrp->children('http://www.loc.gov/METS/')->file as $file) { |
|
923
|
|
|
$fileId = (string) $file->attributes()->ID; |
|
|
|
|
|
|
924
|
|
|
$this->fileGrps[$fileId] = (string) $fileGrp['USE']; |
|
925
|
|
|
$this->fileInfos[$fileId] = [ |
|
|
|
|
|
|
926
|
|
|
'fileGrp' => (string) $fileGrp['USE'], |
|
927
|
|
|
'admId' => (string) $file->attributes()->ADMID, |
|
928
|
|
|
'dmdId' => (string) $file->attributes()->DMDID, |
|
929
|
|
|
]; |
|
930
|
|
|
} |
|
931
|
|
|
} |
|
932
|
|
|
} |
|
933
|
|
|
} |
|
934
|
|
|
// Are there any fulltext files available? |
|
935
|
|
|
if ( |
|
936
|
|
|
!empty($extConf['fileGrpFulltext']) |
|
937
|
|
|
&& array_intersect(GeneralUtility::trimExplode(',', $extConf['fileGrpFulltext']), $this->fileGrps) !== [] |
|
938
|
|
|
) { |
|
939
|
|
|
$this->hasFulltext = true; |
|
940
|
|
|
} |
|
941
|
|
|
$this->fileGrpsLoaded = true; |
|
942
|
|
|
} |
|
943
|
|
|
return $this->fileGrps; |
|
944
|
|
|
} |
|
945
|
|
|
|
|
946
|
|
|
/** |
|
947
|
|
|
* @see AbstractDocument::prepareMetadataArray() |
|
948
|
|
|
*/ |
|
949
|
|
|
protected function prepareMetadataArray(int $cPid): void |
|
950
|
|
|
{ |
|
951
|
|
|
$ids = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID'); |
|
952
|
|
|
// Get all logical structure nodes with metadata. |
|
953
|
|
|
if (!empty($ids)) { |
|
954
|
|
|
foreach ($ids as $id) { |
|
955
|
|
|
$this->metadataArray[(string) $id] = $this->getMetadata((string) $id, $cPid); |
|
956
|
|
|
} |
|
957
|
|
|
} |
|
958
|
|
|
// Set current PID for metadata definitions. |
|
959
|
|
|
} |
|
960
|
|
|
|
|
961
|
|
|
/** |
|
962
|
|
|
* This returns $this->mets via __get() |
|
963
|
|
|
* |
|
964
|
|
|
* @access protected |
|
965
|
|
|
* |
|
966
|
|
|
* @return \SimpleXMLElement The XML's METS part as \SimpleXMLElement object |
|
967
|
|
|
*/ |
|
968
|
|
|
protected function _getMets(): \SimpleXMLElement |
|
969
|
|
|
{ |
|
970
|
|
|
return $this->mets; |
|
971
|
|
|
} |
|
972
|
|
|
|
|
973
|
|
|
/** |
|
974
|
|
|
* @see AbstractDocument::_getPhysicalStructure() |
|
975
|
|
|
*/ |
|
976
|
|
|
protected function _getPhysicalStructure(): array |
|
977
|
|
|
{ |
|
978
|
|
|
// Is there no physical structure array yet? |
|
979
|
|
|
if (!$this->physicalStructureLoaded) { |
|
980
|
|
|
// Does the document have a structMap node of type "PHYSICAL"? |
|
981
|
|
|
$elementNodes = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div'); |
|
982
|
|
|
if (!empty($elementNodes)) { |
|
983
|
|
|
// Get file groups. |
|
984
|
|
|
$fileUse = $this->_getFileGrps(); |
|
985
|
|
|
// Get the physical sequence's metadata. |
|
986
|
|
|
$physNode = $this->mets->xpath('./mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]'); |
|
987
|
|
|
$physSeq[0] = (string) $physNode[0]['ID']; |
|
988
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['id'] = (string) $physNode[0]['ID']; |
|
989
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['dmdId'] = (isset($physNode[0]['DMDID']) ? (string) $physNode[0]['DMDID'] : ''); |
|
990
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['admId'] = (isset($physNode[0]['ADMID']) ? (string) $physNode[0]['ADMID'] : ''); |
|
991
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['order'] = (isset($physNode[0]['ORDER']) ? (string) $physNode[0]['ORDER'] : ''); |
|
992
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['label'] = (isset($physNode[0]['LABEL']) ? (string) $physNode[0]['LABEL'] : ''); |
|
993
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['orderlabel'] = (isset($physNode[0]['ORDERLABEL']) ? (string) $physNode[0]['ORDERLABEL'] : ''); |
|
994
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['type'] = (string) $physNode[0]['TYPE']; |
|
995
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['contentIds'] = (isset($physNode[0]['CONTENTIDS']) ? (string) $physNode[0]['CONTENTIDS'] : ''); |
|
996
|
|
|
// Get the file representations from fileSec node. |
|
997
|
|
|
foreach ($physNode[0]->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
|
998
|
|
|
// Check if file has valid @USE attribute. |
|
999
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
|
1000
|
|
|
$this->physicalStructureInfo[$physSeq[0]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
|
1001
|
|
|
} |
|
1002
|
|
|
} |
|
1003
|
|
|
// Build the physical elements' array from the physical structMap node. |
|
1004
|
|
|
foreach ($elementNodes as $elementNode) { |
|
1005
|
|
|
$elements[(int) $elementNode['ORDER']] = (string) $elementNode['ID']; |
|
1006
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['id'] = (string) $elementNode['ID']; |
|
1007
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['dmdId'] = (isset($elementNode['DMDID']) ? (string) $elementNode['DMDID'] : ''); |
|
1008
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['admId'] = (isset($elementNode['ADMID']) ? (string) $elementNode['ADMID'] : ''); |
|
1009
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['order'] = (isset($elementNode['ORDER']) ? (string) $elementNode['ORDER'] : ''); |
|
1010
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['label'] = (isset($elementNode['LABEL']) ? (string) $elementNode['LABEL'] : ''); |
|
1011
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['orderlabel'] = (isset($elementNode['ORDERLABEL']) ? (string) $elementNode['ORDERLABEL'] : ''); |
|
1012
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['type'] = (string) $elementNode['TYPE']; |
|
1013
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['contentIds'] = (isset($elementNode['CONTENTIDS']) ? (string) $elementNode['CONTENTIDS'] : ''); |
|
1014
|
|
|
// Get the file representations from fileSec node. |
|
1015
|
|
|
foreach ($elementNode->children('http://www.loc.gov/METS/')->fptr as $fptr) { |
|
1016
|
|
|
// Check if file has valid @USE attribute. |
|
1017
|
|
|
if (!empty($fileUse[(string) $fptr->attributes()->FILEID])) { |
|
1018
|
|
|
$this->physicalStructureInfo[$elements[(int) $elementNode['ORDER']]]['files'][$fileUse[(string) $fptr->attributes()->FILEID]] = (string) $fptr->attributes()->FILEID; |
|
1019
|
|
|
} |
|
1020
|
|
|
} |
|
1021
|
|
|
} |
|
1022
|
|
|
// Sort array by keys (= @ORDER). |
|
1023
|
|
|
ksort($elements); |
|
1024
|
|
|
// Set total number of pages/tracks. |
|
1025
|
|
|
$this->numPages = count($elements); |
|
1026
|
|
|
// Merge and re-index the array to get numeric indexes. |
|
1027
|
|
|
$this->physicalStructure = array_merge($physSeq, $elements); |
|
1028
|
|
|
} |
|
1029
|
|
|
$this->physicalStructureLoaded = true; |
|
1030
|
|
|
} |
|
1031
|
|
|
return $this->physicalStructure; |
|
1032
|
|
|
} |
|
1033
|
|
|
|
|
1034
|
|
|
/** |
|
1035
|
|
|
* @see AbstractDocument::_getSmLinks() |
|
1036
|
|
|
*/ |
|
1037
|
|
|
protected function _getSmLinks(): array |
|
1038
|
|
|
{ |
|
1039
|
|
|
if (!$this->smLinksLoaded) { |
|
1040
|
|
|
$smLinks = $this->mets->xpath('./mets:structLink/mets:smLink'); |
|
1041
|
|
|
if (!empty($smLinks)) { |
|
1042
|
|
|
foreach ($smLinks as $smLink) { |
|
1043
|
|
|
$this->smLinks['l2p'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->from][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->to; |
|
1044
|
|
|
$this->smLinks['p2l'][(string) $smLink->attributes('http://www.w3.org/1999/xlink')->to][] = (string) $smLink->attributes('http://www.w3.org/1999/xlink')->from; |
|
1045
|
|
|
} |
|
1046
|
|
|
} |
|
1047
|
|
|
$this->smLinksLoaded = true; |
|
1048
|
|
|
} |
|
1049
|
|
|
return $this->smLinks; |
|
1050
|
|
|
} |
|
1051
|
|
|
|
|
1052
|
|
|
/** |
|
1053
|
|
|
* @see AbstractDocument::_getThumbnail() |
|
1054
|
|
|
*/ |
|
1055
|
|
|
protected function _getThumbnail(bool $forceReload = false): string |
|
1056
|
|
|
{ |
|
1057
|
|
|
if ( |
|
1058
|
|
|
!$this->thumbnailLoaded |
|
1059
|
|
|
|| $forceReload |
|
1060
|
|
|
) { |
|
1061
|
|
|
// Retain current PID. |
|
1062
|
|
|
$cPid = ($this->cPid ? $this->cPid : $this->pid); |
|
1063
|
|
|
if (!$cPid) { |
|
1064
|
|
|
$this->logger->error('Invalid PID ' . $cPid . ' for structure definitions'); |
|
1065
|
|
|
$this->thumbnailLoaded = true; |
|
1066
|
|
|
return $this->thumbnail; |
|
1067
|
|
|
} |
|
1068
|
|
|
// Load extension configuration. |
|
1069
|
|
|
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey); |
|
1070
|
|
|
if (empty($extConf['fileGrpThumbs'])) { |
|
1071
|
|
|
$this->logger->warning('No fileGrp for thumbnails specified'); |
|
1072
|
|
|
$this->thumbnailLoaded = true; |
|
1073
|
|
|
return $this->thumbnail; |
|
1074
|
|
|
} |
|
1075
|
|
|
$strctId = $this->_getToplevelId(); |
|
1076
|
|
|
$metadata = $this->getTitledata($cPid); |
|
1077
|
|
|
|
|
1078
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
|
1079
|
|
|
->getQueryBuilderForTable('tx_dlf_structures'); |
|
1080
|
|
|
|
|
1081
|
|
|
// Get structure element to get thumbnail from. |
|
1082
|
|
|
$result = $queryBuilder |
|
1083
|
|
|
->select('tx_dlf_structures.thumbnail AS thumbnail') |
|
1084
|
|
|
->from('tx_dlf_structures') |
|
1085
|
|
|
->where( |
|
1086
|
|
|
$queryBuilder->expr()->eq('tx_dlf_structures.pid', intval($cPid)), |
|
1087
|
|
|
$queryBuilder->expr()->eq('tx_dlf_structures.index_name', $queryBuilder->expr()->literal($metadata['type'][0])), |
|
1088
|
|
|
Helper::whereExpression('tx_dlf_structures') |
|
1089
|
|
|
) |
|
1090
|
|
|
->setMaxResults(1) |
|
1091
|
|
|
->execute(); |
|
1092
|
|
|
|
|
1093
|
|
|
$allResults = $result->fetchAll(); |
|
1094
|
|
|
|
|
1095
|
|
|
if (count($allResults) == 1) { |
|
1096
|
|
|
$resArray = $allResults[0]; |
|
1097
|
|
|
// Get desired thumbnail structure if not the toplevel structure itself. |
|
1098
|
|
|
if (!empty($resArray['thumbnail'])) { |
|
1099
|
|
|
$strctType = Helper::getIndexNameFromUid($resArray['thumbnail'], 'tx_dlf_structures', $cPid); |
|
1100
|
|
|
// Check if this document has a structure element of the desired type. |
|
1101
|
|
|
$strctIds = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID'); |
|
1102
|
|
|
if (!empty($strctIds)) { |
|
1103
|
|
|
$strctId = (string) $strctIds[0]; |
|
1104
|
|
|
} |
|
1105
|
|
|
} |
|
1106
|
|
|
// Load smLinks. |
|
1107
|
|
|
$this->_getSmLinks(); |
|
1108
|
|
|
// Get thumbnail location. |
|
1109
|
|
|
$fileGrpsThumb = GeneralUtility::trimExplode(',', $extConf['fileGrpThumbs']); |
|
1110
|
|
|
while ($fileGrpThumb = array_shift($fileGrpsThumb)) { |
|
1111
|
|
|
if ( |
|
1112
|
|
|
$this->_getPhysicalStructure() |
|
1113
|
|
|
&& !empty($this->smLinks['l2p'][$strctId]) |
|
1114
|
|
|
&& !empty($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]) |
|
1115
|
|
|
) { |
|
1116
|
|
|
$this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->smLinks['l2p'][$strctId][0]]['files'][$fileGrpThumb]); |
|
1117
|
|
|
break; |
|
1118
|
|
|
} elseif (!empty($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb])) { |
|
1119
|
|
|
$this->thumbnail = $this->getFileLocation($this->physicalStructureInfo[$this->physicalStructure[1]]['files'][$fileGrpThumb]); |
|
1120
|
|
|
break; |
|
1121
|
|
|
} |
|
1122
|
|
|
} |
|
1123
|
|
|
} else { |
|
1124
|
|
|
$this->logger->error('No structure of type "' . $metadata['type'][0] . '" found in database'); |
|
1125
|
|
|
} |
|
1126
|
|
|
$this->thumbnailLoaded = true; |
|
1127
|
|
|
} |
|
1128
|
|
|
return $this->thumbnail; |
|
1129
|
|
|
} |
|
1130
|
|
|
|
|
1131
|
|
|
/** |
|
1132
|
|
|
* @see AbstractDocument::_getToplevelId() |
|
1133
|
|
|
*/ |
|
1134
|
|
|
protected function _getToplevelId(): string |
|
1135
|
|
|
{ |
|
1136
|
|
|
if (empty($this->toplevelId)) { |
|
1137
|
|
|
// Get all logical structure nodes with metadata, but without associated METS-Pointers. |
|
1138
|
|
|
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]'); |
|
1139
|
|
|
if (!empty($divs)) { |
|
1140
|
|
|
// Load smLinks. |
|
1141
|
|
|
$this->_getSmLinks(); |
|
1142
|
|
|
foreach ($divs as $div) { |
|
1143
|
|
|
$id = (string) $div['ID']; |
|
1144
|
|
|
// Are there physical structure nodes for this logical structure? |
|
1145
|
|
|
if (array_key_exists($id, $this->smLinks['l2p'])) { |
|
1146
|
|
|
// Yes. That's what we're looking for. |
|
1147
|
|
|
$this->toplevelId = $id; |
|
1148
|
|
|
break; |
|
1149
|
|
|
} elseif (empty($this->toplevelId)) { |
|
1150
|
|
|
// No. Remember this anyway, but keep looking for a better one. |
|
1151
|
|
|
$this->toplevelId = $id; |
|
1152
|
|
|
} |
|
1153
|
|
|
} |
|
1154
|
|
|
} |
|
1155
|
|
|
} |
|
1156
|
|
|
return $this->toplevelId; |
|
1157
|
|
|
} |
|
1158
|
|
|
|
|
1159
|
|
|
/** |
|
1160
|
|
|
* Try to determine URL of parent document. |
|
1161
|
|
|
* |
|
1162
|
|
|
* @access public |
|
1163
|
|
|
* |
|
1164
|
|
|
* @return string |
|
1165
|
|
|
*/ |
|
1166
|
|
|
public function _getParentHref(): string |
|
1167
|
|
|
{ |
|
1168
|
|
|
if (empty($this->parentHref)) { |
|
1169
|
|
|
// Get the closest ancestor of the current document which has a MPTR child. |
|
1170
|
|
|
$parentMptr = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $this->toplevelId . '"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr'); |
|
1171
|
|
|
if (!empty($parentMptr)) { |
|
1172
|
|
|
$this->parentHref = (string) $parentMptr[0]->attributes('http://www.w3.org/1999/xlink')->href; |
|
|
|
|
|
|
1173
|
|
|
} |
|
1174
|
|
|
} |
|
1175
|
|
|
|
|
1176
|
|
|
return $this->parentHref; |
|
1177
|
|
|
} |
|
1178
|
|
|
|
|
1179
|
|
|
/** |
|
1180
|
|
|
* This magic method is executed prior to any serialization of the object |
|
1181
|
|
|
* @see __wakeup() |
|
1182
|
|
|
* |
|
1183
|
|
|
* @access public |
|
1184
|
|
|
* |
|
1185
|
|
|
* @return array Properties to be serialized |
|
1186
|
|
|
*/ |
|
1187
|
|
|
public function __sleep(): array |
|
1188
|
|
|
{ |
|
1189
|
|
|
// \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization |
|
1190
|
|
|
$this->asXML = $this->xml->asXML(); |
|
|
|
|
|
|
1191
|
|
|
return ['uid', 'pid', 'recordId', 'parentId', 'asXML']; |
|
1192
|
|
|
} |
|
1193
|
|
|
|
|
1194
|
|
|
/** |
|
1195
|
|
|
* This magic method is used for setting a string value for the object |
|
1196
|
|
|
* |
|
1197
|
|
|
* @access public |
|
1198
|
|
|
* |
|
1199
|
|
|
* @return string String representing the METS object |
|
1200
|
|
|
*/ |
|
1201
|
|
|
public function __toString(): string |
|
1202
|
|
|
{ |
|
1203
|
|
|
$xml = new \DOMDocument('1.0', 'utf-8'); |
|
1204
|
|
|
$xml->appendChild($xml->importNode(dom_import_simplexml($this->mets), true)); |
|
1205
|
|
|
$xml->formatOutput = true; |
|
1206
|
|
|
return $xml->saveXML(); |
|
1207
|
|
|
} |
|
1208
|
|
|
|
|
1209
|
|
|
/** |
|
1210
|
|
|
* This magic method is executed after the object is deserialized |
|
1211
|
|
|
* @see __sleep() |
|
1212
|
|
|
* |
|
1213
|
|
|
* @access public |
|
1214
|
|
|
* |
|
1215
|
|
|
* @return void |
|
1216
|
|
|
*/ |
|
1217
|
|
|
public function __wakeup(): void |
|
1218
|
|
|
{ |
|
1219
|
|
|
$xml = Helper::getXmlFileAsString($this->asXML); |
|
1220
|
|
|
if ($xml !== false) { |
|
1221
|
|
|
$this->asXML = ''; |
|
1222
|
|
|
$this->xml = $xml; |
|
1223
|
|
|
// Rebuild the unserializable properties. |
|
1224
|
|
|
$this->init(''); |
|
1225
|
|
|
} else { |
|
1226
|
|
|
$this->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(static::class); |
|
1227
|
|
|
$this->logger->error('Could not load XML after deserialization'); |
|
1228
|
|
|
} |
|
1229
|
|
|
} |
|
1230
|
|
|
} |
|
1231
|
|
|
|