Passed
Pull Request — master (#123)
by
unknown
04:16
created

Mods::getHolderFromXml()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 3
c 1
b 0
f 0
nc 2
nop 2
dl 0
loc 6
rs 10
1
<?php
2
3
/**
4
 * (c) Kitodo. Key to digital objects e.V. <[email protected]>
5
 *
6
 * This file is part of the Kitodo and TYPO3 projects.
7
 *
8
 * @license GNU General Public License version 3 or later.
9
 * For the full copyright and license information, please read the
10
 * LICENSE.txt file that was distributed with this source code.
11
 */
12
13
namespace Kitodo\Dlf\Format;
14
15
use Kitodo\Dlf\Api\Orcid\Profile as OrcidProfile;
16
use Kitodo\Dlf\Api\Viaf\Profile as ViafProfile;
17
use Kitodo\Dlf\Common\MetadataInterface;
18
use Slub\Mods\Element\Name;
19
use Slub\Mods\ModsReader;
20
21
/**
22
 * Metadata MODS format class for the 'dlf' extension
23
 *
24
 * @package TYPO3
25
 * @subpackage dlf
26
 *
27
 * @access public
28
 */
29
class Mods implements MetadataInterface
30
{
31
    /**
32
     * @access private
33
     * @var \SimpleXMLElement The metadata XML
34
     **/
35
    private $xml;
36
37
    /**
38
     * @access private
39
     * @var ModsReader The metadata XML
40
     **/
41
    private $modsReader;
42
43
    /**
44
     * @access private
45
     * @var array The metadata array
46
     **/
47
    private $metadata;
48
49
    /**
50
     * @access private
51
     * @var bool The metadata array
52
     **/
53
    private $useExternalApis;
54
55
    /**
56
     * This extracts the essential MODS metadata from XML
57
     *
58
     * @access public
59
     *
60
     * @param \SimpleXMLElement $xml The XML to extract the metadata from
61
     * @param array &$metadata The metadata array to fill
62
     * @param bool $useExternalApis true if external APIs should be called, false otherwise
63
     *
64
     * @return void
65
     */
66
    public function extractMetadata(\SimpleXMLElement $xml, array &$metadata, bool $useExternalApis): void
67
    {
68
        $this->xml = $xml;
69
        $this->metadata = $metadata;
70
        $this->useExternalApis = $useExternalApis;
71
72
        $this->modsReader = new ModsReader($this->xml);
73
74
        $this->getAuthors();
75
        $this->getHolders();
76
        $this->getPlaces();
77
        $this->getYears();
78
79
        $metadata = $this->metadata;
80
    }
81
82
    /**
83
     * Get "author" and "author_sorting".
84
     *
85
     * @access private
86
     *
87
     * @return void
88
     */
89
    private function getAuthors(): void
90
    {
91
        $authors = $this->modsReader->getNames('[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="aut"]');
92
        // Get "author" and "author_sorting" again if that was too sophisticated.
93
        if (empty($authors)) {
94
            // Get all names which do not have any role term assigned and assume these are authors.
95
            $authors = $this->modsReader->getNames('[not(./mods:role)]');
96
        }
97
        if (!empty($authors)) {
98
            for ($i = 0, $j = count($authors); $i < $j; $i++) {
99
                $identifiers = $authors[$i]->getNameIdentifiers('[@type="orcid"]');
100
                if ($this->useExternalApis && !empty($identifiers)) {
101
                    $this->getAuthorFromOrcidApi($identifiers[0]->getValue(), $authors, $i);
102
                } else {
103
                    $this->getAuthorFromXml($authors, $i);
104
                }
105
            }
106
        }
107
    }
108
109
    /**
110
     * Get author from ORCID API.
111
     *
112
     * @access private
113
     *
114
     * @param string $orcidId
115
     * @param array $authors
116
     * @param int $i
117
     *
118
     * @return void
119
     */
120
    private function getAuthorFromOrcidApi(string $orcidId, array $authors, int $i): void
121
    {
122
        $profile = new OrcidProfile($orcidId);
123
        $name = $profile->getFullName();
124
        if (!empty($name)) {
125
            $this->metadata['author'][$i] = [
126
                'name' => $name,
127
                'url' => 'https://orcid.org/' . $orcidId
128
            ];
129
        } else {
130
            //fallback into display form
131
            $this->getAuthorFromXmlDisplayForm($authors, $i);
132
        }
133
    }
134
135
    /**
136
     * Get author from XML.
137
     *
138
     * @access private
139
     *
140
     * @param array $authors
141
     * @param int $i
142
     *
143
     * @return void
144
     */
145
    private function getAuthorFromXml(array $authors, int $i): void
146
    {
147
        $this->getAuthorFromXmlDisplayForm($authors, $i);
148
149
        $nameParts = $authors[$i]->getNameParts();
150
        if (empty($this->metadata['author'][$i]) && $nameParts) {
151
            $name = [];
152
            $k = 4;
153
            foreach ($nameParts as $namePart) {
154
                if (
155
                    !empty($namePart->getType())
156
                    && $namePart->getType() == 'family'
157
                ) {
158
                    $name[0] = $namePart->getValue();
159
                } elseif (
160
                    !empty($namePart->getType())
161
                    && $namePart->getType() == 'given'
162
                ) {
163
                    $name[1] = $namePart->getValue();
164
                } elseif (
165
                    !empty($namePart->getType())
166
                    && $namePart->getType() == 'termsOfAddress'
167
                ) {
168
                    $name[2] = $namePart->getValue();
169
                } elseif (
170
                    !empty($namePart->getType())
171
                    && $namePart->getType() == 'date'
172
                ) {
173
                    $name[3] = $namePart->getValue();
174
                } else {
175
                    $name[$k] = $namePart->getValue();
176
                }
177
                $k++;
178
            }
179
            ksort($name);
180
            $this->metadata['author'][$i] = trim(implode(', ', $name));
181
        }
182
        // Append "valueURI" to name using Unicode unit separator.
183
        if (!empty($authors[$i]->getValueURI())) {
184
            $this->metadata['author'][$i] .= pack('C', 31) . $authors[$i]->getValueURI();
185
        }
186
    }
187
188
    /**
189
     * Get author from XML display form.
190
     *
191
     * @access private
192
     *
193
     * @param Name[] $authors
194
     * @param int $i
195
     *
196
     * @return void
197
     */
198
    private function getAuthorFromXmlDisplayForm(array $authors, int $i): void
199
    {
200
        $displayForms = $authors[$i]->getDisplayForms();
201
        if ($displayForms) {
202
            $this->metadata['author'][$i] = $displayForms[0]->getValue();
203
        }
204
    }
205
206
    /**
207
     * Get holder.
208
     *
209
     * @access private
210
     *
211
     * @return void
212
     */
213
    private function getHolders(): void
214
    {
215
        $holders = $this->modsReader->getNames('[./mods:role/mods:roleTerm[@type="code" and @authority="marcrelator"]="prv"]');
216
217
        if (!empty($holders)) {
218
            for ($i = 0, $j = count($holders); $i < $j; $i++) {
219
                $identifiers = $holders[$i]->getNameIdentifiers('[@type="viaf"]');
220
                if ($this->useExternalApis && !empty($identifiers)) {
221
                    $this->getHolderFromViafApi($identifiers[0]->getValue(), $holders, $i);
222
                } else {
223
                    $this->getHolderFromXml($holders, $i);
224
                }
225
            }
226
        }
227
    }
228
229
    /**
230
     * Get holder from VIAF API.
231
     *
232
     * @access private
233
     *
234
     * @param string $viafId
235
     * @param array $holders
236
     * @param int $i
237
     *
238
     * @return void
239
     */
240
    private function getHolderFromViafApi(string $viafId, array $holders, int $i): void
241
    {
242
        $profile = new ViafProfile($viafId);
243
        $name = $profile->getFullName();
244
        if (!empty($name)) {
245
            $this->metadata['holder'][$i] = [
246
                'name' => $name,
247
                'url' => 'http://viaf.org/viaf/' . $viafId
248
            ];
249
        } else {
250
            //fallback into display form
251
            $this->getHolderFromXmlDisplayForm($holders, $i);
252
        }
253
    }
254
255
    /**
256
     * Get holder from XML.
257
     *
258
     * @access private
259
     *
260
     * @param array $holders
261
     * @param int $i
262
     *
263
     * @return void
264
     */
265
    private function getHolderFromXml(array $holders, int $i): void
266
    {
267
        $this->getHolderFromXmlDisplayForm($holders, $i);
268
        // Append "valueURI" to name using Unicode unit separator.
269
        if (!empty($holders[$i]->getValueURI())) {
270
            $this->metadata['holder'][$i] .= pack('C', 31) . $holders[$i]->getValueURI();
271
        }
272
    }
273
274
    /**
275
     * Get holder from XML display form.
276
     *
277
     * @access private
278
     * 
279
     * @param array $holders
280
     * @param int $i
281
     *
282
     * @return void
283
     */
284
    private function getHolderFromXmlDisplayForm(array $holders, int $i): void
285
    {
286
        // Check if there is a display form.
287
        $displayForms = $holders[$i]->getDisplayForm();
288
        if ($displayForms) {
289
            $this->metadata['holder'][$i] = $displayForms[0]->getValue();
290
        }
291
    }
292
293
    /**
294
     * Get "place" and "place_sorting".
295
     *
296
     * @access private
297
     *
298
     * @return void
299
     */
300
    private function getPlaces(): void
301
    {
302
        $places = [];
303
        $originInfos = $this->modsReader->getOriginInfos('[not(./mods:edition="[Electronic ed.]")]');
304
        foreach ($originInfos as $originInfo) {
305
            foreach ($originInfo->getPlaces() as $place) {
306
                foreach ($place->getPlaceTerms() as $placeTerm) {
307
                    $places[] = $placeTerm->getValue();
308
                }
309
            }
310
        }
311
312
        // Get "place" and "place_sorting" again if that was to sophisticated.
313
        if (empty($places)) {
314
            // Get all places and assume these are places of publication.
315
            $originInfos = $this->modsReader->getOriginInfos();
316
            foreach ($originInfos as $originInfo) {
317
                foreach ($originInfo->getPlaces() as $place) {
318
                    foreach ($place->getPlaceTerms() as $placeTerm) {
319
                        $places[] = $placeTerm->getValue();
320
                    }
321
                }
322
            }
323
        }
324
325
        if (!empty($places)) {
326
            foreach ($places as $place) {
327
                $this->metadata['place'][] = $place;
328
                if (empty($this->metadata['place_sorting'][0])) {
329
                    $this->metadata['place_sorting'][0] = preg_replace('/[[:punct:]]/', '', $place);
330
                }
331
            }
332
        }
333
    }
334
335
    /**
336
     * Get "year" and "year_sorting".
337
     *
338
     * @access private
339
     *
340
     * @return void
341
     */
342
    private function getYears(): void
343
    {
344
        // Get "year_sorting".
345
        $yearsSorting = $this->modsReader->getOriginInfos('[not(./mods:edition="[Electronic ed.]")]/mods:dateOther[@type="order" and @encoding="w3cdtf"]');
346
        if ($yearsSorting) {
347
            foreach ($yearsSorting as $yearSorting) {
348
                $otherDates = $yearSorting->getOtherDates();
349
                if (!empty($otherDates)) {
350
                    $this->metadata['year_sorting'][0] = $otherDates[0]->getValue();
351
                }
352
            }
353
        }
354
        // Get "year" and "year_sorting" if not specified separately.
355
        $years = $this->modsReader->getOriginInfos('./mods:originInfo[not(./mods:edition="[Electronic ed.]")]/mods:dateIssued[@keyDate="yes"]');
356
        // Get "year" and "year_sorting" again if that was to sophisticated.
357
        if (empty($years)) {
358
            // Get all dates and assume these are dates of publication.
359
            $years = $this->modsReader->getOriginInfos();
360
        }
361
        if (!empty($years)) {
362
            foreach ($years as $year) {
363
                $issued = $year->getIssuedDates();
364
                if (!empty($issued)) {
365
                    $this->metadata['year'][] = $issued[0]->getValue();
366
                    if (empty($this->metadata['year_sorting'][0])) {
367
                        $yearSorting = str_ireplace('x', '5', preg_replace('/[^\d.x]/i', '', $issued[0]->getValue()));
368
                        if (
369
                            strpos($yearSorting, '.')
0 ignored issues
show
Bug introduced by
It seems like $yearSorting can also be of type array; however, parameter $haystack of strpos() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

369
                            strpos(/** @scrutinizer ignore-type */ $yearSorting, '.')
Loading history...
370
                            || strlen($yearSorting) < 3
0 ignored issues
show
Bug introduced by
It seems like $yearSorting can also be of type array; however, parameter $string of strlen() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

370
                            || strlen(/** @scrutinizer ignore-type */ $yearSorting) < 3
Loading history...
371
                        ) {
372
                            $yearSorting = (((int) trim($yearSorting, '.') - 1) * 100) + 50;
0 ignored issues
show
Bug introduced by
It seems like $yearSorting can also be of type array; however, parameter $string of trim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

372
                            $yearSorting = (((int) trim(/** @scrutinizer ignore-type */ $yearSorting, '.') - 1) * 100) + 50;
Loading history...
373
                        }
374
                        $this->metadata['year_sorting'][0] = (int) $yearSorting;
375
                    }
376
                }
377
            }
378
        }
379
    }
380
}
381