Passed
Push — master ( ddb420...13a60b )
by Dispositif
02:30
created

BnfMapper::formatISNI()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 5
c 0
b 0
f 0
nc 3
nop 1
dl 0
loc 11
rs 10
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Publisher;
11
12
use App\Domain\Enums\Language;
13
use SimpleXMLElement;
14
15
/**
16
 * Class BnfMapper
17
 * http://catalogue.bnf.fr/api/SRU?version=1.2&operation=searchRetrieve&query=bib.isbn%2Badj%2B%25222844940404%2522.
18
 */
19
class BnfMapper extends AbstractBookMapper implements MapperInterface
20
{
21
    /**
22
     * @var SimpleXMLElement
23
     */
24
    private $xml;
25
26
    /**
27
     * XML in UniMarc format.
28
     * See http://api.bnf.fr/formats-bibliographiques-intermarc-unimarc
29
     * https://www.transition-bibliographique.fr/systemes-et-donnees/manuel-unimarc-format-bibliographique/.
30
     *
31
     * @param $xml
32
     *
33
     * @return array
34
     */
35
    public function process($xml): array
36
    {
37
        if (!$xml instanceof SimpleXMLElement) {
38
            return [];
39
        }
40
        $this->xml = $xml;
41
42
        // skip multi-records
43
        $nbResults = (int)$xml->xpath('//srw:numberOfRecords[1]')[0] ?? 0;
44
        if (1 !== $nbResults) {
45
            echo "BNF : $nbResults records (skip)\n";
46
47
            return [];
48
        }
49
50
        return [
51
            //            'bnf' => $this->convertBnfIdent(), // pertinent si isbn ?
52
            'isbn' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][1]'),
53
            'isbn2' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][2]'),
54
55
            // Langue
56
            'langue' => $this->lang2wiki($this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="a"][1]')),
57
            // c : Langue de l’œuvre originale
58
            'langue originale' => $this->stripLangFR(
59
                $this->lang2wiki(
60
                    $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="c"][1]')
61
                )
62
            ),
63
            // g : Langue du titre propre (si différent)
64
            'langue titre' => $this->stripLangFR(
65
                $this->lang2wiki(
66
                    $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="g"][1]')
67
                )
68
            ),
69
            /*
70
             * Bloc 200.
71
             * https://www.transition-bibliographique.fr/wp-content/uploads/2019/11/B200-2018.pdf
72
             */ // a : Titre propre
73
            'titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="a"][1]'),
74
            // d : Titre parralèle (autre langue)
75
            'titre original' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="d"][1]'),
76
            // e : Complément du titre
77
            'sous-titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="e"][1]', ', '),
78
            // f : responsabilité principale "Pierre Durand, Paul Dupond" (XML de dingue pour ça...)
79
            'auteur1' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="f"]', ', '),
80
            // g : Mention de responsabilité suivante
81
            'auteur2' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="g"]', ', '),
82
            // h : Numéro de partie
83
            //            'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="h"]'),
84
            // i : Titre de partie
85
            // v : numéro de volume
86
            'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="v"][1]'),
87
88
            // 410 : collection
89
            'collection' => $this->xpath2string('//mxc:datafield[@tag="410"]/mxc:subfield[@code="a"][1]'),
90
91
            // Auteur : voir plutôt 7XX
92
            //  https://www.transition-bibliographique.fr/wp-content/uploads/2018/07/B7XX-6-2011.pdf
93
94
            // multi-zones
95
            'lieu' => $this->getLocation(),
96
            'éditeur' => $this->getPublisher(),
97
            'date' => $this->getPublishDate(),
98
            // 215
99
            'pages totales' => $this->convertPages(),
100
101
            // hidden data
102
            'infos' => [
103
                'source' => 'BnF',
104
                'sourceTag' => $this->sourceTag(),
105
                'bnfAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="3"][1]'),
106
                'ISNIAuteur1' => $this->formatISNI($this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="o"][1]')),
107
                'yearsAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="f"][1]'),
108
            ],
109
        ];
110
    }
111
112
    private function xpath2string(string $path, ?string $glue = ', '): ?string
113
    {
114
        $elements = $this->xml->xpath($path);
115
116
        $res = [];
117
        foreach ($elements as $element) {
118
            if (isset($element) && $element instanceof SimpleXMLElement) {
119
                $res[] = (string)$element;
120
            }
121
        }
122
123
        if (!empty($res)) {
124
            return implode($glue, $res);
0 ignored issues
show
Bug introduced by
It seems like $glue can also be of type null; however, parameter $glue of implode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

124
            return implode(/** @scrutinizer ignore-type */ $glue, $res);
Loading history...
125
        }
126
127
        return null;
128
    }
129
130
    private function formatISNI(?string $raw = ''): ?string
131
    {
132
        if (preg_match('#^0000(000[0-4])([0-9]{4})([0-9]{3}[0-9X])$#', $raw, $matches) > 0) {
133
            return $raw;
134
        }
135
        // BnF curious format of ISNI
136
        if (preg_match('#^ISNI0000(000[0-4])([0-9]{4})([0-9]{3}[0-9X])$#', $raw, $matches) > 0) {
137
            return sprintf('0000 %s %s %s', $matches[1], $matches[2], $matches[3]);
138
        }
139
140
        return null;
141
    }
142
143
    /**
144
     * Strip FR lang
145
     *
146
     * @param string|null $lang
147
     *
148
     * @return string|null
149
     */
150
    private function stripLangFR(?string $lang = null): ?string
151
    {
152
        return ('fr' !== $lang) ? $lang : null;
153
    }
154
155
    /**
156
     * Convert number of pages.
157
     * "1 vol. (126 p.)".
158
     *
159
     * @return string|null
160
     */
161
    private function convertPages(): ?string
162
    {
163
        $raw = $this->xpath2string('//mxc:datafield[@tag="215"]/mxc:subfield[@code="a"]');
164
        if (!empty($raw) && preg_match('#([0-9]{2,}) p\.#', $raw, $matches) > 0) {
165
            return (string)$matches[1];
166
        }
167
168
        return null;
169
    }
170
171
    /**
172
     * todo gestion bilingue fr+en
173
     * ISO 639-1 http://www.loc.gov/standards/iso639-2/php/French_list.php.
174
     *
175
     * @param string|null $lang
176
     *
177
     * @return string|null
178
     */
179
    private function lang2wiki(?string $lang = null): ?string
180
    {
181
        if (!empty($lang)) {
182
            return Language::iso2b2wiki($lang);
183
        }
184
185
        return null;
186
    }
187
188
    private function getPublisher(): ?string
189
    {
190
        // zone 210
191
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="c"]', ' / ')) {
192
            return $tac;
193
        }
194
        // 214 : nouvelle zone 2019
195
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="c"]', ' / ')) {
196
            return $tac;
197
        }
198
199
        // 219 ancienne zone ?
200
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="c"]', ' / ')) {
201
            return $tac;
202
        }
203
204
        return null;
205
    }
206
207
    private function getLocation(): ?string
208
    {
209
        // zone 210
210
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="a"]', '/')) {
211
            return $tac;
212
        }
213
        // 214 : nouvelle zone 2019
214
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="a"]', '/')) {
215
            return $tac;
216
        }
217
        // ancienne zone ?
218
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="a"]', '/')) {
219
            return $tac;
220
        }
221
222
        return null;
223
    }
224
225
    private function getPublishDate(): ?string
226
    {
227
        // zone 210 d : Date de publication, de diffusion, etc.
228
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="d"][1]')) {
229
            return $tac;
230
        }
231
        // 214 : nouvelle zone 2019
232
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="d"][1]')) {
233
            return $tac;
234
        }
235
236
        return null;
237
    }
238
239
    private function convertBnfIdent(): ?string
0 ignored issues
show
Unused Code introduced by
The method convertBnfIdent() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
240
    {
241
        // ark:/12148/cb453986124
242
        $raw = $this->xpath2string('//srw:recordIdentifier[1]/text()');
243
244
        if ($raw && preg_match('#ark:/[0-9]+/cb([0-9]+)#', $raw, $matches) > 0) {
245
            return (string)$matches[1];
246
        }
247
248
        return null;
249
    }
250
251
    private function sourceTag(): ?string
252
    {
253
        $raw = $this->xpath2string('//srw:extraRecordData[1]/ixm:attr[@name="LastModificationDate"][1]');
254
        // 20190922
255
        if ($raw && preg_match('#^([0-9]{4})[0-9]{4}$#', $raw, $matches) > 0) {
256
            return sprintf('BnF:%s', $matches[1]);
257
        }
258
259
        return null;
260
    }
261
}
262