Passed
Push — master ( 66711c...b5507b )
by Dispositif
08:29
created

BnfMapper::getLocation()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 16
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 7
c 0
b 0
f 0
nc 4
nop 0
dl 0
loc 16
rs 10
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Publisher;
11
12
use App\Domain\Enums\Language;
13
use SimpleXMLElement;
14
15
/**
16
 * Class BnfMapper
17
 * http://catalogue.bnf.fr/api/SRU?version=1.2&operation=searchRetrieve&query=bib.isbn%2Badj%2B%25222844940404%2522.
18
 */
19
class BnfMapper extends AbstractBookMapper implements MapperInterface
20
{
21
    /**
22
     * @var SimpleXMLElement
23
     */
24
    private $xml;
25
26
    /**
27
     * XML in UniMarc format.
28
     * See http://api.bnf.fr/formats-bibliographiques-intermarc-unimarc
29
     * https://www.transition-bibliographique.fr/systemes-et-donnees/manuel-unimarc-format-bibliographique/.
30
     *
31
     * @param $xml
32
     *
33
     * @return array
34
     */
35
    public function process($xml): array
36
    {
37
        if (!$xml instanceof SimpleXMLElement) {
38
            return [];
39
        }
40
        $this->xml = $xml;
41
42
        // skip multi-records
43
        $nbResults = (int)$xml->xpath('//srw:numberOfRecords[1]')[0] ?? 0;
44
        if (1 !== $nbResults) {
45
            echo "BNF : $nbResults records (skip)\n";
46
47
            return [];
48
        }
49
50
        return [
51
            // Langue
52
            'langue' => $this->lang2wiki($this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="a"][1]')),
53
            // c : Langue de l’œuvre originale
54
            'langue originale' => $this->stripLangFR(
55
                $this->lang2wiki(
56
                    $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="c"][1]')
57
                )
58
            ),
59
            // g : Langue du titre propre (si différent)
60
            'langue titre' => $this->stripLangFR(
61
                $this->lang2wiki(
62
                    $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="g"][1]')
63
                )
64
            ),
65
            /*
66
             * Bloc 200.
67
             * https://www.transition-bibliographique.fr/wp-content/uploads/2019/11/B200-2018.pdf
68
             */ // a : Titre propre
69
            'titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="a"][1]'),
70
            // d : Titre parralèle (autre langue)
71
            'titre original' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="d"][1]'),
72
            // e : Complément du titre
73
            'sous-titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="e"][1]', ', '),
74
75
            // Responsabilités : zone 200 trop merdique "Pierre Durand, Paul Dupond" ou "Paul Durand,..."
76
            'prénom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="b"]'),
77
            'nom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="a"]'),
78
79
            'prénom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="b"]'),
80
            'nom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="a"]'),
81
82
            // zone 200
83
            // h : Numéro de partie
84
            //            'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="h"]'),
85
            // i : Titre de partie
86
            // v : numéro de volume
87
            'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="v"][1]'),
88
89
            // 410 : collection
90
            'collection' => $this->xpath2string('//mxc:datafield[@tag="410"]/mxc:subfield[@code="a"][1]'),
91
92
            // Auteur : voir plutôt 7XX
93
            //  https://www.transition-bibliographique.fr/wp-content/uploads/2018/07/B7XX-6-2011.pdf
94
95
            // multi-zones
96
            'lieu' => $this->getLocation(),
97
            'éditeur' => $this->getPublisher(),
98
            'date' => $this->getPublishDate(),
99
            // 215
100
            'pages totales' => $this->convertPages(),
101
102
            //            'bnf' => $this->convertBnfIdent(), // pertinent si isbn ?
103
            'isbn2' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][2]'),
104
            'isbn' => $this->extractISBN(),
105
106
            // hidden data
107
            'infos' => [
108
                'source' => 'BnF',
109
                'sourceTag' => $this->sourceTag(),
110
                'bnfAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="3"][1]'),
111
                'ISNIAuteur1' => $this->formatISNI(
112
                    $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="o"][1]')
113
                ),
114
                'yearsAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="f"][1]'),
115
            ],
116
        ];
117
    }
118
119
    private function xpath2string(string $path, ?string $glue = ', '): ?string
120
    {
121
        $elements = $this->xml->xpath($path);
122
123
        $res = [];
124
        foreach ($elements as $element) {
125
            if (isset($element) && $element instanceof SimpleXMLElement) {
126
                $res[] = (string)$element;
127
            }
128
        }
129
130
        if (!empty($res)) {
131
            return implode($glue, $res);
0 ignored issues
show
Bug introduced by
It seems like $glue can also be of type null; however, parameter $glue of implode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

131
            return implode(/** @scrutinizer ignore-type */ $glue, $res);
Loading history...
132
        }
133
134
        return null;
135
    }
136
137
    private function extractISBN(): ?string
138
    {
139
        $isbn = $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][1]');
140
141
        // data pourrie fréquente :  "9789004232891, 9004232893"
142
        if (preg_match('#([0-9]{13})#', $isbn, $matches)) {
143
            return $matches[1];
144
        }
145
        if (preg_match('#([0-9]{10})#', $isbn, $matches)) {
146
            return $matches[1];
147
        }
148
        // ISBN avec tiret
149
        if (preg_match('#([0-9\-]{10,17})#', $isbn, $matches)) {
150
            return $matches[1];
151
        }
152
153
        return null;
154
    }
155
156
    private function formatISNI(?string $raw = null): ?string
157
    {
158
        if (!$raw) {
159
            return null;
160
        }
161
        if (preg_match('#^0000(000[0-4])([0-9]{4})([0-9]{3}[0-9X])$#', $raw, $matches) > 0) {
162
            return $raw;
163
        }
164
        // BnF curious format of ISNI
165
        if (preg_match('#^ISNI0000(000[0-4])([0-9]{4})([0-9]{3}[0-9X])$#', $raw, $matches) > 0) {
166
            return sprintf('0000 %s %s %s', $matches[1], $matches[2], $matches[3]);
167
        }
168
169
        return null;
170
    }
171
172
    /**
173
     * Strip FR lang
174
     *
175
     * @param string|null $lang
176
     *
177
     * @return string|null
178
     */
179
    private function stripLangFR(?string $lang = null): ?string
180
    {
181
        return ('fr' !== $lang) ? $lang : null;
182
    }
183
184
    /**
185
     * Convert number of pages.
186
     * "1 vol. (126 p.)".
187
     *
188
     * @return string|null
189
     */
190
    private function convertPages(): ?string
191
    {
192
        $raw = $this->xpath2string('//mxc:datafield[@tag="215"]/mxc:subfield[@code="a"]');
193
        if (!empty($raw) && preg_match('#([0-9]{2,}) p\.#', $raw, $matches) > 0) {
194
            return (string)$matches[1];
195
        }
196
197
        return null;
198
    }
199
200
    /**
201
     * todo gestion bilingue fr+en
202
     * ISO 639-1 http://www.loc.gov/standards/iso639-2/php/French_list.php.
203
     *
204
     * @param string|null $lang
205
     *
206
     * @return string|null
207
     */
208
    private function lang2wiki(?string $lang = null): ?string
209
    {
210
        if (!empty($lang)) {
211
            return Language::iso2b2wiki($lang);
212
        }
213
214
        return null;
215
    }
216
217
    private function getPublisher(): ?string
218
    {
219
        // zone 210
220
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="c"]', ' / ')) {
221
            return $tac;
222
        }
223
        // 214 : nouvelle zone 2019
224
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="c"]', ' / ')) {
225
            return $tac;
226
        }
227
228
        // 219 ancienne zone ?
229
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="c"]', ' / ')) {
230
            return $tac;
231
        }
232
233
        return null;
234
    }
235
236
    private function getLocation(): ?string
237
    {
238
        // zone 210
239
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="a"]', '/')) {
240
            return $tac;
241
        }
242
        // 214 : nouvelle zone 2019
243
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="a"]', '/')) {
244
            return $tac;
245
        }
246
        // ancienne zone ?
247
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="a"]', '/')) {
248
            return $tac;
249
        }
250
251
        return null;
252
    }
253
254
    private function getPublishDate(): ?string
255
    {
256
        // zone 210 d : Date de publication, de diffusion, etc.
257
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="d"][1]')) {
258
            return $tac;
259
        }
260
        // 214 : nouvelle zone 2019
261
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="d"][1]')) {
262
            return $tac;
263
        }
264
265
        return null;
266
    }
267
268
    private function convertBnfIdent(): ?string
0 ignored issues
show
Unused Code introduced by
The method convertBnfIdent() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
269
    {
270
        // ark:/12148/cb453986124
271
        $raw = $this->xpath2string('//srw:recordIdentifier[1]/text()');
272
273
        if ($raw && preg_match('#ark:/[0-9]+/cb([0-9]+)#', $raw, $matches) > 0) {
274
            return (string)$matches[1];
275
        }
276
277
        return null;
278
    }
279
280
    private function sourceTag(): ?string
281
    {
282
        $raw = $this->xpath2string('//srw:extraRecordData[1]/ixm:attr[@name="LastModificationDate"][1]');
283
        // 20190922
284
        if ($raw && preg_match('#^([0-9]{4})[0-9]{4}$#', $raw, $matches) > 0) {
285
            return sprintf('BnF:%s', $matches[1]);
286
        }
287
288
        return null;
289
    }
290
}
291