Issues (106)

src/Domain/Publisher/BnfMapper.php (1 issue)

Labels
Severity
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe/Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Publisher;
11
12
use App\Domain\Enums\Language;
13
use SimpleXMLElement;
14
15
/**
16
 * Class BnfMapper
17
 * http://catalogue.bnf.fr/api/SRU?version=1.2&operation=searchRetrieve&query=bib.isbn%2Badj%2B%25222844940404%2522.
18
 */
19
class BnfMapper extends AbstractBookMapper implements MapperInterface
20
{
21
    private ?SimpleXMLElement $xml = null;
22
23
    /**
24
     * XML in UniMarc format.
25
     * See http://api.bnf.fr/formats-bibliographiques-intermarc-unimarc
26
     * https://www.transition-bibliographique.fr/systemes-et-donnees/manuel-unimarc-format-bibliographique/.
27
     *
28
     * @param $xml
29
     *
30
     * @return array
31
     */
32
    public function process($xml): array
33
    {
34
        if (!$xml instanceof SimpleXMLElement) {
35 2
            return [];
36
        }
37 2
        $this->xml = $xml;
38
39
        // skip multi-records
40 2
        $nbResults = (int)$xml->xpath('//srw:numberOfRecords[1]')[0] ?? 0;
41
        if (1 !== $nbResults) {
42
            return [];
43 2
        }
44 2
45 1
        return [
46
            // Langue
47 1
            'langue' => $this->lang2wiki($this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="a"][1]')),
48
            // c : Langue de l’œuvre originale
49
            'langue originale' => $this->stripLangFR(
50
                $this->lang2wiki(
51
                    $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="c"][1]')
52 1
                )
53
            ),
54 1
            // g : Langue du titre propre (si différent)
55 1
            'langue titre' => $this->stripLangFR(
56 1
                $this->lang2wiki(
57
                    $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="g"][1]')
58
                )
59
            ),
60 1
            /*
61 1
             * Bloc 200.
62 1
             * https://www.transition-bibliographique.fr/wp-content/uploads/2019/11/B200-2018.pdf
63
             */ // a : Titre propre
64
            'titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="a"][1]'),
65
            // d : Titre parralèle (autre langue)
66
            'titre original' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="d"][1]'),
67
            // e : Complément du titre
68
            'sous-titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="e"][1]', ', '),
69 1
70
            // Responsabilités : zone 200 trop merdique "Pierre Durand, Paul Dupond" ou "Paul Durand,..."
71 1
            'prénom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="b"]'),
72
            'nom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="a"]'),
73 1
74
            'prénom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="b"]'),
75
            'nom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="a"]'),
76 1
77 1
            // zone 200
78
            // h : Numéro de partie
79 1
            //            'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="h"]'),
80 1
            // i : Titre de partie
81
            // v : numéro de volume
82
            'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="v"][1]'),
83
84
            // 410 : collection
85
            'collection' => $this->xpath2string('//mxc:datafield[@tag="410"]/mxc:subfield[@code="a"][1]'),
86
87 1
            // Auteur : voir plutôt 7XX
88
            //  https://www.transition-bibliographique.fr/wp-content/uploads/2018/07/B7XX-6-2011.pdf
89
90 1
            // multi-zones
91
            'lieu' => $this->getLocation(),
92
            'éditeur' => $this->getPublisher(),
93
            'date' => $this->getPublishDate(),
94
            // 215
95
            'pages totales' => $this->convertPages(),
96 1
97 1
            //            'bnf' => $this->convertBnfIdent(), // pertinent si isbn ?
98 1
            'isbn2' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][2]'),
99
            'isbn' => $this->extractISBN(),
100 1
101
            // hidden data
102
            'infos' => [
103 1
                'source' => 'BnF',
104 1
                'sourceTag' => $this->sourceTag(),
105
                'bnfAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="3"][1]'),
106
                'ISNIAuteur1' => $this->formatISNI(
107
                    $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="o"][1]')
108 1
                ),
109 1
                'yearsAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="f"][1]'),
110 1
            ],
111 1
        ];
112 1
    }
113
114 1
    private function xpath2string(string $path, ?string $glue = ', '): ?string
115
    {
116
        if ($glue === null) {
117
            $glue = ', ';
118
        }
119 1
        $elements = $this->xml->xpath($path);
0 ignored issues
show
The method xpath() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

119
        /** @scrutinizer ignore-call */ 
120
        $elements = $this->xml->xpath($path);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
120
121 1
        $res = [];
122
        foreach ($elements as $element) {
123
            if (isset($element) && $element instanceof SimpleXMLElement) {
124 1
                $res[] = (string)$element;
125
            }
126 1
        }
127 1
128 1
        if ($res !== []) {
129 1
            return implode($glue, $res);
130
        }
131
132
        return null;
133 1
    }
134 1
135
    private function extractISBN(): ?string
136
    {
137 1
        $isbn = $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][1]') ?? '';
138
139
        // data pourrie fréquente :  "9789004232891, 9004232893"
140 1
        if (preg_match('#(\d{13})#', $isbn, $matches)) {
141
            return $matches[1];
142 1
        }
143
        if (preg_match('#(\d{10})#', $isbn, $matches)) {
144
            return $matches[1];
145 1
        }
146
        // ISBN avec tiret
147
        if (preg_match('#([0-9\-]{10,17})#', $isbn, $matches)) {
148 1
            return $matches[1];
149
        }
150
151
        return null;
152 1
    }
153 1
154
    private function formatISNI(?string $raw = null): ?string
155
    {
156
        if (!$raw) {
157
            return null;
158
        }
159 1
        if (preg_match('#^0000(000[0-4])(\d{4})(\d{3}[0-9X])$#', $raw, $matches) > 0) {
160
            return $raw;
161 1
        }
162
        // BnF curious format of ISNI
163
        if (preg_match('#^ISNI0000(000[0-4])(\d{4})(\d{3}[0-9X])$#', $raw, $matches) > 0) {
164 1
            return sprintf('0000 %s %s %s', $matches[1], $matches[2], $matches[3]);
165
        }
166
167
        return null;
168 1
    }
169 1
170
    /**
171
     * Strip FR lang
172
     *
173
     *
174
     */
175
    private function stripLangFR(?string $lang = null): ?string
176
    {
177
        return ('fr' !== $lang) ? $lang : null;
178
    }
179
180
    /**
181
     * Convert number of pages.
182 1
     * "1 vol. (126 p.)".
183
     */
184 1
    private function convertPages(): ?string
185
    {
186
        $raw = $this->xpath2string('//mxc:datafield[@tag="215"]/mxc:subfield[@code="a"]');
187
        if (!empty($raw) && preg_match('#(\d{2,}) p\.#', $raw, $matches) > 0) {
188
            return (string)$matches[1];
189
        }
190
191
        return null;
192
    }
193 1
194
    /**
195 1
     * todo gestion bilingue fr+en
196 1
     * ISO 639-1 http://www.loc.gov/standards/iso639-2/php/French_list.php.
197 1
     *
198
     *
199
     */
200
    private function lang2wiki(?string $lang = null): ?string
201
    {
202
        if (!empty($lang)) {
203
            return Language::iso2b2wiki($lang);
204
        }
205
206
        return null;
207
    }
208
209
    private function getPublisher(): ?string
210
    {
211 1
        // zone 210
212
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="c"]', ' / ')) {
213 1
            return $tac;
214 1
        }
215
        // 214 : nouvelle zone 2019
216
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="c"]', ' / ')) {
217 1
            return $tac;
218
        }
219
220 1
        // 219 ancienne zone ?
221
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="c"]', ' / ')) {
222
            return $tac;
223 1
        }
224 1
225
        return null;
226
    }
227
228
    private function getLocation(): ?string
229
    {
230
        // zone 210
231
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="a"]', '/')) {
232
            return $tac;
233
        }
234
        // 214 : nouvelle zone 2019
235
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="a"]', '/')) {
236
            return $tac;
237
        }
238
        // ancienne zone ?
239 1
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="a"]', '/')) {
240
            return $tac;
241
        }
242 1
243 1
        return null;
244
    }
245
246
    private function getPublishDate(): ?string
247
    {
248
        // zone 210 d : Date de publication, de diffusion, etc.
249
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="d"][1]')) {
250
            return $tac;
251
        }
252
        // 214 : nouvelle zone 2019
253
        if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="d"][1]')) {
254
            return $tac;
255
        }
256
257 1
        return null;
258
    }
259
260 1
    //    private function convertBnfIdent(): ?string
261 1
    //    {
262
    //        // ark:/12148/cb453986124
263
    //        $raw = $this->xpath2string('//srw:recordIdentifier[1]/text()');
264
    //
265
    //        if ($raw && preg_match('#ark:/[0-9]+/cb([0-9]+)#', $raw, $matches) > 0) {
266
    //            return (string)$matches[1];
267
    //        }
268
    //
269
    //        return null;
270
    //    }
271
272
    private function sourceTag(): ?string
273
    {
274
        $raw = $this->xpath2string('//srw:extraRecordData[1]/ixm:attr[@name="LastModificationDate"][1]');
275
        // 20190922
276
        if ($raw && preg_match('#^(\d{4})\d{4}$#', $raw, $matches) > 0) {
277
            return sprintf('BnF:%s', $matches[1]);
278
        }
279
280
        return null;
281
    }
282
}
283