1 | <?php |
||
2 | /* |
||
3 | * This file is part of dispositif/wikibot application (@github) |
||
4 | * 2019/2020 © Philippe/Irønie <[email protected]> |
||
5 | * For the full copyright and MIT license information, view the license file. |
||
6 | */ |
||
7 | |||
8 | declare(strict_types=1); |
||
9 | |||
10 | namespace App\Domain\Publisher; |
||
11 | |||
12 | use App\Domain\Enums\Language; |
||
13 | use SimpleXMLElement; |
||
14 | |||
15 | /** |
||
16 | * Class BnfMapper |
||
17 | * http://catalogue.bnf.fr/api/SRU?version=1.2&operation=searchRetrieve&query=bib.isbn%2Badj%2B%25222844940404%2522. |
||
18 | */ |
||
19 | class BnfMapper extends AbstractBookMapper implements MapperInterface |
||
20 | { |
||
21 | private ?SimpleXMLElement $xml = null; |
||
22 | |||
23 | /** |
||
24 | * XML in UniMarc format. |
||
25 | * See http://api.bnf.fr/formats-bibliographiques-intermarc-unimarc |
||
26 | * https://www.transition-bibliographique.fr/systemes-et-donnees/manuel-unimarc-format-bibliographique/. |
||
27 | * |
||
28 | * @param $xml |
||
29 | * |
||
30 | * @return array |
||
31 | */ |
||
32 | public function process($xml): array |
||
33 | { |
||
34 | if (!$xml instanceof SimpleXMLElement) { |
||
35 | 2 | return []; |
|
36 | } |
||
37 | 2 | $this->xml = $xml; |
|
38 | |||
39 | // skip multi-records |
||
40 | 2 | $nbResults = (int)$xml->xpath('//srw:numberOfRecords[1]')[0] ?? 0; |
|
41 | if (1 !== $nbResults) { |
||
42 | return []; |
||
43 | 2 | } |
|
44 | 2 | ||
45 | 1 | return [ |
|
46 | // Langue |
||
47 | 1 | 'langue' => $this->lang2wiki($this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="a"][1]')), |
|
48 | // c : Langue de l’œuvre originale |
||
49 | 'langue originale' => $this->stripLangFR( |
||
50 | $this->lang2wiki( |
||
51 | $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="c"][1]') |
||
52 | 1 | ) |
|
53 | ), |
||
54 | 1 | // g : Langue du titre propre (si différent) |
|
55 | 1 | 'langue titre' => $this->stripLangFR( |
|
56 | 1 | $this->lang2wiki( |
|
57 | $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="g"][1]') |
||
58 | ) |
||
59 | ), |
||
60 | 1 | /* |
|
61 | 1 | * Bloc 200. |
|
62 | 1 | * https://www.transition-bibliographique.fr/wp-content/uploads/2019/11/B200-2018.pdf |
|
63 | */ // a : Titre propre |
||
64 | 'titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="a"][1]'), |
||
65 | // d : Titre parralèle (autre langue) |
||
66 | 'titre original' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="d"][1]'), |
||
67 | // e : Complément du titre |
||
68 | 'sous-titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="e"][1]', ', '), |
||
69 | 1 | ||
70 | // Responsabilités : zone 200 trop merdique "Pierre Durand, Paul Dupond" ou "Paul Durand,..." |
||
71 | 1 | 'prénom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="b"]'), |
|
72 | 'nom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="a"]'), |
||
73 | 1 | ||
74 | 'prénom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="b"]'), |
||
75 | 'nom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="a"]'), |
||
76 | 1 | ||
77 | 1 | // zone 200 |
|
78 | // h : Numéro de partie |
||
79 | 1 | // 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="h"]'), |
|
80 | 1 | // i : Titre de partie |
|
81 | // v : numéro de volume |
||
82 | 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="v"][1]'), |
||
83 | |||
84 | // 410 : collection |
||
85 | 'collection' => $this->xpath2string('//mxc:datafield[@tag="410"]/mxc:subfield[@code="a"][1]'), |
||
86 | |||
87 | 1 | // Auteur : voir plutôt 7XX |
|
88 | // https://www.transition-bibliographique.fr/wp-content/uploads/2018/07/B7XX-6-2011.pdf |
||
89 | |||
90 | 1 | // multi-zones |
|
91 | 'lieu' => $this->getLocation(), |
||
92 | 'éditeur' => $this->getPublisher(), |
||
93 | 'date' => $this->getPublishDate(), |
||
94 | // 215 |
||
95 | 'pages totales' => $this->convertPages(), |
||
96 | 1 | ||
97 | 1 | // 'bnf' => $this->convertBnfIdent(), // pertinent si isbn ? |
|
98 | 1 | 'isbn2' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][2]'), |
|
99 | 'isbn' => $this->extractISBN(), |
||
100 | 1 | ||
101 | // hidden data |
||
102 | 'infos' => [ |
||
103 | 1 | 'source' => 'BnF', |
|
104 | 1 | 'sourceTag' => $this->sourceTag(), |
|
105 | 'bnfAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="3"][1]'), |
||
106 | 'ISNIAuteur1' => $this->formatISNI( |
||
107 | $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="o"][1]') |
||
108 | 1 | ), |
|
109 | 1 | 'yearsAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="f"][1]'), |
|
110 | 1 | ], |
|
111 | 1 | ]; |
|
112 | 1 | } |
|
113 | |||
114 | 1 | private function xpath2string(string $path, ?string $glue = ', '): ?string |
|
115 | { |
||
116 | if ($glue === null) { |
||
117 | $glue = ', '; |
||
118 | } |
||
119 | 1 | $elements = $this->xml->xpath($path); |
|
0 ignored issues
–
show
|
|||
120 | |||
121 | 1 | $res = []; |
|
122 | foreach ($elements as $element) { |
||
123 | if (isset($element) && $element instanceof SimpleXMLElement) { |
||
124 | 1 | $res[] = (string)$element; |
|
125 | } |
||
126 | 1 | } |
|
127 | 1 | ||
128 | 1 | if ($res !== []) { |
|
129 | 1 | return implode($glue, $res); |
|
130 | } |
||
131 | |||
132 | return null; |
||
133 | 1 | } |
|
134 | 1 | ||
135 | private function extractISBN(): ?string |
||
136 | { |
||
137 | 1 | $isbn = $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][1]') ?? ''; |
|
138 | |||
139 | // data pourrie fréquente : "9789004232891, 9004232893" |
||
140 | 1 | if (preg_match('#(\d{13})#', $isbn, $matches)) { |
|
141 | return $matches[1]; |
||
142 | 1 | } |
|
143 | if (preg_match('#(\d{10})#', $isbn, $matches)) { |
||
144 | return $matches[1]; |
||
145 | 1 | } |
|
146 | // ISBN avec tiret |
||
147 | if (preg_match('#([0-9\-]{10,17})#', $isbn, $matches)) { |
||
148 | 1 | return $matches[1]; |
|
149 | } |
||
150 | |||
151 | return null; |
||
152 | 1 | } |
|
153 | 1 | ||
154 | private function formatISNI(?string $raw = null): ?string |
||
155 | { |
||
156 | if (!$raw) { |
||
157 | return null; |
||
158 | } |
||
159 | 1 | if (preg_match('#^0000(000[0-4])(\d{4})(\d{3}[0-9X])$#', $raw, $matches) > 0) { |
|
160 | return $raw; |
||
161 | 1 | } |
|
162 | // BnF curious format of ISNI |
||
163 | if (preg_match('#^ISNI0000(000[0-4])(\d{4})(\d{3}[0-9X])$#', $raw, $matches) > 0) { |
||
164 | 1 | return sprintf('0000 %s %s %s', $matches[1], $matches[2], $matches[3]); |
|
165 | } |
||
166 | |||
167 | return null; |
||
168 | 1 | } |
|
169 | 1 | ||
170 | /** |
||
171 | * Strip FR lang |
||
172 | * |
||
173 | * |
||
174 | */ |
||
175 | private function stripLangFR(?string $lang = null): ?string |
||
176 | { |
||
177 | return ('fr' !== $lang) ? $lang : null; |
||
178 | } |
||
179 | |||
180 | /** |
||
181 | * Convert number of pages. |
||
182 | 1 | * "1 vol. (126 p.)". |
|
183 | */ |
||
184 | 1 | private function convertPages(): ?string |
|
185 | { |
||
186 | $raw = $this->xpath2string('//mxc:datafield[@tag="215"]/mxc:subfield[@code="a"]'); |
||
187 | if (!empty($raw) && preg_match('#(\d{2,}) p\.#', $raw, $matches) > 0) { |
||
188 | return (string)$matches[1]; |
||
189 | } |
||
190 | |||
191 | return null; |
||
192 | } |
||
193 | 1 | ||
194 | /** |
||
195 | 1 | * todo gestion bilingue fr+en |
|
196 | 1 | * ISO 639-1 http://www.loc.gov/standards/iso639-2/php/French_list.php. |
|
197 | 1 | * |
|
198 | * |
||
199 | */ |
||
200 | private function lang2wiki(?string $lang = null): ?string |
||
201 | { |
||
202 | if (!empty($lang)) { |
||
203 | return Language::iso2b2wiki($lang); |
||
204 | } |
||
205 | |||
206 | return null; |
||
207 | } |
||
208 | |||
209 | private function getPublisher(): ?string |
||
210 | { |
||
211 | 1 | // zone 210 |
|
212 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="c"]', ' / ')) { |
||
213 | 1 | return $tac; |
|
214 | 1 | } |
|
215 | // 214 : nouvelle zone 2019 |
||
216 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="c"]', ' / ')) { |
||
217 | 1 | return $tac; |
|
218 | } |
||
219 | |||
220 | 1 | // 219 ancienne zone ? |
|
221 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="c"]', ' / ')) { |
||
222 | return $tac; |
||
223 | 1 | } |
|
224 | 1 | ||
225 | return null; |
||
226 | } |
||
227 | |||
228 | private function getLocation(): ?string |
||
229 | { |
||
230 | // zone 210 |
||
231 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="a"]', '/')) { |
||
232 | return $tac; |
||
233 | } |
||
234 | // 214 : nouvelle zone 2019 |
||
235 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="a"]', '/')) { |
||
236 | return $tac; |
||
237 | } |
||
238 | // ancienne zone ? |
||
239 | 1 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="a"]', '/')) { |
|
240 | return $tac; |
||
241 | } |
||
242 | 1 | ||
243 | 1 | return null; |
|
244 | } |
||
245 | |||
246 | private function getPublishDate(): ?string |
||
247 | { |
||
248 | // zone 210 d : Date de publication, de diffusion, etc. |
||
249 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="d"][1]')) { |
||
250 | return $tac; |
||
251 | } |
||
252 | // 214 : nouvelle zone 2019 |
||
253 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="d"][1]')) { |
||
254 | return $tac; |
||
255 | } |
||
256 | |||
257 | 1 | return null; |
|
258 | } |
||
259 | |||
260 | 1 | // private function convertBnfIdent(): ?string |
|
261 | 1 | // { |
|
262 | // // ark:/12148/cb453986124 |
||
263 | // $raw = $this->xpath2string('//srw:recordIdentifier[1]/text()'); |
||
264 | // |
||
265 | // if ($raw && preg_match('#ark:/[0-9]+/cb([0-9]+)#', $raw, $matches) > 0) { |
||
266 | // return (string)$matches[1]; |
||
267 | // } |
||
268 | // |
||
269 | // return null; |
||
270 | // } |
||
271 | |||
272 | private function sourceTag(): ?string |
||
273 | { |
||
274 | $raw = $this->xpath2string('//srw:extraRecordData[1]/ixm:attr[@name="LastModificationDate"][1]'); |
||
275 | // 20190922 |
||
276 | if ($raw && preg_match('#^(\d{4})\d{4}$#', $raw, $matches) > 0) { |
||
277 | return sprintf('BnF:%s', $matches[1]); |
||
278 | } |
||
279 | |||
280 | return null; |
||
281 | } |
||
282 | } |
||
283 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.