Total Complexity | 40 |
Total Lines | 270 |
Duplicated Lines | 0 % |
Changes | 3 | ||
Bugs | 0 | Features | 0 |
Complex classes like BnfMapper often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use BnfMapper, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
19 | class BnfMapper extends AbstractBookMapper implements MapperInterface |
||
20 | { |
||
21 | /** |
||
22 | * @var SimpleXMLElement |
||
23 | */ |
||
24 | private $xml; |
||
25 | |||
26 | /** |
||
27 | * XML in UniMarc format. |
||
28 | * See http://api.bnf.fr/formats-bibliographiques-intermarc-unimarc |
||
29 | * https://www.transition-bibliographique.fr/systemes-et-donnees/manuel-unimarc-format-bibliographique/. |
||
30 | * |
||
31 | * @param $xml |
||
32 | * |
||
33 | * @return array |
||
34 | */ |
||
35 | public function process($xml): array |
||
36 | { |
||
37 | if (!$xml instanceof SimpleXMLElement) { |
||
38 | return []; |
||
39 | } |
||
40 | $this->xml = $xml; |
||
41 | |||
42 | // skip multi-records |
||
43 | $nbResults = (int)$xml->xpath('//srw:numberOfRecords[1]')[0] ?? 0; |
||
44 | if (1 !== $nbResults) { |
||
45 | echo "BNF : $nbResults records (skip)\n"; |
||
46 | |||
47 | return []; |
||
48 | } |
||
49 | |||
50 | return [ |
||
51 | // Langue |
||
52 | 'langue' => $this->lang2wiki($this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="a"][1]')), |
||
53 | // c : Langue de l’œuvre originale |
||
54 | 'langue originale' => $this->stripLangFR( |
||
55 | $this->lang2wiki( |
||
56 | $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="c"][1]') |
||
57 | ) |
||
58 | ), |
||
59 | // g : Langue du titre propre (si différent) |
||
60 | 'langue titre' => $this->stripLangFR( |
||
61 | $this->lang2wiki( |
||
62 | $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="g"][1]') |
||
63 | ) |
||
64 | ), |
||
65 | /* |
||
66 | * Bloc 200. |
||
67 | * https://www.transition-bibliographique.fr/wp-content/uploads/2019/11/B200-2018.pdf |
||
68 | */ // a : Titre propre |
||
69 | 'titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="a"][1]'), |
||
70 | // d : Titre parralèle (autre langue) |
||
71 | 'titre original' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="d"][1]'), |
||
72 | // e : Complément du titre |
||
73 | 'sous-titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="e"][1]', ', '), |
||
74 | |||
75 | // Responsabilités : zone 200 trop merdique "Pierre Durand, Paul Dupond" ou "Paul Durand,..." |
||
76 | 'prénom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="b"]'), |
||
77 | 'nom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="a"]'), |
||
78 | |||
79 | 'prénom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="b"]'), |
||
80 | 'nom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="a"]'), |
||
81 | |||
82 | // zone 200 |
||
83 | // h : Numéro de partie |
||
84 | // 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="h"]'), |
||
85 | // i : Titre de partie |
||
86 | // v : numéro de volume |
||
87 | 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="v"][1]'), |
||
88 | |||
89 | // 410 : collection |
||
90 | 'collection' => $this->xpath2string('//mxc:datafield[@tag="410"]/mxc:subfield[@code="a"][1]'), |
||
91 | |||
92 | // Auteur : voir plutôt 7XX |
||
93 | // https://www.transition-bibliographique.fr/wp-content/uploads/2018/07/B7XX-6-2011.pdf |
||
94 | |||
95 | // multi-zones |
||
96 | 'lieu' => $this->getLocation(), |
||
97 | 'éditeur' => $this->getPublisher(), |
||
98 | 'date' => $this->getPublishDate(), |
||
99 | // 215 |
||
100 | 'pages totales' => $this->convertPages(), |
||
101 | |||
102 | // 'bnf' => $this->convertBnfIdent(), // pertinent si isbn ? |
||
103 | 'isbn2' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][2]'), |
||
104 | 'isbn' => $this->extractISBN(), |
||
105 | |||
106 | // hidden data |
||
107 | 'infos' => [ |
||
108 | 'source' => 'BnF', |
||
109 | 'sourceTag' => $this->sourceTag(), |
||
110 | 'bnfAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="3"][1]'), |
||
111 | 'ISNIAuteur1' => $this->formatISNI( |
||
112 | $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="o"][1]') |
||
113 | ), |
||
114 | 'yearsAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="f"][1]'), |
||
115 | ], |
||
116 | ]; |
||
117 | } |
||
118 | |||
119 | private function xpath2string(string $path, ?string $glue = ', '): ?string |
||
135 | } |
||
136 | |||
137 | private function extractISBN(): ?string |
||
138 | { |
||
139 | $isbn = $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][1]'); |
||
140 | |||
141 | // data pourrie fréquente : "9789004232891, 9004232893" |
||
142 | if (preg_match('#([0-9]{13})#', $isbn, $matches)) { |
||
143 | return $matches[1]; |
||
144 | } |
||
145 | if (preg_match('#([0-9]{10})#', $isbn, $matches)) { |
||
146 | return $matches[1]; |
||
147 | } |
||
148 | // ISBN avec tiret |
||
149 | if (preg_match('#([0-9\-]{10,17})#', $isbn, $matches)) { |
||
150 | return $matches[1]; |
||
151 | } |
||
152 | |||
153 | return null; |
||
154 | } |
||
155 | |||
156 | private function formatISNI(?string $raw = null): ?string |
||
157 | { |
||
158 | if (!$raw) { |
||
159 | return null; |
||
160 | } |
||
161 | if (preg_match('#^0000(000[0-4])([0-9]{4})([0-9]{3}[0-9X])$#', $raw, $matches) > 0) { |
||
162 | return $raw; |
||
163 | } |
||
164 | // BnF curious format of ISNI |
||
165 | if (preg_match('#^ISNI0000(000[0-4])([0-9]{4})([0-9]{3}[0-9X])$#', $raw, $matches) > 0) { |
||
166 | return sprintf('0000 %s %s %s', $matches[1], $matches[2], $matches[3]); |
||
167 | } |
||
168 | |||
169 | return null; |
||
170 | } |
||
171 | |||
172 | /** |
||
173 | * Strip FR lang |
||
174 | * |
||
175 | * @param string|null $lang |
||
176 | * |
||
177 | * @return string|null |
||
178 | */ |
||
179 | private function stripLangFR(?string $lang = null): ?string |
||
180 | { |
||
181 | return ('fr' !== $lang) ? $lang : null; |
||
182 | } |
||
183 | |||
184 | /** |
||
185 | * Convert number of pages. |
||
186 | * "1 vol. (126 p.)". |
||
187 | * |
||
188 | * @return string|null |
||
189 | */ |
||
190 | private function convertPages(): ?string |
||
198 | } |
||
199 | |||
200 | /** |
||
201 | * todo gestion bilingue fr+en |
||
202 | * ISO 639-1 http://www.loc.gov/standards/iso639-2/php/French_list.php. |
||
203 | * |
||
204 | * @param string|null $lang |
||
205 | * |
||
206 | * @return string|null |
||
207 | */ |
||
208 | private function lang2wiki(?string $lang = null): ?string |
||
209 | { |
||
210 | if (!empty($lang)) { |
||
211 | return Language::iso2b2wiki($lang); |
||
212 | } |
||
213 | |||
214 | return null; |
||
215 | } |
||
216 | |||
217 | private function getPublisher(): ?string |
||
218 | { |
||
219 | // zone 210 |
||
220 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="c"]', ' / ')) { |
||
221 | return $tac; |
||
222 | } |
||
223 | // 214 : nouvelle zone 2019 |
||
224 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="c"]', ' / ')) { |
||
225 | return $tac; |
||
226 | } |
||
227 | |||
228 | // 219 ancienne zone ? |
||
229 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="c"]', ' / ')) { |
||
230 | return $tac; |
||
231 | } |
||
232 | |||
233 | return null; |
||
234 | } |
||
235 | |||
236 | private function getLocation(): ?string |
||
237 | { |
||
238 | // zone 210 |
||
239 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="a"]', '/')) { |
||
240 | return $tac; |
||
241 | } |
||
242 | // 214 : nouvelle zone 2019 |
||
243 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="a"]', '/')) { |
||
244 | return $tac; |
||
245 | } |
||
246 | // ancienne zone ? |
||
247 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="a"]', '/')) { |
||
248 | return $tac; |
||
249 | } |
||
250 | |||
251 | return null; |
||
252 | } |
||
253 | |||
254 | private function getPublishDate(): ?string |
||
255 | { |
||
256 | // zone 210 d : Date de publication, de diffusion, etc. |
||
257 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="d"][1]')) { |
||
258 | return $tac; |
||
259 | } |
||
260 | // 214 : nouvelle zone 2019 |
||
261 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="d"][1]')) { |
||
262 | return $tac; |
||
263 | } |
||
264 | |||
265 | return null; |
||
266 | } |
||
267 | |||
268 | private function convertBnfIdent(): ?string |
||
278 | } |
||
279 | |||
280 | private function sourceTag(): ?string |
||
281 | { |
||
289 | } |
||
290 | } |
||
291 |