| Total Complexity | 40 |
| Total Lines | 270 |
| Duplicated Lines | 0 % |
| Changes | 3 | ||
| Bugs | 0 | Features | 0 |
Complex classes like BnfMapper often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use BnfMapper, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 19 | class BnfMapper extends AbstractBookMapper implements MapperInterface |
||
| 20 | { |
||
| 21 | /** |
||
| 22 | * @var SimpleXMLElement |
||
| 23 | */ |
||
| 24 | private $xml; |
||
| 25 | |||
| 26 | /** |
||
| 27 | * XML in UniMarc format. |
||
| 28 | * See http://api.bnf.fr/formats-bibliographiques-intermarc-unimarc |
||
| 29 | * https://www.transition-bibliographique.fr/systemes-et-donnees/manuel-unimarc-format-bibliographique/. |
||
| 30 | * |
||
| 31 | * @param $xml |
||
| 32 | * |
||
| 33 | * @return array |
||
| 34 | */ |
||
| 35 | public function process($xml): array |
||
| 36 | { |
||
| 37 | if (!$xml instanceof SimpleXMLElement) { |
||
| 38 | return []; |
||
| 39 | } |
||
| 40 | $this->xml = $xml; |
||
| 41 | |||
| 42 | // skip multi-records |
||
| 43 | $nbResults = (int)$xml->xpath('//srw:numberOfRecords[1]')[0] ?? 0; |
||
| 44 | if (1 !== $nbResults) { |
||
| 45 | echo "BNF : $nbResults records (skip)\n"; |
||
| 46 | |||
| 47 | return []; |
||
| 48 | } |
||
| 49 | |||
| 50 | return [ |
||
| 51 | // Langue |
||
| 52 | 'langue' => $this->lang2wiki($this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="a"][1]')), |
||
| 53 | // c : Langue de l’œuvre originale |
||
| 54 | 'langue originale' => $this->stripLangFR( |
||
| 55 | $this->lang2wiki( |
||
| 56 | $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="c"][1]') |
||
| 57 | ) |
||
| 58 | ), |
||
| 59 | // g : Langue du titre propre (si différent) |
||
| 60 | 'langue titre' => $this->stripLangFR( |
||
| 61 | $this->lang2wiki( |
||
| 62 | $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="g"][1]') |
||
| 63 | ) |
||
| 64 | ), |
||
| 65 | /* |
||
| 66 | * Bloc 200. |
||
| 67 | * https://www.transition-bibliographique.fr/wp-content/uploads/2019/11/B200-2018.pdf |
||
| 68 | */ // a : Titre propre |
||
| 69 | 'titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="a"][1]'), |
||
| 70 | // d : Titre parralèle (autre langue) |
||
| 71 | 'titre original' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="d"][1]'), |
||
| 72 | // e : Complément du titre |
||
| 73 | 'sous-titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="e"][1]', ', '), |
||
| 74 | |||
| 75 | // Responsabilités : zone 200 trop merdique "Pierre Durand, Paul Dupond" ou "Paul Durand,..." |
||
| 76 | 'prénom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="b"]'), |
||
| 77 | 'nom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="a"]'), |
||
| 78 | |||
| 79 | 'prénom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="b"]'), |
||
| 80 | 'nom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="a"]'), |
||
| 81 | |||
| 82 | // zone 200 |
||
| 83 | // h : Numéro de partie |
||
| 84 | // 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="h"]'), |
||
| 85 | // i : Titre de partie |
||
| 86 | // v : numéro de volume |
||
| 87 | 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="v"][1]'), |
||
| 88 | |||
| 89 | // 410 : collection |
||
| 90 | 'collection' => $this->xpath2string('//mxc:datafield[@tag="410"]/mxc:subfield[@code="a"][1]'), |
||
| 91 | |||
| 92 | // Auteur : voir plutôt 7XX |
||
| 93 | // https://www.transition-bibliographique.fr/wp-content/uploads/2018/07/B7XX-6-2011.pdf |
||
| 94 | |||
| 95 | // multi-zones |
||
| 96 | 'lieu' => $this->getLocation(), |
||
| 97 | 'éditeur' => $this->getPublisher(), |
||
| 98 | 'date' => $this->getPublishDate(), |
||
| 99 | // 215 |
||
| 100 | 'pages totales' => $this->convertPages(), |
||
| 101 | |||
| 102 | // 'bnf' => $this->convertBnfIdent(), // pertinent si isbn ? |
||
| 103 | 'isbn2' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][2]'), |
||
| 104 | 'isbn' => $this->extractISBN(), |
||
| 105 | |||
| 106 | // hidden data |
||
| 107 | 'infos' => [ |
||
| 108 | 'source' => 'BnF', |
||
| 109 | 'sourceTag' => $this->sourceTag(), |
||
| 110 | 'bnfAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="3"][1]'), |
||
| 111 | 'ISNIAuteur1' => $this->formatISNI( |
||
| 112 | $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="o"][1]') |
||
| 113 | ), |
||
| 114 | 'yearsAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="f"][1]'), |
||
| 115 | ], |
||
| 116 | ]; |
||
| 117 | } |
||
| 118 | |||
| 119 | private function xpath2string(string $path, ?string $glue = ', '): ?string |
||
| 135 | } |
||
| 136 | |||
| 137 | private function extractISBN(): ?string |
||
| 138 | { |
||
| 139 | $isbn = $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][1]'); |
||
| 140 | |||
| 141 | // data pourrie fréquente : "9789004232891, 9004232893" |
||
| 142 | if (preg_match('#([0-9]{13})#', $isbn, $matches)) { |
||
| 143 | return $matches[1]; |
||
| 144 | } |
||
| 145 | if (preg_match('#([0-9]{10})#', $isbn, $matches)) { |
||
| 146 | return $matches[1]; |
||
| 147 | } |
||
| 148 | // ISBN avec tiret |
||
| 149 | if (preg_match('#([0-9\-]{10,17})#', $isbn, $matches)) { |
||
| 150 | return $matches[1]; |
||
| 151 | } |
||
| 152 | |||
| 153 | return null; |
||
| 154 | } |
||
| 155 | |||
| 156 | private function formatISNI(?string $raw = null): ?string |
||
| 157 | { |
||
| 158 | if (!$raw) { |
||
| 159 | return null; |
||
| 160 | } |
||
| 161 | if (preg_match('#^0000(000[0-4])([0-9]{4})([0-9]{3}[0-9X])$#', $raw, $matches) > 0) { |
||
| 162 | return $raw; |
||
| 163 | } |
||
| 164 | // BnF curious format of ISNI |
||
| 165 | if (preg_match('#^ISNI0000(000[0-4])([0-9]{4})([0-9]{3}[0-9X])$#', $raw, $matches) > 0) { |
||
| 166 | return sprintf('0000 %s %s %s', $matches[1], $matches[2], $matches[3]); |
||
| 167 | } |
||
| 168 | |||
| 169 | return null; |
||
| 170 | } |
||
| 171 | |||
| 172 | /** |
||
| 173 | * Strip FR lang |
||
| 174 | * |
||
| 175 | * @param string|null $lang |
||
| 176 | * |
||
| 177 | * @return string|null |
||
| 178 | */ |
||
| 179 | private function stripLangFR(?string $lang = null): ?string |
||
| 180 | { |
||
| 181 | return ('fr' !== $lang) ? $lang : null; |
||
| 182 | } |
||
| 183 | |||
| 184 | /** |
||
| 185 | * Convert number of pages. |
||
| 186 | * "1 vol. (126 p.)". |
||
| 187 | * |
||
| 188 | * @return string|null |
||
| 189 | */ |
||
| 190 | private function convertPages(): ?string |
||
| 198 | } |
||
| 199 | |||
| 200 | /** |
||
| 201 | * todo gestion bilingue fr+en |
||
| 202 | * ISO 639-1 http://www.loc.gov/standards/iso639-2/php/French_list.php. |
||
| 203 | * |
||
| 204 | * @param string|null $lang |
||
| 205 | * |
||
| 206 | * @return string|null |
||
| 207 | */ |
||
| 208 | private function lang2wiki(?string $lang = null): ?string |
||
| 209 | { |
||
| 210 | if (!empty($lang)) { |
||
| 211 | return Language::iso2b2wiki($lang); |
||
| 212 | } |
||
| 213 | |||
| 214 | return null; |
||
| 215 | } |
||
| 216 | |||
| 217 | private function getPublisher(): ?string |
||
| 218 | { |
||
| 219 | // zone 210 |
||
| 220 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="c"]', ' / ')) { |
||
| 221 | return $tac; |
||
| 222 | } |
||
| 223 | // 214 : nouvelle zone 2019 |
||
| 224 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="c"]', ' / ')) { |
||
| 225 | return $tac; |
||
| 226 | } |
||
| 227 | |||
| 228 | // 219 ancienne zone ? |
||
| 229 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="c"]', ' / ')) { |
||
| 230 | return $tac; |
||
| 231 | } |
||
| 232 | |||
| 233 | return null; |
||
| 234 | } |
||
| 235 | |||
| 236 | private function getLocation(): ?string |
||
| 237 | { |
||
| 238 | // zone 210 |
||
| 239 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="a"]', '/')) { |
||
| 240 | return $tac; |
||
| 241 | } |
||
| 242 | // 214 : nouvelle zone 2019 |
||
| 243 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="a"]', '/')) { |
||
| 244 | return $tac; |
||
| 245 | } |
||
| 246 | // ancienne zone ? |
||
| 247 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="a"]', '/')) { |
||
| 248 | return $tac; |
||
| 249 | } |
||
| 250 | |||
| 251 | return null; |
||
| 252 | } |
||
| 253 | |||
| 254 | private function getPublishDate(): ?string |
||
| 255 | { |
||
| 256 | // zone 210 d : Date de publication, de diffusion, etc. |
||
| 257 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="d"][1]')) { |
||
| 258 | return $tac; |
||
| 259 | } |
||
| 260 | // 214 : nouvelle zone 2019 |
||
| 261 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="d"][1]')) { |
||
| 262 | return $tac; |
||
| 263 | } |
||
| 264 | |||
| 265 | return null; |
||
| 266 | } |
||
| 267 | |||
| 268 | private function convertBnfIdent(): ?string |
||
| 278 | } |
||
| 279 | |||
| 280 | private function sourceTag(): ?string |
||
| 281 | { |
||
| 289 | } |
||
| 290 | } |
||
| 291 |