Dispositif /
Wikibot
| 1 | <?php |
||
| 2 | /* |
||
| 3 | * This file is part of dispositif/wikibot application (@github) |
||
| 4 | * 2019/2020 © Philippe/Irønie <[email protected]> |
||
| 5 | * For the full copyright and MIT license information, view the license file. |
||
| 6 | */ |
||
| 7 | |||
| 8 | declare(strict_types=1); |
||
| 9 | |||
| 10 | namespace App\Domain\Publisher; |
||
| 11 | |||
| 12 | use App\Domain\Enums\Language; |
||
| 13 | use SimpleXMLElement; |
||
| 14 | |||
| 15 | /** |
||
| 16 | * Class BnfMapper |
||
| 17 | * http://catalogue.bnf.fr/api/SRU?version=1.2&operation=searchRetrieve&query=bib.isbn%2Badj%2B%25222844940404%2522. |
||
| 18 | */ |
||
| 19 | class BnfMapper extends AbstractBookMapper implements MapperInterface |
||
| 20 | { |
||
| 21 | private ?SimpleXMLElement $xml = null; |
||
| 22 | |||
| 23 | /** |
||
| 24 | * XML in UniMarc format. |
||
| 25 | * See http://api.bnf.fr/formats-bibliographiques-intermarc-unimarc |
||
| 26 | * https://www.transition-bibliographique.fr/systemes-et-donnees/manuel-unimarc-format-bibliographique/. |
||
| 27 | * |
||
| 28 | * @param $xml |
||
| 29 | * |
||
| 30 | * @return array |
||
| 31 | */ |
||
| 32 | public function process($xml): array |
||
| 33 | { |
||
| 34 | if (!$xml instanceof SimpleXMLElement) { |
||
| 35 | 2 | return []; |
|
| 36 | } |
||
| 37 | 2 | $this->xml = $xml; |
|
| 38 | |||
| 39 | // skip multi-records |
||
| 40 | 2 | $nbResults = (int)$xml->xpath('//srw:numberOfRecords[1]')[0] ?? 0; |
|
| 41 | if (1 !== $nbResults) { |
||
| 42 | return []; |
||
| 43 | 2 | } |
|
| 44 | 2 | ||
| 45 | 1 | return [ |
|
| 46 | // Langue |
||
| 47 | 1 | 'langue' => $this->lang2wiki($this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="a"][1]')), |
|
| 48 | // c : Langue de l’œuvre originale |
||
| 49 | 'langue originale' => $this->stripLangFR( |
||
| 50 | $this->lang2wiki( |
||
| 51 | $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="c"][1]') |
||
| 52 | 1 | ) |
|
| 53 | ), |
||
| 54 | 1 | // g : Langue du titre propre (si différent) |
|
| 55 | 1 | 'langue titre' => $this->stripLangFR( |
|
| 56 | 1 | $this->lang2wiki( |
|
| 57 | $this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="g"][1]') |
||
| 58 | ) |
||
| 59 | ), |
||
| 60 | 1 | /* |
|
| 61 | 1 | * Bloc 200. |
|
| 62 | 1 | * https://www.transition-bibliographique.fr/wp-content/uploads/2019/11/B200-2018.pdf |
|
| 63 | */ // a : Titre propre |
||
| 64 | 'titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="a"][1]'), |
||
| 65 | // d : Titre parralèle (autre langue) |
||
| 66 | 'titre original' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="d"][1]'), |
||
| 67 | // e : Complément du titre |
||
| 68 | 'sous-titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="e"][1]', ', '), |
||
| 69 | 1 | ||
| 70 | // Responsabilités : zone 200 trop merdique "Pierre Durand, Paul Dupond" ou "Paul Durand,..." |
||
| 71 | 1 | 'prénom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="b"]'), |
|
| 72 | 'nom1' => $this->xpath2string('//mxc:datafield[@tag="700"]/mxc:subfield[@code="a"]'), |
||
| 73 | 1 | ||
| 74 | 'prénom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="b"]'), |
||
| 75 | 'nom2' => $this->xpath2string('//mxc:datafield[@tag="701"]/mxc:subfield[@code="a"]'), |
||
| 76 | 1 | ||
| 77 | 1 | // zone 200 |
|
| 78 | // h : Numéro de partie |
||
| 79 | 1 | // 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="h"]'), |
|
| 80 | 1 | // i : Titre de partie |
|
| 81 | // v : numéro de volume |
||
| 82 | 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="v"][1]'), |
||
| 83 | |||
| 84 | // 410 : collection |
||
| 85 | 'collection' => $this->xpath2string('//mxc:datafield[@tag="410"]/mxc:subfield[@code="a"][1]'), |
||
| 86 | |||
| 87 | 1 | // Auteur : voir plutôt 7XX |
|
| 88 | // https://www.transition-bibliographique.fr/wp-content/uploads/2018/07/B7XX-6-2011.pdf |
||
| 89 | |||
| 90 | 1 | // multi-zones |
|
| 91 | 'lieu' => $this->getLocation(), |
||
| 92 | 'éditeur' => $this->getPublisher(), |
||
| 93 | 'date' => $this->getPublishDate(), |
||
| 94 | // 215 |
||
| 95 | 'pages totales' => $this->convertPages(), |
||
| 96 | 1 | ||
| 97 | 1 | // 'bnf' => $this->convertBnfIdent(), // pertinent si isbn ? |
|
| 98 | 1 | 'isbn2' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][2]'), |
|
| 99 | 'isbn' => $this->extractISBN(), |
||
| 100 | 1 | ||
| 101 | // hidden data |
||
| 102 | 'infos' => [ |
||
| 103 | 1 | 'source' => 'BnF', |
|
| 104 | 1 | 'sourceTag' => $this->sourceTag(), |
|
| 105 | 'bnfAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="3"][1]'), |
||
| 106 | 'ISNIAuteur1' => $this->formatISNI( |
||
| 107 | $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="o"][1]') |
||
| 108 | 1 | ), |
|
| 109 | 1 | 'yearsAuteur1' => $this->xpath2string('//mxc:datafield[@tag="700"][1]/mxc:subfield[@code="f"][1]'), |
|
| 110 | 1 | ], |
|
| 111 | 1 | ]; |
|
| 112 | 1 | } |
|
| 113 | |||
| 114 | 1 | private function xpath2string(string $path, ?string $glue = ', '): ?string |
|
| 115 | { |
||
| 116 | if ($glue === null) { |
||
| 117 | $glue = ', '; |
||
| 118 | } |
||
| 119 | 1 | $elements = $this->xml->xpath($path); |
|
|
0 ignored issues
–
show
|
|||
| 120 | |||
| 121 | 1 | $res = []; |
|
| 122 | foreach ($elements as $element) { |
||
| 123 | if (isset($element) && $element instanceof SimpleXMLElement) { |
||
| 124 | 1 | $res[] = (string)$element; |
|
| 125 | } |
||
| 126 | 1 | } |
|
| 127 | 1 | ||
| 128 | 1 | if ($res !== []) { |
|
| 129 | 1 | return implode($glue, $res); |
|
| 130 | } |
||
| 131 | |||
| 132 | return null; |
||
| 133 | 1 | } |
|
| 134 | 1 | ||
| 135 | private function extractISBN(): ?string |
||
| 136 | { |
||
| 137 | 1 | $isbn = $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"][1]') ?? ''; |
|
| 138 | |||
| 139 | // data pourrie fréquente : "9789004232891, 9004232893" |
||
| 140 | 1 | if (preg_match('#(\d{13})#', $isbn, $matches)) { |
|
| 141 | return $matches[1]; |
||
| 142 | 1 | } |
|
| 143 | if (preg_match('#(\d{10})#', $isbn, $matches)) { |
||
| 144 | return $matches[1]; |
||
| 145 | 1 | } |
|
| 146 | // ISBN avec tiret |
||
| 147 | if (preg_match('#([0-9\-]{10,17})#', $isbn, $matches)) { |
||
| 148 | 1 | return $matches[1]; |
|
| 149 | } |
||
| 150 | |||
| 151 | return null; |
||
| 152 | 1 | } |
|
| 153 | 1 | ||
| 154 | private function formatISNI(?string $raw = null): ?string |
||
| 155 | { |
||
| 156 | if (!$raw) { |
||
| 157 | return null; |
||
| 158 | } |
||
| 159 | 1 | if (preg_match('#^0000(000[0-4])(\d{4})(\d{3}[0-9X])$#', $raw, $matches) > 0) { |
|
| 160 | return $raw; |
||
| 161 | 1 | } |
|
| 162 | // BnF curious format of ISNI |
||
| 163 | if (preg_match('#^ISNI0000(000[0-4])(\d{4})(\d{3}[0-9X])$#', $raw, $matches) > 0) { |
||
| 164 | 1 | return sprintf('0000 %s %s %s', $matches[1], $matches[2], $matches[3]); |
|
| 165 | } |
||
| 166 | |||
| 167 | return null; |
||
| 168 | 1 | } |
|
| 169 | 1 | ||
| 170 | /** |
||
| 171 | * Strip FR lang |
||
| 172 | * |
||
| 173 | * |
||
| 174 | */ |
||
| 175 | private function stripLangFR(?string $lang = null): ?string |
||
| 176 | { |
||
| 177 | return ('fr' !== $lang) ? $lang : null; |
||
| 178 | } |
||
| 179 | |||
| 180 | /** |
||
| 181 | * Convert number of pages. |
||
| 182 | 1 | * "1 vol. (126 p.)". |
|
| 183 | */ |
||
| 184 | 1 | private function convertPages(): ?string |
|
| 185 | { |
||
| 186 | $raw = $this->xpath2string('//mxc:datafield[@tag="215"]/mxc:subfield[@code="a"]'); |
||
| 187 | if (!empty($raw) && preg_match('#(\d{2,}) p\.#', $raw, $matches) > 0) { |
||
| 188 | return (string)$matches[1]; |
||
| 189 | } |
||
| 190 | |||
| 191 | return null; |
||
| 192 | } |
||
| 193 | 1 | ||
| 194 | /** |
||
| 195 | 1 | * todo gestion bilingue fr+en |
|
| 196 | 1 | * ISO 639-1 http://www.loc.gov/standards/iso639-2/php/French_list.php. |
|
| 197 | 1 | * |
|
| 198 | * |
||
| 199 | */ |
||
| 200 | private function lang2wiki(?string $lang = null): ?string |
||
| 201 | { |
||
| 202 | if (!empty($lang)) { |
||
| 203 | return Language::iso2b2wiki($lang); |
||
| 204 | } |
||
| 205 | |||
| 206 | return null; |
||
| 207 | } |
||
| 208 | |||
| 209 | private function getPublisher(): ?string |
||
| 210 | { |
||
| 211 | 1 | // zone 210 |
|
| 212 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="c"]', ' / ')) { |
||
| 213 | 1 | return $tac; |
|
| 214 | 1 | } |
|
| 215 | // 214 : nouvelle zone 2019 |
||
| 216 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="c"]', ' / ')) { |
||
| 217 | 1 | return $tac; |
|
| 218 | } |
||
| 219 | |||
| 220 | 1 | // 219 ancienne zone ? |
|
| 221 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="c"]', ' / ')) { |
||
| 222 | return $tac; |
||
| 223 | 1 | } |
|
| 224 | 1 | ||
| 225 | return null; |
||
| 226 | } |
||
| 227 | |||
| 228 | private function getLocation(): ?string |
||
| 229 | { |
||
| 230 | // zone 210 |
||
| 231 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="a"]', '/')) { |
||
| 232 | return $tac; |
||
| 233 | } |
||
| 234 | // 214 : nouvelle zone 2019 |
||
| 235 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="a"]', '/')) { |
||
| 236 | return $tac; |
||
| 237 | } |
||
| 238 | // ancienne zone ? |
||
| 239 | 1 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="a"]', '/')) { |
|
| 240 | return $tac; |
||
| 241 | } |
||
| 242 | 1 | ||
| 243 | 1 | return null; |
|
| 244 | } |
||
| 245 | |||
| 246 | private function getPublishDate(): ?string |
||
| 247 | { |
||
| 248 | // zone 210 d : Date de publication, de diffusion, etc. |
||
| 249 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="d"][1]')) { |
||
| 250 | return $tac; |
||
| 251 | } |
||
| 252 | // 214 : nouvelle zone 2019 |
||
| 253 | if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="d"][1]')) { |
||
| 254 | return $tac; |
||
| 255 | } |
||
| 256 | |||
| 257 | 1 | return null; |
|
| 258 | } |
||
| 259 | |||
| 260 | 1 | // private function convertBnfIdent(): ?string |
|
| 261 | 1 | // { |
|
| 262 | // // ark:/12148/cb453986124 |
||
| 263 | // $raw = $this->xpath2string('//srw:recordIdentifier[1]/text()'); |
||
| 264 | // |
||
| 265 | // if ($raw && preg_match('#ark:/[0-9]+/cb([0-9]+)#', $raw, $matches) > 0) { |
||
| 266 | // return (string)$matches[1]; |
||
| 267 | // } |
||
| 268 | // |
||
| 269 | // return null; |
||
| 270 | // } |
||
| 271 | |||
| 272 | private function sourceTag(): ?string |
||
| 273 | { |
||
| 274 | $raw = $this->xpath2string('//srw:extraRecordData[1]/ixm:attr[@name="LastModificationDate"][1]'); |
||
| 275 | // 20190922 |
||
| 276 | if ($raw && preg_match('#^(\d{4})\d{4}$#', $raw, $matches) > 0) { |
||
| 277 | return sprintf('BnF:%s', $matches[1]); |
||
| 278 | } |
||
| 279 | |||
| 280 | return null; |
||
| 281 | } |
||
| 282 | } |
||
| 283 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.