|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* This file is part of dispositif/wikibot application |
|
4
|
|
|
* 2019 : Philippe M. <[email protected]> |
|
5
|
|
|
* For the full copyright and MIT license information, please view the LICENSE file. |
|
6
|
|
|
*/ |
|
7
|
|
|
|
|
8
|
|
|
declare(strict_types=1); |
|
9
|
|
|
|
|
10
|
|
|
namespace App\Domain\Publisher; |
|
11
|
|
|
|
|
12
|
|
|
use SimpleXMLElement; |
|
13
|
|
|
|
|
14
|
|
|
/** |
|
15
|
|
|
* Class BnfMapper |
|
16
|
|
|
* http://catalogue.bnf.fr/api/SRU?version=1.2&operation=searchRetrieve&query=bib.isbn%2Badj%2B%25222844940404%2522 |
|
17
|
|
|
* |
|
18
|
|
|
* @package App\Domain\Publisher |
|
19
|
|
|
*/ |
|
20
|
|
|
class BnfMapper extends AbstractBookMapper implements MapperInterface |
|
21
|
|
|
{ |
|
22
|
|
|
/** |
|
23
|
|
|
* @var SimpleXMLElement |
|
24
|
|
|
*/ |
|
25
|
|
|
private $xml; |
|
26
|
|
|
|
|
27
|
|
|
/** |
|
28
|
|
|
* XML in UniMarc format. |
|
29
|
|
|
* See http://api.bnf.fr/formats-bibliographiques-intermarc-unimarc |
|
30
|
|
|
* https://www.transition-bibliographique.fr/systemes-et-donnees/manuel-unimarc-format-bibliographique/ |
|
31
|
|
|
* |
|
32
|
|
|
* @param $xml |
|
33
|
|
|
* |
|
34
|
|
|
* @return array |
|
35
|
|
|
*/ |
|
36
|
|
|
public function process($xml): array |
|
37
|
|
|
{ |
|
38
|
|
|
if (!$xml instanceof SimpleXMLElement) { |
|
39
|
|
|
return []; |
|
40
|
|
|
} |
|
41
|
|
|
$this->xml = $xml; |
|
42
|
|
|
|
|
43
|
|
|
|
|
44
|
|
|
return [ |
|
45
|
|
|
'bnf' => $this->convertBnfIdent(), |
|
46
|
|
|
'isbn' => $this->xpath2string('//mxc:datafield[@tag="010"]/mxc:subfield[@code="a"]'), |
|
47
|
|
|
|
|
48
|
|
|
// Langue |
|
49
|
|
|
'langue' => $this->lang2wiki($this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="a"]')), |
|
50
|
|
|
// c : Langue de l’œuvre originale |
|
51
|
|
|
'langue originale' => $this->lang2wiki( |
|
52
|
|
|
$this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="c"]') |
|
53
|
|
|
), |
|
54
|
|
|
// g : Langue du titre propre (si différent) |
|
55
|
|
|
'langue titre' => $this->lang2wiki( |
|
56
|
|
|
$this->xpath2string('//mxc:datafield[@tag="101"]/mxc:subfield[@code="g"]') |
|
57
|
|
|
), |
|
58
|
|
|
|
|
59
|
|
|
// Bloc 200 |
|
60
|
|
|
// a : Titre propre |
|
61
|
|
|
'titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="a"]'), |
|
62
|
|
|
// e : Complément du titre |
|
63
|
|
|
'sous-titre' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="e"]'), |
|
64
|
|
|
// f : responsabilité principale "Pierre Durand, Paul Dupond" (XML de dingue pour ça...) |
|
65
|
|
|
'auteur1' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="f"]'), |
|
66
|
|
|
// g : Mention de responsabilité suivante |
|
67
|
|
|
'auteur2' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="g"]'), |
|
68
|
|
|
// h : Numéro de partie |
|
69
|
|
|
// 'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="h"]'), |
|
70
|
|
|
// i : Titre de partie |
|
71
|
|
|
// v : numéro de volume |
|
72
|
|
|
'volume' => $this->xpath2string('//mxc:datafield[@tag="200"]/mxc:subfield[@code="v"]'), |
|
73
|
|
|
|
|
74
|
|
|
// 410 : collection |
|
75
|
|
|
'collection' => $this->xpath2string('//mxc:datafield[@tag="410"]/mxc:subfield[@code="a"]'), |
|
76
|
|
|
|
|
77
|
|
|
// Auteur : voir plutôt 7XX |
|
78
|
|
|
// https://www.transition-bibliographique.fr/wp-content/uploads/2018/07/B7XX-6-2011.pdf |
|
79
|
|
|
|
|
80
|
|
|
// multi-zones |
|
81
|
|
|
'lieu' => $this->getLocation(), |
|
82
|
|
|
'éditeur' => $this->getPublisher(), |
|
83
|
|
|
'date' => $this->getPublishDate(), |
|
84
|
|
|
// 215 |
|
85
|
|
|
'pages totales' => $this->convertPages(), |
|
86
|
|
|
]; |
|
87
|
|
|
} |
|
88
|
|
|
|
|
89
|
|
|
private function xpath2string(string $path): ?string |
|
90
|
|
|
{ |
|
91
|
|
|
$element = $this->xml->xpath($path); |
|
92
|
|
|
if (isset($element[0]) && $element[0] instanceof SimpleXMLElement) { |
|
93
|
|
|
return (string)$element[0]; |
|
94
|
|
|
} |
|
95
|
|
|
|
|
96
|
|
|
return null; |
|
97
|
|
|
} |
|
98
|
|
|
|
|
99
|
|
|
/** |
|
100
|
|
|
* Convert number of pages. |
|
101
|
|
|
* "1 vol. (126 p.)" |
|
102
|
|
|
* |
|
103
|
|
|
* @return string|null |
|
104
|
|
|
*/ |
|
105
|
|
|
private function convertPages(): ?string |
|
106
|
|
|
{ |
|
107
|
|
|
$raw = $this->xpath2string('//mxc:datafield[@tag="215"]/mxc:subfield[@code="a"]'); |
|
108
|
|
|
if (!empty($raw) && preg_match('#([0-9]{2,}) p\.#', $raw, $matches) > 0) { |
|
109
|
|
|
return (string)$matches[1]; |
|
110
|
|
|
} |
|
111
|
|
|
|
|
112
|
|
|
return null; |
|
113
|
|
|
} |
|
114
|
|
|
|
|
115
|
|
|
/** |
|
116
|
|
|
* todo refac and move. |
|
117
|
|
|
* ISO 639-1 http://www.loc.gov/standards/iso639-2/php/French_list.php |
|
118
|
|
|
* |
|
119
|
|
|
* @param string|null $lang |
|
120
|
|
|
* |
|
121
|
|
|
* @return string|null |
|
122
|
|
|
*/ |
|
123
|
|
|
private function lang2wiki(?string $lang = null): ?string |
|
124
|
|
|
{ |
|
125
|
|
|
$iso2b_to_frlang = []; |
|
126
|
|
|
require __DIR__.'/../Enums/languageData.php'; |
|
127
|
|
|
|
|
128
|
|
|
if (!empty($lang) && isset($iso2b_to_frlang[$lang])) { |
|
129
|
|
|
|
|
130
|
|
|
return $iso2b_to_frlang[$lang]; |
|
131
|
|
|
} |
|
132
|
|
|
|
|
133
|
|
|
return null; |
|
134
|
|
|
} |
|
135
|
|
|
|
|
136
|
|
|
private function getPublisher(): ?string |
|
137
|
|
|
{ |
|
138
|
|
|
// zone 210 |
|
139
|
|
|
if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="c"]')) { |
|
140
|
|
|
return $tac; |
|
141
|
|
|
} |
|
142
|
|
|
// 214 : nouvelle zone 2019 |
|
143
|
|
|
if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="c"]')) { |
|
144
|
|
|
return $tac; |
|
145
|
|
|
} |
|
146
|
|
|
|
|
147
|
|
|
// 219 ancienne zone ? |
|
148
|
|
|
if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="c"]')) { |
|
149
|
|
|
return $tac; |
|
150
|
|
|
} |
|
151
|
|
|
|
|
152
|
|
|
return null; |
|
153
|
|
|
} |
|
154
|
|
|
|
|
155
|
|
|
private function getLocation(): ?string |
|
156
|
|
|
{ |
|
157
|
|
|
// zone 210 |
|
158
|
|
|
if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="a"]')) { |
|
159
|
|
|
return $tac; |
|
160
|
|
|
} |
|
161
|
|
|
// 214 : nouvelle zone 2019 |
|
162
|
|
|
if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="a"]')) { |
|
163
|
|
|
return $tac; |
|
164
|
|
|
} |
|
165
|
|
|
// ancienne zone ? |
|
166
|
|
|
if ($tac = $this->xpath2string('//mxc:datafield[@tag="219"]/mxc:subfield[@code="a"]')) { |
|
167
|
|
|
return $tac; |
|
168
|
|
|
} |
|
169
|
|
|
|
|
170
|
|
|
return null; |
|
171
|
|
|
} |
|
172
|
|
|
|
|
173
|
|
|
private function getPublishDate(): ?string |
|
174
|
|
|
{ |
|
175
|
|
|
// zone 210 d : Date de publication, de diffusion, etc. |
|
176
|
|
|
if ($tac = $this->xpath2string('//mxc:datafield[@tag="210"]/mxc:subfield[@code="d"]')) { |
|
177
|
|
|
return $tac; |
|
178
|
|
|
} |
|
179
|
|
|
// 214 : nouvelle zone 2019 |
|
180
|
|
|
if ($tac = $this->xpath2string('//mxc:datafield[@tag="214"]/mxc:subfield[@code="d"]')) { |
|
181
|
|
|
return $tac; |
|
182
|
|
|
} |
|
183
|
|
|
|
|
184
|
|
|
return null; |
|
185
|
|
|
} |
|
186
|
|
|
|
|
187
|
|
|
private function convertBnfIdent(): ?string |
|
188
|
|
|
{ |
|
189
|
|
|
// ark:/12148/cb453986124 |
|
190
|
|
|
$raw = $this->xpath2string('//srw:recordIdentifier[1]/text()'); |
|
191
|
|
|
|
|
192
|
|
|
if ($raw && preg_match('#ark:/[0-9]+/cb([0-9]+)#', $raw, $matches) > 0) { |
|
193
|
|
|
return (string)$matches[1]; |
|
194
|
|
|
} |
|
195
|
|
|
|
|
196
|
|
|
return null; |
|
197
|
|
|
} |
|
198
|
|
|
|
|
199
|
|
|
} |
|
200
|
|
|
|