|
1
|
|
|
<?php |
|
2
|
|
|
namespace EWW\Dpf\Services\ImportExternalMetadata; |
|
3
|
|
|
|
|
4
|
|
|
/* |
|
5
|
|
|
* This file is part of the TYPO3 CMS project. |
|
6
|
|
|
* |
|
7
|
|
|
* It is free software; you can redistribute it and/or modify it under |
|
8
|
|
|
* the terms of the GNU General Public License, either version 2 |
|
9
|
|
|
* of the License, or any later version. |
|
10
|
|
|
* |
|
11
|
|
|
* For the full copyright and license information, please read the |
|
12
|
|
|
* LICENSE.txt file that was distributed with this source code. |
|
13
|
|
|
* |
|
14
|
|
|
* The TYPO3 project - inspiring people to share! |
|
15
|
|
|
*/ |
|
16
|
|
|
|
|
17
|
|
|
\Httpful\Bootstrap::init(); |
|
18
|
|
|
|
|
19
|
|
|
use \Httpful\Request; |
|
20
|
|
|
use Symfony\Component\Serializer\Encoder\XmlEncoder; |
|
21
|
|
|
use EWW\Dpf\Services\Transformer\DocumentTransformer; |
|
22
|
|
|
use EWW\Dpf\Services\ProcessNumber\ProcessNumberGenerator; |
|
23
|
|
|
use EWW\Dpf\Domain\Model\DocumentType; |
|
24
|
|
|
use EWW\Dpf\Domain\Model\PubMedMetadata; |
|
25
|
|
|
use EWW\Dpf\Domain\Model\ExternalMetadata; |
|
26
|
|
|
|
|
27
|
|
|
class PubMedImporter extends AbstractImporter implements Importer |
|
28
|
|
|
{ |
|
29
|
|
|
/** |
|
30
|
|
|
* @var string |
|
31
|
|
|
*/ |
|
32
|
|
|
protected $apiUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"; |
|
33
|
|
|
|
|
34
|
|
|
/** |
|
35
|
|
|
* @var string |
|
36
|
|
|
*/ |
|
37
|
|
|
protected $resource = "/esummary.fcgi?version=2.0&db=pubmed&id="; |
|
38
|
|
|
|
|
39
|
|
|
/** |
|
40
|
|
|
* @var string |
|
41
|
|
|
*/ |
|
42
|
|
|
protected $searchPath = "/esearch.fcgi?version=2.0&db=pubmed"; |
|
43
|
|
|
|
|
44
|
|
|
/** |
|
45
|
|
|
* Returns the list of all publication types |
|
46
|
|
|
* |
|
47
|
|
|
* @return array |
|
48
|
|
|
*/ |
|
49
|
|
|
public static function types() |
|
50
|
|
|
{ |
|
51
|
|
|
return [ |
|
52
|
|
|
'Collected Works', |
|
53
|
|
|
'Congress', |
|
54
|
|
|
'Dataset', |
|
55
|
|
|
'Dictionary', |
|
56
|
|
|
'Journal Article' |
|
57
|
|
|
]; |
|
58
|
|
|
} |
|
59
|
|
|
|
|
60
|
|
|
/** |
|
61
|
|
|
* @param string $identifier |
|
62
|
|
|
* @return ExternalMetadata|null |
|
63
|
|
|
*/ |
|
64
|
|
|
public function findByIdentifier($identifier) |
|
65
|
|
|
{ |
|
66
|
|
|
try { |
|
67
|
|
|
$response = Request::get($this->apiUrl . $this->resource . $identifier)->send(); |
|
68
|
|
|
|
|
69
|
|
|
if (!$response->hasErrors() && $response->code == 200) { |
|
70
|
|
|
|
|
71
|
|
|
/** @var PubMedMetadata $pubMedMetadata */ |
|
72
|
|
|
$pubMedMetadata = $this->objectManager->get(PubMedMetadata::class); |
|
73
|
|
|
|
|
74
|
|
|
$pubMedMetadata->setSource(get_class($this)); |
|
75
|
|
|
$pubMedMetadata->setFeUser($this->security->getUser()->getUid()); |
|
76
|
|
|
$pubMedMetadata->setData($response); |
|
77
|
|
|
$pubMedMetadata->setPublicationIdentifier($identifier); |
|
78
|
|
|
|
|
79
|
|
|
$xmlDataXpath = $pubMedMetadata->getDataXpath(); |
|
80
|
|
|
|
|
81
|
|
|
$node = $xmlDataXpath->query('/eSummaryResult/ERROR'); |
|
82
|
|
|
if ($node->length > 0) { |
|
83
|
|
|
return null; |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
|
|
return $pubMedMetadata; |
|
87
|
|
|
|
|
88
|
|
|
} else { |
|
89
|
|
|
return null; |
|
90
|
|
|
} |
|
91
|
|
|
|
|
92
|
|
|
} catch (\Throwable $throwable) { |
|
93
|
|
|
$this->logger->error($throwable->getMessage()); |
|
94
|
|
|
throw $throwable; |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
|
|
return null; |
|
|
|
|
|
|
98
|
|
|
} |
|
99
|
|
|
|
|
100
|
|
|
/** |
|
101
|
|
|
* @param string $query |
|
102
|
|
|
* @param int $rows |
|
103
|
|
|
* @param int $offset |
|
104
|
|
|
* @param string $searchField |
|
105
|
|
|
* @return array|mixed |
|
106
|
|
|
* @throws \Throwable |
|
107
|
|
|
*/ |
|
108
|
|
|
public function search($query, $rows = 10, $offset = 0, $searchField = 'author') |
|
109
|
|
|
{ |
|
110
|
|
|
$requestUri = $this->apiUrl . $this->searchPath . '&retmax='.$rows; |
|
111
|
|
|
|
|
112
|
|
|
if ($offset > 0) $requestUri .= '&retstart=' . $offset; |
|
113
|
|
|
|
|
114
|
|
|
if ($searchField) { |
|
115
|
|
|
$query .= '['.ucfirst(strtolower($searchField)).']'; |
|
116
|
|
|
} |
|
117
|
|
|
|
|
118
|
|
|
$requestUri .= "&term=" . urlencode($query); |
|
119
|
|
|
$results = []; |
|
120
|
|
|
|
|
121
|
|
|
try { |
|
122
|
|
|
$response = Request::get($requestUri)->send(); |
|
123
|
|
|
|
|
124
|
|
|
if (!$response->hasErrors() && $response->code == 200) { |
|
125
|
|
|
|
|
126
|
|
|
$dom = new \DOMDocument(); |
|
127
|
|
|
if (is_null(@$dom->loadXML($response))) { |
|
128
|
|
|
throw new \Exception("Invalid XML: " . get_class($this)); |
|
129
|
|
|
} |
|
130
|
|
|
$xmlDataXpath = \EWW\Dpf\Helper\XPath::create($dom); |
|
131
|
|
|
|
|
132
|
|
|
$node = $xmlDataXpath->query('/eSearchResult/ERROR'); |
|
133
|
|
|
if ($node->length > 0) { |
|
134
|
|
|
return null; |
|
135
|
|
|
} |
|
136
|
|
|
|
|
137
|
|
|
$node = $xmlDataXpath->query('/eSearchResult/Count'); |
|
138
|
|
|
if ($node->length > 0) { |
|
139
|
|
|
$results['total-results'] = $node->item(0)->nodeValue; |
|
140
|
|
|
} |
|
141
|
|
|
|
|
142
|
|
|
$node = $xmlDataXpath->query('/eSearchResult/RetMax'); |
|
143
|
|
|
if ($node->length > 0) { |
|
144
|
|
|
$results['items-per-page'] = $node->item(0)->nodeValue; |
|
145
|
|
|
} |
|
146
|
|
|
|
|
147
|
|
|
$nodes = $xmlDataXpath->query('/eSearchResult/IdList/Id'); |
|
148
|
|
|
if ($nodes->length > 0) { |
|
149
|
|
|
|
|
150
|
|
|
$identifierList = []; |
|
151
|
|
|
foreach ($nodes as $node) { |
|
152
|
|
|
$identifierList[] = $node->nodeValue; |
|
153
|
|
|
} |
|
154
|
|
|
|
|
155
|
|
|
$results['items'] = $this->findByIdentifierList($identifierList); |
|
156
|
|
|
|
|
157
|
|
|
return $results; |
|
158
|
|
|
} |
|
159
|
|
|
} |
|
160
|
|
|
} catch (\Throwable $throwable) { |
|
161
|
|
|
$this->logger->error($throwable->getMessage()); |
|
162
|
|
|
throw $throwable; |
|
163
|
|
|
} |
|
164
|
|
|
|
|
165
|
|
|
return null; |
|
166
|
|
|
} |
|
167
|
|
|
|
|
168
|
|
|
/** |
|
169
|
|
|
* @return \EWW\Dpf\Domain\Model\TransformationFile |
|
170
|
|
|
*/ |
|
171
|
|
|
protected function getDefaultXsltTransformation() : ?\EWW\Dpf\Domain\Model\TransformationFile |
|
172
|
|
|
{ |
|
173
|
|
|
/** @var \EWW\Dpf\Domain\Model\Client $client */ |
|
174
|
|
|
$client = $this->clientRepository->findAll()->current(); |
|
175
|
|
|
|
|
176
|
|
|
/** @var \EWW\Dpf\Domain\Model\TransformationFile $xsltTransformationFile */ |
|
177
|
|
|
return $client->getPubmedTransformation()->current(); |
|
178
|
|
|
} |
|
179
|
|
|
|
|
180
|
|
|
/** |
|
181
|
|
|
* @return string |
|
182
|
|
|
*/ |
|
183
|
|
|
protected function getDefaultXsltFilePath() : string |
|
184
|
|
|
{ |
|
185
|
|
|
return \TYPO3\CMS\Core\Utility\GeneralUtility::getFileAbsFileName( |
|
186
|
|
|
'EXT:dpf/Resources/Private/Xslt/pubmed-default.xsl' |
|
187
|
|
|
); |
|
188
|
|
|
} |
|
189
|
|
|
|
|
190
|
|
|
/** |
|
191
|
|
|
* @return string |
|
192
|
|
|
*/ |
|
193
|
|
|
protected function getImporterName() |
|
194
|
|
|
{ |
|
195
|
|
|
return 'pubmed'; |
|
196
|
|
|
} |
|
197
|
|
|
|
|
198
|
|
|
/** |
|
199
|
|
|
* @param array $identifierList |
|
200
|
|
|
* @return array |
|
201
|
|
|
*/ |
|
202
|
|
|
protected function findByIdentifierList($identifierList) |
|
203
|
|
|
{ |
|
204
|
|
|
try { |
|
205
|
|
|
$identifiers = implode(',', $identifierList); |
|
206
|
|
|
$response = Request::get($this->apiUrl . $this->resource . $identifiers)->send(); |
|
207
|
|
|
|
|
208
|
|
|
if (!$response->hasErrors() && $response->code == 200) { |
|
209
|
|
|
|
|
210
|
|
|
$dom = new \DOMDocument(); |
|
211
|
|
|
if (is_null(@$dom->loadXML($response))) { |
|
212
|
|
|
throw new \Exception("Invalid XML: " . get_class($this)); |
|
213
|
|
|
} |
|
214
|
|
|
$xmlDataXpath = \EWW\Dpf\Helper\XPath::create($dom); |
|
215
|
|
|
|
|
216
|
|
|
$node = $xmlDataXpath->query('/eSummaryResult/ERROR'); |
|
217
|
|
|
if ($node->length > 0) { |
|
218
|
|
|
return []; |
|
219
|
|
|
} |
|
220
|
|
|
|
|
221
|
|
|
$nodes = $xmlDataXpath->query('/eSummaryResult/DocumentSummarySet/DocumentSummary'); |
|
222
|
|
|
if ($nodes->length > 0) { |
|
223
|
|
|
|
|
224
|
|
|
$results = []; |
|
225
|
|
|
|
|
226
|
|
|
foreach ($nodes as $nodeItem) { |
|
227
|
|
|
|
|
228
|
|
|
$xml = '<eSummaryResult><DocumentSummarySet>'; |
|
229
|
|
|
$xml .= $dom->saveXML($nodeItem); |
|
230
|
|
|
$xml .= '</DocumentSummarySet></eSummaryResult>'; |
|
231
|
|
|
|
|
232
|
|
|
$idNode = $xmlDataXpath->query('@uid', $nodeItem); |
|
233
|
|
|
if ($idNode->length > 0) { |
|
234
|
|
|
$identifier = $idNode->item(0)->nodeValue; |
|
235
|
|
|
|
|
236
|
|
|
if ($identifier) { |
|
237
|
|
|
$itemDom = new \DOMDocument(); |
|
238
|
|
|
|
|
239
|
|
|
if (is_null(@$itemDom->loadXML($xml))) { |
|
240
|
|
|
throw new \Exception("Invalid XML: " . get_class($this)); |
|
241
|
|
|
} |
|
242
|
|
|
|
|
243
|
|
|
/** @var PubMedMetadata $pubMedMetadata */ |
|
244
|
|
|
$pubMedMetadata = $this->objectManager->get(PubMedMetadata::class); |
|
245
|
|
|
|
|
246
|
|
|
$pubMedMetadata->setSource(get_class($this)); |
|
247
|
|
|
$pubMedMetadata->setFeUser($this->security->getUser()->getUid()); |
|
248
|
|
|
$pubMedMetadata->setData($xml); |
|
249
|
|
|
$pubMedMetadata->setPublicationIdentifier($identifier); |
|
250
|
|
|
|
|
251
|
|
|
$results[$identifier] = $pubMedMetadata; |
|
252
|
|
|
} |
|
253
|
|
|
} |
|
254
|
|
|
} |
|
255
|
|
|
|
|
256
|
|
|
return $results; |
|
257
|
|
|
} |
|
258
|
|
|
} |
|
259
|
|
|
} catch (\Throwable $throwable) { |
|
260
|
|
|
$this->logger->error($throwable->getMessage()); |
|
261
|
|
|
throw $throwable; |
|
262
|
|
|
} |
|
263
|
|
|
|
|
264
|
|
|
return []; |
|
265
|
|
|
} |
|
266
|
|
|
} |
|
267
|
|
|
|
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return,dieorexitstatements that have been added for debug purposes.In the above example, the last
return falsewill never be executed, because a return statement has already been met in every possible execution path.