1
|
|
|
<?php namespace Scriptotek\OaiPmh; |
2
|
|
|
|
3
|
|
|
use GuzzleHttp\Client as HttpClient; |
4
|
|
|
use Evenement\EventEmitter; |
5
|
|
|
|
6
|
|
|
/** |
7
|
|
|
* OAI client |
8
|
|
|
*/ |
9
|
|
|
class Client extends EventEmitter |
10
|
|
|
{ |
11
|
|
|
|
12
|
|
|
// When we no longer need to support PHP 5.3: |
13
|
|
|
// - Upgrade to Evenement 2.0 and use trait instead of extending |
14
|
|
|
|
15
|
|
|
/** @var HttpClient */ |
16
|
|
|
protected $httpClient; |
17
|
|
|
|
18
|
|
|
/** @var string OAI service base URL */ |
19
|
|
|
public $url; |
20
|
|
|
|
21
|
|
|
/** @var string Requested schema for the returned records */ |
22
|
|
|
public $schema; |
23
|
|
|
|
24
|
|
|
/** @var string Some user agent string to identify our client */ |
25
|
|
|
public $userAgent; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* @var string|string[] Proxy configuration details. |
29
|
|
|
* |
30
|
|
|
* Either a string 'host:port' or an |
31
|
|
|
* array('host:port', 'username', 'password'). |
32
|
|
|
*/ |
33
|
|
|
public $proxy; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @var string[] Array containing username and password |
37
|
|
|
*/ |
38
|
|
|
public $credentials; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* @var integer Max number of retries before giving up |
42
|
|
|
*/ |
43
|
|
|
public $maxRetries; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* @var float Sleep time in seconds before retrying when getting an errorneous response |
47
|
|
|
*/ |
48
|
|
|
public $sleepTimeOnError; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* @var integer Timeout for each request in seconds |
52
|
|
|
*/ |
53
|
|
|
public $timeout; |
54
|
|
|
|
55
|
|
|
protected function arrayGet($arr, $key, $default = null) |
56
|
|
|
{ |
57
|
|
|
return (isset($arr[$key])) ? $arr[$key] : $default; |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* Create a new client |
62
|
|
|
* |
63
|
|
|
* @param string $url Base URL to the OAI-PMH service |
64
|
|
|
* @param array $options Associative array of options |
65
|
|
|
* @param HttpClient $httpClient |
66
|
|
|
*/ |
67
|
|
|
public function __construct($url, $options = array(), $httpClient = null) |
68
|
|
|
{ |
69
|
|
|
$this->url = $url; |
70
|
|
|
$this->httpClient = $httpClient ?: new HttpClient; |
71
|
|
|
|
72
|
|
|
$this->schema = $this->arrayGet($options, 'schema', 'marcxchange'); |
73
|
|
|
$this->userAgent = $this->arrayGet($options, 'user-agent', 'php-oaipmh-client'); |
74
|
|
|
$this->credentials = $this->arrayGet($options, 'credentials'); |
75
|
|
|
$this->proxy = $this->arrayGet($options, 'proxy'); |
76
|
|
|
$this->maxRetries = $this->arrayGet($options, 'max-retries', 30); |
77
|
|
|
$this->sleepTimeOnError = $this->arrayGet($options, 'sleep-time-on-error', 1.0); |
78
|
|
|
$this->timeout = $this->arrayGet($options, 'timeout', 60.0); |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
/** |
82
|
|
|
* Get HTTP client configuration options (authentication, proxy) |
83
|
|
|
* |
84
|
|
|
* @return array |
85
|
|
|
*/ |
86
|
|
|
public function getHttpOptions() |
87
|
|
|
{ |
88
|
|
|
$options = array( |
89
|
|
|
'headers' => $this->getHttpHeaders(), |
90
|
|
|
'connect_timeout' => $this->timeout, |
91
|
|
|
'timeout' => $this->timeout, |
92
|
|
|
); |
93
|
|
|
if ($this->credentials) { |
|
|
|
|
94
|
|
|
$options['auth'] = $this->credentials; |
95
|
|
|
} |
96
|
|
|
if ($this->proxy) { |
97
|
|
|
$options['proxy'] = $this->proxy; |
98
|
|
|
} |
99
|
|
|
return $options; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* Get HTTP client headers |
104
|
|
|
* |
105
|
|
|
* @return array |
106
|
|
|
*/ |
107
|
|
|
public function getHttpHeaders() |
108
|
|
|
{ |
109
|
|
|
$headers = array( |
110
|
|
|
'Accept' => 'application/xml' |
111
|
|
|
); |
112
|
|
|
if ($this->userAgent) { |
113
|
|
|
$headers['User-Agent'] = $this->userAgent; |
114
|
|
|
} |
115
|
|
|
return $headers; |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
/** |
119
|
|
|
* Construct the URL for an OAI-PMH query |
120
|
|
|
* |
121
|
|
|
* @param string $verb The OAI-PMH verb |
122
|
|
|
* @param array $arguments OAI-PMH arguments |
123
|
|
|
* @return string |
124
|
|
|
*/ |
125
|
|
|
public function urlBuilder($verb, $arguments = array()) |
126
|
|
|
{ |
127
|
|
|
$qs = array( |
128
|
|
|
'verb' => $verb, |
129
|
|
|
'metadataPrefix' => $this->schema, |
130
|
|
|
); |
131
|
|
|
|
132
|
|
|
foreach ($arguments as $key => $value) { |
133
|
|
|
$qs[$key] = $value; |
134
|
|
|
if (is_null($value)) { |
135
|
|
|
// Allow removal of default arguments like 'metadataPrefix' |
136
|
|
|
unset($qs[$key]); |
137
|
|
|
} |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
return $this->url . '?' . http_build_query($qs); |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* Perform a single OAI-PMH request |
145
|
|
|
* |
146
|
|
|
* @param string $verb The OAI-PMH verb |
147
|
|
|
* @param array $arguments OAI-PMH arguments |
148
|
|
|
* @return string |
149
|
|
|
* @throws ConnectionError |
150
|
|
|
*/ |
151
|
|
|
public function request($verb, $arguments) |
152
|
|
|
{ |
153
|
|
|
$this->emit('request.start', array( |
154
|
|
|
'verb' => $verb, |
155
|
|
|
'arguments' => $arguments |
156
|
|
|
)); |
157
|
|
|
$url = $this->urlBuilder($verb, $arguments); |
158
|
|
|
$attempt = 0; |
159
|
|
|
while (true) { |
160
|
|
|
try { |
161
|
|
|
$res = $this->httpClient->get($url, $this->getHttpOptions()); |
162
|
|
|
break; |
163
|
|
|
} catch (\GuzzleHttp\Exception\ConnectException $e) { |
164
|
|
|
// Thrown in case of a networking error (connection timeout, DNS errors, etc.) |
165
|
|
|
$this->emit('request.error', array( |
166
|
|
|
'message' => $e->getMessage(), |
167
|
|
|
)); |
168
|
|
|
time_nanosleep(intval($this->sleepTimeOnError), intval($this->sleepTimeOnError * 1000000000)); |
169
|
|
|
} catch (\GuzzleHttp\Exception\ServerException $e) { |
170
|
|
|
// Thrown in case of 500 errors |
171
|
|
|
$this->emit('request.error', array( |
172
|
|
|
'message' => $e->getMessage(), |
173
|
|
|
)); |
174
|
|
|
time_nanosleep(intval($this->sleepTimeOnError), intval($this->sleepTimeOnError * 1000000000)); |
175
|
|
|
} |
176
|
|
|
$attempt++; |
177
|
|
|
if ($attempt > $this->maxRetries) { |
178
|
|
|
throw new ConnectionError('Failed to get a response from the server. Max retries (' . $this->maxRetries . ') exceeded.'); |
179
|
|
|
} |
180
|
|
|
} |
181
|
|
|
$body = (string) $res->getBody(); |
|
|
|
|
182
|
|
|
$this->emit('request.complete', array( |
183
|
|
|
'verb' => $verb, |
184
|
|
|
'arguments' => $arguments, |
185
|
|
|
'response' => $body |
186
|
|
|
)); |
187
|
|
|
return $body; |
188
|
|
|
} |
189
|
|
|
|
190
|
|
|
/** |
191
|
|
|
* Perform a GetRecord request |
192
|
|
|
* |
193
|
|
|
* @param string $identifier |
194
|
|
|
* @return GetRecordResponse |
195
|
|
|
*/ |
196
|
|
|
public function record($identifier) |
197
|
|
|
{ |
198
|
|
|
$data = $this->request('GetRecord', array('identifier' => $identifier)); |
199
|
|
|
return new GetRecordResponse($data); |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* Perform a ListRecords request and return an iterator over the records |
204
|
|
|
* |
205
|
|
|
* @param string $from Start date |
206
|
|
|
* @param string $until End date |
207
|
|
|
* @param string $set Data set |
208
|
|
|
* @param string $resumptionToken To resume a harvest |
209
|
|
|
* @param array $extraParams Extra GET parameters |
210
|
|
|
* @return Records |
211
|
|
|
*/ |
212
|
|
|
public function records($from, $until, $set, $resumptionToken = null, $extraParams = array()) |
213
|
|
|
{ |
214
|
|
|
return new Records($from, $until, $set, $this, $resumptionToken, $extraParams); |
|
|
|
|
215
|
|
|
} |
216
|
|
|
} |
217
|
|
|
|
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.