1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace SP\Crawler; |
4
|
|
|
|
5
|
|
|
use SP\Spiderling\CrawlerInterface; |
6
|
|
|
use SP\Crawler\Element\ClickRequestInterface; |
7
|
|
|
use SP\Crawler\Element\ClickableInterface; |
8
|
|
|
use SP\Crawler\Element\SelectableInterface; |
9
|
|
|
use Psr\Http\Message\RequestInterface; |
10
|
|
|
use SP\Spiderling\Query\AbstractQuery; |
11
|
|
|
use GuzzleHttp\Psr7\Uri; |
12
|
|
|
use Psr\Http\Message\UriInterface; |
13
|
|
|
use DOMDocument; |
14
|
|
|
use DOMElement; |
15
|
|
|
use DOMXPath; |
16
|
|
|
use InvalidArgumentException; |
17
|
|
|
use BadMethodCallException; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* @author Ivan Kerin <[email protected]> |
21
|
|
|
* @copyright 2015, Clippings Ltd. |
22
|
|
|
* @license http://spdx.org/licenses/BSD-3-Clause |
23
|
|
|
*/ |
24
|
|
|
class Reader implements CrawlerInterface |
25
|
|
|
{ |
26
|
|
|
/** |
27
|
|
|
* @var DOMDocument |
28
|
|
|
*/ |
29
|
|
|
private $document; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @var DOMXPath |
33
|
|
|
*/ |
34
|
|
|
private $xpath; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @var ElementMap |
38
|
|
|
*/ |
39
|
|
|
private $inputMap; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @param DOMDocument $document |
43
|
|
|
*/ |
44
|
1 |
|
public function __construct(DOMDocument $document) |
45
|
|
|
{ |
46
|
1 |
|
$this->document = $document; |
47
|
|
|
|
48
|
1 |
|
$this->xpath = new DOMXPath($document); |
49
|
|
|
|
50
|
1 |
|
$this->inputMap = new InputMap($this); |
|
|
|
|
51
|
1 |
|
} |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* @return DOMDocument |
55
|
|
|
*/ |
56
|
1 |
|
public function getDocument() |
57
|
|
|
{ |
58
|
1 |
|
return $this->document; |
59
|
|
|
} |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* @return DOMXPath |
63
|
|
|
*/ |
64
|
1 |
|
public function getXPath() |
65
|
|
|
{ |
66
|
1 |
|
return $this->xpath; |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* @return ElementMap |
71
|
|
|
*/ |
72
|
1 |
|
public function getInputMap() |
73
|
|
|
{ |
74
|
1 |
|
return $this->inputMap; |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* @param string $id |
79
|
|
|
* @throws BadMethodCallException |
80
|
|
|
*/ |
81
|
3 |
|
public function click($id) |
82
|
|
|
{ |
83
|
3 |
|
$input = $this->getInput($this->getElement($id)); |
84
|
|
|
|
85
|
3 |
|
if ($input instanceof ClickableInterface) { |
86
|
1 |
|
$input->click(); |
87
|
3 |
|
} elseif ($input instanceof ClickRequestInterface) { |
88
|
1 |
|
$request = $input->clickRequest(); |
89
|
1 |
|
$this->sendRequest($request); |
90
|
1 |
|
} else { |
91
|
1 |
|
throw new BadMethodCallException( |
92
|
1 |
|
sprintf('Cannot click on %s, %s', get_class($input), $id) |
93
|
1 |
|
); |
94
|
|
|
} |
95
|
2 |
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* @param string $id |
99
|
|
|
* @throws BadMethodCallException |
100
|
|
|
*/ |
101
|
2 |
|
public function select($id) |
102
|
|
|
{ |
103
|
2 |
|
$input = $this->getInput($this->getElement($id)); |
104
|
|
|
|
105
|
2 |
|
if ($input instanceof SelectableInterface) { |
106
|
1 |
|
$input->select(); |
107
|
1 |
|
} else { |
108
|
1 |
|
throw new BadMethodCallException( |
109
|
1 |
|
sprintf('Cannot select on %s, %s', get_class($input), $id) |
110
|
1 |
|
); |
111
|
|
|
} |
112
|
2 |
|
} |
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* @param RequestInterface $input |
|
|
|
|
116
|
|
|
* @throws BadMethodCallException |
117
|
|
|
*/ |
118
|
1 |
|
public function sendRequest(RequestInterface $request) |
119
|
|
|
{ |
120
|
1 |
|
throw new BadMethodCallException( |
121
|
1 |
|
sprintf('Cannot send request to %s', $request->getUri()) |
122
|
1 |
|
); |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
/** |
126
|
|
|
* @param string $url |
127
|
|
|
* @throws BadMethodCallException |
128
|
|
|
*/ |
129
|
1 |
|
public function open(UriInterface $url) |
130
|
|
|
{ |
131
|
1 |
|
throw new BadMethodCallException( |
132
|
1 |
|
sprintf('Method %s not supported by %s', __METHOD__, __CLASS__) |
133
|
1 |
|
); |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* @return Psr\Http\Message\UriInterface |
138
|
|
|
*/ |
139
|
1 |
|
public function getUri() |
140
|
|
|
{ |
141
|
1 |
|
return new Uri(''); |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* @param string $xpath |
146
|
|
|
* @param DOMElement|null $scope |
147
|
|
|
* @return DOMNodeList |
148
|
|
|
*/ |
149
|
1 |
|
public function query($xpath, DOMElement $scope = null) |
150
|
|
|
{ |
151
|
1 |
|
return $this->getXpath()->query($xpath, $scope); |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
/** |
155
|
|
|
* @param string $xpath |
156
|
|
|
* @throws InvalidArgumentException when id not found |
157
|
|
|
* @return DOMElement |
158
|
|
|
*/ |
159
|
1 |
|
public function getElement($xpath) |
160
|
|
|
{ |
161
|
1 |
|
$items = $this->query($xpath); |
162
|
|
|
|
163
|
1 |
|
if (0 === $items->length) { |
164
|
1 |
|
throw new InvalidArgumentException( |
165
|
1 |
|
sprintf('Node with id %s does not exist', $xpath) |
166
|
1 |
|
); |
167
|
|
|
} |
168
|
|
|
|
169
|
1 |
|
return $items->item(0); |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
/** |
173
|
|
|
* @param string $id |
174
|
|
|
* @throws InvalidArgumentException when id not found |
175
|
|
|
* @return string |
176
|
|
|
*/ |
177
|
1 |
|
public function getText($id) |
178
|
|
|
{ |
179
|
1 |
|
$element = $this->getElement($id); |
180
|
|
|
|
181
|
1 |
|
return trim(preg_replace('/[ \s\f\n\r\t\v ]+/u', ' ', $element->textContent)); |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* @param string $id |
186
|
|
|
* @throws InvalidArgumentException when id not found |
187
|
|
|
* @return string |
188
|
|
|
*/ |
189
|
1 |
|
public function getTagName($id) |
190
|
|
|
{ |
191
|
1 |
|
return $this->getElement($id)->tagName; |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
/** |
195
|
|
|
* @param string $id |
196
|
|
|
* @param string $name |
197
|
|
|
* @throws InvalidArgumentException when id not found |
198
|
|
|
* @return string |
199
|
|
|
*/ |
200
|
1 |
|
public function getAttribute($id, $name) |
201
|
|
|
{ |
202
|
1 |
|
return $this->getElement($id)->getAttribute($name); |
203
|
|
|
} |
204
|
|
|
|
205
|
|
|
/** |
206
|
|
|
* @param string $id |
207
|
|
|
* @throws InvalidArgumentException when id not found |
208
|
|
|
* @return string |
209
|
|
|
*/ |
210
|
1 |
|
public function getHtml($id) |
211
|
|
|
{ |
212
|
1 |
|
return $this->document->saveXml($this->getElement($id)); |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
/** |
216
|
|
|
* @return string |
217
|
|
|
*/ |
218
|
1 |
|
public function getFullHtml() |
219
|
|
|
{ |
220
|
1 |
|
return $this->document->saveHtml(); |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
/** |
224
|
|
|
* @param string $id |
225
|
|
|
* @throws InvalidArgumentException when id not found |
226
|
|
|
* @return boolean |
227
|
|
|
*/ |
228
|
3 |
|
public function isVisible($id) |
229
|
|
|
{ |
230
|
3 |
|
$element = $this->getElement($id); |
231
|
|
|
|
232
|
|
|
$conditions = [ |
233
|
3 |
|
"contains(@style, 'display:none')", |
234
|
3 |
|
"contains(@style, 'display: none')", |
235
|
3 |
|
"self::script", |
236
|
3 |
|
"self::head", |
237
|
3 |
|
]; |
238
|
|
|
|
239
|
3 |
|
$hidden = $this->xpath->query( |
240
|
3 |
|
'./ancestor-or-self::*['.join(' or ', $conditions).']', |
241
|
|
|
$element |
242
|
3 |
|
); |
243
|
|
|
|
244
|
3 |
|
return $hidden->length == 0; |
245
|
|
|
} |
246
|
|
|
|
247
|
|
|
/** |
248
|
|
|
* @param string $id |
249
|
|
|
* @throws InvalidArgumentException when id not found |
250
|
|
|
* @return boolean |
251
|
|
|
*/ |
252
|
4 |
|
public function isSelected($id) |
253
|
|
|
{ |
254
|
4 |
|
return $this->getElement($id)->hasAttribute('selected'); |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
/** |
258
|
|
|
* @param string $id |
259
|
|
|
* @throws InvalidArgumentException when id not found |
260
|
|
|
* @return boolean |
261
|
|
|
*/ |
262
|
6 |
|
public function isChecked($id) |
263
|
|
|
{ |
264
|
6 |
|
return $this->getElement($id)->hasAttribute('checked'); |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
/** |
268
|
|
|
* @param DOMElement $element |
269
|
|
|
* @throws InvalidArgumentException when id not found |
270
|
|
|
* @return Element\AbstractElement |
271
|
|
|
*/ |
272
|
9 |
|
public function getInput(DOMElement $element) |
273
|
|
|
{ |
274
|
9 |
|
return $this->inputMap->get($element); |
275
|
|
|
} |
276
|
|
|
|
277
|
|
|
/** |
278
|
|
|
* @param string $id |
279
|
|
|
* @throws InvalidArgumentException when id not found |
280
|
|
|
* @return mixed |
281
|
|
|
*/ |
282
|
1 |
|
public function getValue($id) |
283
|
|
|
{ |
284
|
1 |
|
return $this->getInput($this->getElement($id))->getValue(); |
|
|
|
|
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
/** |
288
|
|
|
* @param string $id |
289
|
|
|
* @param string $value |
290
|
|
|
* @throws InvalidArgumentException when id not found |
291
|
|
|
*/ |
292
|
1 |
|
public function setValue($id, $value) |
293
|
|
|
{ |
294
|
1 |
|
$input = $this->getInput($this->getElement($id)); |
295
|
|
|
|
296
|
1 |
|
if (false === $input->isDisabled()) { |
297
|
1 |
|
$input->setValue($value); |
298
|
1 |
|
} |
299
|
1 |
|
} |
300
|
|
|
|
301
|
|
|
/** |
302
|
|
|
* @param AbstractQuery $query |
303
|
|
|
* @param string $parent |
304
|
|
|
* @return array |
305
|
|
|
*/ |
306
|
1 |
|
public function queryIds(AbstractQuery $query, $parent = null) |
307
|
|
|
{ |
308
|
1 |
|
$xpath = $parent.$query->getXPath(); |
309
|
|
|
|
310
|
1 |
|
$ids = []; |
311
|
|
|
|
312
|
1 |
|
foreach ($this->query($xpath) as $index => $element) { |
313
|
1 |
|
$ids []= "($xpath)[".($index+1)."]"; |
314
|
1 |
|
} |
315
|
|
|
|
316
|
1 |
|
return $query->getFilters()->matchAll($this, $ids); |
317
|
|
|
} |
318
|
|
|
} |
319
|
|
|
|
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..