1 | <?php |
||||
2 | namespace VDB\Spider; |
||||
3 | |||||
4 | use Exception; |
||||
5 | use GuzzleHttp\Psr7\Response; |
||||
6 | use PHPUnit_Framework_MockObject_MockObject; |
||||
0 ignored issues
–
show
|
|||||
7 | use VDB\Spider\Discoverer\XPathExpressionDiscoverer; |
||||
8 | use VDB\Spider\Tests\TestCase; |
||||
9 | use VDB\Spider\QueueManager\InMemoryQueueManager; |
||||
10 | use VDB\Spider\Uri\DiscoveredUri; |
||||
11 | use VDB\Uri\Uri; |
||||
12 | |||||
13 | /** |
||||
14 | */ |
||||
15 | class SpiderTest extends TestCase |
||||
16 | { |
||||
17 | /** |
||||
18 | * @var Spider |
||||
19 | */ |
||||
20 | protected $spider; |
||||
21 | |||||
22 | /** |
||||
23 | * @var PHPUnit_Framework_MockObject_MockObject |
||||
24 | */ |
||||
25 | protected $requestHandler; |
||||
26 | |||||
27 | /** @var DiscoveredUri */ |
||||
28 | protected $linkA; |
||||
29 | /** @var DiscoveredUri */ |
||||
30 | protected $linkB; |
||||
31 | /** @var DiscoveredUri */ |
||||
32 | protected $linkC; |
||||
33 | /** @var DiscoveredUri */ |
||||
34 | protected $linkD; |
||||
35 | /** @var DiscoveredUri */ |
||||
36 | protected $linkE; |
||||
37 | /** @var DiscoveredUri */ |
||||
38 | protected $linkF; |
||||
39 | /** @var DiscoveredUri */ |
||||
40 | protected $linkG; |
||||
41 | |||||
42 | /** @var Response */ |
||||
43 | protected $responseA; |
||||
44 | /** @var Response */ |
||||
45 | protected $responseB; |
||||
46 | /** @var Response */ |
||||
47 | protected $responseC; |
||||
48 | /** @var Response */ |
||||
49 | protected $responseD; |
||||
50 | /** @var Response */ |
||||
51 | protected $responseE; |
||||
52 | /** @var Response */ |
||||
53 | protected $responseF; |
||||
54 | /** @var Response */ |
||||
55 | protected $responseG; |
||||
56 | |||||
57 | /** @var string */ |
||||
58 | protected $hrefA; |
||||
59 | protected $hrefB; |
||||
60 | protected $hrefC; |
||||
61 | protected $hrefD; |
||||
62 | protected $hrefE; |
||||
63 | protected $hrefF; |
||||
64 | protected $hrefG; |
||||
65 | |||||
66 | /** |
||||
67 | * @var array An associative array, containing a map of $this->linkX to $this->responseX. |
||||
68 | */ |
||||
69 | protected $linkToResponseMap = []; |
||||
70 | |||||
71 | /** |
||||
72 | * Sets up the fixture, for example, opens a network connection. |
||||
73 | * This method is called before a test is executed. |
||||
74 | * |
||||
75 | * Setting up the following structure: |
||||
76 | * |
||||
77 | * 0: A |
||||
78 | * /|\ |
||||
79 | * 1: B C E |
||||
80 | * /| | | |
||||
81 | * 2: D F G | |
||||
82 | * | _ | |
||||
83 | * |
||||
84 | * Note: E links to F. |
||||
85 | */ |
||||
86 | protected function setUp(): void |
||||
87 | { |
||||
88 | $this->spider = new Spider('http://php-spider.org/A'); |
||||
89 | |||||
90 | $this->requestHandler = $this->getMockBuilder('VDB\Spider\RequestHandler\RequestHandlerInterface')->getMock(); |
||||
0 ignored issues
–
show
It seems like
$this->getMockBuilder('V...rInterface')->getMock() of type PHPUnit\Framework\MockObject\MockObject is incompatible with the declared type PHPUnit_Framework_MockObject_MockObject of property $requestHandler .
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property. Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..
Loading history...
|
|||||
91 | |||||
92 | $this->hrefA = 'http://php-spider.org/A'; |
||||
93 | $this->hrefB = 'http://php-spider.org/B'; |
||||
94 | $this->hrefC = 'http://php-spider.org/C'; |
||||
95 | $this->hrefD = 'http://php-spider.org/D'; |
||||
96 | $this->hrefE = 'http://php-spider.org/E'; |
||||
97 | $this->hrefF = 'http://php-spider.org/F'; |
||||
98 | $this->hrefG = 'http://php-spider.org/G'; |
||||
99 | |||||
100 | $this->linkA = new DiscoveredUri(new Uri($this->hrefA)); |
||||
101 | $this->linkB = new DiscoveredUri(new Uri($this->hrefB)); |
||||
102 | $this->linkC = new DiscoveredUri(new Uri($this->hrefC)); |
||||
103 | $this->linkD = new DiscoveredUri(new Uri($this->hrefD)); |
||||
104 | $this->linkE = new DiscoveredUri(new Uri($this->hrefE)); |
||||
105 | $this->linkF = new DiscoveredUri(new Uri($this->hrefF)); |
||||
106 | $this->linkG = new DiscoveredUri(new Uri($this->hrefG)); |
||||
107 | |||||
108 | $this->linkA->setDepthFound(0); |
||||
109 | $this->linkB->setDepthFound(1); |
||||
110 | $this->linkC->setDepthFound(1); |
||||
111 | $this->linkD->setDepthFound(2); |
||||
112 | $this->linkE->setDepthFound(1); |
||||
113 | $this->linkF->setDepthFound(2); |
||||
114 | $this->linkG->setDepthFound(2); |
||||
115 | |||||
116 | $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html'); |
||||
117 | $this->responseA = new Response(200, [], $htmlA); |
||||
118 | |||||
119 | $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html'); |
||||
120 | $this->responseB = new Response(200, [], $htmlB); |
||||
121 | |||||
122 | $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html'); |
||||
123 | $this->responseC = new Response(200, [], $htmlC); |
||||
124 | |||||
125 | $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html'); |
||||
126 | $this->responseD = new Response(200, [], $htmlD); |
||||
127 | |||||
128 | $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html'); |
||||
129 | $this->responseE = new Response(200, [], $htmlE); |
||||
130 | |||||
131 | $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html'); |
||||
132 | $this->responseF = new Response(200, [], $htmlF); |
||||
133 | |||||
134 | $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html'); |
||||
135 | $this->responseG = new Response(200, [], $htmlG); |
||||
136 | |||||
137 | $this->linkToResponseMap[$this->linkA->toString()] = $this->responseA; |
||||
138 | $this->linkToResponseMap[$this->linkB->toString()] = $this->responseB; |
||||
139 | $this->linkToResponseMap[$this->linkC->toString()] = $this->responseC; |
||||
140 | $this->linkToResponseMap[$this->linkD->toString()] = $this->responseD; |
||||
141 | $this->linkToResponseMap[$this->linkE->toString()] = $this->responseE; |
||||
142 | $this->linkToResponseMap[$this->linkF->toString()] = $this->responseF; |
||||
143 | $this->linkToResponseMap[$this->linkG->toString()] = $this->responseG; |
||||
144 | |||||
145 | $this->requestHandler |
||||
146 | ->expects($this->any()) |
||||
147 | ->method('request') |
||||
148 | ->will($this->returnCallback(array($this, 'doTestRequest'))); |
||||
149 | |||||
150 | $this->spider->getDownloader()->setRequestHandler($this->requestHandler); |
||||
0 ignored issues
–
show
The method
setRequestHandler() does not exist on VDB\Spider\Downloader\DownloaderInterface . Since it exists in all sub-types, consider adding an abstract or default implementation to VDB\Spider\Downloader\DownloaderInterface .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
151 | |||||
152 | $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a')); |
||||
153 | } |
||||
154 | |||||
155 | /** |
||||
156 | * @return Resource |
||||
157 | * @throws \ErrorException |
||||
158 | */ |
||||
159 | public function doTestRequest() |
||||
160 | { |
||||
161 | $link = func_get_arg(0); |
||||
162 | |||||
163 | if (array_key_exists($link->toString(), $this->linkToResponseMap)) { |
||||
164 | return $this->getResource($link, $this->linkToResponseMap[$link->toString()]); |
||||
165 | } |
||||
166 | |||||
167 | throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString()); |
||||
168 | } |
||||
169 | |||||
170 | /** |
||||
171 | * @covers VDB\Spider\Spider |
||||
172 | * |
||||
173 | * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example |
||||
174 | */ |
||||
175 | public function testCrawlDFSDefaultBehaviour() |
||||
176 | { |
||||
177 | $this->spider->getDiscovererSet()->maxDepth = 10; |
||||
178 | |||||
179 | $this->spider->crawl(); |
||||
180 | |||||
181 | $expected = array( |
||||
182 | $this->linkA, |
||||
183 | $this->linkE, |
||||
184 | $this->linkF, |
||||
185 | $this->linkC, |
||||
186 | $this->linkG, |
||||
187 | $this->linkB, |
||||
188 | $this->linkD |
||||
189 | ); |
||||
190 | |||||
191 | $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler()); |
||||
192 | } |
||||
193 | |||||
194 | /** |
||||
195 | * @covers VDB\Spider\Spider |
||||
196 | */ |
||||
197 | public function testCrawlBFSDefaultBehaviour() |
||||
198 | { |
||||
199 | $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST); |
||||
200 | $this->spider->getDiscovererSet()->maxDepth = 1000; |
||||
201 | |||||
202 | $this->spider->crawl(); |
||||
203 | |||||
204 | $expected = array( |
||||
205 | $this->linkA, |
||||
206 | $this->linkB, |
||||
207 | $this->linkC, |
||||
208 | $this->linkE, |
||||
209 | $this->linkD, |
||||
210 | $this->linkF, |
||||
211 | $this->linkG |
||||
212 | ); |
||||
213 | |||||
214 | $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler()); |
||||
215 | } |
||||
216 | |||||
217 | private function compareUriArray($expected, $actual) |
||||
218 | { |
||||
219 | foreach ($actual as $index => $resource) { |
||||
220 | $this->assertEquals($resource->getUri(), $expected[$index]); |
||||
221 | } |
||||
222 | } |
||||
223 | |||||
224 | /** |
||||
225 | * @covers VDB\Spider\Spider |
||||
226 | * |
||||
227 | * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example |
||||
228 | * |
||||
229 | * Given the following structure: |
||||
230 | * |
||||
231 | * 0: A |
||||
232 | * /|\ |
||||
233 | * 1: B C E |
||||
234 | * /| | | |
||||
235 | * 2: D F G | |
||||
236 | * | _ | |
||||
237 | * |
||||
238 | * We expect the following result: A, E, C, B |
||||
239 | * |
||||
240 | */ |
||||
241 | public function testCrawlDFSMaxDepthOne() |
||||
242 | { |
||||
243 | $this->spider->getDiscovererSet()->maxDepth = 1; |
||||
244 | |||||
245 | $this->spider->crawl(); |
||||
246 | |||||
247 | $expected = array( |
||||
248 | $this->linkA, |
||||
249 | $this->linkE, |
||||
250 | $this->linkC, |
||||
251 | $this->linkB, |
||||
252 | ); |
||||
253 | |||||
254 | $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler()); |
||||
255 | } |
||||
256 | |||||
257 | /** |
||||
258 | * @covers VDB\Spider\Spider |
||||
259 | */ |
||||
260 | public function testCrawlBFSMaxDepthOne() |
||||
261 | { |
||||
262 | $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST); |
||||
263 | $this->spider->getDiscovererSet()->maxDepth = 1; |
||||
264 | |||||
265 | $this->spider->crawl(); |
||||
266 | |||||
267 | $expected = array( |
||||
268 | $this->linkA, |
||||
269 | $this->linkB, |
||||
270 | $this->linkC, |
||||
271 | $this->linkE, |
||||
272 | ); |
||||
273 | |||||
274 | $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler()); |
||||
275 | } |
||||
276 | |||||
277 | /** |
||||
278 | * @covers VDB\Spider\Spider |
||||
279 | */ |
||||
280 | public function testCrawlDFSMaxQueueSize() |
||||
281 | { |
||||
282 | $this->spider->getDiscovererSet()->maxDepth = 1000; |
||||
283 | $this->spider->getDownloader()->setDownloadLimit(3); |
||||
284 | |||||
285 | $this->spider->crawl(); |
||||
286 | |||||
287 | $expected = array( |
||||
288 | $this->linkA, |
||||
289 | $this->linkE, |
||||
290 | $this->linkF, |
||||
291 | ); |
||||
292 | |||||
293 | $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler()); |
||||
294 | } |
||||
295 | |||||
296 | /** |
||||
297 | * @covers VDB\Spider\Spider |
||||
298 | */ |
||||
299 | public function testCrawlBFSMaxQueueSize() |
||||
300 | { |
||||
301 | $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST); |
||||
302 | $this->spider->getDiscovererSet()->maxDepth = 1000; |
||||
303 | $this->spider->getDownloader()->setDownloadLimit(3); |
||||
304 | |||||
305 | $this->spider->crawl(); |
||||
306 | |||||
307 | $expected = array( |
||||
308 | $this->linkA, |
||||
309 | $this->linkB, |
||||
310 | $this->linkC, |
||||
311 | ); |
||||
312 | |||||
313 | $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler()); |
||||
314 | } |
||||
315 | |||||
316 | /** |
||||
317 | * @covers VDB\Spider\Spider |
||||
318 | */ |
||||
319 | public function testCrawlFailedRequest() |
||||
320 | { |
||||
321 | $this->requestHandler |
||||
322 | ->expects($this->any()) |
||||
323 | ->method('request') |
||||
324 | ->will( |
||||
325 | $this->throwException(new Exception('Failed mock request!')) |
||||
326 | ); |
||||
327 | |||||
328 | $this->spider->crawl(); |
||||
329 | |||||
330 | $this->assertCount(0, $this->spider->getDownloader()->getPersistenceHandler(), 'Persisted count'); |
||||
331 | } |
||||
332 | } |
||||
333 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths