Failed Conditions
Push — master ( fc7301...8aadf7 )
by Matthijs
06:40
created

tests/VDB/Spider/Tests/SpiderTest.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use GuzzleHttp\Psr7\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\Uri\DiscoveredUri;
11
use VDB\Uri\Uri;
12
13
/**
14
 */
15
class SpiderTest extends TestCase
16
{
17
    /**
18
     * @var Spider
19
     */
20
    protected $spider;
21
22
    /**
23
     * @var PHPUnit_Framework_MockObject_MockObject
24
     */
25
    protected $requestHandler;
26
27
    /** @var DiscoveredUri */
28
    protected $linkA;
29
    /** @var DiscoveredUri */
30
    protected $linkB;
31
    /** @var DiscoveredUri */
32
    protected $linkC;
33
    /** @var DiscoveredUri */
34
    protected $linkD;
35
    /** @var DiscoveredUri */
36
    protected $linkE;
37
    /** @var DiscoveredUri */
38
    protected $linkF;
39
    /** @var DiscoveredUri */
40
    protected $linkG;
41
42
    /** @var Response */
43
    protected $responseA;
44
    /** @var Response */
45
    protected $responseB;
46
    /** @var Response */
47
    protected $responseC;
48
    /** @var Response */
49
    protected $responseD;
50
    /** @var Response */
51
    protected $responseE;
52
    /** @var Response */
53
    protected $responseF;
54
    /** @var Response */
55
    protected $responseG;
56
57
    /** @var string */
58
    protected $hrefA;
59
    protected $hrefB;
60
    protected $hrefC;
61
    protected $hrefD;
62
    protected $hrefE;
63
    protected $hrefF;
64
    protected $hrefG;
65
66
    /**
67
     * @var array An associative array, containing a map of $this->linkX to $this->responseX.
68
     */
69
    protected $linkToResponseMap = [];
70
71
    /**
72
     * Sets up the fixture, for example, opens a network connection.
73
     * This method is called before a test is executed.
74
     *
75
     * Setting up the following structure:
76
     *
77
     * 0:        A
78
     *          /|\
79
     * 1:      B C E
80
     *        /| | |
81
     * 2:    D F G |
82
     *         | _ |
83
     *
84
     * Note: E links to F.
85
     */
86
    protected function setUp(): void
87
    {
88
        $this->spider = new Spider('http://php-spider.org/A');
89
90
        $this->requestHandler = $this->getMockBuilder('VDB\Spider\RequestHandler\RequestHandlerInterface')->getMock();
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->getMockBuilder('V...rInterface')->getMock() of type object<PHPUnit\Framework\MockObject\MockObject> is incompatible with the declared type object<PHPUnit_Framework_MockObject_MockObject> of property $requestHandler.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
91
92
        $this->hrefA = 'http://php-spider.org/A';
93
        $this->hrefB = 'http://php-spider.org/B';
94
        $this->hrefC = 'http://php-spider.org/C';
95
        $this->hrefD = 'http://php-spider.org/D';
96
        $this->hrefE = 'http://php-spider.org/E';
97
        $this->hrefF = 'http://php-spider.org/F';
98
        $this->hrefG = 'http://php-spider.org/G';
99
100
        $this->linkA = new DiscoveredUri(new Uri($this->hrefA));
101
        $this->linkB = new DiscoveredUri(new Uri($this->hrefB));
102
        $this->linkC = new DiscoveredUri(new Uri($this->hrefC));
103
        $this->linkD = new DiscoveredUri(new Uri($this->hrefD));
104
        $this->linkE = new DiscoveredUri(new Uri($this->hrefE));
105
        $this->linkF = new DiscoveredUri(new Uri($this->hrefF));
106
        $this->linkG = new DiscoveredUri(new Uri($this->hrefG));
107
108
        $this->linkA->setDepthFound(0);
109
        $this->linkB->setDepthFound(1);
110
        $this->linkC->setDepthFound(1);
111
        $this->linkD->setDepthFound(2);
112
        $this->linkE->setDepthFound(1);
113
        $this->linkF->setDepthFound(2);
114
        $this->linkG->setDepthFound(2);
115
116
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
117
        $this->responseA = new Response(200, [], $htmlA);
118
119
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
120
        $this->responseB = new Response(200, [], $htmlB);
121
122
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
123
        $this->responseC = new Response(200, [], $htmlC);
124
125
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
126
        $this->responseD = new Response(200, [], $htmlD);
127
128
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
129
        $this->responseE = new Response(200, [], $htmlE);
130
131
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
132
        $this->responseF = new Response(200, [], $htmlF);
133
134
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
135
        $this->responseG = new Response(200, [], $htmlG);
136
137
        $this->linkToResponseMap[$this->linkA->toString()] = $this->responseA;
138
        $this->linkToResponseMap[$this->linkB->toString()] = $this->responseB;
139
        $this->linkToResponseMap[$this->linkC->toString()] = $this->responseC;
140
        $this->linkToResponseMap[$this->linkD->toString()] = $this->responseD;
141
        $this->linkToResponseMap[$this->linkE->toString()] = $this->responseE;
142
        $this->linkToResponseMap[$this->linkF->toString()] = $this->responseF;
143
        $this->linkToResponseMap[$this->linkG->toString()] = $this->responseG;
144
145
        $this->requestHandler
146
            ->expects($this->any())
147
            ->method('request')
148
            ->will($this->returnCallback(array($this, 'doTestRequest')));
149
150
        $this->spider->getDownloader()->setRequestHandler($this->requestHandler);
151
152
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
153
    }
154
155
    /**
156
     * @return Resource
157
     * @throws \ErrorException
158
     */
159
    public function doTestRequest()
160
    {
161
        $link = func_get_arg(0);
162
163
        if (array_key_exists($link->toString(), $this->linkToResponseMap)) {
164
            return $this->getResource($link, $this->linkToResponseMap[$link->toString()]);
165
        }
166
167
        throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
168
    }
169
170
    /**
171
     * @covers VDB\Spider\Spider
172
     *
173
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
174
     */
175 View Code Duplication
    public function testCrawlDFSDefaultBehaviour()
176
    {
177
        $this->spider->getDiscovererSet()->maxDepth = 10;
178
179
        $this->spider->crawl();
180
181
        $expected = array(
182
            $this->linkA,
183
            $this->linkE,
184
            $this->linkF,
185
            $this->linkC,
186
            $this->linkG,
187
            $this->linkB,
188
            $this->linkD
189
        );
190
191
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
192
    }
193
194
    /**
195
     * @covers VDB\Spider\Spider
196
     */
197 View Code Duplication
    public function testCrawlBFSDefaultBehaviour()
198
    {
199
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
200
        $this->spider->getDiscovererSet()->maxDepth = 1000;
201
202
        $this->spider->crawl();
203
204
        $expected = array(
205
            $this->linkA,
206
            $this->linkB,
207
            $this->linkC,
208
            $this->linkE,
209
            $this->linkD,
210
            $this->linkF,
211
            $this->linkG
212
        );
213
214
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
215
    }
216
217
    private function compareUriArray($expected, $actual)
218
    {
219
        foreach ($actual as $index => $resource) {
220
            $this->assertEquals($resource->getUri(), $expected[$index]);
221
        }
222
    }
223
224
    /**
225
     * @covers VDB\Spider\Spider
226
     *
227
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
228
     *
229
     * Given the following structure:
230
     *
231
     * 0:        A
232
     *          /|\
233
     * 1:      B C E
234
     *        /| | |
235
     * 2:    D F G |
236
     *         | _ |
237
     *
238
     * We expect the following result: A, E, C, B
239
     *
240
     */
241
    public function testCrawlDFSMaxDepthOne()
242
    {
243
        $this->spider->getDiscovererSet()->maxDepth = 1;
244
245
        $this->spider->crawl();
246
247
        $expected = array(
248
            $this->linkA,
249
            $this->linkE,
250
            $this->linkC,
251
            $this->linkB,
252
        );
253
254
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
255
    }
256
257
    /**
258
     * @covers VDB\Spider\Spider
259
     */
260 View Code Duplication
    public function testCrawlBFSMaxDepthOne()
261
    {
262
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
263
        $this->spider->getDiscovererSet()->maxDepth = 1;
264
265
        $this->spider->crawl();
266
267
        $expected = array(
268
            $this->linkA,
269
            $this->linkB,
270
            $this->linkC,
271
            $this->linkE,
272
        );
273
274
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
275
    }
276
277
    /**
278
     * @covers VDB\Spider\Spider
279
     */
280 View Code Duplication
    public function testCrawlDFSMaxQueueSize()
281
    {
282
        $this->spider->getDiscovererSet()->maxDepth = 1000;
283
        $this->spider->getDownloader()->setDownloadLimit(3);
284
285
        $this->spider->crawl();
286
287
        $expected = array(
288
            $this->linkA,
289
            $this->linkE,
290
            $this->linkF,
291
        );
292
293
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
294
    }
295
296
    /**
297
     * @covers VDB\Spider\Spider
298
     */
299 View Code Duplication
    public function testCrawlBFSMaxQueueSize()
300
    {
301
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
302
        $this->spider->getDiscovererSet()->maxDepth = 1000;
303
        $this->spider->getDownloader()->setDownloadLimit(3);
304
305
        $this->spider->crawl();
306
307
        $expected = array(
308
            $this->linkA,
309
            $this->linkB,
310
            $this->linkC,
311
        );
312
313
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
314
    }
315
316
    /**
317
     * @covers VDB\Spider\Spider
318
     */
319
    public function testCrawlFailedRequest()
320
    {
321
        $this->requestHandler
322
            ->expects($this->any())
323
            ->method('request')
324
            ->will(
325
                $this->throwException(new Exception('Failed mock request!'))
326
            );
327
328
        $this->spider->crawl();
329
330
        $this->assertCount(0, $this->spider->getDownloader()->getPersistenceHandler(), 'Persisted count');
331
    }
332
}
333