Completed
Push — master ( 5f2915...02e675 )
by Matthijs
03:27
created

SpiderTest::testCrawlFailedRequest()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 16
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 1
Metric Value
c 4
b 0
f 1
dl 0
loc 16
rs 9.4286
cc 1
eloc 11
nc 1
nop 0
1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use Guzzle\Http\Message\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\StatsHandler;
11
use VDB\Spider\Uri\DiscoveredUri;
12
use VDB\Uri\Uri;
13
14
/**
15
 */
16
class SpiderTest extends TestCase
17
{
18
    /**
19
     * @var Spider
20
     */
21
    protected $spider;
22
23
    /**
24
     * @var logHandler
25
     */
26
    protected $logHandler;
27
28
    /**
29
     * @var StatsHandler
30
     */
31
    protected $statsHandler;
32
33
    /**
34
     * @var PHPUnit_Framework_MockObject_MockObject
35
     */
36
    protected $requestHandler;
37
38
    /** @var DiscoveredUri */
39
    protected $linkA;
40
    /** @var DiscoveredUri */
41
    protected $linkB;
42
    /** @var DiscoveredUri */
43
    protected $linkC;
44
    /** @var DiscoveredUri */
45
    protected $linkD;
46
    /** @var DiscoveredUri */
47
    protected $linkE;
48
    /** @var DiscoveredUri */
49
    protected $linkF;
50
    /** @var DiscoveredUri */
51
    protected $linkG;
52
53
    /** @var Response */
54
    protected $responseA;
55
    /** @var Response */
56
    protected $responseB;
57
    /** @var Response */
58
    protected $responseC;
59
    /** @var Response */
60
    protected $responseD;
61
    /** @var Response */
62
    protected $responseE;
63
    /** @var Response */
64
    protected $responseF;
65
    /** @var Response */
66
    protected $responseG;
67
68
    /** @var string */
69
    protected $hrefA;
70
    protected $hrefB;
71
    protected $hrefC;
72
    protected $hrefD;
73
    protected $hrefE;
74
    protected $hrefF;
75
    protected $hrefG;
76
77
    /**
78
     * @var array An associative array, containing a map of $this->linkX to $this->responseX.
79
     */
80
    protected $linkToResponseMap = [];
81
82
    /**
83
     * Sets up the fixture, for example, opens a network connection.
84
     * This method is called before a test is executed.
85
     *
86
     * Setting up the following structure:
87
     *
88
     * 0:        A
89
     *          /|\
90
     * 1:      B C E
91
     *        /| | |
92
     * 2:    D F G |
93
     *         | _ |
94
     *
95
     * Note: E links to F.
96
     */
97
    protected function setUp()
98
    {
99
        $this->spider = new Spider('http://php-spider.org/A');
100
101
        $this->requestHandler = $this->getMock('VDB\Spider\RequestHandler\RequestHandlerInterface');
102
103
        $this->hrefA = 'http://php-spider.org/A';
104
        $this->hrefB = 'http://php-spider.org/B';
105
        $this->hrefC = 'http://php-spider.org/C';
106
        $this->hrefD = 'http://php-spider.org/D';
107
        $this->hrefE = 'http://php-spider.org/E';
108
        $this->hrefF = 'http://php-spider.org/F';
109
        $this->hrefG = 'http://php-spider.org/G';
110
111
        $this->linkA = new DiscoveredUri(new Uri($this->hrefA));
112
        $this->linkB = new DiscoveredUri(new Uri($this->hrefB));
113
        $this->linkC = new DiscoveredUri(new Uri($this->hrefC));
114
        $this->linkD = new DiscoveredUri(new Uri($this->hrefD));
115
        $this->linkE = new DiscoveredUri(new Uri($this->hrefE));
116
        $this->linkF = new DiscoveredUri(new Uri($this->hrefF));
117
        $this->linkG = new DiscoveredUri(new Uri($this->hrefG));
118
119
        $this->linkA->setDepthFound(0);
120
        $this->linkB->setDepthFound(1);
121
        $this->linkC->setDepthFound(1);
122
        $this->linkD->setDepthFound(2);
123
        $this->linkE->setDepthFound(1);
124
        $this->linkF->setDepthFound(2);
125
        $this->linkG->setDepthFound(2);
126
127
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
128
        $this->responseA = new Response(200, null, $htmlA);
129
130
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
131
        $this->responseB = new Response(200, null, $htmlB);
132
133
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
134
        $this->responseC = new Response(200, null, $htmlC);
135
136
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
137
        $this->responseD = new Response(200, null, $htmlD);
138
139
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
140
        $this->responseE = new Response(200, null, $htmlE);
141
142
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
143
        $this->responseF = new Response(200, null, $htmlF);
144
145
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
146
        $this->responseG = new Response(200, null, $htmlG);
147
148
        $this->linkToResponseMap[$this->linkA->toString()] = $this->responseA;
149
        $this->linkToResponseMap[$this->linkB->toString()] = $this->responseB;
150
        $this->linkToResponseMap[$this->linkC->toString()] = $this->responseC;
151
        $this->linkToResponseMap[$this->linkD->toString()] = $this->responseD;
152
        $this->linkToResponseMap[$this->linkE->toString()] = $this->responseE;
153
        $this->linkToResponseMap[$this->linkF->toString()] = $this->responseF;
154
        $this->linkToResponseMap[$this->linkG->toString()] = $this->responseG;
155
156
        $this->requestHandler
157
            ->expects($this->any())
158
            ->method('request')
159
            ->will($this->returnCallback(array($this, 'doTestRequest')));
160
161
        $this->spider->getDownloader()->setRequestHandler($this->requestHandler);
162
163
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
164
165
        $this->statsHandler = new StatsHandler();
166
        $this->spider->getDispatcher()->addSubscriber($this->statsHandler);
167
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->statsHandler);
168
        $this->spider->getDownloader()->getDispatcher()->addSubscriber($this->statsHandler);
169
170
        $this->logHandler = new LogHandler();
171
        $this->spider->getDispatcher()->addSubscriber($this->logHandler);
172
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->logHandler);
173
        $this->spider->getDownloader()->getDispatcher()->addSubscriber($this->logHandler);
174
    }
175
176
    /**
177
     * @return Resource
178
     * @throws \ErrorException
179
     */
180
    public function doTestRequest()
181
    {
182
        $link = func_get_arg(0);
183
184
        if (array_key_exists($link->toString(), $this->linkToResponseMap)) {
185
            return $this->getResource($link, $this->linkToResponseMap[$link->toString()]);
186
        }
187
188
        throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
189
    }
190
191
    /**
192
     * @covers VDB\Spider\Spider::crawl
193
     *
194
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
195
     */
196
    public function testCrawlDFSDefaultBehaviour()
197
    {
198
        $this->spider->getDiscovererSet()->maxDepth = 10;
199
200
        $this->spider->crawl();
201
202
        $expected = array(
203
            $this->linkA,
204
            $this->linkE,
205
            $this->linkF,
206
            $this->linkC,
207
            $this->linkG,
208
            $this->linkB,
209
            $this->linkD
210
        );
211
212
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
213
    }
214
215
    /**
216
     * @covers VDB\Spider\Spider::crawl
217
     *
218
     */
219
    public function testCrawlBFSDefaultBehaviour()
220
    {
221
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
222
        $this->spider->getDiscovererSet()->maxDepth = 1000;
223
224
        $this->spider->crawl();
225
226
        $expected = array(
227
            $this->linkA,
228
            $this->linkB,
229
            $this->linkC,
230
            $this->linkE,
231
            $this->linkD,
232
            $this->linkF,
233
            $this->linkG
234
        );
235
236
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
237
    }
238
239
    /**
240
     * @covers VDB\Spider\Spider::crawl
241
     *
242
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
243
     *
244
     * Given the following structure:
245
     *
246
     * 0:        A
247
     *          /|\
248
     * 1:      B C E
249
     *        /| | |
250
     * 2:    D F G |
251
     *         | _ |
252
     *
253
     * We expect the following result: A, E, C, B
254
     *
255
     */
256
    public function testCrawlDFSMaxDepthOne()
257
    {
258
        $this->spider->getDiscovererSet()->maxDepth = 1;
259
260
        $this->spider->crawl();
261
262
        $expected = array(
263
            $this->linkA,
264
            $this->linkE,
265
            $this->linkC,
266
            $this->linkB,
267
        );
268
269
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
270
    }
271
272 View Code Duplication
    public function testCrawlBFSMaxDepthOne()
273
    {
274
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
275
        $this->spider->getDiscovererSet()->maxDepth = 1;
276
277
        $this->spider->crawl();
278
279
        $expected = array(
280
            $this->linkA,
281
            $this->linkB,
282
            $this->linkC,
283
            $this->linkE,
284
        );
285
286
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
287
    }
288
289
    /**
290
     * @covers VDB\Spider\Spider::crawl
291
     */
292 View Code Duplication
    public function testCrawlDFSMaxQueueSize()
293
    {
294
        $this->spider->getDiscovererSet()->maxDepth = 1000;
295
        $this->spider->getDownloader()->setDownloadLimit(3);
296
297
        $this->spider->crawl();
298
299
        $expected = array(
300
            $this->linkA,
301
            $this->linkE,
302
            $this->linkF,
303
        );
304
305
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
306
    }
307
308 View Code Duplication
    public function testCrawlBFSMaxQueueSize()
309
    {
310
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
311
        $this->spider->getDiscovererSet()->maxDepth = 1000;
312
        $this->spider->getDownloader()->setDownloadLimit(3);
313
314
        $this->spider->crawl();
315
316
        $expected = array(
317
            $this->linkA,
318
            $this->linkB,
319
            $this->linkC,
320
        );
321
322
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
323
    }
324
325
    /**
326
     * @covers VDB\Spider\Spider::crawl
327
     */
328
    public function testCrawlFailedRequest()
329
    {
330
        $this->requestHandler
331
            ->expects($this->any())
332
            ->method('request')
333
            ->will(
334
                $this->throwException(new Exception('Failed mock request!'))
335
            );
336
337
        $this->spider->crawl();
338
        $stats = $this->statsHandler;
339
340
        $this->assertCount(0, $stats->getFiltered(), 'Filtered count');
341
        $this->assertCount(0, $stats->getPersisted(), 'Persisted count');
342
        $this->assertCount(1, $stats->getFailed(), 'Failed count');
343
    }
344
}
345