Completed
Pull Request — master (#52)
by Tobias
03:22
created

SpiderTest::testCrawlFailedRequest()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 13
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
dl 0
loc 13
rs 9.4285
c 2
b 0
f 0
cc 1
eloc 8
nc 1
nop 0
1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use GuzzleHttp\Psr7\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\Uri\DiscoveredUri;
11
use VDB\Uri\Uri;
12
13
/**
14
 */
15
class SpiderTest extends TestCase
16
{
17
    /**
18
     * @var Spider
19
     */
20
    protected $spider;
21
22
    /**
23
     * @var PHPUnit_Framework_MockObject_MockObject
24
     */
25
    protected $requestHandler;
26
27
    /** @var DiscoveredUri */
28
    protected $linkA;
29
    /** @var DiscoveredUri */
30
    protected $linkB;
31
    /** @var DiscoveredUri */
32
    protected $linkC;
33
    /** @var DiscoveredUri */
34
    protected $linkD;
35
    /** @var DiscoveredUri */
36
    protected $linkE;
37
    /** @var DiscoveredUri */
38
    protected $linkF;
39
    /** @var DiscoveredUri */
40
    protected $linkG;
41
42
    /** @var Response */
43
    protected $responseA;
44
    /** @var Response */
45
    protected $responseB;
46
    /** @var Response */
47
    protected $responseC;
48
    /** @var Response */
49
    protected $responseD;
50
    /** @var Response */
51
    protected $responseE;
52
    /** @var Response */
53
    protected $responseF;
54
    /** @var Response */
55
    protected $responseG;
56
57
    /** @var string */
58
    protected $hrefA;
59
    protected $hrefB;
60
    protected $hrefC;
61
    protected $hrefD;
62
    protected $hrefE;
63
    protected $hrefF;
64
    protected $hrefG;
65
66
    /**
67
     * @var array An associative array, containing a map of $this->linkX to $this->responseX.
68
     */
69
    protected $linkToResponseMap = [];
70
71
    /**
72
     * Sets up the fixture, for example, opens a network connection.
73
     * This method is called before a test is executed.
74
     *
75
     * Setting up the following structure:
76
     *
77
     * 0:        A
78
     *          /|\
79
     * 1:      B C E
80
     *        /| | |
81
     * 2:    D F G |
82
     *         | _ |
83
     *
84
     * Note: E links to F.
85
     */
86
    protected function setUp()
87
    {
88
        $this->spider = new Spider('http://php-spider.org/A');
89
90
        $this->requestHandler = $this->getMockBuilder('VDB\Spider\RequestHandler\RequestHandlerInterface')->getMock();
91
92
        $this->hrefA = 'http://php-spider.org/A';
93
        $this->hrefB = 'http://php-spider.org/B';
94
        $this->hrefC = 'http://php-spider.org/C';
95
        $this->hrefD = 'http://php-spider.org/D';
96
        $this->hrefE = 'http://php-spider.org/E';
97
        $this->hrefF = 'http://php-spider.org/F';
98
        $this->hrefG = 'http://php-spider.org/G';
99
100
        $this->linkA = new DiscoveredUri(new Uri($this->hrefA));
101
        $this->linkB = new DiscoveredUri(new Uri($this->hrefB));
102
        $this->linkC = new DiscoveredUri(new Uri($this->hrefC));
103
        $this->linkD = new DiscoveredUri(new Uri($this->hrefD));
104
        $this->linkE = new DiscoveredUri(new Uri($this->hrefE));
105
        $this->linkF = new DiscoveredUri(new Uri($this->hrefF));
106
        $this->linkG = new DiscoveredUri(new Uri($this->hrefG));
107
108
        $this->linkA->setDepthFound(0);
109
        $this->linkB->setDepthFound(1);
110
        $this->linkC->setDepthFound(1);
111
        $this->linkD->setDepthFound(2);
112
        $this->linkE->setDepthFound(1);
113
        $this->linkF->setDepthFound(2);
114
        $this->linkG->setDepthFound(2);
115
116
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
117
        $this->responseA = new Response(200, [], $htmlA);
118
119
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
120
        $this->responseB = new Response(200, [], $htmlB);
121
122
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
123
        $this->responseC = new Response(200, [], $htmlC);
124
125
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
126
        $this->responseD = new Response(200, [], $htmlD);
127
128
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
129
        $this->responseE = new Response(200, [], $htmlE);
130
131
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
132
        $this->responseF = new Response(200, [], $htmlF);
133
134
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
135
        $this->responseG = new Response(200, [], $htmlG);
136
137
        $this->linkToResponseMap[$this->linkA->toString()] = $this->responseA;
138
        $this->linkToResponseMap[$this->linkB->toString()] = $this->responseB;
139
        $this->linkToResponseMap[$this->linkC->toString()] = $this->responseC;
140
        $this->linkToResponseMap[$this->linkD->toString()] = $this->responseD;
141
        $this->linkToResponseMap[$this->linkE->toString()] = $this->responseE;
142
        $this->linkToResponseMap[$this->linkF->toString()] = $this->responseF;
143
        $this->linkToResponseMap[$this->linkG->toString()] = $this->responseG;
144
145
        $this->requestHandler
146
            ->expects($this->any())
147
            ->method('request')
148
            ->will($this->returnCallback(array($this, 'doTestRequest')));
149
150
        $this->spider->getDownloader()->setRequestHandler($this->requestHandler);
151
152
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
153
    }
154
155
    /**
156
     * @return Resource
157
     * @throws \ErrorException
158
     */
159
    public function doTestRequest()
160
    {
161
        $link = func_get_arg(0);
162
163
        if (array_key_exists($link->toString(), $this->linkToResponseMap)) {
164
            return $this->getResource($link, $this->linkToResponseMap[$link->toString()]);
165
        }
166
167
        throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
168
    }
169
170
    /**
171
     * @covers VDB\Spider\Spider
172
     *
173
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
174
     */
175 View Code Duplication
    public function testCrawlDFSDefaultBehaviour()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
176
    {
177
        $this->spider->getDiscovererSet()->maxDepth = 10;
178
179
        $this->spider->crawl();
180
181
        $expected = array(
182
            $this->linkA,
183
            $this->linkE,
184
            $this->linkF,
185
            $this->linkC,
186
            $this->linkG,
187
            $this->linkB,
188
            $this->linkD
189
        );
190
191
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
192
    }
193
194
    /**
195
     * @covers VDB\Spider\Spider
196
     */
197 View Code Duplication
    public function testCrawlBFSDefaultBehaviour()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
198
    {
199
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
200
        $this->spider->getDiscovererSet()->maxDepth = 1000;
201
202
        $this->spider->crawl();
203
204
        $expected = array(
205
            $this->linkA,
206
            $this->linkB,
207
            $this->linkC,
208
            $this->linkE,
209
            $this->linkD,
210
            $this->linkF,
211
            $this->linkG
212
        );
213
214
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
215
    }
216
217
    private function compareUriArray($expected, $actual)
218
    {
219
        foreach ($actual as $index => $resource) {
220
            $this->assertEquals($resource->getUri(), $expected[$index]);
221
        }
222
    }
223
224
    /**
225
     * @covers VDB\Spider\Spider
226
     *
227
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
228
     *
229
     * Given the following structure:
230
     *
231
     * 0:        A
232
     *          /|\
233
     * 1:      B C E
234
     *        /| | |
235
     * 2:    D F G |
236
     *         | _ |
237
     *
238
     * We expect the following result: A, E, C, B
239
     *
240
     */
241
    public function testCrawlDFSMaxDepthOne()
242
    {
243
        $this->spider->getDiscovererSet()->maxDepth = 1;
244
245
        $this->spider->crawl();
246
247
        $expected = array(
248
            $this->linkA,
249
            $this->linkE,
250
            $this->linkC,
251
            $this->linkB,
252
        );
253
254
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
255
    }
256
257
    /**
258
     * @covers VDB\Spider\Spider
259
     */
260 View Code Duplication
    public function testCrawlBFSMaxDepthOne()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
261
    {
262
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
263
        $this->spider->getDiscovererSet()->maxDepth = 1;
264
265
        $this->spider->crawl();
266
267
        $expected = array(
268
            $this->linkA,
269
            $this->linkB,
270
            $this->linkC,
271
            $this->linkE,
272
        );
273
274
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
275
    }
276
277
    /**
278
     * @covers VDB\Spider\Spider
279
     */
280 View Code Duplication
    public function testCrawlDFSMaxQueueSize()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
281
    {
282
        $this->spider->getDiscovererSet()->maxDepth = 1000;
283
        $this->spider->getDownloader()->setDownloadLimit(3);
284
285
        $this->spider->crawl();
286
287
        $expected = array(
288
            $this->linkA,
289
            $this->linkE,
290
            $this->linkF,
291
        );
292
293
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
294
    }
295
296
    /**
297
     * @covers VDB\Spider\Spider
298
     */
299 View Code Duplication
    public function testCrawlBFSMaxQueueSize()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
300
    {
301
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
302
        $this->spider->getDiscovererSet()->maxDepth = 1000;
303
        $this->spider->getDownloader()->setDownloadLimit(3);
304
305
        $this->spider->crawl();
306
307
        $expected = array(
308
            $this->linkA,
309
            $this->linkB,
310
            $this->linkC,
311
        );
312
313
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
314
    }
315
316
    /**
317
     * @covers VDB\Spider\Spider
318
     */
319
    public function testCrawlFailedRequest()
320
    {
321
        $this->requestHandler
322
            ->expects($this->any())
323
            ->method('request')
324
            ->will(
325
                $this->throwException(new Exception('Failed mock request!'))
326
            );
327
328
        $this->spider->crawl();
329
330
        $this->assertCount(0, $this->spider->getDownloader()->getPersistenceHandler(), 'Persisted count');
331
    }
332
333
    /**
334
     * @covers VDB\Spider\Spider
335
     * @covers VDB\Spider\Downloader\Downloader::getDispatcher
336
     */
337
    public function testDownloaderEventDispatcher()
338
    {
339
        $this->assertSame(
340
            $this->spider->getDispatcher(),
341
            $this->spider->getDownloader()->getDispatcher(),
342
            'Default Spider dispatcher is the same as default Downloader dispatcher'
343
        );
344
    }
345
346
    /**
347
     * @covers VDB\Spider\Spider
348
     * @covers VDB\Spider\QueueManager\InMemoryQueueManager::getDispatcher
349
     */
350
    public function testQueueManagerEventDispatcher()
351
    {
352
        $this->assertSame(
353
            $this->spider->getDispatcher(),
354
            $this->spider->getQueueManager()->getDispatcher(),
355
            'Default Spider dispatcher is the same as default Queue manager dispatcher'
356
        );
357
    }
358
}
359