Passed
Push — master ( 88bf51...34820f )
by Matthijs
08:16
created

tests/VDB/Spider/Tests/SpiderTest.php (4 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use Guzzle\Http\Message\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\StatsHandler;
11
use VDB\Spider\Uri\DiscoveredUri;
12
use VDB\Uri\Uri;
13
14
/**
15
 */
16
class SpiderTest extends TestCase
17
{
18
    /**
19
     * @var Spider
20
     */
21
    protected $spider;
22
23
    /**
24
     * @var logHandler
25
     */
26
    protected $logHandler;
27
28
    /**
29
     * @var StatsHandler
30
     */
31
    protected $statsHandler;
32
33
    /**
34
     * @var PHPUnit_Framework_MockObject_MockObject
35
     */
36
    protected $requestHandler;
37
38
    /** @var DiscoveredUri */
39
    protected $linkA;
40
    /** @var DiscoveredUri */
41
    protected $linkB;
42
    /** @var DiscoveredUri */
43
    protected $linkC;
44
    /** @var DiscoveredUri */
45
    protected $linkD;
46
    /** @var DiscoveredUri */
47
    protected $linkE;
48
    /** @var DiscoveredUri */
49
    protected $linkF;
50
    /** @var DiscoveredUri */
51
    protected $linkG;
52
53
    /** @var Response */
54
    protected $responseA;
55
    /** @var Response */
56
    protected $responseB;
57
    /** @var Response */
58
    protected $responseC;
59
    /** @var Response */
60
    protected $responseD;
61
    /** @var Response */
62
    protected $responseE;
63
    /** @var Response */
64
    protected $responseF;
65
    /** @var Response */
66
    protected $responseG;
67
68
    /** @var string */
69
    protected $hrefA;
70
    protected $hrefB;
71
    protected $hrefC;
72
    protected $hrefD;
73
    protected $hrefE;
74
    protected $hrefF;
75
    protected $hrefG;
76
77
    /**
78
     * Sets up the fixture, for example, opens a network connection.
79
     * This method is called before a test is executed.
80
     *
81
     * Setting up the following structure:
82
     *
83
     * 0:        A
84
     *          /|\
85
     * 1:      B C E
86
     *        /| | |
87
     * 2:    D F G |
88
     *         | _ |
89
     *
90
     * Note: E links to F.
91
     */
92
    protected function setUp()
93
    {
94
        $this->spider = new Spider('http://php-spider.org/A');
95
96
        $this->requestHandler = $this->getMock('VDB\Spider\RequestHandler\RequestHandlerInterface');
97
98
        $this->hrefA = 'http://php-spider.org/A';
99
        $this->hrefB = 'http://php-spider.org/B';
100
        $this->hrefC = 'http://php-spider.org/C';
101
        $this->hrefD = 'http://php-spider.org/D';
102
        $this->hrefE = 'http://php-spider.org/E';
103
        $this->hrefF = 'http://php-spider.org/F';
104
        $this->hrefG = 'http://php-spider.org/G';
105
106
        $this->linkA = new DiscoveredUri(new Uri($this->hrefA));
107
        $this->linkB = new DiscoveredUri(new Uri($this->hrefB));
108
        $this->linkC = new DiscoveredUri(new Uri($this->hrefC));
109
        $this->linkD = new DiscoveredUri(new Uri($this->hrefD));
110
        $this->linkE = new DiscoveredUri(new Uri($this->hrefE));
111
        $this->linkF = new DiscoveredUri(new Uri($this->hrefF));
112
        $this->linkG = new DiscoveredUri(new Uri($this->hrefG));
113
114
        $this->linkA->setDepthFound(0);
115
        $this->linkB->setDepthFound(1);
116
        $this->linkC->setDepthFound(1);
117
        $this->linkD->setDepthFound(2);
118
        $this->linkE->setDepthFound(1);
119
        $this->linkF->setDepthFound(2);
120
        $this->linkG->setDepthFound(2);
121
122
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
123
        $this->responseA = new Response(200, null, $htmlA);
124
125
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
126
        $this->responseB = new Response(200, null, $htmlB);
127
128
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
129
        $this->responseC = new Response(200, null, $htmlC);
130
131
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
132
        $this->responseD = new Response(200, null, $htmlD);
133
134
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
135
        $this->responseE = new Response(200, null, $htmlE);
136
137
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
138
        $this->responseF = new Response(200, null, $htmlF);
139
140
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
141
        $this->responseG = new Response(200, null, $htmlG);
142
143
        $this->requestHandler
144
            ->expects($this->any())
145
            ->method('request')
146
            ->will($this->returnCallback(array($this, 'doTestRequest')));
147
148
        $this->spider->setRequestHandler($this->requestHandler);
149
150
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
151
152
        $this->statsHandler = new StatsHandler();
153
        $this->spider->getDispatcher()->addSubscriber($this->statsHandler);
154
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->statsHandler);
155
156
        $this->logHandler = new LogHandler();
157
        $this->spider->getDispatcher()->addSubscriber($this->logHandler);
158
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->logHandler);
159
    }
160
161
    /**
162
     * @return Resource
163
     * @throws \ErrorException
164
     */
165
    public function doTestRequest()
166
    {
167
        $link = func_get_arg(0);
168
169
        switch ($link->toString()) {
170
            case $this->linkA->toString():
171
                return $this->getResource($this->linkA, $this->responseA);
172
            case $this->linkB->toString():
173
                return $this->getResource($this->linkB, $this->responseB);
174
            case $this->linkC->toString():
175
                return $this->getResource($this->linkC, $this->responseC);
176
            case $this->linkD->toString():
177
                return $this->getResource($this->linkD, $this->responseD);
178
            case $this->linkE->toString():
179
                return $this->getResource($this->linkE, $this->responseE);
180
            case $this->linkF->toString():
181
                return $this->getResource($this->linkF, $this->responseF);
182
            case $this->linkG->toString():
183
                return $this->getResource($this->linkG, $this->responseG);
184
            default:
185
                throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
186
        }
187
    }
188
189
    /**
190
     * @covers VDB\Spider\Spider::crawl
191
     *
192
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
193
     */
194
    public function testCrawlDFSDefaultBehaviour()
1 ignored issue
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
195
    {
196
        $this->spider->getDiscovererSet()->maxDepth = 10;
197
198
        $this->spider->crawl();
199
200
        $expected = array(
201
            $this->linkA,
202
            $this->linkE,
203
            $this->linkF,
204
            $this->linkC,
205
            $this->linkG,
206
            $this->linkB,
207
            $this->linkD
208
        );
209
210
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
211
    }
212
213
    /**
214
     * @covers VDB\Spider\Spider::crawl
215
     *
216
     */
217
    public function testCrawlBFSDefaultBehaviour()
1 ignored issue
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
218
    {
219
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
220
        $this->spider->getDiscovererSet()->maxDepth = 1000;
221
222
        $this->spider->crawl();
223
224
        $expected = array(
225
            $this->linkA,
226
            $this->linkB,
227
            $this->linkC,
228
            $this->linkE,
229
            $this->linkD,
230
            $this->linkF,
231
            $this->linkG
232
        );
233
234
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
235
    }
236
237
    /**
238
     * @covers VDB\Spider\Spider::crawl
239
     *
240
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
241
     *
242
     * Given the following structure:
243
     *
244
     * 0:        A
245
     *          /|\
246
     * 1:      B C E
247
     *        /| | |
248
     * 2:    D F G |
249
     *         | _ |
250
     *
251
     * We expect the following result: A, E, C, B
252
     *
253
     */
254
    public function testCrawlDFSMaxDepthOne()
255
    {
256
        $this->spider->getDiscovererSet()->maxDepth = 1;
257
258
        $this->spider->crawl();
259
260
        $expected = array(
261
            $this->linkA,
262
            $this->linkE,
263
            $this->linkC,
264
            $this->linkB,
265
        );
266
267
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
268
    }
269
270
    public function testCrawlBFSMaxDepthOne()
271
    {
272
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
273
        $this->spider->getDiscovererSet()->maxDepth = 1;
274
275
        $this->spider->crawl();
276
277
        $expected = array(
278
            $this->linkA,
279
            $this->linkB,
280
            $this->linkC,
281
            $this->linkE,
282
        );
283
284
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
285
    }
286
287
    /**
288
     * @covers VDB\Spider\Spider::crawl
289
     */
290
    public function testCrawlDFSMaxQueueSize()
1 ignored issue
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
291
    {
292
        $this->spider->getDiscovererSet()->maxDepth = 1000;
293
        $this->spider->downloadLimit = 3;
294
295
        $this->spider->crawl();
296
297
        $expected = array(
298
            $this->linkA,
299
            $this->linkE,
300
            $this->linkF,
301
        );
302
303
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
304
    }
305
306
    public function testCrawlBFSMaxQueueSize()
1 ignored issue
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
307
    {
308
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
309
        $this->spider->getDiscovererSet()->maxDepth = 1000;
310
        $this->spider->downloadLimit = 3;
311
312
        $this->spider->crawl();
313
314
        $expected = array(
315
            $this->linkA,
316
            $this->linkB,
317
            $this->linkC,
318
        );
319
320
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
321
    }
322
323
    /**
324
     * @covers VDB\Spider\Spider::crawl
325
     */
326
    public function testCrawlFailedRequest()
327
    {
328
        $this->requestHandler
329
            ->expects($this->any())
330
            ->method('request')
331
            ->will(
332
                $this->throwException(new Exception('Failed mock request!'))
333
            );
334
335
        $this->spider->crawl();
336
        $stats = $this->statsHandler;
337
338
        $this->assertCount(0, $stats->getFiltered(), 'Filtered count');
339
        $this->assertCount(0, $stats->getPersisted(), 'Persisted count');
340
        $this->assertCount(1, $stats->getFailed(), 'Failed count');
341
    }
342
}
343