Completed
Pull Request — master (#16)
by Matthijs
05:12
created

SpiderTest::testCrawlBFSMaxDepthOne()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 16
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 1
Metric Value
c 4
b 0
f 1
dl 0
loc 16
rs 9.4286
cc 1
eloc 10
nc 1
nop 0
1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use Guzzle\Http\Message\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\StatsHandler;
11
use VDB\Spider\Uri\FilterableUri;
12
13
/**
14
 */
15
class SpiderTest extends TestCase
16
{
17
    /**
18
     * @var Spider
19
     */
20
    protected $spider;
21
22
    /**
23
     * @var logHandler
24
     */
25
    protected $logHandler;
26
27
    /**
28
     * @var StatsHandler
29
     */
30
    protected $statsHandler;
31
32
    /**
33
     * @var PHPUnit_Framework_MockObject_MockObject
34
     */
35
    protected $requestHandler;
36
37
    /** @var FilterableUri */
38
    protected $linkA;
39
    /** @var FilterableUri */
40
    protected $linkB;
41
    /** @var FilterableUri */
42
    protected $linkC;
43
    /** @var FilterableUri */
44
    protected $linkD;
45
    /** @var FilterableUri */
46
    protected $linkE;
47
    /** @var FilterableUri */
48
    protected $linkF;
49
    /** @var FilterableUri */
50
    protected $linkG;
51
52
    /** @var Response */
53
    protected $responseA;
54
    /** @var Response */
55
    protected $responseB;
56
    /** @var Response */
57
    protected $responseC;
58
    /** @var Response */
59
    protected $responseD;
60
    /** @var Response */
61
    protected $responseE;
62
    /** @var Response */
63
    protected $responseF;
64
    /** @var Response */
65
    protected $responseG;
66
67
    /** @var string */
68
    protected $hrefA;
69
    protected $hrefB;
70
    protected $hrefC;
71
    protected $hrefD;
72
    protected $hrefE;
73
    protected $hrefF;
74
    protected $hrefG;
75
76
    /**
77
     * Sets up the fixture, for example, opens a network connection.
78
     * This method is called before a test is executed.
79
     *
80
     * Setting up the following structure:
81
     *
82
     * 0:        A
83
     *          /|\
84
     * 1:      B C E
85
     *        /| | |
86
     * 2:    D F G |
87
     *         | _ |
88
     *
89
     * Note: E links to F.
90
     */
91
    protected function setUp()
92
    {
93
        $this->spider = new Spider('http://php-spider.org/A');
94
95
        $this->requestHandler = $this->getMock('VDB\Spider\RequestHandler\RequestHandlerInterface');
96
97
        $this->hrefA = 'http://php-spider.org/A';
98
        $this->hrefB = 'http://php-spider.org/B';
99
        $this->hrefC = 'http://php-spider.org/C';
100
        $this->hrefD = 'http://php-spider.org/D';
101
        $this->hrefE = 'http://php-spider.org/E';
102
        $this->hrefF = 'http://php-spider.org/F';
103
        $this->hrefG = 'http://php-spider.org/G';
104
105
        $this->linkA = new FilterableUri($this->hrefA);
106
        $this->linkB = new FilterableUri($this->hrefB);
107
        $this->linkC = new FilterableUri($this->hrefC);
108
        $this->linkD = new FilterableUri($this->hrefD);
109
        $this->linkE = new FilterableUri($this->hrefE);
110
        $this->linkF = new FilterableUri($this->hrefF);
111
        $this->linkG = new FilterableUri($this->hrefG);
112
113
        $this->linkA->setDepthFound(0);
114
        $this->linkB->setDepthFound(1);
115
        $this->linkC->setDepthFound(1);
116
        $this->linkD->setDepthFound(2);
117
        $this->linkE->setDepthFound(1);
118
        $this->linkF->setDepthFound(2);
119
        $this->linkG->setDepthFound(2);
120
121
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
122
        $this->responseA = new Response(200, null, $htmlA);
123
124
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
125
        $this->responseB = new Response(200, null, $htmlB);
126
127
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
128
        $this->responseC = new Response(200, null, $htmlC);
129
130
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
131
        $this->responseD = new Response(200, null, $htmlD);
132
133
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
134
        $this->responseE = new Response(200, null, $htmlE);
135
136
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
137
        $this->responseF = new Response(200, null, $htmlF);
138
139
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
140
        $this->responseG = new Response(200, null, $htmlG);
141
142
        $this->requestHandler
143
            ->expects($this->any())
144
            ->method('request')
145
            ->will($this->returnCallback(array($this, 'doTestRequest')));
146
147
        $this->spider->setRequestHandler($this->requestHandler);
148
149
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
150
151
        $this->statsHandler = new StatsHandler();
152
        $this->spider->getDispatcher()->addSubscriber($this->statsHandler);
153
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->statsHandler);
154
155
        $this->logHandler = new LogHandler();
156
        $this->spider->getDispatcher()->addSubscriber($this->logHandler);
157
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->logHandler);
158
    }
159
160
    /**
161
     * @return Resource
162
     * @throws \ErrorException
163
     */
164
    public function doTestRequest()
165
    {
166
        $link = func_get_arg(0);
167
168
        switch ($link->toString()) {
169
            case $this->linkA->toString():
170
                return $this->getResource($this->linkA, $this->responseA);
171
            case $this->linkB->toString():
172
                return $this->getResource($this->linkB, $this->responseB);
173
            case $this->linkC->toString():
174
                return $this->getResource($this->linkC, $this->responseC);
175
            case $this->linkD->toString():
176
                return $this->getResource($this->linkD, $this->responseD);
177
            case $this->linkE->toString():
178
                return $this->getResource($this->linkE, $this->responseE);
179
            case $this->linkF->toString():
180
                return $this->getResource($this->linkF, $this->responseF);
181
            case $this->linkG->toString():
182
                return $this->getResource($this->linkG, $this->responseG);
183
            default:
184
                throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
185
        }
186
    }
187
188
    /**
189
     * @covers VDB\Spider\Spider::crawl
190
     *
191
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
192
     */
193 View Code Duplication
    public function testCrawlDFSDefaultBehaviour()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
194
    {
195
        $this->spider->getDiscovererSet()->maxDepth = 10;
196
197
        $this->spider->crawl();
198
199
        $expected = array(
200
            $this->linkA,
201
            $this->linkE,
202
            $this->linkF,
203
            $this->linkC,
204
            $this->linkG,
205
            $this->linkB,
206
            $this->linkD
207
        );
208
209
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
210
    }
211
212
    /**
213
     * @covers VDB\Spider\Spider::crawl
214
     *
215
     */
216 View Code Duplication
    public function testCrawlBFSDefaultBehaviour()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
217
    {
218
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
219
        $this->spider->getDiscovererSet()->maxDepth = 1000;
220
221
        $this->spider->crawl();
222
223
        $expected = array(
224
            $this->linkA,
225
            $this->linkB,
226
            $this->linkC,
227
            $this->linkE,
228
            $this->linkD,
229
            $this->linkF,
230
            $this->linkG
231
        );
232
233
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
234
    }
235
236
    /**
237
     * @covers VDB\Spider\Spider::crawl
238
     *
239
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
240
     *
241
     * Given the following structure:
242
     *
243
     * 0:        A
244
     *          /|\
245
     * 1:      B C E
246
     *        /| | |
247
     * 2:    D F G |
248
     *         | _ |
249
     *
250
     * We expect the following result: A, E, C, B
251
     *
252
     */
253
    public function testCrawlDFSMaxDepthOne()
254
    {
255
        $this->spider->getDiscovererSet()->maxDepth = 1;
256
257
        $this->spider->crawl();
258
259
        $expected = array(
260
            $this->linkA,
261
            $this->linkE,
262
            $this->linkC,
263
            $this->linkB,
264
        );
265
266
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
267
    }
268
269
    public function testCrawlBFSMaxDepthOne()
270
    {
271
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
272
        $this->spider->getDiscovererSet()->maxDepth = 1;
273
274
        $this->spider->crawl();
275
276
        $expected = array(
277
            $this->linkA,
278
            $this->linkB,
279
            $this->linkC,
280
            $this->linkE,
281
        );
282
283
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
284
    }
285
286
    /**
287
     * @covers VDB\Spider\Spider::crawl
288
     */
289 View Code Duplication
    public function testCrawlDFSMaxQueueSize()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
290
    {
291
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManagerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
292
        $this->spider->downloadLimit = 3;
0 ignored issues
show
Documentation Bug introduced by
It seems like 3 of type integer is incompatible with the declared type object<VDB\Spider\the> of property $downloadLimit.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
293
294
        $this->spider->crawl();
295
296
        $expected = array(
297
            $this->linkA,
298
            $this->linkE,
299
            $this->linkF,
300
        );
301
302
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
303
    }
304
305 View Code Duplication
    public function testCrawlBFSMaxQueueSize()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
306
    {
307
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
308
        $this->spider->getDiscovererSet()->maxDepth = 1000;
309
        $this->spider->downloadLimit = 3;
0 ignored issues
show
Documentation Bug introduced by
It seems like 3 of type integer is incompatible with the declared type object<VDB\Spider\the> of property $downloadLimit.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
310
311
        $this->spider->crawl();
312
313
        $expected = array(
314
            $this->linkA,
315
            $this->linkB,
316
            $this->linkC,
317
        );
318
319
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
320
    }
321
322
    /**
323
     * @covers VDB\Spider\Spider::crawl
324
     */
325
    public function testCrawlFailedRequest()
326
    {
327
        $this->requestHandler
328
            ->expects($this->any())
329
            ->method('request')
330
            ->will(
331
                $this->throwException(new Exception('Failed mock request!'))
332
            );
333
334
        $this->spider->crawl();
335
        $stats = $this->statsHandler;
336
337
        $this->assertCount(0, $stats->getFiltered(), 'Filtered count');
338
        $this->assertCount(0, $stats->getPersisted(), 'Persisted count');
339
        $this->assertCount(1, $stats->getFailed(), 'Failed count');
340
    }
341
}
342