Completed
Pull Request — master (#16)
by Matthijs
05:11
created

SpiderTest::testCrawlDFSMaxQueueSize()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 15
Code Lines 9

Duplication

Lines 15
Ratio 100 %

Importance

Changes 4
Bugs 0 Features 1
Metric Value
c 4
b 0
f 1
dl 15
loc 15
rs 9.4286
cc 1
eloc 9
nc 1
nop 0
1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use Guzzle\Http\Message\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\StatsHandler;
11
use VDB\Spider\Uri\FilterableUri;
12
13
/**
14
 */
15
class SpiderTest extends TestCase
16
{
17
    /**
18
     * @var Spider
19
     */
20
    protected $spider;
21
22
    /**
23
     * @var logHandler
24
     */
25
    protected $logHandler;
26
27
    /**
28
     * @var StatsHandler
29
     */
30
    protected $statsHandler;
31
32
    /**
33
     * @var PHPUnit_Framework_MockObject_MockObject
34
     */
35
    protected $requestHandler;
36
37
    /** @var FilterableUri */
38
    protected $linkA;
39
    /** @var FilterableUri */
40
    protected $linkB;
41
    /** @var FilterableUri */
42
    protected $linkC;
43
    /** @var FilterableUri */
44
    protected $linkD;
45
    /** @var FilterableUri */
46
    protected $linkE;
47
    /** @var FilterableUri */
48
    protected $linkF;
49
    /** @var FilterableUri */
50
    protected $linkG;
51
52
    /** @var Response */
53
    protected $responseA;
54
    /** @var Response */
55
    protected $responseB;
56
    /** @var Response */
57
    protected $responseC;
58
    /** @var Response */
59
    protected $responseD;
60
    /** @var Response */
61
    protected $responseE;
62
    /** @var Response */
63
    protected $responseF;
64
    /** @var Response */
65
    protected $responseG;
66
67
    /** @var string */
68
    protected $hrefA;
69
    protected $hrefB;
70
    protected $hrefC;
71
    protected $hrefD;
72
    protected $hrefE;
73
    protected $hrefF;
74
    protected $hrefG;
75
76
    /**
77
     * Sets up the fixture, for example, opens a network connection.
78
     * This method is called before a test is executed.
79
     */
80
    protected function setUp()
81
    {
82
        $this->spider = new Spider('http://php-spider.org/A');
83
84
        $this->requestHandler = $this->getMock('VDB\Spider\RequestHandler\RequestHandlerInterface');
85
86
        $this->hrefA = 'http://php-spider.org/A';
87
        $this->hrefB = 'http://php-spider.org/B';
88
        $this->hrefC = 'http://php-spider.org/C';
89
        $this->hrefD = 'http://php-spider.org/D';
90
        $this->hrefE = 'http://php-spider.org/E';
91
        $this->hrefF = 'http://php-spider.org/F';
92
        $this->hrefG = 'http://php-spider.org/G';
93
94
        $this->linkA = new FilterableUri($this->hrefA);
95
        $this->linkB = new FilterableUri($this->hrefB);
96
        $this->linkC = new FilterableUri($this->hrefC);
97
        $this->linkD = new FilterableUri($this->hrefD);
98
        $this->linkE = new FilterableUri($this->hrefE);
99
        $this->linkF = new FilterableUri($this->hrefF);
100
        $this->linkG = new FilterableUri($this->hrefG);
101
102
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
103
        $this->responseA = new Response(200, null, $htmlA);
104
105
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
106
        $this->responseB = new Response(200, null, $htmlB);
107
108
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
109
        $this->responseC = new Response(200, null, $htmlC);
110
111
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
112
        $this->responseD = new Response(200, null, $htmlD);
113
114
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
115
        $this->responseE = new Response(200, null, $htmlE);
116
117
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
118
        $this->responseF = new Response(200, null, $htmlF);
119
120
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
121
        $this->responseG = new Response(200, null, $htmlG);
122
123
        $this->requestHandler
124
            ->expects($this->any())
125
            ->method('request')
126
            ->will($this->returnCallback(array($this, 'doTestRequest')));
127
128
        $this->spider->setRequestHandler($this->requestHandler);
129
130
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
131
132
        $this->statsHandler = new StatsHandler();
133
        $this->spider->getDispatcher()->addSubscriber($this->statsHandler);
134
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->statsHandler);
135
136
        $this->logHandler = new LogHandler();
137
        $this->spider->getDispatcher()->addSubscriber($this->logHandler);
138
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->logHandler);
139
    }
140
141
    /**
142
     * @return Resource
143
     * @throws \ErrorException
144
     */
145
    public function doTestRequest()
146
    {
147
        $link = func_get_arg(0);
148
149
        switch ($link->toString()) {
150
            case $this->linkA->toString():
151
                return $this->getResource($this->linkA, $this->responseA);
152
            case $this->linkB->toString():
153
                return $this->getResource($this->linkB, $this->responseB);
154
            case $this->linkC->toString():
155
                return $this->getResource($this->linkC, $this->responseC);
156
            case $this->linkD->toString():
157
                return $this->getResource($this->linkD, $this->responseD);
158
            case $this->linkE->toString():
159
                return $this->getResource($this->linkE, $this->responseE);
160
            case $this->linkF->toString():
161
                return $this->getResource($this->linkF, $this->responseF);
162
            case $this->linkG->toString():
163
                return $this->getResource($this->linkG, $this->responseG);
164
            default:
165
                throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
166
        }
167
    }
168
169
    /**
170
     * @covers VDB\Spider\Spider::crawl
171
     *
172
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
173
     */
174 View Code Duplication
    public function testCrawlDFSDefaultBehaviour()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
175
    {
176
        $this->spider->getQueueManager()->maxDepth = 10;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManagerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
177
178
        $this->spider->crawl();
179
180
        $expected = array(
181
            $this->linkA,
182
            $this->linkE,
183
            $this->linkF,
184
            $this->linkC,
185
            $this->linkG,
186
            $this->linkB,
187
            $this->linkD
188
        );
189
190
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
191
    }
192
193
    /**
194
     * @covers VDB\Spider\Spider::crawl
195
     *
196
     */
197 View Code Duplication
    public function testCrawlBFSDefaultBehaviour()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
198
    {
199
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
200
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManagerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
201
202
        $this->spider->crawl();
203
204
        $expected = array(
205
            $this->linkA,
206
            $this->linkB,
207
            $this->linkC,
208
            $this->linkE,
209
            $this->linkD,
210
            $this->linkF,
211
            $this->linkG
212
        );
213
214
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
215
    }
216
217
    /**
218
     * @covers VDB\Spider\Spider::crawl
219
     *
220
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
221
     */
222
    public function testCrawlDFSMaxDepthOne()
223
    {
224
        $this->spider->getQueueManager()->maxDepth = 1;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManagerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
225
226
        $this->spider->crawl();
227
228
        $expected = array(
229
            $this->linkA,
230
            $this->linkE,
231
            $this->linkC,
232
            $this->linkB,
233
        );
234
235
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
236
    }
237
238
    public function testCrawlBFSMaxDepthOne()
239
    {
240
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
241
        $this->spider->getQueueManager()->maxDepth = 1;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManagerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
242
243
        $this->spider->crawl();
244
245
        $expected = array(
246
            $this->linkA,
247
            $this->linkB,
248
            $this->linkC,
249
            $this->linkE,
250
        );
251
252
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
253
    }
254
255
    /**
256
     * @covers VDB\Spider\Spider::crawl
257
     */
258 View Code Duplication
    public function testCrawlDFSMaxQueueSize()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
259
    {
260
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManagerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
261
        $this->spider->downloadLimit = 3;
0 ignored issues
show
Documentation Bug introduced by
It seems like 3 of type integer is incompatible with the declared type object<VDB\Spider\the> of property $downloadLimit.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
262
263
        $this->spider->crawl();
264
265
        $expected = array(
266
            $this->linkA,
267
            $this->linkE,
268
            $this->linkF,
269
        );
270
271
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
272
    }
273
274 View Code Duplication
    public function testCrawlBFSMaxQueueSize()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
275
    {
276
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
277
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManagerInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
278
        $this->spider->downloadLimit = 3;
0 ignored issues
show
Documentation Bug introduced by
It seems like 3 of type integer is incompatible with the declared type object<VDB\Spider\the> of property $downloadLimit.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
279
280
        $this->spider->crawl();
281
282
        $expected = array(
283
            $this->linkA,
284
            $this->linkB,
285
            $this->linkC,
286
        );
287
288
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
289
    }
290
291
    /**
292
     * @covers VDB\Spider\Spider::crawl
293
     */
294
    public function testCrawlFailedRequest()
295
    {
296
        $this->requestHandler
297
            ->expects($this->any())
298
            ->method('request')
299
            ->will(
300
                $this->throwException(new Exception('Failed mock request!'))
301
            );
302
303
        $this->spider->crawl();
304
        $stats = $this->statsHandler;
305
306
        $this->assertCount(0, $stats->getFiltered(), 'Filtered count');
307
        $this->assertCount(0, $stats->getPersisted(), 'Persisted count');
308
        $this->assertCount(1, $stats->getFailed(), 'Failed count');
309
    }
310
}
311