Passed
Branch feature/cleanup (2bd333)
by Matthijs
06:03
created

SpiderTest::testCrawlDFSDefaultBehaviour()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 18
Code Lines 12

Duplication

Lines 18
Ratio 100 %

Importance

Changes 5
Bugs 0 Features 2
Metric Value
c 5
b 0
f 2
dl 18
loc 18
rs 9.4286
cc 1
eloc 12
nc 1
nop 0
1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use Guzzle\Http\Message\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\StatsHandler;
11
use VDB\Spider\Uri\FilterableUri;
12
13
/**
14
 */
15
class SpiderTest extends TestCase
16
{
17
    /**
18
     * @var Spider
19
     */
20
    protected $spider;
21
22
    /**
23
     * @var StatsHandler
24
     */
25
    protected $statsHandler;
26
27
    /**
28
     * @var PHPUnit_Framework_MockObject_MockObject
29
     */
30
    protected $requestHandler;
31
32
    /** @var FilterableUri */
33
    protected $linkA;
34
    /** @var FilterableUri */
35
    protected $linkB;
36
    /** @var FilterableUri */
37
    protected $linkC;
38
    /** @var FilterableUri */
39
    protected $linkD;
40
    /** @var FilterableUri */
41
    protected $linkE;
42
    /** @var FilterableUri */
43
    protected $linkF;
44
    /** @var FilterableUri */
45
    protected $linkG;
46
47
    /** @var Response */
48
    protected $responseA;
49
    /** @var Response */
50
    protected $responseB;
51
    /** @var Response */
52
    protected $responseC;
53
    /** @var Response */
54
    protected $responseD;
55
    /** @var Response */
56
    protected $responseE;
57
    /** @var Response */
58
    protected $responseF;
59
    /** @var Response */
60
    protected $responseG;
61
62
    /** @var string */
63
    protected $hrefA;
64
    protected $hrefB;
65
    protected $hrefC;
66
    protected $hrefD;
67
    protected $hrefE;
68
    protected $hrefF;
69
    protected $hrefG;
70
71
    /**
72
     * Sets up the fixture, for example, opens a network connection.
73
     * This method is called before a test is executed.
74
     */
75
    protected function setUp()
76
    {
77
        $this->spider = new Spider('http://php-spider.org/A');
78
79
        $this->requestHandler = $this->getMock('VDB\Spider\RequestHandler\RequestHandler');
80
81
        $this->hrefA = 'http://php-spider.org/A';
82
        $this->hrefB = 'http://php-spider.org/B';
83
        $this->hrefC = 'http://php-spider.org/C';
84
        $this->hrefD = 'http://php-spider.org/D';
85
        $this->hrefE = 'http://php-spider.org/E';
86
        $this->hrefF = 'http://php-spider.org/F';
87
        $this->hrefG = 'http://php-spider.org/G';
88
89
        $this->linkA = new FilterableUri($this->hrefA);
90
        $this->linkB = new FilterableUri($this->hrefB);
91
        $this->linkC = new FilterableUri($this->hrefC);
92
        $this->linkD = new FilterableUri($this->hrefD);
93
        $this->linkE = new FilterableUri($this->hrefE);
94
        $this->linkF = new FilterableUri($this->hrefF);
95
        $this->linkG = new FilterableUri($this->hrefG);
96
97
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
98
        $this->responseA = new Response(200, null, $htmlA);
99
100
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
101
        $this->responseB = new Response(200, null, $htmlB);
102
103
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
104
        $this->responseC = new Response(200, null, $htmlC);
105
106
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
107
        $this->responseD = new Response(200, null, $htmlD);
108
109
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
110
        $this->responseE = new Response(200, null, $htmlE);
111
112
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
113
        $this->responseF = new Response(200, null, $htmlF);
114
115
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
116
        $this->responseG = new Response(200, null, $htmlG);
117
118
        $this->requestHandler
119
            ->expects($this->any())
120
            ->method('request')
121
            ->will($this->returnCallback(array($this, 'doTestRequest')));
122
123
        $this->spider->setRequestHandler($this->requestHandler);
124
125
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
126
127
        $this->statsHandler = new StatsHandler();
128
        $this->spider->getDispatcher()->addSubscriber($this->statsHandler);
129
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->statsHandler);
130
131
        $this->logHandler = new LogHandler();
0 ignored issues
show
Bug introduced by
The property logHandler does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
132
        $this->spider->getDispatcher()->addSubscriber($this->logHandler);
133
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->logHandler);
134
    }
135
136
    /**
137
     * @return Resource
138
     * @throws \ErrorException
139
     */
140
    public function doTestRequest()
141
    {
142
        $link = func_get_arg(0);
143
144
        switch ($link->toString()) {
145
            case $this->linkA->toString():
146
                return $this->getResource($this->linkA, $this->responseA);
147
            case $this->linkB->toString():
148
                return $this->getResource($this->linkB, $this->responseB);
149
            case $this->linkC->toString():
150
                return $this->getResource($this->linkC, $this->responseC);
151
            case $this->linkD->toString():
152
                return $this->getResource($this->linkD, $this->responseD);
153
            case $this->linkE->toString():
154
                return $this->getResource($this->linkE, $this->responseE);
155
            case $this->linkF->toString():
156
                return $this->getResource($this->linkF, $this->responseF);
157
            case $this->linkG->toString():
158
                return $this->getResource($this->linkG, $this->responseG);
159
            default:
160
                throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
161
        }
162
    }
163
164
    /**
165
     * @covers VDB\Spider\Spider::crawl
166
     *
167
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
168
     */
169 View Code Duplication
    public function testCrawlDFSDefaultBehaviour()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
170
    {
171
        $this->spider->getQueueManager()->maxDepth = 10;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
172
173
        $this->spider->crawl();
174
175
        $expected = array(
176
            $this->linkA,
177
            $this->linkE,
178
            $this->linkF,
179
            $this->linkC,
180
            $this->linkG,
181
            $this->linkB,
182
            $this->linkD
183
        );
184
185
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
186
    }
187
188
    /**
189
     * @covers VDB\Spider\Spider::crawl
190
     *
191
     */
192 View Code Duplication
    public function testCrawlBFSDefaultBehaviour()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
193
    {
194
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
195
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
196
197
        $this->spider->crawl();
198
199
        $expected = array(
200
            $this->linkA,
201
            $this->linkB,
202
            $this->linkC,
203
            $this->linkE,
204
            $this->linkD,
205
            $this->linkF,
206
            $this->linkG
207
        );
208
209
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
210
    }
211
212
    /**
213
     * @covers VDB\Spider\Spider::crawl
214
     *
215
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
216
     */
217
    public function testCrawlDFSMaxDepthOne()
218
    {
219
        $this->spider->getQueueManager()->maxDepth = 1;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
220
221
        $this->spider->crawl();
222
223
        $expected = array(
224
            $this->linkA,
225
            $this->linkE,
226
            $this->linkC,
227
            $this->linkB,
228
        );
229
230
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
231
    }
232
233
    public function testCrawlBFSMaxDepthOne()
234
    {
235
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
236
        $this->spider->getQueueManager()->maxDepth = 1;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
237
238
        $this->spider->crawl();
239
240
        $expected = array(
241
            $this->linkA,
242
            $this->linkB,
243
            $this->linkC,
244
            $this->linkE,
245
        );
246
247
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
248
    }
249
250
    /**
251
     * @covers VDB\Spider\Spider::crawl
252
     */
253 View Code Duplication
    public function testCrawlDFSMaxQueueSize()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
254
    {
255
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
256
        $this->spider->downloadLimit = 3;
0 ignored issues
show
Documentation Bug introduced by
It seems like 3 of type integer is incompatible with the declared type object<VDB\Spider\the> of property $downloadLimit.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
257
258
        $this->spider->crawl();
259
260
        $expected = array(
261
            $this->linkA,
262
            $this->linkE,
263
            $this->linkF,
264
        );
265
266
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
267
    }
268
269 View Code Duplication
    public function testCrawlBFSMaxQueueSize()
1 ignored issue
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
270
    {
271
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
272
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
273
        $this->spider->downloadLimit = 3;
0 ignored issues
show
Documentation Bug introduced by
It seems like 3 of type integer is incompatible with the declared type object<VDB\Spider\the> of property $downloadLimit.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
274
275
        $this->spider->crawl();
276
277
        $expected = array(
278
            $this->linkA,
279
            $this->linkB,
280
            $this->linkC,
281
        );
282
283
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
284
    }
285
286
    /**
287
     * @covers VDB\Spider\Spider::crawl
288
     */
289
    public function testCrawlFailedRequest()
290
    {
291
        $this->requestHandler
292
            ->expects($this->any())
293
            ->method('request')
294
            ->will(
295
                $this->throwException(new Exception('Failed mock request!'))
296
            );
297
298
        $this->spider->crawl();
299
        $stats = $this->statsHandler;
300
301
        $this->assertCount(0, $stats->getFiltered(), 'Filtered count');
302
        $this->assertCount(0, $stats->getPersisted(), 'Persisted count');
303
        $this->assertCount(1, $stats->getFailed(), 'Failed count');
304
    }
305
}
306