Completed
Pull Request — master (#16)
by Matthijs
02:29
created

SpiderTest::setUp()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 60
Code Lines 43

Duplication

Lines 0
Ratio 0 %

Importance

Changes 8
Bugs 1 Features 2
Metric Value
c 8
b 1
f 2
dl 0
loc 60
rs 9.5556
cc 1
eloc 43
nc 1
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use Guzzle\Http\Message\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\StatsHandler;
11
use VDB\Spider\Uri\FilterableUri;
12
13
/**
14
 */
15
class SpiderTest extends TestCase
16
{
17
    /**
18
     * @var Spider
19
     */
20
    protected $spider;
21
22
    /**
23
     * @var StatsHandler
24
     */
25
    protected $statsHandler;
26
27
    /**
28
     * @var PHPUnit_Framework_MockObject_MockObject
29
     */
30
    protected $requestHandler;
31
32
    /** @var FilterableUri */
33
    protected $linkA;
34
    /** @var FilterableUri */
35
    protected $linkB;
36
    /** @var FilterableUri */
37
    protected $linkC;
38
    /** @var FilterableUri */
39
    protected $linkD;
40
    /** @var FilterableUri */
41
    protected $linkE;
42
    /** @var FilterableUri */
43
    protected $linkF;
44
    /** @var FilterableUri */
45
    protected $linkG;
46
47
    /** @var Response */
48
    protected $responseA;
49
    /** @var Response */
50
    protected $responseB;
51
    /** @var Response */
52
    protected $responseC;
53
    /** @var Response */
54
    protected $responseD;
55
    /** @var Response */
56
    protected $responseE;
57
    /** @var Response */
58
    protected $responseF;
59
    /** @var Response */
60
    protected $responseG;
61
62
    /** @var string */
63
    protected $hrefA;
64
    protected $hrefB;
65
    protected $hrefC;
66
    protected $hrefD;
67
    protected $hrefE;
68
    protected $hrefF;
69
    protected $hrefG;
70
71
    /**
72
     * Sets up the fixture, for example, opens a network connection.
73
     * This method is called before a test is executed.
74
     */
75
    protected function setUp()
76
    {
77
        $this->spider = new Spider('http://php-spider.org/A');
78
79
        $this->requestHandler = $this->getMock('VDB\Spider\RequestHandler\RequestHandler');
80
81
        $this->hrefA = 'http://php-spider.org/A';
82
        $this->hrefB = 'http://php-spider.org/B';
83
        $this->hrefC = 'http://php-spider.org/C';
84
        $this->hrefD = 'http://php-spider.org/D';
85
        $this->hrefE = 'http://php-spider.org/E';
86
        $this->hrefF = 'http://php-spider.org/F';
87
        $this->hrefG = 'http://php-spider.org/G';
88
89
        $this->linkA = new FilterableUri($this->hrefA);
90
        $this->linkB = new FilterableUri($this->hrefB);
91
        $this->linkC = new FilterableUri($this->hrefC);
92
        $this->linkD = new FilterableUri($this->hrefD);
93
        $this->linkE = new FilterableUri($this->hrefE);
94
        $this->linkF = new FilterableUri($this->hrefF);
95
        $this->linkG = new FilterableUri($this->hrefG);
96
97
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
98
        $this->responseA = new Response(200, null, $htmlA);
99
100
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
101
        $this->responseB = new Response(200, null, $htmlB);
102
103
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
104
        $this->responseC = new Response(200, null, $htmlC);
105
106
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
107
        $this->responseD = new Response(200, null, $htmlD);
108
109
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
110
        $this->responseE = new Response(200, null, $htmlE);
111
112
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
113
        $this->responseF = new Response(200, null, $htmlF);
114
115
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
116
        $this->responseG = new Response(200, null, $htmlG);
117
118
        $this->requestHandler
119
            ->expects($this->any())
120
            ->method('request')
121
            ->will($this->returnCallback(array($this, 'doTestRequest')));
122
123
        $this->spider->setRequestHandler($this->requestHandler);
124
125
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
126
127
        $this->statsHandler = new StatsHandler();
128
        $this->spider->getDispatcher()->addSubscriber($this->statsHandler);
129
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->statsHandler);
130
131
        $this->logHandler = new LogHandler();
0 ignored issues
show
Bug introduced by
The property logHandler does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
132
        $this->spider->getDispatcher()->addSubscriber($this->logHandler);
133
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->logHandler);
134
    }
135
136
    /**
137
     * @return Resource
138
     * @throws \ErrorException
139
     */
140
    public function doTestRequest()
141
    {
142
        $link = func_get_arg(0);
143
144
        switch ($link->toString()) {
145
            case $this->linkA->toString():
146
                return $this->getResource($this->linkA, $this->responseA);
147
            case $this->linkB->toString():
148
                return $this->getResource($this->linkB, $this->responseB);
149
            case $this->linkC->toString():
150
                return $this->getResource($this->linkC, $this->responseC);
151
            case $this->linkD->toString():
152
                return $this->getResource($this->linkD, $this->responseD);
153
            case $this->linkE->toString():
154
                return $this->getResource($this->linkE, $this->responseE);
155
            case $this->linkF->toString():
156
                return $this->getResource($this->linkF, $this->responseF);
157
            case $this->linkG->toString():
158
                return $this->getResource($this->linkG, $this->responseG);
159
            default:
160
                throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
161
        }
162
    }
163
164
    /**
165
     * @covers VDB\Spider\Spider::crawl
166
     *
167
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
168
     */
169
    public function testCrawlDFSDefaultBehaviour()
170
    {
171
        $this->spider->getQueueManager()->maxDepth = 10;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
172
173
        $this->spider->crawl();
174
175
        $expected = array(
176
            $this->linkA,
177
            $this->linkE,
178
            $this->linkF,
179
            $this->linkC,
180
            $this->linkG,
181
            $this->linkB,
182
            $this->linkD
183
        );
184
185
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
186
    }
187
188
    /**
189
     * @covers VDB\Spider\Spider::crawl
190
     *
191
     */
192
    public function testCrawlBFSDefaultBehaviour()
193
    {
194
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
195
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
196
197
        $this->spider->crawl();
198
199
        $expected = array(
200
            $this->linkA,
201
            $this->linkB,
202
            $this->linkC,
203
            $this->linkE,
204
            $this->linkD,
205
            $this->linkF,
206
            $this->linkG
207
        );
208
209
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
210
    }
211
212
    /**
213
     * @covers VDB\Spider\Spider::crawl
214
     *
215
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
216
     */
217
    public function testCrawlDFSMaxDepthOne()
218
    {
219
        $this->spider->getQueueManager()->maxDepth = 1;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
220
221
        $this->spider->crawl();
222
223
        $expected = array(
224
            $this->linkA,
225
            $this->linkE,
226
            $this->linkC,
227
            $this->linkB,
228
        );
229
230
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
231
    }
232
233
    public function testCrawlBFSMaxDepthOne()
234
    {
235
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
236
        $this->spider->getQueueManager()->maxDepth = 1;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
237
238
        $this->spider->crawl();
239
240
        $expected = array(
241
            $this->linkA,
242
            $this->linkB,
243
            $this->linkC,
244
            $this->linkE,
245
        );
246
247
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
248
    }
249
250
    /**
251
     * @covers VDB\Spider\Spider::crawl
252
     */
253
    public function testCrawlDFSMaxQueueSize()
254
    {
255
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
256
        $this->spider->downloadLimit = 3;
0 ignored issues
show
Documentation Bug introduced by
It seems like 3 of type integer is incompatible with the declared type object<VDB\Spider\the> of property $downloadLimit.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
257
258
        $this->spider->crawl();
259
260
        $expected = array(
261
            $this->linkA,
262
            $this->linkE,
263
            $this->linkF,
264
        );
265
266
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
267
    }
268
269
    public function testCrawlBFSMaxQueueSize()
270
    {
271
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
272
        $this->spider->getQueueManager()->maxDepth = 1000;
0 ignored issues
show
Bug introduced by
Accessing maxDepth on the interface VDB\Spider\QueueManager\QueueManager suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
273
        $this->spider->downloadLimit = 3;
0 ignored issues
show
Documentation Bug introduced by
It seems like 3 of type integer is incompatible with the declared type object<VDB\Spider\the> of property $downloadLimit.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
274
275
        $this->spider->crawl();
276
277
        $expected = array(
278
            $this->linkA,
279
            $this->linkB,
280
            $this->linkC,
281
        );
282
283
        $this->assertEquals($expected, $this->statsHandler->getPersisted());
284
    }
285
286
    /**
287
     * @covers VDB\Spider\Spider::crawl
288
     */
289
    public function testCrawlFailedRequest()
290
    {
291
        $this->requestHandler
292
            ->expects($this->any())
293
            ->method('request')
294
            ->will(
295
                $this->throwException(new Exception('Failed mock request!'))
296
            );
297
298
        $this->spider->crawl();
299
        $stats = $this->statsHandler;
300
301
        $this->assertCount(0, $stats->getFiltered(), 'Filtered count');
302
        $this->assertCount(0, $stats->getPersisted(), 'Persisted count');
303
        $this->assertCount(1, $stats->getFailed(), 'Failed count');
304
    }
305
}
306