Completed
Pull Request — master (#27)
by Matthijs
03:18
created

SpiderTest   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 341
Duplicated Lines 24.63 %

Coupling/Cohesion

Components 1
Dependencies 12

Importance

Changes 22
Bugs 2 Features 5
Metric Value
wmc 12
c 22
b 2
f 5
lcom 1
cbo 12
dl 84
loc 341
rs 10

10 Methods

Rating   Name   Duplication   Size   Complexity  
B setUp() 0 78 1
A doTestRequest() 0 10 2
A testCrawlDFSDefaultBehaviour() 18 18 1
A testCrawlBFSDefaultBehaviour() 19 19 1
A compareUriArray() 0 6 2
A testCrawlDFSMaxDepthOne() 0 15 1
A testCrawlBFSMaxDepthOne() 16 16 1
A testCrawlDFSMaxQueueSize() 15 15 1
A testCrawlBFSMaxQueueSize() 16 16 1
A testCrawlFailedRequest() 0 16 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
namespace VDB\Spider;
3
4
use Exception;
5
use GuzzleHttp\Psr7\Response;
6
use PHPUnit_Framework_MockObject_MockObject;
7
use VDB\Spider\Discoverer\XPathExpressionDiscoverer;
8
use VDB\Spider\Tests\TestCase;
9
use VDB\Spider\QueueManager\InMemoryQueueManager;
10
use VDB\Spider\StatsHandler;
11
use VDB\Spider\Uri\DiscoveredUri;
12
use VDB\Uri\Uri;
13
14
/**
15
 */
16
class SpiderTest extends TestCase
17
{
18
    /**
19
     * @var Spider
20
     */
21
    protected $spider;
22
23
    /**
24
     * @var logHandler
25
     */
26
    protected $logHandler;
27
28
    /**
29
     * @var StatsHandler
30
     */
31
    protected $statsHandler;
32
33
    /**
34
     * @var PHPUnit_Framework_MockObject_MockObject
35
     */
36
    protected $requestHandler;
37
38
    /** @var DiscoveredUri */
39
    protected $linkA;
40
    /** @var DiscoveredUri */
41
    protected $linkB;
42
    /** @var DiscoveredUri */
43
    protected $linkC;
44
    /** @var DiscoveredUri */
45
    protected $linkD;
46
    /** @var DiscoveredUri */
47
    protected $linkE;
48
    /** @var DiscoveredUri */
49
    protected $linkF;
50
    /** @var DiscoveredUri */
51
    protected $linkG;
52
53
    /** @var Response */
54
    protected $responseA;
55
    /** @var Response */
56
    protected $responseB;
57
    /** @var Response */
58
    protected $responseC;
59
    /** @var Response */
60
    protected $responseD;
61
    /** @var Response */
62
    protected $responseE;
63
    /** @var Response */
64
    protected $responseF;
65
    /** @var Response */
66
    protected $responseG;
67
68
    /** @var string */
69
    protected $hrefA;
70
    protected $hrefB;
71
    protected $hrefC;
72
    protected $hrefD;
73
    protected $hrefE;
74
    protected $hrefF;
75
    protected $hrefG;
76
77
    /**
78
     * @var array An associative array, containing a map of $this->linkX to $this->responseX.
79
     */
80
    protected $linkToResponseMap = [];
81
82
    /**
83
     * Sets up the fixture, for example, opens a network connection.
84
     * This method is called before a test is executed.
85
     *
86
     * Setting up the following structure:
87
     *
88
     * 0:        A
89
     *          /|\
90
     * 1:      B C E
91
     *        /| | |
92
     * 2:    D F G |
93
     *         | _ |
94
     *
95
     * Note: E links to F.
96
     */
97
    protected function setUp()
98
    {
99
        $this->spider = new Spider('http://php-spider.org/A');
100
101
        $this->requestHandler = $this->getMock('VDB\Spider\RequestHandler\RequestHandlerInterface');
102
103
        $this->hrefA = 'http://php-spider.org/A';
104
        $this->hrefB = 'http://php-spider.org/B';
105
        $this->hrefC = 'http://php-spider.org/C';
106
        $this->hrefD = 'http://php-spider.org/D';
107
        $this->hrefE = 'http://php-spider.org/E';
108
        $this->hrefF = 'http://php-spider.org/F';
109
        $this->hrefG = 'http://php-spider.org/G';
110
111
        $this->linkA = new DiscoveredUri(new Uri($this->hrefA));
112
        $this->linkB = new DiscoveredUri(new Uri($this->hrefB));
113
        $this->linkC = new DiscoveredUri(new Uri($this->hrefC));
114
        $this->linkD = new DiscoveredUri(new Uri($this->hrefD));
115
        $this->linkE = new DiscoveredUri(new Uri($this->hrefE));
116
        $this->linkF = new DiscoveredUri(new Uri($this->hrefF));
117
        $this->linkG = new DiscoveredUri(new Uri($this->hrefG));
118
119
        $this->linkA->setDepthFound(0);
120
        $this->linkB->setDepthFound(1);
121
        $this->linkC->setDepthFound(1);
122
        $this->linkD->setDepthFound(2);
123
        $this->linkE->setDepthFound(1);
124
        $this->linkF->setDepthFound(2);
125
        $this->linkG->setDepthFound(2);
126
127
        $htmlA = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceA.html');
128
        $this->responseA = new Response(200, [], $htmlA);
129
130
        $htmlB = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceB.html');
131
        $this->responseB = new Response(200, [], $htmlB);
132
133
        $htmlC = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceC.html');
134
        $this->responseC = new Response(200, [], $htmlC);
135
136
        $htmlD = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceD.html');
137
        $this->responseD = new Response(200, [], $htmlD);
138
139
        $htmlE = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceE.html');
140
        $this->responseE = new Response(200, [], $htmlE);
141
142
        $htmlF = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceF.html');
143
        $this->responseF = new Response(200, [], $htmlF);
144
145
        $htmlG = file_get_contents(__DIR__ . '/Fixtures/SpiderTestHTMLResourceG.html');
146
        $this->responseG = new Response(200, [], $htmlG);
147
148
        $this->linkToResponseMap[$this->linkA->toString()] = $this->responseA;
149
        $this->linkToResponseMap[$this->linkB->toString()] = $this->responseB;
150
        $this->linkToResponseMap[$this->linkC->toString()] = $this->responseC;
151
        $this->linkToResponseMap[$this->linkD->toString()] = $this->responseD;
152
        $this->linkToResponseMap[$this->linkE->toString()] = $this->responseE;
153
        $this->linkToResponseMap[$this->linkF->toString()] = $this->responseF;
154
        $this->linkToResponseMap[$this->linkG->toString()] = $this->responseG;
155
156
        $this->requestHandler
157
            ->expects($this->any())
158
            ->method('request')
159
            ->will($this->returnCallback(array($this, 'doTestRequest')));
160
161
        $this->spider->getDownloader()->setRequestHandler($this->requestHandler);
162
163
        $this->spider->getDiscovererSet()->set(new XPathExpressionDiscoverer('//a'));
164
165
        $this->statsHandler = new StatsHandler();
166
        $this->spider->getDispatcher()->addSubscriber($this->statsHandler);
167
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->statsHandler);
168
        $this->spider->getDownloader()->getDispatcher()->addSubscriber($this->statsHandler);
169
170
        $this->logHandler = new LogHandler();
171
        $this->spider->getDispatcher()->addSubscriber($this->logHandler);
172
        $this->spider->getQueueManager()->getDispatcher()->addSubscriber($this->logHandler);
173
        $this->spider->getDownloader()->getDispatcher()->addSubscriber($this->logHandler);
174
    }
175
176
    /**
177
     * @return Resource
178
     * @throws \ErrorException
179
     */
180
    public function doTestRequest()
181
    {
182
        $link = func_get_arg(0);
183
184
        if (array_key_exists($link->toString(), $this->linkToResponseMap)) {
185
            return $this->getResource($link, $this->linkToResponseMap[$link->toString()]);
186
        }
187
188
        throw new \ErrorException('The requested URI was not stubbed: ' . $link->toString());
189
    }
190
191
    /**
192
     * @covers VDB\Spider\Spider
193
     *
194
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
195
     */
196 View Code Duplication
    public function testCrawlDFSDefaultBehaviour()
197
    {
198
        $this->spider->getDiscovererSet()->maxDepth = 10;
199
200
        $this->spider->crawl();
201
202
        $expected = array(
203
            $this->linkA,
204
            $this->linkE,
205
            $this->linkF,
206
            $this->linkC,
207
            $this->linkG,
208
            $this->linkB,
209
            $this->linkD
210
        );
211
212
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
213
    }
214
215
    /**
216
     * @covers VDB\Spider\Spider
217
     */
218 View Code Duplication
    public function testCrawlBFSDefaultBehaviour()
219
    {
220
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
221
        $this->spider->getDiscovererSet()->maxDepth = 1000;
222
223
        $this->spider->crawl();
224
225
        $expected = array(
226
            $this->linkA,
227
            $this->linkB,
228
            $this->linkC,
229
            $this->linkE,
230
            $this->linkD,
231
            $this->linkF,
232
            $this->linkG
233
        );
234
235
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
236
    }
237
238
    private function compareUriArray($expected, $actual)
239
    {
240
        foreach ($actual as $index => $resource) {
241
            $this->assertEquals($resource->getUri(), $expected[$index]);
242
        }
243
    }
244
245
    /**
246
     * @covers VDB\Spider\Spider
247
     *
248
     * Behaviour as explained here: https://en.wikipedia.org/wiki/Depth-first_search#Example
249
     *
250
     * Given the following structure:
251
     *
252
     * 0:        A
253
     *          /|\
254
     * 1:      B C E
255
     *        /| | |
256
     * 2:    D F G |
257
     *         | _ |
258
     *
259
     * We expect the following result: A, E, C, B
260
     *
261
     */
262
    public function testCrawlDFSMaxDepthOne()
263
    {
264
        $this->spider->getDiscovererSet()->maxDepth = 1;
265
266
        $this->spider->crawl();
267
268
        $expected = array(
269
            $this->linkA,
270
            $this->linkE,
271
            $this->linkC,
272
            $this->linkB,
273
        );
274
275
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
276
    }
277
278
    /**
279
     * @covers VDB\Spider\Spider
280
     */
281 View Code Duplication
    public function testCrawlBFSMaxDepthOne()
282
    {
283
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
284
        $this->spider->getDiscovererSet()->maxDepth = 1;
285
286
        $this->spider->crawl();
287
288
        $expected = array(
289
            $this->linkA,
290
            $this->linkB,
291
            $this->linkC,
292
            $this->linkE,
293
        );
294
295
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
296
    }
297
298
    /**
299
     * @covers VDB\Spider\Spider
300
     */
301 View Code Duplication
    public function testCrawlDFSMaxQueueSize()
302
    {
303
        $this->spider->getDiscovererSet()->maxDepth = 1000;
304
        $this->spider->getDownloader()->setDownloadLimit(3);
305
306
        $this->spider->crawl();
307
308
        $expected = array(
309
            $this->linkA,
310
            $this->linkE,
311
            $this->linkF,
312
        );
313
314
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
315
    }
316
317
    /**
318
     * @covers VDB\Spider\Spider
319
     */
320 View Code Duplication
    public function testCrawlBFSMaxQueueSize()
321
    {
322
        $this->spider->getQueueManager()->setTraversalAlgorithm(InMemoryQueueManager::ALGORITHM_BREADTH_FIRST);
323
        $this->spider->getDiscovererSet()->maxDepth = 1000;
324
        $this->spider->getDownloader()->setDownloadLimit(3);
325
326
        $this->spider->crawl();
327
328
        $expected = array(
329
            $this->linkA,
330
            $this->linkB,
331
            $this->linkC,
332
        );
333
334
        $this->compareUriArray($expected, $this->spider->getDownloader()->getPersistenceHandler());
335
    }
336
337
    /**
338
     * @covers VDB\Spider\Spider
339
     */
340
    public function testCrawlFailedRequest()
341
    {
342
        $this->requestHandler
343
            ->expects($this->any())
344
            ->method('request')
345
            ->will(
346
                $this->throwException(new Exception('Failed mock request!'))
347
            );
348
349
        $this->spider->crawl();
350
        $stats = $this->statsHandler;
351
352
353
        $this->assertCount(0, $this->spider->getDownloader()->getPersistenceHandler(), 'Persisted count');
354
        $this->assertCount(1, $stats->getFailed(), 'Failed count');
355
    }
356
}
357