Completed
Push — master ( 7d72c0...456733 )
by Nils
02:48
created

Crawler   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 89
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Importance

Changes 12
Bugs 4 Features 2
Metric Value
wmc 16
c 12
b 4
f 2
lcom 1
cbo 4
dl 0
loc 89
rs 10

8 Methods

Rating   Name   Duplication   Size   Complexity  
A getStartPage() 0 4 1
A setStartPage() 0 4 1
A setHttpClient() 0 4 1
A next() 0 18 4
A getComingFrom() 0 4 1
A init() 0 11 2
B initPageContainer() 0 13 5
A getOriginUri() 0 4 1
1
<?php
2
3
namespace whm\Smoke\Extensions\SmokeResponseRetriever\Retriever\Crawler;
4
5
use Ivory\HttpAdapter\HttpAdapterInterface;
6
use PhmLabs\Components\Init\Init;
7
use Psr\Http\Message\UriInterface;
8
use whm\Crawler\Crawler as whmCrawler;
9
use whm\Crawler\PageContainer\PatternAwareContainer;
10
use whm\Html\Uri;
11
use whm\Smoke\Extensions\SmokeResponseRetriever\Retriever\CrawlingRetriever;
12
use whm\Smoke\Http\Response;
13
14
class Crawler implements CrawlingRetriever
15
{
16
    private $startPage;
17
    private $httpClient;
18
    private $parallelRequests;
19
20
    private $started = false;
21
    private $filters;
22
23
    private $pageContainer;
24
25
    /**
26
     * @var \whm\Crawler\Crawler
27
     */
28
    private $crawler;
29
30
    public function init(array $filters, $pageContainer, $startPage = null, $parallelRequests = 5)
31
    {
32
        $this->filters = Init::initializeAll($filters);
33
        if (!is_null($startPage)) {
34
            $this->startPage = new Uri($startPage);
35
        }
36
37
        $this->initPageContainer($pageContainer);
38
39
        $this->parallelRequests = $parallelRequests;
40
    }
41
42
    private function initPageContainer($pageContainerArray)
43
    {
44
        $this->pageContainer = Init::initialize($pageContainerArray);
45
46
        // @todo this should be done inside a factory
47
        if ($this->pageContainer instanceof PatternAwareContainer) {
48
            if (array_key_exists('parameters', $pageContainerArray) && array_key_exists('pattern', $pageContainerArray['parameters'])) {
49
                foreach ($pageContainerArray['parameters']['pattern'] as $name => $pattern) {
50
                    $this->pageContainer->registerPattern($name, $pattern);
51
                }
52
            }
53
        }
54
    }
55
56
    public function getStartPage()
57
    {
58
        return $this->startPage;
59
    }
60
61
    public function setStartPage(UriInterface $startPage)
62
    {
63
        $this->startPage = $startPage;
64
    }
65
66
    public function setHttpClient(HttpAdapterInterface $httpClient)
67
    {
68
        $this->httpClient = $httpClient;
69
    }
70
71
    /**
72
     * @return Response
73
     */
74
    public function next()
75
    {
76
        if (!$this->started) {
77
            $this->started = true;
78
79
            if (is_null($this->startPage)) {
80
                throw new \RuntimeException('The crawler you are using needs a start page to work, but it is not defined. ');
81
            }
82
83
            $this->crawler = new whmCrawler($this->httpClient, $this->pageContainer, $this->startPage, $this->parallelRequests);
84
85
            foreach ($this->filters as $filter) {
86
                $this->crawler->addFilter($filter);
87
            }
88
        }
89
90
        return $this->crawler->next();
91
    }
92
93
    public function getComingFrom(UriInterface $uri)
94
    {
95
        return $this->crawler->getComingFrom($uri);
96
    }
97
98
    public function getOriginUri(UriInterface $uri)
99
    {
100
        return $uri;
101
    }
102
}
103