Completed
Push — master ( 85e5eb...cc11ea )
by Nils
02:13
created

Crawler::addPage()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
1
<?php
2
3
namespace whm\Smoke\Extensions\SmokeResponseRetriever\Retriever\Crawler;
4
5
use phm\HttpWebdriverClient\Http\Client\HttpClient;
6
use PhmLabs\Components\Init\Init;
7
use Psr\Http\Message\ResponseInterface;
8
use Psr\Http\Message\UriInterface;
9
use whm\Crawler\Crawler as whmCrawler;
10
use whm\Crawler\PageContainer\PageContainer;
11
use whm\Crawler\PageContainer\PatternAwareContainer;
12
use whm\Html\Uri;
13
use whm\Smoke\Extensions\SmokeResponseRetriever\Retriever\CrawlingRetriever;
14
use whm\Smoke\Scanner\SessionContainer;
15
16
class Crawler implements CrawlingRetriever
17
{
18
    private $startPage;
19
    private $httpClient;
20
    private $parallelRequests;
21
22
    private $started = false;
23
    private $filters;
24
25
    /**
26
     * @var PageContainer
27
     */
28
    private $pageContainer;
29
30
    /**
31
     * @var \whm\Crawler\Crawler
32
     */
33
    private $crawler;
34
35
    public function init(array $filters, $pageContainer, $startPage = null, $parallelRequests = 5)
36
    {
37
        $this->filters = Init::initializeAll($filters);
38
        if (!is_null($startPage)) {
39
            $this->startPage = new Uri($startPage);
40
        }
41
        $this->initPageContainer($pageContainer);
42
        $this->parallelRequests = $parallelRequests;
43
    }
44
45
    public function addPage(UriInterface $uri)
46
    {
47
        $this->pageContainer->push($uri, true);
48
    }
49
50
    private function initPageContainer($pageContainerArray)
51
    {
52
        $this->pageContainer = Init::initialize($pageContainerArray);
53
54
        // @todo this should be done inside a factory
55
        if ($this->pageContainer instanceof PatternAwareContainer) {
56
            if (array_key_exists('parameters', $pageContainerArray) && array_key_exists('pattern', $pageContainerArray['parameters'])) {
57
                foreach ($pageContainerArray['parameters']['pattern'] as $name => $pattern) {
58
                    $this->pageContainer->registerPattern($name, $pattern);
59
                }
60
            }
61
        }
62
    }
63
64
    public function getStartPage()
65
    {
66
        return $this->startPage;
67
    }
68
69
    public function setStartPage(UriInterface $startPage)
70
    {
71
        $this->startPage = $startPage;
72
    }
73
74
    public function setHttpClient(HttpClient $httpClient)
75
    {
76
        $this->httpClient = $httpClient;
77
    }
78
79
    /**
80
     * @return ResponseInterface
81
     */
82
    public function next()
83
    {
84
        if (!$this->started) {
85
            $this->started = true;
86
87
            if (is_null($this->startPage)) {
88
                throw new \RuntimeException('The crawler you are using needs a start page to work, but it is not defined. ');
89
            }
90
91
            $this->crawler = new whmCrawler($this->httpClient, $this->pageContainer, $this->startPage, $this->parallelRequests);
92
93
            foreach ($this->filters as $filter) {
94
                $this->crawler->addFilter($filter);
95
            }
96
        }
97
98
        return $this->crawler->next();
99
    }
100
101
    public function getComingFrom(UriInterface $uri)
102
    {
103
        return $this->crawler->getComingFrom($uri);
104
    }
105
106
    public function getOriginUri(UriInterface $uri)
107
    {
108
        return $uri;
109
    }
110
111
    public function setSessionContainer(SessionContainer $sessionContainer)
112
    {
113
    }
114
}
115