1 | <?php |
||
16 | class Crawler implements CrawlingRetriever |
||
17 | { |
||
18 | private $startPage; |
||
19 | private $httpClient; |
||
20 | private $parallelRequests; |
||
21 | |||
22 | private $started = false; |
||
23 | private $filters; |
||
24 | |||
25 | /** |
||
26 | * @var PageContainer |
||
27 | */ |
||
28 | private $pageContainer; |
||
29 | |||
30 | /** |
||
31 | * @var \whm\Crawler\Crawler |
||
32 | */ |
||
33 | private $crawler; |
||
34 | |||
35 | public function init(array $filters, $pageContainer, $startPage = null, $parallelRequests = 5) |
||
36 | { |
||
37 | $this->filters = Init::initializeAll($filters); |
||
38 | if (!is_null($startPage)) { |
||
39 | $this->startPage = new Uri($startPage); |
||
40 | } |
||
41 | |||
42 | $this->initPageContainer($pageContainer); |
||
43 | $this->parallelRequests = $parallelRequests; |
||
44 | } |
||
45 | |||
46 | public function addPage(UriInterface $uri) |
||
50 | |||
51 | private function initPageContainer($pageContainerArray) |
||
64 | |||
65 | public function getStartPage() |
||
69 | |||
70 | public function setStartPage(UriInterface $startPage) |
||
74 | |||
75 | public function setHttpClient(HttpClient $httpClient) |
||
79 | |||
80 | /** |
||
81 | * @return ResponseInterface |
||
82 | */ |
||
83 | public function next() |
||
101 | |||
102 | public function getComingFrom(UriInterface $uri) |
||
106 | |||
107 | public function getOriginUri(UriInterface $uri) |
||
111 | |||
112 | public function setSessionContainer(SessionContainer $sessionContainer) |
||
115 | |||
116 | public function getOccuredExceptions() |
||
120 | } |
||
121 |