CrawlerUrlFromCache   A
last analyzed

Complexity

Total Complexity 8

Size/Duplication

Total Lines 34
Duplicated Lines 0 %

Importance

Changes 3
Bugs 1 Features 0
Metric Value
eloc 21
c 3
b 1
f 0
dl 0
loc 34
rs 10
wmc 8

1 Method

Rating   Name   Duplication   Size   Complexity  
B getHarvester() 0 32 8
1
<?php
2
3
namespace PiedWeb\SeoPocketCrawler;
4
5
use PiedWeb\Curl\ResponseFromCache;
6
use PiedWeb\UrlHarvester\Harvest;
7
8
class CrawlerUrlFromCache extends CrawlerUrl
9
{
10
    public function getHarvester()
11
    {
12
        if (null !== $this->harvest) {
13
            return $this->harvest;
14
        }
15
16
        $filePath = $this->config->getRecorder()->getCacheFilePath($this->url);
17
        if (null !== $filePath && file_exists($filePath)) {
18
            $cachedContent = file_get_contents($filePath);
19
            if (0 === strpos($cachedContent, 'curl_error_code:')) {
20
                $this->harvest = substr($cachedContent, strlen('curl_error_code:'));
21
                if (42 != $this->harvest) {
22
                    $this->harvest = parent::getHarvester(); // retry if was not stopped because too big
23
                }
24
            } else {
25
                $response = new ResponseFromCache(
26
                    $filePath, // todo: push a PR on PiedWeb\Curl to permit to create ResponseFromCacheString
27
                    $this->config->getBase().$this->url->getUri(),
28
                    json_decode(file_get_contents($filePath.'---info'), true)
29
                );
30
31
                $this->harvest = new Harvest($response);
32
            }
33
        } else {
34
            $this->harvest = parent::getHarvester();
35
        }
36
37
        if ($this->harvest instanceof Harvest && null !== $this->config->getRobotsTxtCached()) {
38
            $this->harvest->setRobotsTxt($this->config->getRobotsTxtCached());
39
        }
40
41
        return $this->getHarvester();
42
    }
43
}
44