Passed
Push — master ( b2f1dc...ac4dad )
by Dev
13:52
created

CrawlerContinue::loadFromPreviousCrawl()   B

Complexity

Conditions 7
Paths 8

Size

Total Lines 27
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 16
dl 0
loc 27
rs 8.8333
c 0
b 0
f 0
cc 7
nc 8
nop 1
1
<?php
2
3
namespace PiedWeb\SeoPocketCrawler;
4
5
use Spatie\Robots\RobotsTxt;
6
use League\Csv\Reader;
7
8
class CrawlerContinue extends Crawler
9
{
10
    public function __construct(string $id)
11
    {
12
        $this->id = trim($id);
13
14
        $configFilePath = $this->getDataFolder().'/config.json';
15
        if (!file_exists($configFilePath)) {
16
            throw new \Exception('previous crawl not found (config.json)');
17
        }
18
        $config = json_decode(file_get_contents($configFilePath), true);
19
20
        $this->ignore = new RobotsTxt($config['ignore']);
21
        $this->limit = $config['limit'];
22
        $this->userAgent = $config['userAgent'];
23
        $this->wait = $config['wait'];
24
        $this->base = $config['base'];
25
26
        $this->recorder = new Recorder($this->getDataFolder(), (int) $config['cacheMethod']);
27
        $this->loadFromPreviousCrawl($config['startUrl']);
28
    }
29
30
    protected function loadFromPreviousCrawl(string $startUrl)
31
    {
32
        $resultFilePath = $this->getDataFolder().'/index.csv';
33
        if (!file_exists($resultFilePath)) {
34
            throw new \Exception('previous crawl not found (index.csv)');
35
        }
36
37
        $csv = Reader::createFromPath($resultFilePath, 'r');
38
        $csv->setHeaderOffset(0);
39
40
        $records = $csv->getRecords();
41
        foreach ($records as $r) {
42
            $this->urls[$r['uri']] = new Url($this->base.$r['uri'], 0);
43
            foreach ($r as $k => $v) {
44
                if ('can_be_crawled' == $k && !empty($v)) {
45
                    $v = (bool) $v;
46
                }
47
                $this->urls[$r['uri']]->$k = $v;
48
            }
49
            if (!empty($r['can_be_crawled'])) {
50
                ++$this->counter;
51
            }
52
        }
53
54
        $this->currentClick = $r['click'];
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $r seems to be defined by a foreach iteration on line 41. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
55
56
        return $startUrl;
57
    }
58
}
59