Completed
Pull Request — master (#150)
by Brent
03:17
created

CrawlRequestFulfilled::handleCrawled()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 10
rs 9.4285
cc 2
eloc 6
nc 2
nop 2
1
<?php
2
3
namespace Spatie\Crawler\Handlers;
4
5
use Spatie\Crawler\Crawler;
6
use Spatie\Crawler\CrawlUrl;
7
use Spatie\Crawler\LinkAdder;
8
use Spatie\Robots\RobotsMeta;
9
use Spatie\Robots\RobotsHeaders;
10
use Spatie\Crawler\CrawlSubdomains;
11
use Psr\Http\Message\StreamInterface;
12
use Psr\Http\Message\ResponseInterface;
13
14
class CrawlRequestFulfilled
15
{
16
    /** @var \Spatie\Crawler\Crawler */
17
    protected $crawler;
18
19
    /** @var \Spatie\Crawler\LinkAdder */
20
    protected $linkAdder;
21
22
    public function __construct(Crawler $crawler)
23
    {
24
        $this->crawler = $crawler;
25
26
        $this->linkAdder = new LinkAdder($this->crawler);
27
    }
28
29
    public function __invoke(ResponseInterface $response, $index)
30
    {
31
        $crawlUrl = $this->crawler->getCrawlQueue()->getUrlById($index);
32
33
        $body = $this->convertBodyToString($response->getBody(), $this->crawler->getMaximumResponseSize());
34
35
        $robotsHeaders = RobotsHeaders::create($response->getHeaders());
36
37
        $robotsMeta = RobotsMeta::create($body);
38
39
        if (! $this->mayIndex($robotsHeaders, $robotsMeta)) {
40
            return;
41
        }
42
43
        $this->handleCrawled($response, $crawlUrl);
44
45
        if (! $this->crawler->getCrawlProfile() instanceof CrawlSubdomains) {
46
            if ($crawlUrl->url->getHost() !== $this->crawler->getBaseUrl()->getHost()) {
47
                return;
48
            }
49
        }
50
51
        if (! $this->mayFollow($robotsHeaders, $robotsMeta)) {
52
            return;
53
        }
54
55
        $this->linkAdder->addFromHtml($body, $crawlUrl->url);
56
    }
57
58
    protected function convertBodyToString(StreamInterface $bodyStream, $readMaximumBytes = 1024 * 1024 * 2): string
59
    {
60
        $bodyStream->rewind();
61
62
        $body = $bodyStream->read($readMaximumBytes);
63
64
        return $body;
65
    }
66
67
    protected function handleCrawled(ResponseInterface $response, CrawlUrl $crawlUrl)
68
    {
69
        foreach ($this->crawler->getCrawlObservers() as $crawlObserver) {
70
            $crawlObserver->crawled(
71
                $crawlUrl->url,
72
                $response,
73
                $crawlUrl->foundOnUrl
74
            );
75
        }
76
    }
77
78 View Code Duplication
    protected function mayIndex(RobotsHeaders $robotsHeaders, RobotsMeta $robotsMeta): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
79
    {
80
        if (! $this->crawler->mustRespectRobots()) {
81
            return true;
82
        }
83
84
        if (! $robotsHeaders->mayIndex()) {
85
            return false;
86
        }
87
88
        if (! $robotsMeta->mayIndex()) {
89
            return false;
90
        }
91
92
        return true;
93
    }
94
95 View Code Duplication
    protected function mayFollow(RobotsHeaders $robotsHeaders, RobotsMeta $robotsMeta): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
96
    {
97
        if (! $this->crawler->mustRespectRobots()) {
98
            return true;
99
        }
100
101
        if (! $robotsHeaders->mayFollow()) {
102
            return false;
103
        }
104
105
        if (! $robotsMeta->mayFollow()) {
106
            return false;
107
        }
108
109
        return true;
110
    }
111
}
112