Completed
Pull Request — master (#150)
by Brent
01:28
created

CrawlRequestFulfilled::__invoke()   B

Complexity

Conditions 5
Paths 6

Size

Total Lines 26
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 26
rs 8.439
c 0
b 0
f 0
cc 5
eloc 13
nc 6
nop 2
1
<?php
2
3
namespace Spatie\Crawler\Handlers;
4
5
use Spatie\Crawler\Crawler;
6
use Spatie\Crawler\CrawlerRobots;
7
use Spatie\Crawler\CrawlUrl;
8
use Spatie\Crawler\LinkAdder;
9
use Spatie\Crawler\CrawlSubdomains;
10
use Psr\Http\Message\StreamInterface;
11
use Psr\Http\Message\ResponseInterface;
12
13
class CrawlRequestFulfilled
14
{
15
    /** @var \Spatie\Crawler\Crawler */
16
    protected $crawler;
17
18
    /** @var \Spatie\Crawler\LinkAdder */
19
    protected $linkAdder;
20
21
    public function __construct(Crawler $crawler)
22
    {
23
        $this->crawler = $crawler;
24
25
        $this->linkAdder = new LinkAdder($this->crawler);
26
    }
27
28
    public function __invoke(ResponseInterface $response, $index)
29
    {
30
        $robots = new CrawlerRobots($response, $this->crawler->mustRespectRobots());
31
32
        if (! $robots->mayIndex()) {
33
            return;
34
        }
35
36
        $crawlUrl = $this->crawler->getCrawlQueue()->getUrlById($index);
37
38
        $this->handleCrawled($response, $crawlUrl);
39
40
        if (! $this->crawler->getCrawlProfile() instanceof CrawlSubdomains) {
41
            if ($crawlUrl->url->getHost() !== $this->crawler->getBaseUrl()->getHost()) {
42
                return;
43
            }
44
        }
45
46
        if (! $robots->mayFollow()) {
47
            return;
48
        }
49
50
        $body = $this->convertBodyToString($response->getBody(), $this->crawler->getMaximumResponseSize());
51
52
        $this->linkAdder->addFromHtml($body, $crawlUrl->url);
53
    }
54
55
    protected function handleCrawled(ResponseInterface $response, CrawlUrl $crawlUrl)
56
    {
57
        $this->crawler->getCrawlObservers()->crawled($crawlUrl, $response);
58
    }
59
60
    protected function convertBodyToString(StreamInterface $bodyStream, $readMaximumBytes = 1024 * 1024 * 2): string
61
    {
62
        $bodyStream->rewind();
63
64
        $body = $bodyStream->read($readMaximumBytes);
65
66
        return $body;
67
    }
68
}
69