Completed
Push — master ( c3dfe8...698260 )
by Freek
05:56
created

SitemapGenerator::setMaximumCrawlCount()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
1
<?php
2
3
namespace Spatie\Sitemap;
4
5
use Spatie\Crawler\Crawler;
6
use Spatie\Sitemap\Tags\Url;
7
use Spatie\Crawler\CrawlProfile;
8
use Spatie\Sitemap\Crawler\Profile;
9
use Spatie\Sitemap\Crawler\Observer;
10
use Spatie\Crawler\Url as CrawlerUrl;
11
use Psr\Http\Message\ResponseInterface;
12
13
class SitemapGenerator
14
{
15
    /** @var \Spatie\Sitemap\Sitemap */
16
    protected $sitemap;
17
18
    /** @var string */
19
    protected $urlToBeCrawled = '';
20
21
    /** @var \Spatie\Crawler\Crawler */
22
    protected $crawler;
23
24
    /** @var callable */
25
    protected $shouldCrawl;
26
27
    /** @var callable */
28
    protected $hasCrawled;
29
30
    /** @var int */
31
    protected $concurrency = 10;
32
33
    /** @var int|null */
34
    protected $maximumCrawlCount = null;
35
36
    /**
37
     * @param string $urlToBeCrawled
38
     *
39
     * @return static
40
     */
41
    public static function create(string $urlToBeCrawled)
42
    {
43
        return app(static::class)->setUrl($urlToBeCrawled);
44
    }
45
46
    public function __construct(Crawler $crawler)
47
    {
48
        $this->crawler = $crawler;
49
50
        $this->sitemap = new Sitemap();
51
52
        $this->hasCrawled = function (Url $url, ResponseInterface $response = null) {
0 ignored issues
show
Unused Code introduced by
The parameter $response is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
53
            return $url;
54
        };
55
    }
56
57
    public function setConcurrency(int $concurrency)
58
    {
59
        $this->concurrency = $concurrency;
60
    }
61
62
    public function setMaximumCrawlCount(int $maximumCrawlCount)
63
    {
64
        $this->maximumCrawlCount = $maximumCrawlCount;
65
    }
66
67
    public function setUrl(string $urlToBeCrawled)
68
    {
69
        $this->urlToBeCrawled = $urlToBeCrawled;
70
71
        return $this;
72
    }
73
74
    public function shouldCrawl(callable $shouldCrawl)
75
    {
76
        $this->shouldCrawl = $shouldCrawl;
77
78
        return $this;
79
    }
80
81
    public function hasCrawled(callable $hasCrawled)
82
    {
83
        $this->hasCrawled = $hasCrawled;
84
85
        return $this;
86
    }
87
88
    public function getSitemap(): Sitemap
89
    {
90
        if (config('sitemap.execute_javascript')) {
91
            $this->crawler->executeJavaScript(config('sitemap.chrome_binary_path'));
92
        }
93
94
        if (! is_null($this->maximumCrawlCount)) {
95
            $this->crawler->setMaximumCrawlCount($this->maximumCrawlCount);
96
        }
97
98
        $this->crawler
99
            ->setCrawlProfile($this->getCrawlProfile())
100
            ->setCrawlObserver($this->getCrawlObserver())
101
            ->setConcurrency($this->concurrency)
102
103
            ->startCrawling($this->urlToBeCrawled);
104
105
        return $this->sitemap;
106
    }
107
108
    /**
109
     * @param string $path
110
     *
111
     * @return $this
112
     */
113
    public function writeToFile(string $path)
114
    {
115
        $this->getSitemap()->writeToFile($path);
116
117
        return $this;
118
    }
119
120
    protected function getCrawlProfile(): CrawlProfile
121
    {
122
        $shouldCrawl = function (CrawlerUrl $url) {
123
            if ($url->host !== CrawlerUrl::create($this->urlToBeCrawled)->host) {
124
                return false;
125
            }
126
127
            if (! is_callable($this->shouldCrawl)) {
128
                return true;
129
            }
130
131
            return ($this->shouldCrawl)($url);
132
        };
133
134
        $profileClass = config('sitemap.crawl_profile', Profile::class);
135
        $profile = new $profileClass($this->urlToBeCrawled);
136
137
        if (method_exists($profile, 'shouldCrawlCallback')) {
138
            $profile->shouldCrawlCallback($shouldCrawl);
139
        }
140
141
        return $profile;
142
    }
143
144
    protected function getCrawlObserver(): Observer
145
    {
146
        $performAfterUrlHasBeenCrawled = function (CrawlerUrl $crawlerUrl, ResponseInterface $response = null) {
147
            $sitemapUrl = ($this->hasCrawled)(Url::create($crawlerUrl), $response);
148
149
            if ($sitemapUrl) {
150
                $this->sitemap->add($sitemapUrl);
151
            }
152
        };
153
154
        return new Observer($performAfterUrlHasBeenCrawled);
155
    }
156
}
157