Completed
Pull Request — master (#104)
by
unknown
01:24
created

SitemapGenerator::getCrawlProfile()   B

Complexity

Conditions 4
Paths 2

Size

Total Lines 23
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 23
rs 8.7972
c 0
b 0
f 0
cc 4
eloc 12
nc 2
nop 0
1
<?php
2
3
namespace Spatie\Sitemap;
4
5
use Spatie\Crawler\Crawler;
6
use Spatie\Sitemap\Tags\Url;
7
use Spatie\Crawler\CrawlProfile;
8
use Spatie\Sitemap\Crawler\Profile;
9
use Spatie\Sitemap\Crawler\Observer;
10
use Spatie\Crawler\Url as CrawlerUrl;
11
use Psr\Http\Message\ResponseInterface;
12
13
class SitemapGenerator
14
{
15
    /** @var \Spatie\Sitemap\Sitemap */
16
    protected $sitemap;
17
18
    /** @var string */
19
    protected $urlToBeCrawled = '';
20
21
    /** @var \Spatie\Crawler\Crawler */
22
    protected $crawler;
23
24
    /** @var callable */
25
    protected $shouldCrawl;
26
27
    /** @var callable */
28
    protected $hasCrawled;
29
30
    /** @var int */
31
    protected $concurrency = 10;
32
33
    /**
34
     * @param string $urlToBeCrawled
35
     *
36
     * @return static
37
     */
38
    public static function create(string $urlToBeCrawled)
39
    {
40
        return app(static::class)->setUrl($urlToBeCrawled);
41
    }
42
43
    public function __construct(Crawler $crawler)
44
    {
45
        $this->crawler = $crawler;
46
47
        $this->sitemap = new Sitemap();
48
49
        $this->hasCrawled = function (Url $url, ResponseInterface $response = null) {
0 ignored issues
show
Unused Code introduced by
The parameter $response is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
50
            return $url;
51
        };
52
    }
53
54
    public function setConcurrency(int $concurrency)
55
    {
56
        $this->concurrency = $concurrency;
57
    }
58
59
    public function setUrl(string $urlToBeCrawled)
60
    {
61
        $this->urlToBeCrawled = $urlToBeCrawled;
62
63
        return $this;
64
    }
65
66
    public function shouldCrawl(callable $shouldCrawl)
67
    {
68
        $this->shouldCrawl = $shouldCrawl;
69
70
        return $this;
71
    }
72
73
    public function hasCrawled(callable $hasCrawled)
74
    {
75
        $this->hasCrawled = $hasCrawled;
76
77
        return $this;
78
    }
79
80
    public function getSitemap(): Sitemap
81
    {
82
        if (config('sitemap.execute_javascript')) {
83
            $this->crawler->executeJavaScript(config('sitemap.chrome_binary_path'));
84
        }
85
86
        $this->crawler
87
            ->setCrawlProfile($this->getCrawlProfile())
88
            ->setCrawlObserver($this->getCrawlObserver())
89
            ->setConcurrency($this->concurrency)
90
            ->startCrawling($this->urlToBeCrawled);
91
92
        return $this->sitemap;
93
    }
94
95
    /**
96
     * @param string $path
97
     *
98
     * @return $this
99
     */
100
    public function writeToFile(string $path)
101
    {
102
        $this->getSitemap()->writeToFile($path);
103
104
        return $this;
105
    }
106
107
    protected function getCrawlProfile(): CrawlProfile
108
    {
109
        $shouldCrawl = function (CrawlerUrl $url) {
110
            if ($url->host !== CrawlerUrl::create($this->urlToBeCrawled)->host) {
111
                return false;
112
            }
113
114
            if (! is_callable($this->shouldCrawl)) {
115
                return true;
116
            }
117
118
            return ($this->shouldCrawl)($url);
119
        };
120
121
        $profileClass = config('sitemap.crawl_profile', Profile::class);
122
        $profile = new $profileClass($this->urlToBeCrawled);
123
124
        if (method_exists($profile, 'shouldCrawlCallback')) {
125
            $profile->shouldCrawlCallback($shouldCrawl);
126
        }
127
128
        return $profile;
129
    }
130
131
    protected function getCrawlObserver(): Observer
132
    {
133
        $performAfterUrlHasBeenCrawled = function (CrawlerUrl $crawlerUrl, ResponseInterface $response = null) {
134
            $sitemapUrl = ($this->hasCrawled)(Url::create($crawlerUrl), $response);
135
136
            if ($sitemapUrl) {
137
                $this->sitemap->add($sitemapUrl);
138
            }
139
        };
140
141
        return new Observer($performAfterUrlHasBeenCrawled);
142
    }
143
}
144