Completed
Push — master ( d132d2...52a9d6 )
by Freek
02:35
created

SitemapGenerator::shouldAddSitemap()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 2
eloc 2
nc 2
nop 0
1
<?php
2
3
namespace Spatie\Sitemap;
4
5
use GuzzleHttp\Psr7\Uri;
6
use Illuminate\Support\Collection;
7
use Spatie\Crawler\Crawler;
8
use Spatie\Sitemap\Tags\Url;
9
use Spatie\Crawler\CrawlProfile;
10
use Psr\Http\Message\UriInterface;
11
use Spatie\Sitemap\Crawler\Profile;
12
use Spatie\Sitemap\Crawler\Observer;
13
use Psr\Http\Message\ResponseInterface;
14
15
class SitemapGenerator
16
{
17
    /** @var \Illuminate\Support\Collection */
18
    protected $sitemaps;
19
20
    /** @var \GuzzleHttp\Psr7\Uri */
21
    protected $urlToBeCrawled = '';
22
23
    /** @var \Spatie\Crawler\Crawler */
24
    protected $crawler;
25
26
    /** @var callable */
27
    protected $shouldCrawl;
28
29
    /** @var callable */
30
    protected $hasCrawled;
31
32
    /** @var int */
33
    protected $concurrency = 10;
34
35
    /** @var bool|int $chunk */
36
    protected $chunk = false;
37
38
    /** @var int|null */
39
    protected $maximumCrawlCount = null;
40
41
    /**
42
     * @param string $urlToBeCrawled
43
     *
44
     * @return static
45
     */
46
    public static function create(string $urlToBeCrawled)
47
    {
48
        return app(static::class)->setUrl($urlToBeCrawled);
49
    }
50
51
    public function __construct(Crawler $crawler)
52
    {
53
        $this->crawler = $crawler;
54
55
        $this->sitemaps = new Collection([new Sitemap]);
56
57
        $this->hasCrawled = function (Url $url, ResponseInterface $response = null) {
0 ignored issues
show
Unused Code introduced by
The parameter $response is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
58
            return $url;
59
        };
60
    }
61
62
    public function setConcurrency(int $concurrency)
63
    {
64
        $this->concurrency = $concurrency;
65
    }
66
67
    public function setMaximumCrawlCount(int $maximumCrawlCount)
68
    {
69
        $this->maximumCrawlCount = $maximumCrawlCount;
70
    }
71
72
    public function maxItemsPerSitemap(int $chunk = 50000): self
73
    {
74
        $this->chunk = $chunk;
75
76
        return $this;
77
    }
78
79
    public function setUrl(string $urlToBeCrawled)
80
    {
81
        $this->urlToBeCrawled = new Uri($urlToBeCrawled);
82
83
        if ($this->urlToBeCrawled->getPath() === '') {
84
            $this->urlToBeCrawled = $this->urlToBeCrawled->withPath('/');
85
        }
86
87
        return $this;
88
    }
89
90
    public function shouldCrawl(callable $shouldCrawl)
91
    {
92
        $this->shouldCrawl = $shouldCrawl;
93
94
        return $this;
95
    }
96
97
    public function hasCrawled(callable $hasCrawled)
98
    {
99
        $this->hasCrawled = $hasCrawled;
100
101
        return $this;
102
    }
103
104
    public function getSitemap(): Sitemap
105
    {
106
        if (config('sitemap.execute_javascript')) {
107
            $this->crawler->executeJavaScript(config('sitemap.chrome_binary_path'));
0 ignored issues
show
Unused Code introduced by
The call to Crawler::executeJavaScript() has too many arguments starting with config('sitemap.chrome_binary_path').

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
108
        }
109
110
        if (! is_null($this->maximumCrawlCount)) {
111
            $this->crawler->setMaximumCrawlCount($this->maximumCrawlCount);
112
        }
113
114
        $this->crawler
115
            ->setCrawlProfile($this->getCrawlProfile())
116
            ->setCrawlObserver($this->getCrawlObserver())
117
            ->setConcurrency($this->concurrency)
118
            ->startCrawling($this->urlToBeCrawled);
119
120
        return $this->sitemaps->first();
121
    }
122
123
    /**
124
     * @param string $path
125
     *
126
     * @return $this
127
     */
128
    public function writeToFile(string $path)
129
    {
130
        $sitemap = $this->getSitemap();
131
132
        if ($this->chunk) {
133
            $sitemap = SitemapIndex::create();
134
            $format = str_replace('.xml', '_%d.xml', $path);
135
136
            // Parses each sub-sitemaps, writes and pushs them into the sitemap
137
            // index
138
            $this->sitemaps->each(function (Sitemap $item, int $key) use ($sitemap, $format) {
139
                $path = sprintf($format, $key);
140
141
                $item->writeToFile(sprintf($format, $key));
142
                $sitemap->add(last(explode('public', $path)));
143
            });
144
        }
145
146
        $sitemap->writeToFile($path);
147
148
        return $this;
149
    }
150
151
    protected function getCrawlProfile(): CrawlProfile
152
    {
153
        $shouldCrawl = function (UriInterface $url) {
154
            if ($url->getHost() !== $this->urlToBeCrawled->getHost()) {
155
                return false;
156
            }
157
158
            if (! is_callable($this->shouldCrawl)) {
159
                return true;
160
            }
161
162
            return ($this->shouldCrawl)($url);
163
        };
164
165
        $profileClass = config('sitemap.crawl_profile', Profile::class);
166
        $profile = new $profileClass($this->urlToBeCrawled);
167
168
        if (method_exists($profile, 'shouldCrawlCallback')) {
169
            $profile->shouldCrawlCallback($shouldCrawl);
170
        }
171
172
        return $profile;
173
    }
174
175
    protected function getCrawlObserver(): Observer
176
    {
177
        $performAfterUrlHasBeenCrawled = function (UriInterface $crawlerUrl, ResponseInterface $response = null) {
178
            $sitemapUrl = ($this->hasCrawled)(Url::create((string) $crawlerUrl), $response);
179
180
            if ($this->shouldAddSitemap()) {
181
                $this->sitemaps->prepend(new Sitemap);
182
            }
183
184
            if ($sitemapUrl) {
185
                $this->sitemaps->first()->add($sitemapUrl);
186
            }
187
        };
188
189
        return new Observer($performAfterUrlHasBeenCrawled);
190
    }
191
192
    protected function shouldAddSitemap(): bool
193
    {
194
        return ($this->chunk && count($this->sitemaps->first()->getTags()) >= $this->chunk);
195
    }
196
}
197