SitemapGenerator   A
last analyzed

Complexity

Total Complexity 24

Size/Duplication

Total Lines 199
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 8

Importance

Changes 0
Metric Value
wmc 24
lcom 1
cbo 8
dl 0
loc 199
rs 10
c 0
b 0
f 0

14 Methods

Rating   Name   Duplication   Size   Complexity  
A create() 0 4 1
A __construct() 0 10 1
A configureCrawler() 0 6 1
A setConcurrency() 0 6 1
A setMaximumCrawlCount() 0 6 1
A maxTagsPerSitemap() 0 6 1
A setUrl() 0 10 2
A shouldCrawl() 0 6 1
A hasCrawled() 0 6 1
A getSitemap() 0 18 3
A writeToFile() 0 22 2
A getCrawlProfile() 0 23 4
A getCrawlObserver() 0 16 3
A shouldStartNewSitemapFile() 0 10 2
1
<?php
2
3
namespace Spatie\Sitemap;
4
5
use Closure;
6
use GuzzleHttp\Psr7\Uri;
7
use Illuminate\Support\Collection;
8
use Psr\Http\Message\ResponseInterface;
9
use Psr\Http\Message\UriInterface;
10
use Spatie\Crawler\Crawler;
11
use Spatie\Crawler\CrawlProfile;
12
use Spatie\Sitemap\Crawler\Observer;
13
use Spatie\Sitemap\Crawler\Profile;
14
use Spatie\Sitemap\Tags\Url;
15
16
class SitemapGenerator
17
{
18
    /** @var \Illuminate\Support\Collection */
19
    protected $sitemaps;
20
21
    /** @var \GuzzleHttp\Psr7\Uri */
22
    protected $urlToBeCrawled = '';
23
24
    /** @var \Spatie\Crawler\Crawler */
25
    protected $crawler;
26
27
    /** @var callable */
28
    protected $shouldCrawl;
29
30
    /** @var callable */
31
    protected $hasCrawled;
32
33
    /** @var int */
34
    protected $concurrency = 10;
35
36
    /** @var bool|int */
37
    protected $maximumTagsPerSitemap = false;
38
39
    /** @var int|null */
40
    protected $maximumCrawlCount = null;
41
42
    /**
43
     * @param string $urlToBeCrawled
44
     *
45
     * @return static
46
     */
47
    public static function create(string $urlToBeCrawled)
48
    {
49
        return app(static::class)->setUrl($urlToBeCrawled);
50
    }
51
52
    public function __construct(Crawler $crawler)
53
    {
54
        $this->crawler = $crawler;
55
56
        $this->sitemaps = new Collection([new Sitemap]);
57
58
        $this->hasCrawled = function (Url $url, ResponseInterface $response = null) {
0 ignored issues
show
Unused Code introduced by
The parameter $response is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
59
            return $url;
60
        };
61
    }
62
63
    public function configureCrawler(Closure $closure): self
64
    {
65
        call_user_func_array($closure, [$this->crawler]);
66
67
        return $this;
68
    }
69
70
    public function setConcurrency(int $concurrency)
71
    {
72
        $this->concurrency = $concurrency;
73
74
        return $this;
75
    }
76
77
    public function setMaximumCrawlCount(int $maximumCrawlCount)
78
    {
79
        $this->maximumCrawlCount = $maximumCrawlCount;
80
81
        return $this;
82
    }
83
84
    public function maxTagsPerSitemap(int $maximumTagsPerSitemap = 50000): self
85
    {
86
        $this->maximumTagsPerSitemap = $maximumTagsPerSitemap;
87
88
        return $this;
89
    }
90
91
    public function setUrl(string $urlToBeCrawled)
92
    {
93
        $this->urlToBeCrawled = new Uri($urlToBeCrawled);
94
95
        if ($this->urlToBeCrawled->getPath() === '') {
96
            $this->urlToBeCrawled = $this->urlToBeCrawled->withPath('/');
97
        }
98
99
        return $this;
100
    }
101
102
    public function shouldCrawl(callable $shouldCrawl)
103
    {
104
        $this->shouldCrawl = $shouldCrawl;
105
106
        return $this;
107
    }
108
109
    public function hasCrawled(callable $hasCrawled)
110
    {
111
        $this->hasCrawled = $hasCrawled;
112
113
        return $this;
114
    }
115
116
    public function getSitemap(): Sitemap
117
    {
118
        if (config('sitemap.execute_javascript')) {
119
            $this->crawler->executeJavaScript(config('sitemap.chrome_binary_path'));
0 ignored issues
show
Unused Code introduced by
The call to Crawler::executeJavaScript() has too many arguments starting with config('sitemap.chrome_binary_path').

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
120
        }
121
122
        if (! is_null($this->maximumCrawlCount)) {
123
            $this->crawler->setMaximumCrawlCount($this->maximumCrawlCount);
124
        }
125
126
        $this->crawler
127
            ->setCrawlProfile($this->getCrawlProfile())
128
            ->setCrawlObserver($this->getCrawlObserver())
129
            ->setConcurrency($this->concurrency)
130
            ->startCrawling($this->urlToBeCrawled);
131
132
        return $this->sitemaps->first();
133
    }
134
135
    /**
136
     * @param string $path
137
     *
138
     * @return $this
139
     */
140
    public function writeToFile(string $path)
141
    {
142
        $sitemap = $this->getSitemap();
143
144
        if ($this->maximumTagsPerSitemap) {
145
            $sitemap = SitemapIndex::create();
146
            $format = str_replace('.xml', '_%d.xml', $path);
147
148
            // Parses each sub-sitemaps, writes and pushs them into the sitemap
149
            // index
150
            $this->sitemaps->each(function (Sitemap $item, int $key) use ($sitemap, $format) {
151
                $path = sprintf($format, $key);
152
153
                $item->writeToFile(sprintf($format, $key));
154
                $sitemap->add(last(explode('public', $path)));
155
            });
156
        }
157
158
        $sitemap->writeToFile($path);
159
160
        return $this;
161
    }
162
163
    protected function getCrawlProfile(): CrawlProfile
164
    {
165
        $shouldCrawl = function (UriInterface $url) {
166
            if ($url->getHost() !== $this->urlToBeCrawled->getHost()) {
167
                return false;
168
            }
169
170
            if (! is_callable($this->shouldCrawl)) {
171
                return true;
172
            }
173
174
            return ($this->shouldCrawl)($url);
175
        };
176
177
        $profileClass = config('sitemap.crawl_profile', Profile::class);
178
        $profile = new $profileClass($this->urlToBeCrawled);
179
180
        if (method_exists($profile, 'shouldCrawlCallback')) {
181
            $profile->shouldCrawlCallback($shouldCrawl);
182
        }
183
184
        return $profile;
185
    }
186
187
    protected function getCrawlObserver(): Observer
188
    {
189
        $performAfterUrlHasBeenCrawled = function (UriInterface $crawlerUrl, ResponseInterface $response = null) {
190
            $sitemapUrl = ($this->hasCrawled)(Url::create((string) $crawlerUrl), $response);
191
192
            if ($this->shouldStartNewSitemapFile()) {
193
                $this->sitemaps->push(new Sitemap);
194
            }
195
196
            if ($sitemapUrl) {
197
                $this->sitemaps->last()->add($sitemapUrl);
198
            }
199
        };
200
201
        return new Observer($performAfterUrlHasBeenCrawled);
202
    }
203
204
    protected function shouldStartNewSitemapFile(): bool
205
    {
206
        if (! $this->maximumTagsPerSitemap) {
207
            return false;
208
        }
209
210
        $currentNumberOfTags = count($this->sitemaps->last()->getTags());
211
212
        return $currentNumberOfTags >= $this->maximumTagsPerSitemap;
213
    }
214
}
215