Completed
Pull Request — master (#245)
by
unknown
01:21
created

SitemapGenerator::setFormatDocument()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 6
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
3
namespace Spatie\Sitemap;
4
5
use Closure;
6
use GuzzleHttp\Psr7\Uri;
7
use Spatie\Crawler\Crawler;
8
use Spatie\Sitemap\Tags\Url;
9
use Spatie\Crawler\CrawlProfile;
10
use Illuminate\Support\Collection;
11
use Psr\Http\Message\UriInterface;
12
use Spatie\Sitemap\Crawler\Profile;
13
use Spatie\Sitemap\Crawler\Observer;
14
use Psr\Http\Message\ResponseInterface;
15
16
class SitemapGenerator
17
{
18
    /** @var \Illuminate\Support\Collection */
19
    protected $sitemaps;
20
21
    /** @var \GuzzleHttp\Psr7\Uri */
22
    protected $urlToBeCrawled = '';
23
24
    /** @var \Spatie\Crawler\Crawler */
25
    protected $crawler;
26
27
    /** @var callable */
28
    protected $shouldCrawl;
29
30
    /** @var callable */
31
    protected $hasCrawled;
32
33
    /** @var int */
34
    protected $concurrency = 10;
35
36
    /** @var bool|int $maximumTagsPerSitemap */
37
    protected $maximumTagsPerSitemap = false;
38
39
    /** @var int|null */
40
    protected $maximumCrawlCount = null;
41
42
    /** @var bool */
43
    protected $formatDocument = false;
44
45
    /**
46
     * @param string $urlToBeCrawled
47
     *
48
     * @return static
49
     */
50
    public static function create(string $urlToBeCrawled)
51
    {
52
        return app(static::class)->setUrl($urlToBeCrawled);
53
    }
54
55
    public function __construct(Crawler $crawler)
56
    {
57
        $this->crawler = $crawler;
58
59
        $this->sitemaps = new Collection([new Sitemap]);
60
61
        $this->hasCrawled = function (Url $url, ResponseInterface $response = null) {
0 ignored issues
show
Unused Code introduced by
The parameter $response is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
62
            return $url;
63
        };
64
    }
65
66
    public function configureCrawler(Closure $closure): self
67
    {
68
        call_user_func_array($closure, [$this->crawler]);
69
70
        return $this;
71
    }
72
73
    public function setConcurrency(int $concurrency)
74
    {
75
        $this->concurrency = $concurrency;
76
77
        return $this;
78
    }
79
80
    public function setMaximumCrawlCount(int $maximumCrawlCount)
81
    {
82
        $this->maximumCrawlCount = $maximumCrawlCount;
83
84
        return $this;
85
    }
86
87
    public function maxTagsPerSitemap(int $maximumTagsPerSitemap = 50000): self
88
    {
89
        $this->maximumTagsPerSitemap = $maximumTagsPerSitemap;
90
91
        return $this;
92
    }
93
94
    public function setUrl(string $urlToBeCrawled)
95
    {
96
        $this->urlToBeCrawled = new Uri($urlToBeCrawled);
97
98
        if ($this->urlToBeCrawled->getPath() === '') {
99
            $this->urlToBeCrawled = $this->urlToBeCrawled->withPath('/');
100
        }
101
102
        return $this;
103
    }
104
105
    public function setFormatDocument(bool $formatDocument = true)
106
    {
107
        $this->formatDocument = $formatDocument;
108
109
        return $this;
110
    }
111
112
    public function shouldCrawl(callable $shouldCrawl)
113
    {
114
        $this->shouldCrawl = $shouldCrawl;
115
116
        return $this;
117
    }
118
119
    public function hasCrawled(callable $hasCrawled)
120
    {
121
        $this->hasCrawled = $hasCrawled;
122
123
        return $this;
124
    }
125
126
    public function getSitemap(): Sitemap
127
    {
128
        if (config('sitemap.execute_javascript')) {
129
            $this->crawler->executeJavaScript(config('sitemap.chrome_binary_path'));
0 ignored issues
show
Unused Code introduced by
The call to Crawler::executeJavaScript() has too many arguments starting with config('sitemap.chrome_binary_path').

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
130
        }
131
132
        if (! is_null($this->maximumCrawlCount)) {
133
            $this->crawler->setMaximumCrawlCount($this->maximumCrawlCount);
134
        }
135
136
        $this->crawler
137
            ->setCrawlProfile($this->getCrawlProfile())
138
            ->setCrawlObserver($this->getCrawlObserver())
139
            ->setConcurrency($this->concurrency)
140
            ->startCrawling($this->urlToBeCrawled);
141
142
        return $this->sitemaps->first();
143
    }
144
145
    /**
146
     * @param string $path
147
     *
148
     * @return $this
149
     */
150
    public function writeToFile(string $path)
151
    {
152
        $sitemap = $this->getSitemap()->setFormatDocument($this->formatDocument);
153
154
        if ($this->maximumTagsPerSitemap) {
155
            $sitemap = SitemapIndex::create();
156
            $format = str_replace('.xml', '_%d.xml', $path);
157
158
            // Parses each sub-sitemaps, writes and pushs them into the sitemap
159
            // index
160
            $this->sitemaps->each(function (Sitemap $item, int $key) use ($sitemap, $format) {
161
                $path = sprintf($format, $key);
162
163
                $item->writeToFile(sprintf($format, $key));
164
                $sitemap->add(last(explode('public', $path)));
165
            });
166
        }
167
168
        $sitemap->writeToFile($path);
169
170
        return $this;
171
    }
172
173
    protected function getCrawlProfile(): CrawlProfile
174
    {
175
        $shouldCrawl = function (UriInterface $url) {
176
            if ($url->getHost() !== $this->urlToBeCrawled->getHost()) {
177
                return false;
178
            }
179
180
            if (! is_callable($this->shouldCrawl)) {
181
                return true;
182
            }
183
184
            return ($this->shouldCrawl)($url);
185
        };
186
187
        $profileClass = config('sitemap.crawl_profile', Profile::class);
188
        $profile = new $profileClass($this->urlToBeCrawled);
189
190
        if (method_exists($profile, 'shouldCrawlCallback')) {
191
            $profile->shouldCrawlCallback($shouldCrawl);
192
        }
193
194
        return $profile;
195
    }
196
197
    protected function getCrawlObserver(): Observer
198
    {
199
        $performAfterUrlHasBeenCrawled = function (UriInterface $crawlerUrl, ResponseInterface $response = null) {
200
            $sitemapUrl = ($this->hasCrawled)(Url::create((string) $crawlerUrl), $response);
201
202
            if ($this->shouldStartNewSitemapFile()) {
203
                $this->sitemaps->push(new Sitemap);
204
            }
205
206
            if ($sitemapUrl) {
207
                $this->sitemaps->last()->add($sitemapUrl);
208
            }
209
        };
210
211
        return new Observer($performAfterUrlHasBeenCrawled);
212
    }
213
214
    protected function shouldStartNewSitemapFile(): bool
215
    {
216
        if (! $this->maximumTagsPerSitemap) {
217
            return false;
218
        }
219
220
        $currentNumberOfTags = count($this->sitemaps->last()->getTags());
221
222
        return $currentNumberOfTags >= $this->maximumTagsPerSitemap;
223
    }
224
}
225