Completed
Pull Request — master (#155)
by Robin
01:23
created

SitemapGenerator::getSitemap()   B

Complexity

Conditions 4
Paths 8

Size

Total Lines 22
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 22
rs 8.9197
c 0
b 0
f 0
cc 4
eloc 13
nc 8
nop 0
1
<?php
2
3
namespace Spatie\Sitemap;
4
5
use GuzzleHttp\Psr7\Uri;
6
use Spatie\Crawler\Crawler;
7
use Spatie\Sitemap\Tags\Url;
8
use Spatie\Crawler\CrawlProfile;
9
use Psr\Http\Message\UriInterface;
10
use Spatie\Sitemap\Crawler\Profile;
11
use Spatie\Sitemap\Crawler\Observer;
12
use Psr\Http\Message\ResponseInterface;
13
14
class SitemapGenerator
15
{
16
    /** @var \Spatie\Sitemap\Sitemap */
17
    protected $sitemap;
18
19
    /** @var \GuzzleHttp\Psr7\Uri */
20
    protected $urlToBeCrawled = '';
21
22
    /** @var \Spatie\Crawler\Crawler */
23
    protected $crawler;
24
25
    /** @var callable */
26
    protected $shouldCrawl;
27
28
    /** @var callable */
29
    protected $hasCrawled;
30
31
    /** @var int */
32
    protected $concurrency = 10;
33
34
    /** @var int|null */
35
    protected $maximumCrawlCount = null;
36
37
    /** @var int|null */
38
    protected $maximumDepth = null;
39
40
    /**
41
     * @param string $urlToBeCrawled
42
     *
43
     * @return static
44
     */
45
    public static function create(string $urlToBeCrawled)
46
    {
47
        return app(static::class)->setUrl($urlToBeCrawled);
48
    }
49
50
    public function __construct(Crawler $crawler)
51
    {
52
        $this->crawler = $crawler;
53
54
        $this->sitemap = new Sitemap();
55
56
        $this->hasCrawled = function (Url $url, ResponseInterface $response = null) {
0 ignored issues
show
Unused Code introduced by
The parameter $response is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
57
            return $url;
58
        };
59
    }
60
61
    public function setConcurrency(int $concurrency)
62
    {
63
        $this->concurrency = $concurrency;
64
65
        return $this;
66
    }
67
68
    public function setMaximumCrawlCount(int $maximumCrawlCount)
69
    {
70
        $this->maximumCrawlCount = $maximumCrawlCount;
71
72
        return $this;
73
    }
74
75
    public function setMaximumDepth(int $maximumDepth)
76
    {
77
        $this->maximumDepth = $maximumDepth;
78
79
        return $this;
80
    }
81
82
    public function setUrl(string $urlToBeCrawled)
83
    {
84
        $this->urlToBeCrawled = new Uri($urlToBeCrawled);
85
86
        if ($this->urlToBeCrawled->getPath() === '') {
87
            $this->urlToBeCrawled = $this->urlToBeCrawled->withPath('/');
88
        }
89
90
        return $this;
91
    }
92
93
    public function shouldCrawl(callable $shouldCrawl)
94
    {
95
        $this->shouldCrawl = $shouldCrawl;
96
97
        return $this;
98
    }
99
100
    public function hasCrawled(callable $hasCrawled)
101
    {
102
        $this->hasCrawled = $hasCrawled;
103
104
        return $this;
105
    }
106
107
    public function getSitemap(): Sitemap
108
    {
109
        if (config('sitemap.execute_javascript')) {
110
            $this->crawler->executeJavaScript(config('sitemap.chrome_binary_path'));
0 ignored issues
show
Unused Code introduced by
The call to Crawler::executeJavaScript() has too many arguments starting with config('sitemap.chrome_binary_path').

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
111
        }
112
113
        if (! is_null($this->maximumCrawlCount)) {
114
            $this->crawler->setMaximumCrawlCount($this->maximumCrawlCount);
115
        }
116
117
        if (! is_null($this->maximumDepth)) {
118
            $this->crawler->setMaximumDepth($this->maximumDepth);
119
        }
120
121
        $this->crawler
122
            ->setCrawlProfile($this->getCrawlProfile())
123
            ->setCrawlObserver($this->getCrawlObserver())
124
            ->setConcurrency($this->concurrency)
125
            ->startCrawling($this->urlToBeCrawled);
126
127
        return $this->sitemap;
128
    }
129
130
    /**
131
     * @param string $path
132
     *
133
     * @return $this
134
     */
135
    public function writeToFile(string $path)
136
    {
137
        $this->getSitemap()->writeToFile($path);
138
139
        return $this;
140
    }
141
142
    protected function getCrawlProfile(): CrawlProfile
143
    {
144
        $shouldCrawl = function (UriInterface $url) {
145
            if ($url->getHost() !== $this->urlToBeCrawled->getHost()) {
146
                return false;
147
            }
148
149
            if (! is_callable($this->shouldCrawl)) {
150
                return true;
151
            }
152
153
            return ($this->shouldCrawl)($url);
154
        };
155
156
        $profileClass = config('sitemap.crawl_profile', Profile::class);
157
        $profile = new $profileClass($this->urlToBeCrawled);
158
159
        if (method_exists($profile, 'shouldCrawlCallback')) {
160
            $profile->shouldCrawlCallback($shouldCrawl);
161
        }
162
163
        return $profile;
164
    }
165
166
    protected function getCrawlObserver(): Observer
167
    {
168
        $performAfterUrlHasBeenCrawled = function (UriInterface $crawlerUrl, ResponseInterface $response = null) {
169
            $sitemapUrl = ($this->hasCrawled)(Url::create((string) $crawlerUrl), $response);
170
171
            if ($sitemapUrl) {
172
                $this->sitemap->add($sitemapUrl);
173
            }
174
        };
175
176
        return new Observer($performAfterUrlHasBeenCrawled);
177
    }
178
}
179