1 | <?php |
||
16 | class SitemapGenerator |
||
17 | { |
||
18 | /** @var \Illuminate\Support\Collection */ |
||
19 | protected $sitemaps; |
||
20 | |||
21 | /** @var \GuzzleHttp\Psr7\Uri */ |
||
22 | protected $urlToBeCrawled = ''; |
||
23 | |||
24 | /** @var \Spatie\Crawler\Crawler */ |
||
25 | protected $crawler; |
||
26 | |||
27 | /** @var callable */ |
||
28 | protected $shouldCrawl; |
||
29 | |||
30 | /** @var callable */ |
||
31 | protected $hasCrawled; |
||
32 | |||
33 | /** @var int */ |
||
34 | protected $concurrency = 10; |
||
35 | |||
36 | /** @var bool|int $maximumTagsPerSitemap */ |
||
37 | protected $maximumTagsPerSitemap = false; |
||
38 | |||
39 | /** @var int|null */ |
||
40 | protected $maximumCrawlCount = null; |
||
41 | |||
42 | /** |
||
43 | * @param string $urlToBeCrawled |
||
44 | * |
||
45 | * @return static |
||
46 | */ |
||
47 | public static function create(string $urlToBeCrawled) |
||
48 | { |
||
49 | return app(static::class)->setUrl($urlToBeCrawled); |
||
50 | } |
||
51 | |||
52 | public function __construct(Crawler $crawler) |
||
53 | { |
||
54 | $this->crawler = $crawler; |
||
55 | |||
56 | $this->sitemaps = new Collection([new Sitemap]); |
||
57 | |||
58 | $this->hasCrawled = function (Url $url, ResponseInterface $response = null) { |
||
|
|||
59 | return $url; |
||
60 | }; |
||
61 | } |
||
62 | |||
63 | public function configureCrawler(Closure $closure): self |
||
64 | { |
||
65 | call_user_func_array($closure, [$this->crawler]); |
||
66 | |||
67 | return $this; |
||
68 | } |
||
69 | |||
70 | public function setConcurrency(int $concurrency) |
||
71 | { |
||
72 | $this->concurrency = $concurrency; |
||
73 | } |
||
74 | |||
75 | public function setMaximumCrawlCount(int $maximumCrawlCount) |
||
76 | { |
||
77 | $this->maximumCrawlCount = $maximumCrawlCount; |
||
78 | } |
||
79 | |||
80 | public function maxTagsPerSitemap(int $maximumTagsPerSitemap = 50000): self |
||
81 | { |
||
82 | $this->maximumTagsPerSitemap = $maximumTagsPerSitemap; |
||
83 | |||
84 | return $this; |
||
85 | } |
||
86 | |||
87 | public function setUrl(string $urlToBeCrawled) |
||
88 | { |
||
89 | $this->urlToBeCrawled = new Uri($urlToBeCrawled); |
||
90 | |||
91 | if ($this->urlToBeCrawled->getPath() === '') { |
||
92 | $this->urlToBeCrawled = $this->urlToBeCrawled->withPath('/'); |
||
93 | } |
||
94 | |||
95 | return $this; |
||
96 | } |
||
97 | |||
98 | public function shouldCrawl(callable $shouldCrawl) |
||
99 | { |
||
100 | $this->shouldCrawl = $shouldCrawl; |
||
101 | |||
102 | return $this; |
||
103 | } |
||
104 | |||
105 | public function hasCrawled(callable $hasCrawled) |
||
106 | { |
||
107 | $this->hasCrawled = $hasCrawled; |
||
108 | |||
109 | return $this; |
||
110 | } |
||
111 | |||
112 | public function getSitemap(): Sitemap |
||
113 | { |
||
114 | if (config('sitemap.execute_javascript')) { |
||
115 | $this->crawler->executeJavaScript(config('sitemap.chrome_binary_path')); |
||
116 | } |
||
117 | |||
118 | if (! is_null($this->maximumCrawlCount)) { |
||
119 | $this->crawler->setMaximumCrawlCount($this->maximumCrawlCount); |
||
120 | } |
||
121 | |||
122 | $this->crawler |
||
123 | ->setCrawlProfile($this->getCrawlProfile()) |
||
124 | ->setCrawlObserver($this->getCrawlObserver()) |
||
125 | ->setConcurrency($this->concurrency) |
||
126 | ->startCrawling($this->urlToBeCrawled); |
||
127 | |||
128 | return $this->sitemaps->first(); |
||
129 | } |
||
130 | |||
131 | /** |
||
132 | * @param string $path |
||
133 | * |
||
134 | * @return $this |
||
135 | */ |
||
136 | public function writeToFile(string $path) |
||
137 | { |
||
138 | $sitemap = $this->getSitemap(); |
||
139 | |||
140 | if ($this->maximumTagsPerSitemap) { |
||
141 | $sitemap = SitemapIndex::create(); |
||
142 | $format = str_replace('.xml', '_%d.xml', $path); |
||
143 | |||
144 | // Parses each sub-sitemaps, writes and pushs them into the sitemap |
||
145 | // index |
||
146 | $this->sitemaps->each(function (Sitemap $item, int $key) use ($sitemap, $format) { |
||
147 | $path = sprintf($format, $key); |
||
148 | |||
149 | $item->writeToFile(sprintf($format, $key)); |
||
150 | $sitemap->add(last(explode('public', $path))); |
||
151 | }); |
||
152 | } |
||
153 | |||
154 | $sitemap->writeToFile($path); |
||
155 | |||
156 | return $this; |
||
157 | } |
||
158 | |||
159 | protected function getCrawlProfile(): CrawlProfile |
||
182 | |||
183 | protected function getCrawlObserver(): Observer |
||
184 | { |
||
185 | $performAfterUrlHasBeenCrawled = function (UriInterface $crawlerUrl, ResponseInterface $response = null) { |
||
186 | $sitemapUrl = ($this->hasCrawled)(Url::create((string) $crawlerUrl), $response); |
||
187 | |||
188 | if ($this->shouldStartNewSitemapFile()) { |
||
189 | $this->sitemaps->push(new Sitemap); |
||
190 | } |
||
191 | |||
192 | if ($sitemapUrl) { |
||
193 | $this->sitemaps->last()->add($sitemapUrl); |
||
199 | |||
200 | protected function shouldStartNewSitemapFile(): bool |
||
210 | } |
||
211 |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.