This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Spatie\Sitemap; |
||
4 | |||
5 | use Closure; |
||
6 | use GuzzleHttp\Psr7\Uri; |
||
7 | use Illuminate\Support\Collection; |
||
8 | use Psr\Http\Message\ResponseInterface; |
||
9 | use Psr\Http\Message\UriInterface; |
||
10 | use Spatie\Crawler\Crawler; |
||
11 | use Spatie\Crawler\CrawlProfile; |
||
12 | use Spatie\Sitemap\Crawler\Observer; |
||
13 | use Spatie\Sitemap\Crawler\Profile; |
||
14 | use Spatie\Sitemap\Tags\Url; |
||
15 | |||
16 | class SitemapGenerator |
||
17 | { |
||
18 | /** @var \Illuminate\Support\Collection */ |
||
19 | protected $sitemaps; |
||
20 | |||
21 | /** @var \GuzzleHttp\Psr7\Uri */ |
||
22 | protected $urlToBeCrawled = ''; |
||
23 | |||
24 | /** @var \Spatie\Crawler\Crawler */ |
||
25 | protected $crawler; |
||
26 | |||
27 | /** @var callable */ |
||
28 | protected $shouldCrawl; |
||
29 | |||
30 | /** @var callable */ |
||
31 | protected $hasCrawled; |
||
32 | |||
33 | /** @var int */ |
||
34 | protected $concurrency = 10; |
||
35 | |||
36 | /** @var bool|int */ |
||
37 | protected $maximumTagsPerSitemap = false; |
||
38 | |||
39 | /** @var int|null */ |
||
40 | protected $maximumCrawlCount = null; |
||
41 | |||
42 | /** |
||
43 | * @param string $urlToBeCrawled |
||
44 | * |
||
45 | * @return static |
||
46 | */ |
||
47 | public static function create(string $urlToBeCrawled) |
||
48 | { |
||
49 | return app(static::class)->setUrl($urlToBeCrawled); |
||
50 | } |
||
51 | |||
52 | public function __construct(Crawler $crawler) |
||
53 | { |
||
54 | $this->crawler = $crawler; |
||
55 | |||
56 | $this->sitemaps = new Collection([new Sitemap]); |
||
57 | |||
58 | $this->hasCrawled = function (Url $url, ResponseInterface $response = null) { |
||
0 ignored issues
–
show
|
|||
59 | return $url; |
||
60 | }; |
||
61 | } |
||
62 | |||
63 | public function configureCrawler(Closure $closure): self |
||
64 | { |
||
65 | call_user_func_array($closure, [$this->crawler]); |
||
66 | |||
67 | return $this; |
||
68 | } |
||
69 | |||
70 | public function setConcurrency(int $concurrency) |
||
71 | { |
||
72 | $this->concurrency = $concurrency; |
||
73 | |||
74 | return $this; |
||
75 | } |
||
76 | |||
77 | public function setMaximumCrawlCount(int $maximumCrawlCount) |
||
78 | { |
||
79 | $this->maximumCrawlCount = $maximumCrawlCount; |
||
80 | |||
81 | return $this; |
||
82 | } |
||
83 | |||
84 | public function maxTagsPerSitemap(int $maximumTagsPerSitemap = 50000): self |
||
85 | { |
||
86 | $this->maximumTagsPerSitemap = $maximumTagsPerSitemap; |
||
87 | |||
88 | return $this; |
||
89 | } |
||
90 | |||
91 | public function setUrl(string $urlToBeCrawled) |
||
92 | { |
||
93 | $this->urlToBeCrawled = new Uri($urlToBeCrawled); |
||
94 | |||
95 | if ($this->urlToBeCrawled->getPath() === '') { |
||
96 | $this->urlToBeCrawled = $this->urlToBeCrawled->withPath('/'); |
||
97 | } |
||
98 | |||
99 | return $this; |
||
100 | } |
||
101 | |||
102 | public function shouldCrawl(callable $shouldCrawl) |
||
103 | { |
||
104 | $this->shouldCrawl = $shouldCrawl; |
||
105 | |||
106 | return $this; |
||
107 | } |
||
108 | |||
109 | public function hasCrawled(callable $hasCrawled) |
||
110 | { |
||
111 | $this->hasCrawled = $hasCrawled; |
||
112 | |||
113 | return $this; |
||
114 | } |
||
115 | |||
116 | public function getSitemap(): Sitemap |
||
117 | { |
||
118 | if (config('sitemap.execute_javascript')) { |
||
119 | $this->crawler->executeJavaScript(config('sitemap.chrome_binary_path')); |
||
120 | } |
||
121 | |||
122 | if (! is_null($this->maximumCrawlCount)) { |
||
123 | $this->crawler->setMaximumCrawlCount($this->maximumCrawlCount); |
||
124 | } |
||
125 | |||
126 | $this->crawler |
||
127 | ->setCrawlProfile($this->getCrawlProfile()) |
||
128 | ->setCrawlObserver($this->getCrawlObserver()) |
||
129 | ->setConcurrency($this->concurrency) |
||
130 | ->startCrawling($this->urlToBeCrawled); |
||
131 | |||
132 | return $this->sitemaps->first(); |
||
133 | } |
||
134 | |||
135 | /** |
||
136 | * @param string $path |
||
137 | * |
||
138 | * @return $this |
||
139 | */ |
||
140 | public function writeToFile(string $path) |
||
141 | { |
||
142 | $sitemap = $this->getSitemap(); |
||
143 | |||
144 | if ($this->maximumTagsPerSitemap) { |
||
145 | $sitemap = SitemapIndex::create(); |
||
146 | $format = str_replace('.xml', '_%d.xml', $path); |
||
147 | |||
148 | // Parses each sub-sitemaps, writes and pushs them into the sitemap |
||
149 | // index |
||
150 | $this->sitemaps->each(function (Sitemap $item, int $key) use ($sitemap, $format) { |
||
151 | $path = sprintf($format, $key); |
||
152 | |||
153 | $item->writeToFile(sprintf($format, $key)); |
||
154 | $sitemap->add(last(explode('public', $path))); |
||
155 | }); |
||
156 | } |
||
157 | |||
158 | $sitemap->writeToFile($path); |
||
159 | |||
160 | return $this; |
||
161 | } |
||
162 | |||
163 | protected function getCrawlProfile(): CrawlProfile |
||
164 | { |
||
165 | $shouldCrawl = function (UriInterface $url) { |
||
166 | if ($url->getHost() !== $this->urlToBeCrawled->getHost()) { |
||
167 | return false; |
||
168 | } |
||
169 | |||
170 | if (! is_callable($this->shouldCrawl)) { |
||
171 | return true; |
||
172 | } |
||
173 | |||
174 | return ($this->shouldCrawl)($url); |
||
175 | }; |
||
176 | |||
177 | $profileClass = config('sitemap.crawl_profile', Profile::class); |
||
178 | $profile = new $profileClass($this->urlToBeCrawled); |
||
179 | |||
180 | if (method_exists($profile, 'shouldCrawlCallback')) { |
||
181 | $profile->shouldCrawlCallback($shouldCrawl); |
||
182 | } |
||
183 | |||
184 | return $profile; |
||
185 | } |
||
186 | |||
187 | protected function getCrawlObserver(): Observer |
||
188 | { |
||
189 | $performAfterUrlHasBeenCrawled = function (UriInterface $crawlerUrl, ResponseInterface $response = null) { |
||
190 | $sitemapUrl = ($this->hasCrawled)(Url::create((string) $crawlerUrl), $response); |
||
191 | |||
192 | if ($this->shouldStartNewSitemapFile()) { |
||
193 | $this->sitemaps->push(new Sitemap); |
||
194 | } |
||
195 | |||
196 | if ($sitemapUrl) { |
||
197 | $this->sitemaps->last()->add($sitemapUrl); |
||
198 | } |
||
199 | }; |
||
200 | |||
201 | return new Observer($performAfterUrlHasBeenCrawled); |
||
202 | } |
||
203 | |||
204 | protected function shouldStartNewSitemapFile(): bool |
||
205 | { |
||
206 | if (! $this->maximumTagsPerSitemap) { |
||
207 | return false; |
||
208 | } |
||
209 | |||
210 | $currentNumberOfTags = count($this->sitemaps->last()->getTags()); |
||
211 | |||
212 | return $currentNumberOfTags >= $this->maximumTagsPerSitemap; |
||
213 | } |
||
214 | } |
||
215 |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.