|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
/** |
|
6
|
|
|
* This file is part of Wszetko Sitemap. |
|
7
|
|
|
* |
|
8
|
|
|
* (c) Paweł Kłopotek-Główczewski <[email protected]> |
|
9
|
|
|
* |
|
10
|
|
|
* This source file is subject to the MIT license that is bundled |
|
11
|
|
|
* with this source code in the file LICENSE. |
|
12
|
|
|
*/ |
|
13
|
|
|
|
|
14
|
|
|
namespace Wszetko\Sitemap; |
|
15
|
|
|
|
|
16
|
|
|
use Exception; |
|
17
|
|
|
use InvalidArgumentException; |
|
18
|
|
|
use RecursiveDirectoryIterator; |
|
19
|
|
|
use RecursiveIteratorIterator; |
|
20
|
|
|
use RegexIterator; |
|
21
|
|
|
use Wszetko\Sitemap\Drivers\DataCollectors\AbstractDataCollector; |
|
22
|
|
|
use Wszetko\Sitemap\Drivers\Output\OutputXMLWriter; |
|
23
|
|
|
use Wszetko\Sitemap\Interfaces\DataCollector; |
|
24
|
|
|
use Wszetko\Sitemap\Interfaces\XML; |
|
25
|
|
|
use Wszetko\Sitemap\Traits\Domain; |
|
26
|
|
|
|
|
27
|
|
|
/** |
|
28
|
|
|
* Sitemap |
|
29
|
|
|
* This class used for generating Google Sitemap files. |
|
30
|
|
|
* |
|
31
|
|
|
* @package Sitemap |
|
32
|
|
|
* |
|
33
|
|
|
* @author Paweł Kłopotek-Główczewski <[email protected]> |
|
34
|
|
|
* @copyright 2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/) |
|
35
|
|
|
* @license https://opensource.org/licenses/MIT MIT License |
|
36
|
|
|
* |
|
37
|
|
|
* @see https://github.com/wszetko/sitemap |
|
38
|
|
|
*/ |
|
39
|
|
|
class Sitemap |
|
40
|
|
|
{ |
|
41
|
|
|
use Domain; |
|
42
|
|
|
|
|
43
|
|
|
/** |
|
44
|
|
|
* Avaliable values for changefreq tag. |
|
45
|
|
|
* |
|
46
|
|
|
* @var array |
|
47
|
|
|
*/ |
|
48
|
|
|
public const CHANGEFREQ = [ |
|
49
|
|
|
'always', |
|
50
|
|
|
'hourly', |
|
51
|
|
|
'daily', |
|
52
|
|
|
'weekly', |
|
53
|
|
|
'monthly', |
|
54
|
|
|
'yearly', |
|
55
|
|
|
'never', |
|
56
|
|
|
]; |
|
57
|
|
|
|
|
58
|
|
|
/** |
|
59
|
|
|
* Extension for sitemap file. |
|
60
|
|
|
* |
|
61
|
|
|
* @var string |
|
62
|
|
|
*/ |
|
63
|
|
|
public const EXT = '.xml'; |
|
64
|
|
|
|
|
65
|
|
|
/** |
|
66
|
|
|
* Extension for gzipped sitemap file. |
|
67
|
|
|
* |
|
68
|
|
|
* @var string |
|
69
|
|
|
*/ |
|
70
|
|
|
public const GZ_EXT = '.xml.gz'; |
|
71
|
|
|
|
|
72
|
|
|
/** |
|
73
|
|
|
* URL to Sitemap Schema. |
|
74
|
|
|
* |
|
75
|
|
|
* @var string |
|
76
|
|
|
*/ |
|
77
|
|
|
public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9'; |
|
78
|
|
|
|
|
79
|
|
|
/** |
|
80
|
|
|
* Limit of items in Sitemap files. |
|
81
|
|
|
* |
|
82
|
|
|
* @var int |
|
83
|
|
|
*/ |
|
84
|
|
|
public const ITEM_PER_SITEMAP = 50000; |
|
85
|
|
|
|
|
86
|
|
|
/** |
|
87
|
|
|
* Limit of Sitmeaps in SitemapsIndex. |
|
88
|
|
|
* |
|
89
|
|
|
* @var int |
|
90
|
|
|
*/ |
|
91
|
|
|
public const SITEMAP_PER_SITEMAPINDEX = 1000; |
|
92
|
|
|
|
|
93
|
|
|
/** |
|
94
|
|
|
* Limit of single files size. |
|
95
|
|
|
* |
|
96
|
|
|
* @var int |
|
97
|
|
|
*/ |
|
98
|
|
|
public const SITEMAP_MAX_SIZE = 52000000; |
|
99
|
|
|
|
|
100
|
|
|
/** |
|
101
|
|
|
* Path on disk to public directory. |
|
102
|
|
|
* |
|
103
|
|
|
* @var string |
|
104
|
|
|
*/ |
|
105
|
|
|
private $publicDirectory = ''; |
|
106
|
|
|
|
|
107
|
|
|
/** |
|
108
|
|
|
* Path related to public directory to dir where sitemaps will be. |
|
109
|
|
|
* |
|
110
|
|
|
* @var string |
|
111
|
|
|
*/ |
|
112
|
|
|
private $sitepamsDirectory = ''; |
|
113
|
|
|
|
|
114
|
|
|
/** |
|
115
|
|
|
* Path to temporary directory. |
|
116
|
|
|
* |
|
117
|
|
|
* @var string |
|
118
|
|
|
*/ |
|
119
|
|
|
private $sitemapTempDirectory = ''; |
|
120
|
|
|
|
|
121
|
|
|
/** |
|
122
|
|
|
* Default filename for sitemap file. |
|
123
|
|
|
* |
|
124
|
|
|
* @var string |
|
125
|
|
|
*/ |
|
126
|
|
|
private $defaultFilename = 'sitemap'; |
|
127
|
|
|
|
|
128
|
|
|
/** |
|
129
|
|
|
* Name of index file. |
|
130
|
|
|
* |
|
131
|
|
|
* @var string |
|
132
|
|
|
*/ |
|
133
|
|
|
private $indexFilename = 'index'; |
|
134
|
|
|
|
|
135
|
|
|
/** |
|
136
|
|
|
* DataCollector instance. |
|
137
|
|
|
* |
|
138
|
|
|
* @var DataCollector |
|
139
|
|
|
*/ |
|
140
|
|
|
private $dataCollector; |
|
141
|
|
|
|
|
142
|
|
|
/** |
|
143
|
|
|
* Use compression. |
|
144
|
|
|
* |
|
145
|
|
|
* @var bool |
|
146
|
|
|
*/ |
|
147
|
|
|
private $useCompression = false; |
|
148
|
|
|
|
|
149
|
|
|
/** |
|
150
|
|
|
* XML Writer object. |
|
151
|
|
|
* |
|
152
|
|
|
* @var XML |
|
153
|
|
|
*/ |
|
154
|
|
|
private $xml; |
|
155
|
|
|
|
|
156
|
|
|
/** |
|
157
|
|
|
* Separator to be used in Sitemap filenames. |
|
158
|
|
|
* |
|
159
|
|
|
* @var string |
|
160
|
|
|
*/ |
|
161
|
|
|
private $separator = '-'; // ~49,6MB - to have some limit to close file |
|
162
|
|
|
|
|
163
|
|
|
/** |
|
164
|
|
|
* Construktor. |
|
165
|
|
|
* |
|
166
|
|
|
* @param string $domain |
|
167
|
|
|
* |
|
168
|
|
|
* @throws \InvalidArgumentException |
|
169
|
|
|
*/ |
|
170
|
40 |
|
public function __construct(string $domain = null) |
|
171
|
|
|
{ |
|
172
|
40 |
|
if (null !== $domain) { |
|
173
|
38 |
|
$this->setDomain($domain); |
|
174
|
|
|
} |
|
175
|
40 |
|
} |
|
176
|
|
|
|
|
177
|
|
|
/** |
|
178
|
|
|
* @param Items\Url $item |
|
179
|
|
|
* @param null|string $group |
|
180
|
|
|
* |
|
181
|
|
|
* @throws \Exception |
|
182
|
|
|
* |
|
183
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
184
|
|
|
*/ |
|
185
|
8 |
|
public function addItem(Items\Url $item, ?string $group = null): self |
|
186
|
|
|
{ |
|
187
|
8 |
|
if (is_string($group)) { |
|
188
|
2 |
|
$group = preg_replace('/\W+/', '', $group); |
|
189
|
|
|
} |
|
190
|
|
|
|
|
191
|
8 |
|
if ('' === $group || null === $group) { |
|
192
|
6 |
|
$group = $this->getDefaultFilename(); |
|
193
|
|
|
} |
|
194
|
|
|
|
|
195
|
8 |
|
$group = mb_strtolower($group); |
|
196
|
8 |
|
$item->setDomain($this->getDomain()); |
|
197
|
8 |
|
$this->getDataCollector()->add($item, $group); |
|
198
|
|
|
|
|
199
|
6 |
|
return $this; |
|
200
|
|
|
} |
|
201
|
|
|
|
|
202
|
|
|
/** |
|
203
|
|
|
* @param array $items |
|
204
|
|
|
* @param null|string $group |
|
205
|
|
|
* |
|
206
|
|
|
* @throws \Exception |
|
207
|
|
|
* |
|
208
|
|
|
* @return $this |
|
209
|
|
|
*/ |
|
210
|
2 |
|
public function addItems(array $items, ?string $group = null): self |
|
211
|
|
|
{ |
|
212
|
2 |
|
foreach ($items as $item) { |
|
213
|
2 |
|
$this->addItem($item, $group); |
|
214
|
|
|
} |
|
215
|
|
|
|
|
216
|
2 |
|
return $this; |
|
217
|
|
|
} |
|
218
|
|
|
|
|
219
|
|
|
/** |
|
220
|
|
|
* Get default filename for sitemap file. |
|
221
|
|
|
* |
|
222
|
|
|
* @return string |
|
223
|
|
|
*/ |
|
224
|
8 |
|
public function getDefaultFilename(): string |
|
225
|
|
|
{ |
|
226
|
8 |
|
return $this->defaultFilename; |
|
227
|
|
|
} |
|
228
|
|
|
|
|
229
|
|
|
/** |
|
230
|
|
|
* Set default filename for sitemap file. |
|
231
|
|
|
* |
|
232
|
|
|
* @param string $defaultFilename |
|
233
|
|
|
* |
|
234
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
235
|
|
|
*/ |
|
236
|
2 |
|
public function setDefaultFilename(string $defaultFilename): self |
|
237
|
|
|
{ |
|
238
|
2 |
|
$this->defaultFilename = $defaultFilename; |
|
239
|
|
|
|
|
240
|
2 |
|
return $this; |
|
241
|
|
|
} |
|
242
|
|
|
|
|
243
|
|
|
/** |
|
244
|
|
|
* Get DataCollecotr Object. |
|
245
|
|
|
* |
|
246
|
|
|
* @return DataCollector |
|
247
|
|
|
* |
|
248
|
|
|
* @throws \Exception |
|
249
|
|
|
*/ |
|
250
|
10 |
|
public function getDataCollector(): DataCollector |
|
251
|
|
|
{ |
|
252
|
10 |
|
if (null === $this->dataCollector) { |
|
253
|
2 |
|
throw new Exception('DataCollector is not set.'); |
|
254
|
|
|
} |
|
255
|
8 |
|
return $this->dataCollector; |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
|
|
/** |
|
259
|
|
|
* @param string $driver |
|
260
|
|
|
* @param array $config |
|
261
|
|
|
* |
|
262
|
|
|
* @throws \InvalidArgumentException |
|
263
|
|
|
* |
|
264
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
265
|
|
|
*/ |
|
266
|
10 |
|
public function setDataCollector(string $driver, $config = []): self |
|
267
|
|
|
{ |
|
268
|
10 |
|
if (class_exists($driver)) { |
|
269
|
8 |
|
$dataCollector = new $driver($config); |
|
270
|
|
|
|
|
271
|
8 |
|
if ($dataCollector instanceof AbstractDataCollector) { |
|
272
|
8 |
|
$this->dataCollector = $dataCollector; |
|
273
|
|
|
} else { |
|
274
|
8 |
|
throw new InvalidArgumentException($driver . ' data collector does not exists.'); |
|
275
|
|
|
} |
|
276
|
|
|
} else { |
|
277
|
2 |
|
throw new InvalidArgumentException($driver . ' data collector does not exists.'); |
|
278
|
|
|
} |
|
279
|
|
|
|
|
280
|
8 |
|
return $this; |
|
281
|
|
|
} |
|
282
|
|
|
|
|
283
|
|
|
/** |
|
284
|
|
|
* @throws Exception |
|
285
|
|
|
*/ |
|
286
|
|
|
public function generate(): void |
|
287
|
|
|
{ |
|
288
|
|
|
if ('' === $this->getPublicDirectory()) { |
|
289
|
|
|
throw new Exception('Public directory is not set.'); |
|
290
|
|
|
} |
|
291
|
|
|
|
|
292
|
|
|
if ('' === $this->getDomain()) { |
|
293
|
|
|
throw new Exception('Domain is not set.'); |
|
294
|
|
|
} |
|
295
|
|
|
|
|
296
|
|
|
if (null === $this->xml) { |
|
297
|
|
|
$this->setXml(OutputXMLWriter::class, ['domain' => $this->getDomain()]); |
|
298
|
|
|
} |
|
299
|
|
|
|
|
300
|
|
|
$this->removeDir($this->getTempDirectory()); |
|
301
|
|
|
$this->getXml()->setWorkDir($this->getSitepamsTempDirectory()); |
|
302
|
|
|
$sitemaps = $this->generateSitemaps(); |
|
303
|
|
|
$this->getXml()->setWorkDir($this->getTempDirectory()); |
|
304
|
|
|
$this->generateSitemapsIndex($sitemaps); |
|
305
|
|
|
$this->publishSitemap(); |
|
306
|
|
|
} |
|
307
|
|
|
|
|
308
|
|
|
/** |
|
309
|
|
|
* @return string |
|
310
|
|
|
*/ |
|
311
|
2 |
|
public function getPublicDirectory(): string |
|
312
|
|
|
{ |
|
313
|
2 |
|
return $this->publicDirectory; |
|
314
|
|
|
} |
|
315
|
|
|
|
|
316
|
|
|
/** |
|
317
|
|
|
* @param string $publicDirectory |
|
318
|
|
|
* |
|
319
|
|
|
* @throws Exception |
|
320
|
|
|
* |
|
321
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
322
|
|
|
*/ |
|
323
|
6 |
|
public function setPublicDirectory(string $publicDirectory): self |
|
324
|
|
|
{ |
|
325
|
6 |
|
$publicDirectory = realpath($publicDirectory); |
|
326
|
|
|
|
|
327
|
6 |
|
if (false === $publicDirectory) { |
|
328
|
2 |
|
throw new Exception('Sitemap directory does not exists.'); |
|
329
|
|
|
} |
|
330
|
|
|
|
|
331
|
4 |
|
$this->publicDirectory = $publicDirectory; |
|
332
|
|
|
|
|
333
|
4 |
|
return $this; |
|
334
|
|
|
} |
|
335
|
|
|
|
|
336
|
|
|
/** |
|
337
|
|
|
* @return XML |
|
338
|
|
|
* |
|
339
|
|
|
* @throws \Exception |
|
340
|
|
|
*/ |
|
341
|
6 |
|
public function getXml(): XML |
|
342
|
|
|
{ |
|
343
|
6 |
|
if (null === $this->xml) { |
|
344
|
2 |
|
throw new Exception('XML writer class is not set.'); |
|
345
|
|
|
} |
|
346
|
|
|
|
|
347
|
4 |
|
return $this->xml; |
|
348
|
|
|
} |
|
349
|
|
|
|
|
350
|
|
|
/** |
|
351
|
|
|
* @param string $driver |
|
352
|
|
|
* @param array $config |
|
353
|
|
|
* |
|
354
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
355
|
|
|
*/ |
|
356
|
4 |
|
public function setXml(string $driver, array $config = []): self |
|
357
|
|
|
{ |
|
358
|
4 |
|
if (class_exists($driver)) { |
|
359
|
4 |
|
if (!isset($config['domain'])) { |
|
360
|
2 |
|
$config['domain'] = $this->getDomain(); |
|
361
|
|
|
} |
|
362
|
|
|
|
|
363
|
4 |
|
$xml = new $driver($config); |
|
364
|
|
|
|
|
365
|
4 |
|
if ($xml instanceof XML) { |
|
366
|
4 |
|
$this->xml = $xml; |
|
367
|
|
|
} |
|
368
|
|
|
} |
|
369
|
|
|
|
|
370
|
4 |
|
return $this; |
|
371
|
|
|
} |
|
372
|
|
|
|
|
373
|
|
|
/** |
|
374
|
|
|
* @throws \Exception |
|
375
|
|
|
* |
|
376
|
|
|
* @return string |
|
377
|
|
|
*/ |
|
378
|
4 |
|
public function getTempDirectory(): string |
|
379
|
|
|
{ |
|
380
|
4 |
|
if (null === $this->sitemapTempDirectory || '' == $this->sitemapTempDirectory) { |
|
381
|
4 |
|
$hash = md5(microtime()); |
|
382
|
|
|
|
|
383
|
4 |
|
if (!is_dir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) { |
|
384
|
4 |
|
mkdir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash); |
|
385
|
|
|
} |
|
386
|
|
|
|
|
387
|
4 |
|
$tempDir = realpath(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash); |
|
388
|
|
|
|
|
389
|
4 |
|
if (false !== $tempDir) { |
|
390
|
4 |
|
$this->sitemapTempDirectory = $tempDir; |
|
391
|
|
|
} else { |
|
392
|
|
|
// @codeCoverageIgnoreStart |
|
393
|
|
|
throw new Exception('Can\'t get temporary directory.'); |
|
394
|
|
|
// @codeCoverageIgnoreEnd |
|
395
|
|
|
} |
|
396
|
|
|
} |
|
397
|
|
|
|
|
398
|
4 |
|
return $this->sitemapTempDirectory; |
|
399
|
|
|
} |
|
400
|
|
|
|
|
401
|
|
|
/** |
|
402
|
|
|
* @throws \Exception |
|
403
|
|
|
* |
|
404
|
|
|
* @return string |
|
405
|
|
|
*/ |
|
406
|
2 |
|
public function getSitepamsTempDirectory(): string |
|
407
|
|
|
{ |
|
408
|
2 |
|
$directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory); |
|
409
|
|
|
|
|
410
|
2 |
|
if (false === $directory) { |
|
411
|
|
|
mkdir( |
|
412
|
|
|
$this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory, |
|
413
|
|
|
0777, |
|
414
|
|
|
true |
|
415
|
|
|
); |
|
416
|
|
|
$directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory); |
|
417
|
|
|
} |
|
418
|
|
|
|
|
419
|
2 |
|
if (false === $directory) { |
|
420
|
|
|
// @codeCoverageIgnoreStart |
|
421
|
|
|
throw new Exception('Can\'t get temporary directory.'); |
|
422
|
|
|
// @codeCoverageIgnoreEnd |
|
423
|
|
|
} |
|
424
|
|
|
|
|
425
|
2 |
|
return $directory; |
|
426
|
|
|
} |
|
427
|
|
|
|
|
428
|
|
|
/** |
|
429
|
|
|
* @throws Exception |
|
430
|
|
|
* |
|
431
|
|
|
* @return array |
|
432
|
|
|
*/ |
|
433
|
|
|
public function generateSitemaps(): array |
|
434
|
|
|
{ |
|
435
|
|
|
if (0 == $this->getDataCollector()->getCount()) { |
|
436
|
|
|
return []; |
|
437
|
|
|
} |
|
438
|
|
|
|
|
439
|
|
|
$groups = $this->getDataCollector()->getGroups(); |
|
440
|
|
|
$currentGroup = 0; |
|
441
|
|
|
$files = []; |
|
442
|
|
|
|
|
443
|
|
|
foreach ($groups as $group) { |
|
444
|
|
|
$groupNo = 0; |
|
445
|
|
|
$filesInGroup = 0; |
|
446
|
|
|
++$currentGroup; |
|
447
|
|
|
|
|
448
|
|
|
if ($this->getDataCollector()->getGroupCount($group) > 0) { |
|
449
|
|
|
$this->getXml()->openSitemap( |
|
450
|
|
|
$group . $this->getSeparator() . $groupNo . self::EXT, |
|
451
|
|
|
$this->getDataCollector()->getExtensions() |
|
452
|
|
|
); |
|
453
|
|
|
$files[$group . $this->getSeparator() . $groupNo . self::EXT] = null; |
|
454
|
|
|
|
|
455
|
|
|
while ($element = $this->getDataCollector()->fetch($group)) { |
|
456
|
|
|
$this->getXml()->addUrl($element); |
|
457
|
|
|
++$filesInGroup; |
|
458
|
|
|
|
|
459
|
|
|
if (isset($element['lastmod'])) { |
|
460
|
|
|
if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) { |
|
461
|
|
|
if ( |
|
462
|
|
|
strtotime($element['lastmod']) > |
|
463
|
|
|
strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT]) |
|
464
|
|
|
) { |
|
465
|
|
|
$files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod']; |
|
466
|
|
|
} |
|
467
|
|
|
} else { |
|
468
|
|
|
$files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod']; |
|
469
|
|
|
} |
|
470
|
|
|
} |
|
471
|
|
|
|
|
472
|
|
|
// self::SITEMAP_MAX_SIZE - 20 for buffer for close tag |
|
473
|
|
|
if ( |
|
474
|
|
|
$filesInGroup >= self::ITEM_PER_SITEMAP || |
|
475
|
|
|
$this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20) |
|
476
|
|
|
) { |
|
477
|
|
|
$this->getXml()->closeSitemap(); |
|
478
|
|
|
|
|
479
|
|
|
if (!$this->getDataCollector()->isLast($group)) { |
|
480
|
|
|
++$groupNo; |
|
481
|
|
|
$filesInGroup = 0; |
|
482
|
|
|
$this->getXml()->openSitemap( |
|
483
|
|
|
$group . $this->getSeparator() . $groupNo . self::EXT, |
|
484
|
|
|
$this->getDataCollector()->getExtensions() |
|
485
|
|
|
); |
|
486
|
|
|
$files[$group . $this->getSeparator() . $groupNo . self::EXT] = null; |
|
487
|
|
|
} |
|
488
|
|
|
} |
|
489
|
|
|
} |
|
490
|
|
|
|
|
491
|
|
|
$this->getXml()->closeSitemap(); |
|
492
|
|
|
} |
|
493
|
|
|
} |
|
494
|
|
|
|
|
495
|
|
|
if ($this->isUseCompression() && [] !== $files) { |
|
496
|
|
|
$this->compressFiles($this->getSitepamsTempDirectory(), $files); |
|
497
|
|
|
} |
|
498
|
|
|
|
|
499
|
|
|
return $files; |
|
500
|
|
|
} |
|
501
|
|
|
|
|
502
|
|
|
/** |
|
503
|
|
|
* @return string |
|
504
|
|
|
*/ |
|
505
|
2 |
|
public function getSeparator(): string |
|
506
|
|
|
{ |
|
507
|
2 |
|
return $this->separator; |
|
508
|
|
|
} |
|
509
|
|
|
|
|
510
|
|
|
/** |
|
511
|
|
|
* @param string $separator |
|
512
|
|
|
* |
|
513
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
514
|
|
|
*/ |
|
515
|
2 |
|
public function setSeparator(string $separator): self |
|
516
|
|
|
{ |
|
517
|
2 |
|
$this->separator = $separator; |
|
518
|
|
|
|
|
519
|
2 |
|
return $this; |
|
520
|
|
|
} |
|
521
|
|
|
|
|
522
|
|
|
/** |
|
523
|
|
|
* Check if compression is used. |
|
524
|
|
|
* |
|
525
|
|
|
* @return bool |
|
526
|
|
|
*/ |
|
527
|
4 |
|
public function isUseCompression(): bool |
|
528
|
|
|
{ |
|
529
|
4 |
|
return $this->useCompression; |
|
530
|
|
|
} |
|
531
|
|
|
|
|
532
|
|
|
/** |
|
533
|
|
|
* Set whether to use compression or not. |
|
534
|
|
|
* |
|
535
|
|
|
* @param bool $useCompression |
|
536
|
|
|
* |
|
537
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
538
|
|
|
*/ |
|
539
|
2 |
|
public function setUseCompression(bool $useCompression): self |
|
540
|
|
|
{ |
|
541
|
2 |
|
if ($useCompression && extension_loaded('zlib')) { |
|
542
|
2 |
|
$this->useCompression = $useCompression; |
|
543
|
|
|
} |
|
544
|
|
|
|
|
545
|
2 |
|
return $this; |
|
546
|
|
|
} |
|
547
|
|
|
|
|
548
|
|
|
/** |
|
549
|
|
|
* @param array $sitemaps |
|
550
|
|
|
* |
|
551
|
|
|
* @throws Exception |
|
552
|
|
|
* |
|
553
|
|
|
* @return array |
|
554
|
|
|
*/ |
|
555
|
|
|
public function generateSitemapsIndex(array $sitemaps): array |
|
556
|
|
|
{ |
|
557
|
|
|
if (0 === count($sitemaps)) { |
|
558
|
|
|
return []; |
|
559
|
|
|
} |
|
560
|
|
|
|
|
561
|
|
|
$counter = 0; |
|
562
|
|
|
$file = $this->getIndexFilename() . self::EXT; |
|
563
|
|
|
$files = [$file => null]; |
|
564
|
|
|
$this->getXml()->openSitemapIndex($file); |
|
565
|
|
|
$lastItem = array_key_last($sitemaps); |
|
566
|
|
|
|
|
567
|
|
|
foreach ($sitemaps as $sitemap => $lastmod) { |
|
568
|
|
|
$this->getXml()->addSitemap((string) $this->getDomain() . '/' . ltrim(str_replace( |
|
569
|
|
|
$this->getPublicDirectory(), |
|
570
|
|
|
'', |
|
571
|
|
|
$this->getSitepamsDirectory() |
|
572
|
|
|
), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod); |
|
573
|
|
|
++$counter; |
|
574
|
|
|
|
|
575
|
|
|
if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) { |
|
576
|
|
|
$this->getXml()->closeSitemapIndex(); |
|
577
|
|
|
$counter = 0; |
|
578
|
|
|
$filesCount = count($files); |
|
579
|
|
|
|
|
580
|
|
|
if ($sitemap != $lastItem) { |
|
581
|
|
|
$file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT; |
|
582
|
|
|
$files[$file] = null; |
|
583
|
|
|
$this->getXml()->openSitemapIndex($file); |
|
584
|
|
|
} |
|
585
|
|
|
} |
|
586
|
|
|
} |
|
587
|
|
|
|
|
588
|
|
|
$this->getXml()->closeSitemapIndex(); |
|
589
|
|
|
|
|
590
|
|
|
if ($this->isUseCompression() && [] !== $files) { |
|
591
|
|
|
$this->compressFiles($this->getTempDirectory(), $files); |
|
592
|
|
|
} |
|
593
|
|
|
|
|
594
|
|
|
return $files; |
|
595
|
|
|
} |
|
596
|
|
|
|
|
597
|
|
|
/** |
|
598
|
|
|
* Get filename of sitemap index file. |
|
599
|
|
|
* |
|
600
|
|
|
* @return string |
|
601
|
|
|
*/ |
|
602
|
2 |
|
public function getIndexFilename(): string |
|
603
|
|
|
{ |
|
604
|
2 |
|
return $this->indexFilename; |
|
605
|
|
|
} |
|
606
|
|
|
|
|
607
|
|
|
/** |
|
608
|
|
|
* Set filename of sitemap index file. |
|
609
|
|
|
* |
|
610
|
|
|
* @param string $indexFilename |
|
611
|
|
|
* |
|
612
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
613
|
|
|
*/ |
|
614
|
2 |
|
public function setIndexFilename(string $indexFilename): self |
|
615
|
|
|
{ |
|
616
|
2 |
|
$this->indexFilename = $indexFilename; |
|
617
|
|
|
|
|
618
|
2 |
|
return $this; |
|
619
|
|
|
} |
|
620
|
|
|
|
|
621
|
|
|
/** |
|
622
|
|
|
* @throws \Exception |
|
623
|
|
|
* |
|
624
|
|
|
* @return string |
|
625
|
|
|
*/ |
|
626
|
|
|
public function getSitepamsDirectory(): string |
|
627
|
|
|
{ |
|
628
|
|
|
return $this->sitepamsDirectory; |
|
629
|
|
|
} |
|
630
|
|
|
|
|
631
|
|
|
/** |
|
632
|
|
|
* @param string $sitepamsDirectory |
|
633
|
|
|
* |
|
634
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
|
635
|
|
|
* |
|
636
|
|
|
* @throws \Exception |
|
637
|
|
|
*/ |
|
638
|
|
|
public function setSitepamsDirectory(string $sitepamsDirectory): self |
|
639
|
|
|
{ |
|
640
|
|
|
$directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory); |
|
641
|
|
|
|
|
642
|
|
|
if (false === $directory) { |
|
643
|
|
|
mkdir($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory, 0777, true); |
|
644
|
|
|
$directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory); |
|
645
|
|
|
} |
|
646
|
|
|
|
|
647
|
|
|
if (false === $directory) { |
|
648
|
|
|
throw new Exception('Can\'t get sitemap directory.'); |
|
649
|
|
|
} |
|
650
|
|
|
|
|
651
|
|
|
$this->sitepamsDirectory = $directory; |
|
652
|
|
|
|
|
653
|
|
|
return $this; |
|
654
|
|
|
} |
|
655
|
|
|
|
|
656
|
|
|
/** |
|
657
|
|
|
* @param string $dir |
|
658
|
|
|
* |
|
659
|
|
|
* @return void |
|
660
|
|
|
*/ |
|
661
|
|
|
private function removeDir($dir): void |
|
662
|
|
|
{ |
|
663
|
|
|
if (is_dir($dir)) { |
|
664
|
|
|
return; |
|
665
|
|
|
} |
|
666
|
|
|
|
|
667
|
|
|
$objects = scandir($dir); |
|
668
|
|
|
|
|
669
|
|
|
if (false !== $objects) { |
|
670
|
|
|
foreach ($objects as $object) { |
|
671
|
|
|
if ('.' != $object && '..' != $object) { |
|
672
|
|
|
if ('dir' == filetype($dir . '/' . $object)) { |
|
673
|
|
|
$this->removeDir($dir . '/' . $object); |
|
674
|
|
|
} else { |
|
675
|
|
|
unlink($dir . '/' . $object); |
|
676
|
|
|
} |
|
677
|
|
|
} |
|
678
|
|
|
} |
|
679
|
|
|
|
|
680
|
|
|
rmdir($dir); |
|
681
|
|
|
} |
|
682
|
|
|
} |
|
683
|
|
|
|
|
684
|
|
|
/** |
|
685
|
|
|
* @param string $dir |
|
686
|
|
|
* @param array $files |
|
687
|
|
|
* |
|
688
|
|
|
* @throws Exception |
|
689
|
|
|
* |
|
690
|
|
|
* @return void |
|
691
|
|
|
*/ |
|
692
|
|
|
private function compressFiles(string $dir, array &$files): void |
|
693
|
|
|
{ |
|
694
|
|
|
if (!extension_loaded('zlib')) { |
|
695
|
|
|
throw new Exception('Extension zlib is not loaded.'); |
|
696
|
|
|
} |
|
697
|
|
|
|
|
698
|
|
|
$newFiles = []; |
|
699
|
|
|
|
|
700
|
|
|
foreach ($files as $file => $lastmod) { |
|
701
|
|
|
$source = $dir . DIRECTORY_SEPARATOR . $file; |
|
702
|
|
|
$gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT; |
|
703
|
|
|
$output = $dir . DIRECTORY_SEPARATOR . $gzFile; |
|
704
|
|
|
$out = gzopen($output, 'wb9'); |
|
705
|
|
|
$in = fopen($source, 'rb'); |
|
706
|
|
|
|
|
707
|
|
|
if (false === $out) { |
|
708
|
|
|
throw new Exception('Can\'t create GZip archive.'); |
|
709
|
|
|
} |
|
710
|
|
|
|
|
711
|
|
|
if (false === $in) { |
|
712
|
|
|
throw new Exception('Can\'t open xml file.'); |
|
713
|
|
|
} |
|
714
|
|
|
|
|
715
|
|
|
while (!feof($in)) { |
|
716
|
|
|
$content = fread($in, 524288); |
|
717
|
|
|
|
|
718
|
|
|
if (false !== $content) { |
|
719
|
|
|
gzwrite($out, $content); |
|
720
|
|
|
} |
|
721
|
|
|
} |
|
722
|
|
|
|
|
723
|
|
|
fclose($in); |
|
724
|
|
|
gzclose($out); |
|
725
|
|
|
unlink($source); |
|
726
|
|
|
$newFiles[$gzFile] = $lastmod; |
|
727
|
|
|
} |
|
728
|
|
|
|
|
729
|
|
|
$files = $newFiles; |
|
730
|
|
|
} |
|
731
|
|
|
|
|
732
|
|
|
/** |
|
733
|
|
|
* @throws \Exception |
|
734
|
|
|
* |
|
735
|
|
|
* @return void |
|
736
|
|
|
*/ |
|
737
|
|
|
private function publishSitemap(): void |
|
738
|
|
|
{ |
|
739
|
|
|
// Clear previous sitemaps |
|
740
|
|
|
$this->removeDir($this->getSitepamsDirectory()); |
|
741
|
|
|
$publicDir = scandir($this->getPublicDirectory()); |
|
742
|
|
|
|
|
743
|
|
|
if (is_array($publicDir)) { |
|
744
|
|
|
foreach ($publicDir as $file) { |
|
745
|
|
|
if ( |
|
746
|
|
|
1 === preg_match( |
|
747
|
|
|
'/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/', |
|
748
|
|
|
$file |
|
749
|
|
|
) |
|
750
|
|
|
) { |
|
751
|
|
|
unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file); |
|
752
|
|
|
} |
|
753
|
|
|
} |
|
754
|
|
|
} |
|
755
|
|
|
|
|
756
|
|
|
$this->getSitepamsDirectory(); //To create sitemaps directory |
|
757
|
|
|
$dir = new RecursiveDirectoryIterator($this->getTempDirectory()); |
|
758
|
|
|
$iterator = new RecursiveIteratorIterator($dir); |
|
759
|
|
|
$files = new RegexIterator( |
|
760
|
|
|
$iterator, |
|
761
|
|
|
"/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/", |
|
762
|
|
|
RegexIterator::GET_MATCH |
|
763
|
|
|
); |
|
764
|
|
|
$fileList = []; |
|
765
|
|
|
|
|
766
|
|
|
foreach ($files as $file) { |
|
767
|
|
|
if (isset($file['path'])) { |
|
768
|
|
|
$fileList[] = $file['path']; |
|
769
|
|
|
} |
|
770
|
|
|
} |
|
771
|
|
|
|
|
772
|
|
|
$currentFile = 0; |
|
773
|
|
|
|
|
774
|
|
|
foreach ($fileList as $file) { |
|
775
|
|
|
++$currentFile; |
|
776
|
|
|
$destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file); |
|
777
|
|
|
rename($file, $destination); |
|
778
|
|
|
} |
|
779
|
|
|
|
|
780
|
|
|
$this->removeDir($this->getTempDirectory()); |
|
781
|
|
|
} |
|
782
|
|
|
|
|
783
|
|
|
/** |
|
784
|
|
|
* @return string |
|
785
|
|
|
*/ |
|
786
|
|
|
private function getExt(): string |
|
787
|
|
|
{ |
|
788
|
|
|
if ($this->isUseCompression()) { |
|
789
|
|
|
return self::GZ_EXT; |
|
790
|
|
|
} |
|
791
|
|
|
|
|
792
|
|
|
return self::EXT; |
|
793
|
|
|
} |
|
794
|
|
|
} |
|
795
|
|
|
|