1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* This file is part of Wszetko Sitemap. |
7
|
|
|
* |
8
|
|
|
* (c) Paweł Kłopotek-Główczewski <[email protected]> |
9
|
|
|
* |
10
|
|
|
* This source file is subject to the MIT license that is bundled |
11
|
|
|
* with this source code in the file LICENSE. |
12
|
|
|
*/ |
13
|
|
|
|
14
|
|
|
namespace Wszetko\Sitemap; |
15
|
|
|
|
16
|
|
|
use Exception; |
17
|
|
|
use InvalidArgumentException; |
18
|
|
|
use RecursiveDirectoryIterator; |
19
|
|
|
use RecursiveIteratorIterator; |
20
|
|
|
use RegexIterator; |
21
|
|
|
use Wszetko\Sitemap\Drivers\DataCollectors\AbstractDataCollector; |
22
|
|
|
use Wszetko\Sitemap\Drivers\Output\OutputXMLWriter; |
23
|
|
|
use Wszetko\Sitemap\Interfaces\DataCollector; |
24
|
|
|
use Wszetko\Sitemap\Interfaces\XML; |
25
|
|
|
use Wszetko\Sitemap\Traits\Domain; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Sitemap |
29
|
|
|
* This class used for generating Google Sitemap files. |
30
|
|
|
* |
31
|
|
|
* @package Sitemap |
32
|
|
|
* |
33
|
|
|
* @author Paweł Kłopotek-Główczewski <[email protected]> |
34
|
|
|
* @copyright 2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/) |
35
|
|
|
* @license https://opensource.org/licenses/MIT MIT License |
36
|
|
|
* |
37
|
|
|
* @see https://github.com/wszetko/sitemap |
38
|
|
|
*/ |
39
|
|
|
class Sitemap |
40
|
|
|
{ |
41
|
|
|
use Domain; |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* Avaliable values for changefreq tag. |
45
|
|
|
* |
46
|
|
|
* @var array |
47
|
|
|
*/ |
48
|
|
|
public const CHANGEFREQ = [ |
49
|
|
|
'always', |
50
|
|
|
'hourly', |
51
|
|
|
'daily', |
52
|
|
|
'weekly', |
53
|
|
|
'monthly', |
54
|
|
|
'yearly', |
55
|
|
|
'never', |
56
|
|
|
]; |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* Extension for sitemap file. |
60
|
|
|
* |
61
|
|
|
* @var string |
62
|
|
|
*/ |
63
|
|
|
public const EXT = '.xml'; |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* Extension for gzipped sitemap file. |
67
|
|
|
* |
68
|
|
|
* @var string |
69
|
|
|
*/ |
70
|
|
|
public const GZ_EXT = '.xml.gz'; |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* URL to Sitemap Schema. |
74
|
|
|
* |
75
|
|
|
* @var string |
76
|
|
|
*/ |
77
|
|
|
public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9'; |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* Limit of items in Sitemap files. |
81
|
|
|
* |
82
|
|
|
* @var int |
83
|
|
|
*/ |
84
|
|
|
public const ITEM_PER_SITEMAP = 50000; |
85
|
|
|
|
86
|
|
|
/** |
87
|
|
|
* Limit of Sitmeaps in SitemapsIndex. |
88
|
|
|
* |
89
|
|
|
* @var int |
90
|
|
|
*/ |
91
|
|
|
public const SITEMAP_PER_SITEMAPINDEX = 1000; |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Limit of single files size. |
95
|
|
|
* |
96
|
|
|
* @var int |
97
|
|
|
*/ |
98
|
|
|
public const SITEMAP_MAX_SIZE = 52000000; |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* Path on disk to public directory. |
102
|
|
|
* |
103
|
|
|
* @var string |
104
|
|
|
*/ |
105
|
|
|
private $publicDirectory = ''; |
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* Path related to public directory to dir where sitemaps will be. |
109
|
|
|
* |
110
|
|
|
* @var string |
111
|
|
|
*/ |
112
|
|
|
private $sitepamsDirectory = ''; |
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* Path to temporary directory. |
116
|
|
|
* |
117
|
|
|
* @var string |
118
|
|
|
*/ |
119
|
|
|
private $sitemapTempDirectory = ''; |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* Default filename for sitemap file. |
123
|
|
|
* |
124
|
|
|
* @var string |
125
|
|
|
*/ |
126
|
|
|
private $defaultFilename = 'sitemap'; |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* Name of index file. |
130
|
|
|
* |
131
|
|
|
* @var string |
132
|
|
|
*/ |
133
|
|
|
private $indexFilename = 'index'; |
134
|
|
|
|
135
|
|
|
/** |
136
|
|
|
* DataCollector instance. |
137
|
|
|
* |
138
|
|
|
* @var DataCollector |
139
|
|
|
*/ |
140
|
|
|
private $dataCollector; |
141
|
|
|
|
142
|
|
|
/** |
143
|
|
|
* Use compression. |
144
|
|
|
* |
145
|
|
|
* @var bool |
146
|
|
|
*/ |
147
|
|
|
private $useCompression = false; |
148
|
|
|
|
149
|
|
|
/** |
150
|
|
|
* XML Writer object. |
151
|
|
|
* |
152
|
|
|
* @var XML |
153
|
|
|
*/ |
154
|
|
|
private $xml; |
155
|
|
|
|
156
|
|
|
/** |
157
|
|
|
* Separator to be used in Sitemap filenames. |
158
|
|
|
* |
159
|
|
|
* @var string |
160
|
|
|
*/ |
161
|
|
|
private $separator = '-'; // ~49,6MB - to have some limit to close file |
162
|
|
|
|
163
|
|
|
/** |
164
|
|
|
* Construktor. |
165
|
|
|
* |
166
|
|
|
* @param string $domain |
167
|
|
|
* |
168
|
|
|
* @throws \InvalidArgumentException |
169
|
|
|
*/ |
170
|
40 |
|
public function __construct(string $domain = null) |
171
|
|
|
{ |
172
|
40 |
|
if (null !== $domain) { |
173
|
38 |
|
$this->setDomain($domain); |
174
|
|
|
} |
175
|
40 |
|
} |
176
|
|
|
|
177
|
|
|
/** |
178
|
|
|
* @param Items\Url $item |
179
|
|
|
* @param null|string $group |
180
|
|
|
* |
181
|
|
|
* @throws \Exception |
182
|
|
|
* |
183
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
184
|
|
|
*/ |
185
|
8 |
|
public function addItem(Items\Url $item, ?string $group = null): self |
186
|
|
|
{ |
187
|
8 |
|
if (is_string($group)) { |
188
|
2 |
|
$group = preg_replace('/\W+/', '', $group); |
189
|
|
|
} |
190
|
|
|
|
191
|
8 |
|
if ('' === $group || null === $group) { |
192
|
6 |
|
$group = $this->getDefaultFilename(); |
193
|
|
|
} |
194
|
|
|
|
195
|
8 |
|
$group = mb_strtolower($group); |
196
|
8 |
|
$item->setDomain($this->getDomain()); |
197
|
8 |
|
$this->getDataCollector()->add($item, $group); |
198
|
|
|
|
199
|
6 |
|
return $this; |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* @param array $items |
204
|
|
|
* @param null|string $group |
205
|
|
|
* |
206
|
|
|
* @throws \Exception |
207
|
|
|
* |
208
|
|
|
* @return $this |
209
|
|
|
*/ |
210
|
2 |
|
public function addItems(array $items, ?string $group = null): self |
211
|
|
|
{ |
212
|
2 |
|
foreach ($items as $item) { |
213
|
2 |
|
$this->addItem($item, $group); |
214
|
|
|
} |
215
|
|
|
|
216
|
2 |
|
return $this; |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
/** |
220
|
|
|
* Get default filename for sitemap file. |
221
|
|
|
* |
222
|
|
|
* @return string |
223
|
|
|
*/ |
224
|
8 |
|
public function getDefaultFilename(): string |
225
|
|
|
{ |
226
|
8 |
|
return $this->defaultFilename; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
/** |
230
|
|
|
* Set default filename for sitemap file. |
231
|
|
|
* |
232
|
|
|
* @param string $defaultFilename |
233
|
|
|
* |
234
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
235
|
|
|
*/ |
236
|
2 |
|
public function setDefaultFilename(string $defaultFilename): self |
237
|
|
|
{ |
238
|
2 |
|
$this->defaultFilename = $defaultFilename; |
239
|
|
|
|
240
|
2 |
|
return $this; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
/** |
244
|
|
|
* Get DataCollecotr Object. |
245
|
|
|
* |
246
|
|
|
* @return DataCollector |
247
|
|
|
* |
248
|
|
|
* @throws \Exception |
249
|
|
|
*/ |
250
|
10 |
|
public function getDataCollector(): DataCollector |
251
|
|
|
{ |
252
|
10 |
|
if (null === $this->dataCollector) { |
253
|
2 |
|
throw new Exception('DataCollector is not set.'); |
254
|
|
|
} |
255
|
8 |
|
return $this->dataCollector; |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
/** |
259
|
|
|
* @param string $driver |
260
|
|
|
* @param array $config |
261
|
|
|
* |
262
|
|
|
* @throws \InvalidArgumentException |
263
|
|
|
* |
264
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
265
|
|
|
*/ |
266
|
10 |
|
public function setDataCollector(string $driver, $config = []): self |
267
|
|
|
{ |
268
|
10 |
|
if (class_exists($driver)) { |
269
|
8 |
|
$dataCollector = new $driver($config); |
270
|
|
|
|
271
|
8 |
|
if ($dataCollector instanceof AbstractDataCollector) { |
272
|
8 |
|
$this->dataCollector = $dataCollector; |
273
|
|
|
} else { |
274
|
8 |
|
throw new InvalidArgumentException($driver . ' data collector does not exists.'); |
275
|
|
|
} |
276
|
|
|
} else { |
277
|
2 |
|
throw new InvalidArgumentException($driver . ' data collector does not exists.'); |
278
|
|
|
} |
279
|
|
|
|
280
|
8 |
|
return $this; |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* @throws Exception |
285
|
|
|
*/ |
286
|
|
|
public function generate(): void |
287
|
|
|
{ |
288
|
|
|
if ('' === $this->getPublicDirectory()) { |
289
|
|
|
throw new Exception('Public directory is not set.'); |
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
if ('' === $this->getDomain()) { |
293
|
|
|
throw new Exception('Domain is not set.'); |
294
|
|
|
} |
295
|
|
|
|
296
|
|
|
if (null === $this->xml) { |
297
|
|
|
$this->setXml(OutputXMLWriter::class, ['domain' => $this->getDomain()]); |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
$this->removeDir($this->getTempDirectory()); |
301
|
|
|
$this->getXml()->setWorkDir($this->getSitepamsTempDirectory()); |
302
|
|
|
$sitemaps = $this->generateSitemaps(); |
303
|
|
|
$this->getXml()->setWorkDir($this->getTempDirectory()); |
304
|
|
|
$this->generateSitemapsIndex($sitemaps); |
305
|
|
|
$this->publishSitemap(); |
306
|
|
|
} |
307
|
|
|
|
308
|
|
|
/** |
309
|
|
|
* @return string |
310
|
|
|
*/ |
311
|
2 |
|
public function getPublicDirectory(): string |
312
|
|
|
{ |
313
|
2 |
|
return $this->publicDirectory; |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
/** |
317
|
|
|
* @param string $publicDirectory |
318
|
|
|
* |
319
|
|
|
* @throws Exception |
320
|
|
|
* |
321
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
322
|
|
|
*/ |
323
|
6 |
|
public function setPublicDirectory(string $publicDirectory): self |
324
|
|
|
{ |
325
|
6 |
|
$publicDirectory = realpath($publicDirectory); |
326
|
|
|
|
327
|
6 |
|
if (false === $publicDirectory) { |
328
|
2 |
|
throw new Exception('Sitemap directory does not exists.'); |
329
|
|
|
} |
330
|
|
|
|
331
|
4 |
|
$this->publicDirectory = $publicDirectory; |
332
|
|
|
|
333
|
4 |
|
return $this; |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
/** |
337
|
|
|
* @return XML |
338
|
|
|
* |
339
|
|
|
* @throws \Exception |
340
|
|
|
*/ |
341
|
6 |
|
public function getXml(): XML |
342
|
|
|
{ |
343
|
6 |
|
if (null === $this->xml) { |
344
|
2 |
|
throw new Exception('XML writer class is not set.'); |
345
|
|
|
} |
346
|
|
|
|
347
|
4 |
|
return $this->xml; |
348
|
|
|
} |
349
|
|
|
|
350
|
|
|
/** |
351
|
|
|
* @param string $driver |
352
|
|
|
* @param array $config |
353
|
|
|
* |
354
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
355
|
|
|
*/ |
356
|
4 |
|
public function setXml(string $driver, array $config = []): self |
357
|
|
|
{ |
358
|
4 |
|
if (class_exists($driver)) { |
359
|
4 |
|
if (!isset($config['domain'])) { |
360
|
2 |
|
$config['domain'] = $this->getDomain(); |
361
|
|
|
} |
362
|
|
|
|
363
|
4 |
|
$xml = new $driver($config); |
364
|
|
|
|
365
|
4 |
|
if ($xml instanceof XML) { |
366
|
4 |
|
$this->xml = $xml; |
367
|
|
|
} |
368
|
|
|
} |
369
|
|
|
|
370
|
4 |
|
return $this; |
371
|
|
|
} |
372
|
|
|
|
373
|
|
|
/** |
374
|
|
|
* @throws \Exception |
375
|
|
|
* |
376
|
|
|
* @return string |
377
|
|
|
*/ |
378
|
4 |
|
public function getTempDirectory(): string |
379
|
|
|
{ |
380
|
4 |
|
if (null === $this->sitemapTempDirectory || '' == $this->sitemapTempDirectory) { |
381
|
4 |
|
$hash = md5(microtime()); |
382
|
|
|
|
383
|
4 |
|
if (!is_dir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) { |
384
|
4 |
|
mkdir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash); |
385
|
|
|
} |
386
|
|
|
|
387
|
4 |
|
$tempDir = realpath(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash); |
388
|
|
|
|
389
|
4 |
|
if (false !== $tempDir) { |
390
|
4 |
|
$this->sitemapTempDirectory = $tempDir; |
391
|
|
|
} else { |
392
|
|
|
// @codeCoverageIgnoreStart |
393
|
|
|
throw new Exception('Can\'t get temporary directory.'); |
394
|
|
|
// @codeCoverageIgnoreEnd |
395
|
|
|
} |
396
|
|
|
} |
397
|
|
|
|
398
|
4 |
|
return $this->sitemapTempDirectory; |
399
|
|
|
} |
400
|
|
|
|
401
|
|
|
/** |
402
|
|
|
* @throws \Exception |
403
|
|
|
* |
404
|
|
|
* @return string |
405
|
|
|
*/ |
406
|
2 |
|
public function getSitepamsTempDirectory(): string |
407
|
|
|
{ |
408
|
2 |
|
$directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory); |
409
|
|
|
|
410
|
2 |
|
if (false === $directory) { |
411
|
|
|
mkdir( |
412
|
|
|
$this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory, |
413
|
|
|
0777, |
414
|
|
|
true |
415
|
|
|
); |
416
|
|
|
$directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory); |
417
|
|
|
} |
418
|
|
|
|
419
|
2 |
|
if (false === $directory) { |
420
|
|
|
// @codeCoverageIgnoreStart |
421
|
|
|
throw new Exception('Can\'t get temporary directory.'); |
422
|
|
|
// @codeCoverageIgnoreEnd |
423
|
|
|
} |
424
|
|
|
|
425
|
2 |
|
return $directory; |
426
|
|
|
} |
427
|
|
|
|
428
|
|
|
/** |
429
|
|
|
* @throws Exception |
430
|
|
|
* |
431
|
|
|
* @return array |
432
|
|
|
*/ |
433
|
|
|
public function generateSitemaps(): array |
434
|
|
|
{ |
435
|
|
|
if (0 == $this->getDataCollector()->getCount()) { |
436
|
|
|
return []; |
437
|
|
|
} |
438
|
|
|
|
439
|
|
|
$groups = $this->getDataCollector()->getGroups(); |
440
|
|
|
$currentGroup = 0; |
441
|
|
|
$files = []; |
442
|
|
|
|
443
|
|
|
foreach ($groups as $group) { |
444
|
|
|
$groupNo = 0; |
445
|
|
|
$filesInGroup = 0; |
446
|
|
|
++$currentGroup; |
447
|
|
|
|
448
|
|
|
if ($this->getDataCollector()->getGroupCount($group) > 0) { |
449
|
|
|
$this->getXml()->openSitemap( |
450
|
|
|
$group . $this->getSeparator() . $groupNo . self::EXT, |
451
|
|
|
$this->getDataCollector()->getExtensions() |
452
|
|
|
); |
453
|
|
|
$files[$group . $this->getSeparator() . $groupNo . self::EXT] = null; |
454
|
|
|
|
455
|
|
|
while ($element = $this->getDataCollector()->fetch($group)) { |
456
|
|
|
$this->getXml()->addUrl($element); |
457
|
|
|
++$filesInGroup; |
458
|
|
|
|
459
|
|
|
if (isset($element['lastmod'])) { |
460
|
|
|
if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) { |
461
|
|
|
if ( |
462
|
|
|
strtotime($element['lastmod']) > |
463
|
|
|
strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT]) |
464
|
|
|
) { |
465
|
|
|
$files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod']; |
466
|
|
|
} |
467
|
|
|
} else { |
468
|
|
|
$files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod']; |
469
|
|
|
} |
470
|
|
|
} |
471
|
|
|
|
472
|
|
|
// self::SITEMAP_MAX_SIZE - 20 for buffer for close tag |
473
|
|
|
if ( |
474
|
|
|
$filesInGroup >= self::ITEM_PER_SITEMAP || |
475
|
|
|
$this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20) |
476
|
|
|
) { |
477
|
|
|
$this->getXml()->closeSitemap(); |
478
|
|
|
|
479
|
|
|
if (!$this->getDataCollector()->isLast($group)) { |
480
|
|
|
++$groupNo; |
481
|
|
|
$filesInGroup = 0; |
482
|
|
|
$this->getXml()->openSitemap( |
483
|
|
|
$group . $this->getSeparator() . $groupNo . self::EXT, |
484
|
|
|
$this->getDataCollector()->getExtensions() |
485
|
|
|
); |
486
|
|
|
$files[$group . $this->getSeparator() . $groupNo . self::EXT] = null; |
487
|
|
|
} |
488
|
|
|
} |
489
|
|
|
} |
490
|
|
|
|
491
|
|
|
$this->getXml()->closeSitemap(); |
492
|
|
|
} |
493
|
|
|
} |
494
|
|
|
|
495
|
|
|
if ($this->isUseCompression() && [] !== $files) { |
496
|
|
|
$this->compressFiles($this->getSitepamsTempDirectory(), $files); |
497
|
|
|
} |
498
|
|
|
|
499
|
|
|
return $files; |
500
|
|
|
} |
501
|
|
|
|
502
|
|
|
/** |
503
|
|
|
* @return string |
504
|
|
|
*/ |
505
|
2 |
|
public function getSeparator(): string |
506
|
|
|
{ |
507
|
2 |
|
return $this->separator; |
508
|
|
|
} |
509
|
|
|
|
510
|
|
|
/** |
511
|
|
|
* @param string $separator |
512
|
|
|
* |
513
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
514
|
|
|
*/ |
515
|
2 |
|
public function setSeparator(string $separator): self |
516
|
|
|
{ |
517
|
2 |
|
$this->separator = $separator; |
518
|
|
|
|
519
|
2 |
|
return $this; |
520
|
|
|
} |
521
|
|
|
|
522
|
|
|
/** |
523
|
|
|
* Check if compression is used. |
524
|
|
|
* |
525
|
|
|
* @return bool |
526
|
|
|
*/ |
527
|
4 |
|
public function isUseCompression(): bool |
528
|
|
|
{ |
529
|
4 |
|
return $this->useCompression; |
530
|
|
|
} |
531
|
|
|
|
532
|
|
|
/** |
533
|
|
|
* Set whether to use compression or not. |
534
|
|
|
* |
535
|
|
|
* @param bool $useCompression |
536
|
|
|
* |
537
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
538
|
|
|
*/ |
539
|
2 |
|
public function setUseCompression(bool $useCompression): self |
540
|
|
|
{ |
541
|
2 |
|
if ($useCompression && extension_loaded('zlib')) { |
542
|
2 |
|
$this->useCompression = $useCompression; |
543
|
|
|
} |
544
|
|
|
|
545
|
2 |
|
return $this; |
546
|
|
|
} |
547
|
|
|
|
548
|
|
|
/** |
549
|
|
|
* @param array $sitemaps |
550
|
|
|
* |
551
|
|
|
* @throws Exception |
552
|
|
|
* |
553
|
|
|
* @return array |
554
|
|
|
*/ |
555
|
|
|
public function generateSitemapsIndex(array $sitemaps): array |
556
|
|
|
{ |
557
|
|
|
if (0 === count($sitemaps)) { |
558
|
|
|
return []; |
559
|
|
|
} |
560
|
|
|
|
561
|
|
|
$counter = 0; |
562
|
|
|
$file = $this->getIndexFilename() . self::EXT; |
563
|
|
|
$files = [$file => null]; |
564
|
|
|
$this->getXml()->openSitemapIndex($file); |
565
|
|
|
$lastItem = array_key_last($sitemaps); |
566
|
|
|
|
567
|
|
|
foreach ($sitemaps as $sitemap => $lastmod) { |
568
|
|
|
$this->getXml()->addSitemap((string) $this->getDomain() . '/' . ltrim(str_replace( |
569
|
|
|
$this->getPublicDirectory(), |
570
|
|
|
'', |
571
|
|
|
$this->getSitepamsDirectory() |
572
|
|
|
), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod); |
573
|
|
|
++$counter; |
574
|
|
|
|
575
|
|
|
if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) { |
576
|
|
|
$this->getXml()->closeSitemapIndex(); |
577
|
|
|
$counter = 0; |
578
|
|
|
$filesCount = count($files); |
579
|
|
|
|
580
|
|
|
if ($sitemap != $lastItem) { |
581
|
|
|
$file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT; |
582
|
|
|
$files[$file] = null; |
583
|
|
|
$this->getXml()->openSitemapIndex($file); |
584
|
|
|
} |
585
|
|
|
} |
586
|
|
|
} |
587
|
|
|
|
588
|
|
|
$this->getXml()->closeSitemapIndex(); |
589
|
|
|
|
590
|
|
|
if ($this->isUseCompression() && [] !== $files) { |
591
|
|
|
$this->compressFiles($this->getTempDirectory(), $files); |
592
|
|
|
} |
593
|
|
|
|
594
|
|
|
return $files; |
595
|
|
|
} |
596
|
|
|
|
597
|
|
|
/** |
598
|
|
|
* Get filename of sitemap index file. |
599
|
|
|
* |
600
|
|
|
* @return string |
601
|
|
|
*/ |
602
|
2 |
|
public function getIndexFilename(): string |
603
|
|
|
{ |
604
|
2 |
|
return $this->indexFilename; |
605
|
|
|
} |
606
|
|
|
|
607
|
|
|
/** |
608
|
|
|
* Set filename of sitemap index file. |
609
|
|
|
* |
610
|
|
|
* @param string $indexFilename |
611
|
|
|
* |
612
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
613
|
|
|
*/ |
614
|
2 |
|
public function setIndexFilename(string $indexFilename): self |
615
|
|
|
{ |
616
|
2 |
|
$this->indexFilename = $indexFilename; |
617
|
|
|
|
618
|
2 |
|
return $this; |
619
|
|
|
} |
620
|
|
|
|
621
|
|
|
/** |
622
|
|
|
* @throws \Exception |
623
|
|
|
* |
624
|
|
|
* @return string |
625
|
|
|
*/ |
626
|
|
|
public function getSitepamsDirectory(): string |
627
|
|
|
{ |
628
|
|
|
return $this->sitepamsDirectory; |
629
|
|
|
} |
630
|
|
|
|
631
|
|
|
/** |
632
|
|
|
* @param string $sitepamsDirectory |
633
|
|
|
* |
634
|
|
|
* @return \Wszetko\Sitemap\Sitemap |
635
|
|
|
* |
636
|
|
|
* @throws \Exception |
637
|
|
|
*/ |
638
|
|
|
public function setSitepamsDirectory(string $sitepamsDirectory): self |
639
|
|
|
{ |
640
|
|
|
$directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory); |
641
|
|
|
|
642
|
|
|
if (false === $directory) { |
643
|
|
|
mkdir($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory, 0777, true); |
644
|
|
|
$directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory); |
645
|
|
|
} |
646
|
|
|
|
647
|
|
|
if (false === $directory) { |
648
|
|
|
throw new Exception('Can\'t get sitemap directory.'); |
649
|
|
|
} |
650
|
|
|
|
651
|
|
|
$this->sitepamsDirectory = $directory; |
652
|
|
|
|
653
|
|
|
return $this; |
654
|
|
|
} |
655
|
|
|
|
656
|
|
|
/** |
657
|
|
|
* @param string $dir |
658
|
|
|
* |
659
|
|
|
* @return void |
660
|
|
|
*/ |
661
|
|
|
private function removeDir($dir): void |
662
|
|
|
{ |
663
|
|
|
if (is_dir($dir)) { |
664
|
|
|
return; |
665
|
|
|
} |
666
|
|
|
|
667
|
|
|
$objects = scandir($dir); |
668
|
|
|
|
669
|
|
|
if (false !== $objects) { |
670
|
|
|
foreach ($objects as $object) { |
671
|
|
|
if ('.' != $object && '..' != $object) { |
672
|
|
|
if ('dir' == filetype($dir . '/' . $object)) { |
673
|
|
|
$this->removeDir($dir . '/' . $object); |
674
|
|
|
} else { |
675
|
|
|
unlink($dir . '/' . $object); |
676
|
|
|
} |
677
|
|
|
} |
678
|
|
|
} |
679
|
|
|
|
680
|
|
|
rmdir($dir); |
681
|
|
|
} |
682
|
|
|
} |
683
|
|
|
|
684
|
|
|
/** |
685
|
|
|
* @param string $dir |
686
|
|
|
* @param array $files |
687
|
|
|
* |
688
|
|
|
* @throws Exception |
689
|
|
|
* |
690
|
|
|
* @return void |
691
|
|
|
*/ |
692
|
|
|
private function compressFiles(string $dir, array &$files): void |
693
|
|
|
{ |
694
|
|
|
if (!extension_loaded('zlib')) { |
695
|
|
|
throw new Exception('Extension zlib is not loaded.'); |
696
|
|
|
} |
697
|
|
|
|
698
|
|
|
$newFiles = []; |
699
|
|
|
|
700
|
|
|
foreach ($files as $file => $lastmod) { |
701
|
|
|
$source = $dir . DIRECTORY_SEPARATOR . $file; |
702
|
|
|
$gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT; |
703
|
|
|
$output = $dir . DIRECTORY_SEPARATOR . $gzFile; |
704
|
|
|
$out = gzopen($output, 'wb9'); |
705
|
|
|
$in = fopen($source, 'rb'); |
706
|
|
|
|
707
|
|
|
if (false === $out) { |
708
|
|
|
throw new Exception('Can\'t create GZip archive.'); |
709
|
|
|
} |
710
|
|
|
|
711
|
|
|
if (false === $in) { |
712
|
|
|
throw new Exception('Can\'t open xml file.'); |
713
|
|
|
} |
714
|
|
|
|
715
|
|
|
while (!feof($in)) { |
716
|
|
|
$content = fread($in, 524288); |
717
|
|
|
|
718
|
|
|
if (false !== $content) { |
719
|
|
|
gzwrite($out, $content); |
720
|
|
|
} |
721
|
|
|
} |
722
|
|
|
|
723
|
|
|
fclose($in); |
724
|
|
|
gzclose($out); |
725
|
|
|
unlink($source); |
726
|
|
|
$newFiles[$gzFile] = $lastmod; |
727
|
|
|
} |
728
|
|
|
|
729
|
|
|
$files = $newFiles; |
730
|
|
|
} |
731
|
|
|
|
732
|
|
|
/** |
733
|
|
|
* @throws \Exception |
734
|
|
|
* |
735
|
|
|
* @return void |
736
|
|
|
*/ |
737
|
|
|
private function publishSitemap(): void |
738
|
|
|
{ |
739
|
|
|
// Clear previous sitemaps |
740
|
|
|
$this->removeDir($this->getSitepamsDirectory()); |
741
|
|
|
$publicDir = scandir($this->getPublicDirectory()); |
742
|
|
|
|
743
|
|
|
if (is_array($publicDir)) { |
744
|
|
|
foreach ($publicDir as $file) { |
745
|
|
|
if ( |
746
|
|
|
1 === preg_match( |
747
|
|
|
'/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/', |
748
|
|
|
$file |
749
|
|
|
) |
750
|
|
|
) { |
751
|
|
|
unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file); |
752
|
|
|
} |
753
|
|
|
} |
754
|
|
|
} |
755
|
|
|
|
756
|
|
|
$this->getSitepamsDirectory(); //To create sitemaps directory |
757
|
|
|
$dir = new RecursiveDirectoryIterator($this->getTempDirectory()); |
758
|
|
|
$iterator = new RecursiveIteratorIterator($dir); |
759
|
|
|
$files = new RegexIterator( |
760
|
|
|
$iterator, |
761
|
|
|
"/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/", |
762
|
|
|
RegexIterator::GET_MATCH |
763
|
|
|
); |
764
|
|
|
$fileList = []; |
765
|
|
|
|
766
|
|
|
foreach ($files as $file) { |
767
|
|
|
if (isset($file['path'])) { |
768
|
|
|
$fileList[] = $file['path']; |
769
|
|
|
} |
770
|
|
|
} |
771
|
|
|
|
772
|
|
|
$currentFile = 0; |
773
|
|
|
|
774
|
|
|
foreach ($fileList as $file) { |
775
|
|
|
++$currentFile; |
776
|
|
|
$destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file); |
777
|
|
|
rename($file, $destination); |
778
|
|
|
} |
779
|
|
|
|
780
|
|
|
$this->removeDir($this->getTempDirectory()); |
781
|
|
|
} |
782
|
|
|
|
783
|
|
|
/** |
784
|
|
|
* @return string |
785
|
|
|
*/ |
786
|
|
|
private function getExt(): string |
787
|
|
|
{ |
788
|
|
|
if ($this->isUseCompression()) { |
789
|
|
|
return self::GZ_EXT; |
790
|
|
|
} |
791
|
|
|
|
792
|
|
|
return self::EXT; |
793
|
|
|
} |
794
|
|
|
} |
795
|
|
|
|