Completed
Push — master ( 209992...5259ee )
by Paweł
02:43
created

Sitemap::publishSitemap()   A

Complexity

Conditions 4
Paths 6

Size

Total Lines 27
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 20

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 4
eloc 17
c 3
b 0
f 0
nc 6
nop 0
dl 0
loc 27
ccs 0
cts 18
cp 0
crap 20
rs 9.7
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Wszetko Sitemap.
7
 *
8
 * (c) Paweł Kłopotek-Główczewski <[email protected]>
9
 *
10
 * This source file is subject to the MIT license that is bundled
11
 * with this source code in the file LICENSE.
12
 */
13
14
namespace Wszetko\Sitemap;
15
16
use Exception;
17
use InvalidArgumentException;
18
use RecursiveDirectoryIterator;
19
use RecursiveIteratorIterator;
20
use RegexIterator;
21
use Wszetko\Sitemap\Drivers\DataCollectors\AbstractDataCollector;
22
use Wszetko\Sitemap\Drivers\Output\OutputXMLWriter;
23
use Wszetko\Sitemap\Helpers\Directory;
24
use Wszetko\Sitemap\Interfaces\DataCollector;
25
use Wszetko\Sitemap\Interfaces\XML;
26
use Wszetko\Sitemap\Traits\Domain;
27
28
/**
29
 * Sitemap
30
 * This class used for generating Google Sitemap files.
31
 *
32
 * @package    Sitemap
33
 *
34
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
35
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
36
 * @license    https://opensource.org/licenses/MIT MIT License
37
 *
38
 * @see       https://github.com/wszetko/sitemap
39
 */
40
class Sitemap
41
{
42
    use Domain;
43
44
    /**
45
     * Avaliable values for changefreq tag.
46
     *
47
     * @var array
48
     */
49
    public const CHANGEFREQ = [
50
        'always',
51
        'hourly',
52
        'daily',
53
        'weekly',
54
        'monthly',
55
        'yearly',
56
        'never',
57
    ];
58
59
    /**
60
     * Extension for sitemap file.
61
     *
62
     * @var string
63
     */
64
    public const EXT = '.xml';
65
66
    /**
67
     * Extension for gzipped sitemap file.
68
     *
69
     * @var string
70
     */
71
    public const GZ_EXT = '.xml.gz';
72
73
    /**
74
     * URL to Sitemap Schema.
75
     *
76
     * @var string
77
     */
78
    public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
79
80
    /**
81
     * Limit of items in Sitemap files.
82
     *
83
     * @var int
84
     */
85
    public const ITEM_PER_SITEMAP = 50000;
86
87
    /**
88
     * Limit of Sitmeaps in SitemapsIndex.
89
     *
90
     * @var int
91
     */
92
    public const SITEMAP_PER_SITEMAPINDEX = 1000;
93
94
    /**
95
     * Limit of single files size.
96
     *
97
     * @var int
98
     */
99
    public const SITEMAP_MAX_SIZE = 52000000; // ~49,6MB - to have some limit to close file
100
101
    /**
102
     * Path on disk to public directory.
103
     *
104
     * @var string
105
     */
106
    private $publicDirectory = '';
107
108
    /**
109
     * Path related to public directory to dir where sitemaps will be.
110
     *
111
     * @var string
112
     */
113
    private $sitemapsDirectory = '';
114
115
    /**
116
     * Path to temporary directory.
117
     *
118
     * @var string
119
     */
120
    private $sitemapTempDirectory = '';
121
122
    /**
123
     * Default filename for sitemap file.
124
     *
125
     * @var string
126
     */
127
    private $defaultFilename = 'sitemap';
128
129
    /**
130
     * Name of index file.
131
     *
132
     * @var string
133
     */
134
    private $indexFilename = 'index';
135
136
    /**
137
     * DataCollector instance.
138
     *
139
     * @var null|DataCollector
140
     */
141
    private $dataCollector = null;
142
143
    /**
144
     * Use compression.
145
     *
146
     * @var bool
147
     */
148
    private $useCompression = false;
149
150
    /**
151
     * XML Writer object.
152
     *
153
     * @var null|XML
154
     */
155
    private $xml = null;
156
157
    /**
158
     * Separator to be used in Sitemap filenames.
159
     *
160
     * @var string
161
     */
162
    private $separator = '-';
163
164
    /**
165
     * Class constructor.
166
     *
167
     * @param string $domain
168
     *
169
     * @throws \InvalidArgumentException
170
     */
171 44
    public function __construct(string $domain = null)
172
    {
173 44
        if (null !== $domain) {
174 42
            $this->setDomain($domain);
175
        }
176 44
    }
177
178
    /**
179
     * Add URL to specific group.
180
     *
181
     * @param Items\Url   $item
182
     * @param null|string $group
183
     *
184
     * @throws \Exception
185
     *
186
     * @return \Wszetko\Sitemap\Sitemap
187
     */
188 8
    public function addItem(Items\Url $item, ?string $group = null): self
189
    {
190 8
        if (is_string($group)) {
191 2
            $group = preg_replace('/\W+/', '', $group);
192
        }
193
194 8
        if ('' === $group || null === $group) {
195 6
            $group = $this->getDefaultFilename();
196
        }
197
198 8
        $group = mb_strtolower($group);
199 8
        $item->setDomain($this->getDomain());
200 8
        $this->getDataCollector()->add($item, $group);
201
202 6
        return $this;
203
    }
204
205
    /**
206
     * Add multiple URLs to specific group.
207
     *
208
     * @param array       $items
209
     * @param null|string $group
210
     *
211
     * @throws \Exception
212
     *
213
     * @return $this
214
     */
215 2
    public function addItems(array $items, ?string $group = null): self
216
    {
217 2
        foreach ($items as $item) {
218 2
            $this->addItem($item, $group);
219
        }
220
221 2
        return $this;
222
    }
223
224
    /**
225
     * Return DataCollecotr Object.
226
     *
227
     * @return DataCollector
228
     *
229
     * @throws \Exception
230
     */
231 10
    public function getDataCollector(): DataCollector
232
    {
233 10
        if (null === $this->dataCollector) {
234 2
            throw new Exception('DataCollector is not set.');
235
        }
236
237 8
        return $this->dataCollector;
238
    }
239
240
    /**
241
     * Set DataCollector driver with configuration.
242
     *
243
     * @param string $driver
244
     * @param array  $config
245
     *
246
     * @throws \InvalidArgumentException
247
     *
248
     * @return \Wszetko\Sitemap\Sitemap
249
     */
250 10
    public function setDataCollector(string $driver, $config = []): self
251
    {
252 10
        if (class_exists($driver)) {
253 8
            $dataCollector = new $driver($config);
254
255 8
            if ($dataCollector instanceof AbstractDataCollector) {
256 8
                $this->dataCollector = $dataCollector;
257
            }
258
        }
259
260 10
        if (null === $this->dataCollector) {
261 2
            throw new InvalidArgumentException($driver . ' data collector does not exists.');
262
        }
263
264 8
        return $this;
265
    }
266
267
    /**
268
     * Return XML driver object.
269
     *
270
     * @return XML
271
     *
272
     * @throws \Exception
273
     */
274 6
    public function getXml(): XML
275
    {
276 6
        if (null === $this->xml) {
277 2
            throw new Exception('XML writer class is not set.');
278
        }
279
280 4
        return $this->xml;
281
    }
282
283
    /**
284
     * Set XML driver with configuration.
285
     *
286
     * @param string $driver
287
     * @param array  $config
288
     *
289
     * @return \Wszetko\Sitemap\Sitemap
290
     *
291
     * @throws \Exception
292
     */
293 4
    public function setXml(string $driver, array $config = []): self
294
    {
295 4
        if (class_exists($driver)) {
296 4
            if (!isset($config['domain'])) {
297 2
                $config['domain'] = $this->getDomain();
298
            }
299
300 4
            $xml = new $driver($config);
301
302 4
            if ($xml instanceof XML) {
303 4
                $this->xml = $xml;
304
            }
305
        }
306
307 4
        if (null === $this->xml) {
308
            throw new Exception('XML writer class is not set.');
309
        }
310
311 4
        return $this;
312
    }
313
314
    /**
315
     * Generate sitemaps, sitemaps index and publish them.
316
     *
317
     * @throws Exception
318
     */
319
    public function generate(): void
320
    {
321
        if ('' === $this->getDomain()) {
322
            throw new Exception('Domain is not set.');
323
        }
324
325
        if (null === $this->xml) {
326
            $this->setXml(OutputXMLWriter::class, ['domain' => $this->getDomain()]);
327
        }
328
329
        Directory::removeDir($this->getTempDirectory());
330
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
331
        $sitemaps = $this->generateSitemaps();
332
        $this->getXml()->setWorkDir($this->getTempDirectory());
333
        $this->generateSitemapsIndex($sitemaps);
334
        $this->publishSitemap();
335
    }
336
337
    /**
338
     * Generates sitemaps based on collected data.
339
     *
340
     * @throws Exception
341
     *
342
     * @return array
343
     */
344
    public function generateSitemaps(): array
345
    {
346
        if (0 == $this->getDataCollector()->getCount()) {
347
            return [];
348
        }
349
350
        $groups = $this->getDataCollector()->getGroups();
351
        $currentGroup = 0;
352
        $files = [];
353
354
        foreach ($groups as $group) {
355
            $groupNo = 0;
356
            $filesInGroup = 0;
357
            ++$currentGroup;
358
359
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
360
                $this->getXml()->openSitemap(
361
                    $group . $this->getSeparator() . $groupNo . self::EXT,
362
                    $this->getDataCollector()->getExtensions()
363
                );
364
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
365
366
                while ($element = $this->getDataCollector()->fetch($group)) {
367
                    $this->getXml()->addUrl($element);
368
                    ++$filesInGroup;
369
370
                    if (isset($element['lastmod'])) {
371
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
372
                            if (
373
                                strtotime($element['lastmod']) >
374
                                    strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])
375
                            ) {
376
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
377
                            }
378
                        } else {
379
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
380
                        }
381
                    }
382
383
                    // self::SITEMAP_MAX_SIZE - 20 for buffer for close tag
384
                    if (
385
                        $filesInGroup >= self::ITEM_PER_SITEMAP ||
386
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)
387
                    ) {
388
                        $this->getXml()->closeSitemap();
389
390
                        if (!$this->getDataCollector()->isLast($group)) {
391
                            ++$groupNo;
392
                            $filesInGroup = 0;
393
                            $this->getXml()->openSitemap(
394
                                $group . $this->getSeparator() . $groupNo . self::EXT,
395
                                $this->getDataCollector()->getExtensions()
396
                            );
397
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
398
                        }
399
                    }
400
                }
401
402
                $this->getXml()->closeSitemap();
403
            }
404
        }
405
406
        if ($this->isUseCompression() && [] !== $files) {
407
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
408
        }
409
410
        return $files;
411
    }
412
413
    /**
414
     * Generates sitemap index for generated sitemaps.
415
     *
416
     * @param array $sitemaps
417
     *
418
     * @throws Exception
419
     *
420
     * @return array
421
     */
422
    public function generateSitemapsIndex(array $sitemaps): array
423
    {
424
        if (0 === count($sitemaps)) {
425
            return [];
426
        }
427
428
        $counter = 0;
429
        $file = $this->getIndexFilename() . self::EXT;
430
        $files = [$file => null];
431
        $this->getXml()->openSitemapIndex($file);
432
        $lastItem = array_key_last($sitemaps);
433
434
        foreach ($sitemaps as $sitemap => $lastmod) {
435
            $this->getXml()->addSitemap((string) $this->getDomain() . '/' . ltrim(str_replace(
436
                $this->getPublicDirectory(),
437
                '',
438
                $this->getSitemapsDirectory()
439
            ), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
440
            ++$counter;
441
442
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
443
                $this->getXml()->closeSitemapIndex();
444
                $counter = 0;
445
                $filesCount = count($files);
446
447
                if ($sitemap != $lastItem) {
448
                    $file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT;
449
                    $files[$file] = null;
450
                    $this->getXml()->openSitemapIndex($file);
451
                }
452
            }
453
        }
454
455
        $this->getXml()->closeSitemapIndex();
456
457
        if ($this->isUseCompression() && [] !== $files) {
458
            $this->compressFiles($this->getTempDirectory(), $files);
459
        }
460
461
        return $files;
462
    }
463
464
    /**
465
     * Gzip sitemap files and put them in specified directory.
466
     *
467
     * @param string $dir
468
     * @param array  $files
469
     *
470
     * @throws Exception
471
     *
472
     * @return void
473
     */
474
    private function compressFiles(string $dir, array &$files): void
475
    {
476
        if (!extension_loaded('zlib')) {
477
            throw new Exception('Extension zlib is not loaded.');
478
        }
479
480
        $newFiles = [];
481
482
        foreach ($files as $file => $lastmod) {
483
            $source = $dir . DIRECTORY_SEPARATOR . $file;
484
            $gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT;
485
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
486
            $out = gzopen($output, 'wb9');
487
            $in = fopen($source, 'rb');
488
489
            if (false === $out) {
490
                throw new Exception('Can\'t create GZip archive.');
491
            }
492
493
            if (false === $in) {
494
                throw new Exception('Can\'t open xml file.');
495
            }
496
497
            while (!feof($in)) {
498
                $content = fread($in, 524288);
499
500
                if (false !== $content) {
501
                    gzwrite($out, $content);
502
                }
503
            }
504
505
            fclose($in);
506
            gzclose($out);
507
            unlink($source);
508
            $newFiles[$gzFile] = $lastmod;
509
        }
510
511
        $files = $newFiles;
512
    }
513
514
    /**
515
     * Copy generated sitemaps to their destination.
516
     *
517
     * @throws \Exception
518
     *
519
     * @return void
520
     */
521
    private function publishSitemap(): void
522
    {
523
        $this->clearPreviousSitemaps();
524
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
525
        $iterator = new RecursiveIteratorIterator($dir);
526
        $files = new RegexIterator(
527
            $iterator,
528
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/",
529
            RegexIterator::GET_MATCH
530
        );
531
        $fileList = [];
532
533
        foreach ($files as $file) {
534
            if (isset($file['path'])) {
535
                $fileList[] = $file['path'];
536
            }
537
        }
538
539
        $currentFile = 0;
540
541
        foreach ($fileList as $file) {
542
            ++$currentFile;
543
            $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
544
            rename($file, $destination);
545
        }
546
547
        Directory::removeDir($this->getTempDirectory());
548
    }
549
550
    /**
551
     * Remove previous sitemap files.
552
     *
553
     * @throws \Exception
554
     *
555
     * @return void
556
     */
557
    private function clearPreviousSitemaps(): void
558
    {
559
        $sitemapDir = str_replace($this->getPublicDirectory(), '', $this->getSitemapsDirectory());
560
561
        if ('' !== $sitemapDir) {
562
            Directory::removeDir($this->getSitemapsDirectory());
563
        }
564
565
        $publicDir = scandir($this->getPublicDirectory());
566
567
        if (is_array($publicDir)) {
568
            foreach ($publicDir as $file) {
569
                if (
570
                    1 === preg_match(
571
                        '/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . self::EXT . '|' . self::GZ_EXT . ')$/',
572
                        $file
573
                    )
574
                ) {
575
                    unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
576
                }
577
            }
578
        }
579
    }
580
581
    /**
582
     * Get filename of sitemap index file.
583
     *
584
     * @return string
585
     */
586 2
    public function getIndexFilename(): string
587
    {
588 2
        return $this->indexFilename;
589
    }
590
591
    /**
592
     * Set filename of sitemap index file.
593
     *
594
     * @param string $indexFilename
595
     *
596
     * @return \Wszetko\Sitemap\Sitemap
597
     */
598 2
    public function setIndexFilename(string $indexFilename): self
599
    {
600 2
        $this->indexFilename = $indexFilename;
601
602 2
        return $this;
603
    }
604
605
    /**
606
     * Get public directory path.
607
     *
608
     * @return string
609
     *
610
     * @throws \Exception
611
     */
612 10
    public function getPublicDirectory(): string
613
    {
614 10
        if ('' === $this->publicDirectory) {
615 2
            throw new Exception('Public directory is not set.');
616
        }
617
618 8
        return $this->publicDirectory;
619
    }
620
621
    /**
622
     * Set public directory path.
623
     *
624
     * @param string $publicDirectory
625
     *
626
     * @throws Exception
627
     *
628
     * @return \Wszetko\Sitemap\Sitemap
629
     */
630 8
    public function setPublicDirectory(string $publicDirectory): self
631
    {
632 8
        $this->publicDirectory = Directory::checkDirectory($publicDirectory);
633
634 8
        return $this;
635
    }
636
637
638
639
    /**
640
     * Get sitemaps directory path.
641
     *
642
     * @throws \Exception
643
     *
644
     * @return string
645
     */
646 6
    public function getSitemapsDirectory(): string
647
    {
648 6
        if ('' === $this->sitemapsDirectory) {
649 4
            $this->setSitemapsDirectory('');
650
        }
651
652 6
        return $this->sitemapsDirectory;
653
    }
654
655
    /**
656
     * Set sitemaps directory path.
657
     *
658
     * @param string $sitemapsDirectory
659
     *
660
     * @return \Wszetko\Sitemap\Sitemap
661
     * @throws \Exception
662
     */
663 6
    public function setSitemapsDirectory(string $sitemapsDirectory): self
664
    {
665 6
        $this->sitemapsDirectory = Directory::checkDirectory(
666 6
            $this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitemapsDirectory
667
        );
668
669 6
        return $this;
670
    }
671
672
    /**
673
     * Get temporary directory path.
674
     *
675
     * @throws \Exception
676
     *
677
     * @return string
678
     */
679 4
    public function getTempDirectory(): string
680
    {
681 4
        if (null === $this->sitemapTempDirectory || '' == $this->sitemapTempDirectory) {
682 4
            $hash = md5(microtime());
683 4
            $this->setTempDirectory(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
684
        }
685
686 4
        return $this->sitemapTempDirectory;
687
    }
688
689
    /**
690
     * Set temporary directory path.
691
     *
692
     * @param string $tempDirectory
693
     *
694
     * @return $this
695
     *
696
     * @throws \Exception
697
     */
698 4
    public function setTempDirectory(string $tempDirectory): self
699
    {
700 4
        $this->sitemapTempDirectory = Directory::checkDirectory($tempDirectory);
701
702 4
        return $this;
703
    }
704
705
    /**
706
     * Get temporary sitemaps directory path.
707
     *
708
     * @throws \Exception
709
     *
710
     * @return string
711
     */
712 2
    public function getSitepamsTempDirectory(): string
713
    {
714 2
        $sitemapsDirectory = str_replace($this->getPublicDirectory(), '', $this->getSitemapsDirectory());
715
716 2
        return Directory::checkDirectory($this->getTempDirectory() . DIRECTORY_SEPARATOR . $sitemapsDirectory);
717
    }
718
719
    /**
720
     * Get separator for filenames.
721
     *
722
     * @param string $separator
723
     *
724
     * @return \Wszetko\Sitemap\Sitemap
725
     */
726 2
    public function setSeparator(string $separator): self
727
    {
728 2
        $this->separator = $separator;
729
730 2
        return $this;
731
    }
732
733
    /**
734
     * Set separator for filenames.
735
     *
736
     * @return string
737
     */
738 2
    public function getSeparator(): string
739
    {
740 2
        return $this->separator;
741
    }
742
743
    /**
744
     * Set if sitemaps files should be GZiped.
745
     *
746
     * Set whether to use compression or not.
747
     *
748
     * @param bool $useCompression
749
     *
750
     * @return \Wszetko\Sitemap\Sitemap
751
     */
752 2
    public function setUseCompression(bool $useCompression): self
753
    {
754 2
        if ($useCompression && extension_loaded('zlib')) {
755 2
            $this->useCompression = $useCompression;
756
        }
757
758 2
        return $this;
759
    }
760
761
    /**
762
     * Checi fi sitemaps files should be GZiped.
763
     *
764
     * Check if compression is used.
765
     *
766
     * @return bool
767
     */
768 4
    public function isUseCompression(): bool
769
    {
770 4
        return $this->useCompression;
771
    }
772
773
    /**
774
     * Set default filename for sitemap file.
775
     *
776
     * @param string $defaultFilename
777
     *
778
     * @return \Wszetko\Sitemap\Sitemap
779
     */
780 2
    public function setDefaultFilename(string $defaultFilename): self
781
    {
782 2
        $this->defaultFilename = $defaultFilename;
783
784 2
        return $this;
785
    }
786
787
    /**
788
     * Get default filename for sitemap file.
789
     *
790
     * @return string
791
     */
792 8
    public function getDefaultFilename(): string
793
    {
794 8
        return $this->defaultFilename;
795
    }
796
797
    /**
798
     * Get extension for sitemap files.
799
     *
800
     * @return string
801
     */
802
    private function getExt(): string
803
    {
804
        if ($this->isUseCompression()) {
805
            return self::GZ_EXT;
806
        }
807
808
        return self::EXT;
809
    }
810
}
811