Sitemap::compressFiles()   B
last analyzed

Complexity

Conditions 7
Paths 7

Size

Total Lines 38
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 7
eloc 22
c 2
b 0
f 0
nc 7
nop 2
dl 0
loc 38
ccs 0
cts 23
cp 0
crap 56
rs 8.6346
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Wszetko Sitemap.
7
 *
8
 * (c) Paweł Kłopotek-Główczewski <[email protected]>
9
 *
10
 * This source file is subject to the MIT license that is bundled
11
 * with this source code in the file LICENSE.
12
 */
13
14
namespace Wszetko\Sitemap;
15
16
use Exception;
17
use InvalidArgumentException;
18
use RecursiveDirectoryIterator;
19
use RecursiveIteratorIterator;
20
use RegexIterator;
21
use Wszetko\Sitemap\Drivers\DataCollectors\AbstractDataCollector;
22
use Wszetko\Sitemap\Drivers\Output\OutputXMLWriter;
23
use Wszetko\Sitemap\Helpers\Directory;
24
use Wszetko\Sitemap\Interfaces\DataCollector;
25
use Wszetko\Sitemap\Interfaces\XML;
26
use Wszetko\Sitemap\Traits\Domain;
27
28
/**
29
 * Sitemap
30
 * This class used for generating Google Sitemap files.
31
 *
32
 * @package    Sitemap
33
 *
34
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
35
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
36
 * @license    https://opensource.org/licenses/MIT MIT License
37
 *
38
 * @see       https://github.com/wszetko/sitemap
39
 */
40
class Sitemap
41
{
42
    use Domain;
43
44
    /**
45
     * Avaliable values for changefreq tag.
46
     *
47
     * @var array
48
     */
49
    public const CHANGEFREQ = [
50
        'always',
51
        'hourly',
52
        'daily',
53
        'weekly',
54
        'monthly',
55
        'yearly',
56
        'never',
57
    ];
58
59
    /**
60
     * Extension for sitemap file.
61
     *
62
     * @var string
63
     */
64
    public const EXT = '.xml';
65
66
    /**
67
     * Extension for gzipped sitemap file.
68
     *
69
     * @var string
70
     */
71
    public const GZ_EXT = '.xml.gz';
72
73
    /**
74
     * URL to Sitemap Schema.
75
     *
76
     * @var string
77
     */
78
    public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
79
80
    /**
81
     * Limit of items in Sitemap files.
82
     *
83
     * @var int
84
     */
85
    public const ITEM_PER_SITEMAP = 50000;
86
87
    /**
88
     * Limit of Sitmeaps in SitemapsIndex.
89
     *
90
     * @var int
91
     */
92
    public const SITEMAP_PER_SITEMAPINDEX = 1000;
93
94
    /**
95
     * Limit of single files size.
96
     *
97
     * @var int
98
     */
99
    public const SITEMAP_MAX_SIZE = 52000000; // ~49,6MB - to have some limit to close file
100
101
    /**
102
     * Path on disk to public directory.
103
     *
104
     * @var string
105
     */
106
    private $publicDirectory = '';
107
108
    /**
109
     * Path related to public directory to dir where sitemaps will be.
110
     *
111
     * @var string
112
     */
113
    private $sitemapsDirectory = '';
114
115
    /**
116
     * Path to temporary directory.
117
     *
118
     * @var string
119
     */
120
    private $sitemapTempDirectory = '';
121
122
    /**
123
     * Default filename for sitemap file.
124
     *
125
     * @var string
126
     */
127
    private $defaultFilename = 'sitemap';
128
129
    /**
130
     * Name of index file.
131
     *
132
     * @var string
133
     */
134
    private $indexFilename = 'index';
135
136
    /**
137
     * DataCollector instance.
138
     *
139
     * @var null|DataCollector
140
     */
141
    private $dataCollector = null;
142
143
    /**
144
     * Use compression.
145
     *
146
     * @var bool
147
     */
148
    private $useCompression = false;
149
150
    /**
151
     * XML Writer object.
152
     *
153
     * @var null|XML
154
     */
155
    private $xml = null;
156
157
    /**
158
     * Separator to be used in Sitemap filenames.
159
     *
160
     * @var string
161
     */
162
    private $separator = '-';
163
164
    /**
165
     * Class constructor.
166
     *
167
     * @param string $domain
168
     *
169
     * @throws \InvalidArgumentException
170
     */
171 46
    public function __construct(string $domain = null)
172
    {
173 46
        if (null !== $domain) {
174 44
            $this->setDomain($domain);
175
        }
176 46
    }
177
178
    /**
179
     * Add URL to specific group.
180
     *
181
     * @param Items\Url   $item
182
     * @param null|string $group
183
     *
184
     * @throws \Exception
185
     *
186
     * @return \Wszetko\Sitemap\Sitemap
187
     */
188 8
    public function addItem(Items\Url $item, ?string $group = null): self
189
    {
190 8
        if (is_string($group)) {
191 2
            $group = preg_replace('/\W+/', '', $group);
192
        }
193
194 8
        if ('' === $group || null === $group) {
195 6
            $group = $this->getDefaultFilename();
196
        }
197
198 8
        $group = mb_strtolower($group);
199 8
        $item->setDomain($this->getDomain());
200 8
        $this->getDataCollector()->add($item, $group);
201
202 6
        return $this;
203
    }
204
205
    /**
206
     * Add multiple URLs to specific group.
207
     *
208
     * @param array       $items
209
     * @param null|string $group
210
     *
211
     * @throws \Exception
212
     *
213
     * @return $this
214
     */
215 2
    public function addItems(array $items, ?string $group = null): self
216
    {
217 2
        foreach ($items as $item) {
218 2
            $this->addItem($item, $group);
219
        }
220
221 2
        return $this;
222
    }
223
224
    /**
225
     * Return DataCollecotr Object.
226
     *
227
     * @return DataCollector
228
     *
229
     * @throws \Exception
230
     */
231 10
    public function getDataCollector(): DataCollector
232
    {
233 10
        if (null === $this->dataCollector) {
234 2
            throw new Exception('DataCollector is not set.');
235
        }
236
237 8
        return $this->dataCollector;
238
    }
239
240
    /**
241
     * Set DataCollector driver with configuration.
242
     *
243
     * @param string $driver
244
     * @param array  $config
245
     *
246
     * @throws \InvalidArgumentException
247
     *
248
     * @return \Wszetko\Sitemap\Sitemap
249
     */
250 10
    public function setDataCollector(string $driver, $config = []): self
251
    {
252 10
        if (class_exists($driver)) {
253 8
            $dataCollector = new $driver($config);
254
255 8
            if ($dataCollector instanceof AbstractDataCollector) {
256 8
                $this->dataCollector = $dataCollector;
257
            }
258
        }
259
260 10
        if (null === $this->dataCollector) {
261 2
            throw new InvalidArgumentException($driver . ' data collector does not exists.');
262
        }
263
264 8
        return $this;
265
    }
266
267
    /**
268
     * Return XML driver object.
269
     *
270
     * @return XML
271
     *
272
     * @throws \Exception
273
     */
274 6
    public function getXml(): XML
275
    {
276 6
        if (null === $this->xml) {
277 2
            throw new Exception('XML writer class is not set.');
278
        }
279
280 4
        return $this->xml;
281
    }
282
283
    /**
284
     * Set XML driver with configuration.
285
     *
286
     * @param string $driver
287
     * @param array  $config
288
     *
289
     * @return \Wszetko\Sitemap\Sitemap
290
     *
291
     * @throws \Exception
292
     */
293 6
    public function setXml(string $driver, array $config = []): self
294
    {
295 6
        if (class_exists($driver)) {
296 6
            if (!isset($config['domain'])) {
297 4
                $config['domain'] = $this->getDomain();
298
            }
299
300 6
            $xml = new $driver($config);
301
302 6
            if ($xml instanceof XML) {
303 4
                $this->xml = $xml;
304
            }
305
        }
306
307 6
        if (null === $this->xml) {
308 2
            throw new Exception('XML writer class is not set.');
309
        }
310
311 4
        return $this;
312
    }
313
314
    /**
315
     * Generate sitemaps, sitemaps index and publish them.
316
     *
317
     * @throws Exception
318
     */
319
    public function generate(): void
320
    {
321
        if ('' === $this->getDomain()) {
322
            throw new Exception('Domain is not set.');
323
        }
324
325
        if (null === $this->xml) {
326
            $this->setXml(OutputXMLWriter::class, ['domain' => $this->getDomain()]);
327
        }
328
329
        Directory::removeDir($this->getTempDirectory());
330
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
331
        $sitemaps = $this->generateSitemaps();
332
        $this->getXml()->setWorkDir($this->getTempDirectory());
333
        $this->generateSitemapsIndex($sitemaps);
334
        $this->publishSitemap();
335
    }
336
337
    /**
338
     * Generates sitemaps based on collected data.
339
     *
340
     * @throws Exception
341
     *
342
     * @return array
343
     */
344
    public function generateSitemaps(): array
345
    {
346
        if (0 == $this->getDataCollector()->getCount()) {
347
            return [];
348
        }
349
350
        $groups = $this->getDataCollector()->getGroups();
351
        $currentGroup = 0;
352
        $files = [];
353
354
        foreach ($groups as $group) {
355
            $groupNo = 0;
356
            $filesInGroup = 0;
357
            ++$currentGroup;
358
359
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
360
                $this->getXml()->openSitemap(
361
                    $group . $this->getSeparator() . $groupNo . self::EXT,
362
                    $this->getDataCollector()->getExtensions()
363
                );
364
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
365
366
                while ($element = $this->getDataCollector()->fetch($group)) {
367
                    $this->getXml()->addUrl($element);
368
                    ++$filesInGroup;
369
370
                    if (isset($element['lastmod'])) {
371
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
372
                            if (
373
                                strtotime($element['lastmod']) >
374
                                    strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])
375
                            ) {
376
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
377
                            }
378
                        } else {
379
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
380
                        }
381
                    }
382
383
                    // self::SITEMAP_MAX_SIZE - 20 for buffer for close tag
384
                    if (
385
                        $filesInGroup >= self::ITEM_PER_SITEMAP ||
386
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)
387
                    ) {
388
                        $this->getXml()->closeSitemap();
389
390
                        if (!$this->getDataCollector()->isLast($group)) {
391
                            ++$groupNo;
392
                            $filesInGroup = 0;
393
                            $this->getXml()->openSitemap(
394
                                $group . $this->getSeparator() . $groupNo . self::EXT,
395
                                $this->getDataCollector()->getExtensions()
396
                            );
397
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
398
                        }
399
                    }
400
                }
401
402
                $this->getXml()->closeSitemap();
403
            }
404
        }
405
406
        if ($this->isUseCompression() && [] !== $files) {
407
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
408
        }
409
410
        return $files;
411
    }
412
413
    /**
414
     * Generates sitemap index for generated sitemaps.
415
     *
416
     * @param array $sitemaps
417
     *
418
     * @throws Exception
419
     *
420
     * @return array
421
     */
422
    public function generateSitemapsIndex(array $sitemaps): array
423
    {
424
        if (0 === count($sitemaps)) {
425
            return [];
426
        }
427
428
        $counter = 0;
429
        $file = $this->getIndexFilename() . self::EXT;
430
        $files = [$file => null];
431
        $this->getXml()->openSitemapIndex($file);
432
        $lastItem = array_key_last($sitemaps);
433
434
        foreach ($sitemaps as $sitemap => $lastmod) {
435
            $this->getXml()->addSitemap((string) $this->getDomain() . '/' . ltrim(str_replace(
436
                $this->getPublicDirectory(),
437
                '',
438
                $this->getSitemapsDirectory()
439
            ), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
440
            ++$counter;
441
442
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
443
                $this->getXml()->closeSitemapIndex();
444
                $counter = 0;
445
                $filesCount = count($files);
446
447
                if ($sitemap != $lastItem) {
448
                    $file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT;
449
                    $files[$file] = null;
450
                    $this->getXml()->openSitemapIndex($file);
451
                }
452
            }
453
        }
454
455
        $this->getXml()->closeSitemapIndex();
456
457
        if (true === $this->isUseCompression()) {
458
            $this->compressFiles($this->getTempDirectory(), $files);
459
        }
460
461
        return $files;
462
    }
463
464
    /**
465
     * Gzip sitemap files and put them in specified directory.
466
     *
467
     * @param string $dir
468
     * @param array  $files
469
     *
470
     * @throws Exception
471
     *
472
     * @return void
473
     */
474
    private function compressFiles(string $dir, array &$files): void
475
    {
476
        if (!extension_loaded('zlib')) {
477
            throw new Exception('Extension zlib is not loaded.');
478
        }
479
480
        $newFiles = [];
481
482
        foreach ($files as $file => $lastmod) {
483
            $source = $dir . DIRECTORY_SEPARATOR . $file;
484
            $gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT;
485
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
486
            $out = gzopen($output, 'wb9');
487
            $in = fopen($source, 'rb');
488
489
            if (false === $out) {
490
                throw new Exception('Can\'t create GZip archive.');
491
            }
492
493
            if (false === $in) {
494
                throw new Exception('Can\'t open xml file.');
495
            }
496
497
            while (!feof($in)) {
498
                $content = fread($in, 524288);
499
500
                if (false !== $content) {
501
                    gzwrite($out, $content);
502
                }
503
            }
504
505
            fclose($in);
506
            gzclose($out);
507
            unlink($source);
508
            $newFiles[$gzFile] = $lastmod;
509
        }
510
511
        $files = $newFiles;
512
    }
513
514
    /**
515
     * Copy generated sitemaps to their destination.
516
     *
517
     * @throws \Exception
518
     *
519
     * @return void
520
     */
521
    private function publishSitemap(): void
522
    {
523
        $this->clearPreviousSitemaps();
524
        Directory::checkDirectory($this->getSitemapsDirectory());
525
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
526
        $iterator = new RecursiveIteratorIterator($dir);
527
        $files = new RegexIterator(
528
            $iterator,
529
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/",
530
            RegexIterator::GET_MATCH
531
        );
532
        $fileList = [];
533
534
        foreach ($files as $file) {
535
            if (isset($file['path'])) {
536
                $fileList[] = $file['path'];
537
            }
538
        }
539
540
        $currentFile = 0;
541
542
        foreach ($fileList as $file) {
543
            if (file_exists($file)) {
544
                ++$currentFile;
545
                $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
546
                rename($file, $destination);
547
            } else {
548
                Directory::removeDir($this->getTempDirectory());
549
                throw new Exception('Couldn\'t find generated sitemap file.');
550
            }
551
        }
552
553
        Directory::removeDir($this->getTempDirectory());
554
    }
555
556
    /**
557
     * Remove previous sitemap files.
558
     *
559
     * @throws \Exception
560
     *
561
     * @return void
562
     */
563
    private function clearPreviousSitemaps(): void
564
    {
565
        $sitemapDir = str_replace($this->getPublicDirectory(), '', $this->getSitemapsDirectory());
566
567
        if ('' !== $sitemapDir) {
568
            Directory::removeDir($this->getSitemapsDirectory());
569
        }
570
571
        $publicDir = scandir($this->getPublicDirectory());
572
573
        if (is_array($publicDir)) {
574
            foreach ($publicDir as $file) {
575
                if (
576
                    1 === preg_match(
577
                        '/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . self::EXT . '|' . self::GZ_EXT . ')$/',
578
                        $file
579
                    )
580
                ) {
581
                    unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
582
                }
583
            }
584
        }
585
    }
586
587
    /**
588
     * Get filename of sitemap index file.
589
     *
590
     * @return string
591
     */
592 2
    public function getIndexFilename(): string
593
    {
594 2
        return $this->indexFilename;
595
    }
596
597
    /**
598
     * Set filename of sitemap index file.
599
     *
600
     * @param string $indexFilename
601
     *
602
     * @return \Wszetko\Sitemap\Sitemap
603
     */
604 2
    public function setIndexFilename(string $indexFilename): self
605
    {
606 2
        $this->indexFilename = $indexFilename;
607
608 2
        return $this;
609
    }
610
611
    /**
612
     * Get public directory path.
613
     *
614
     * @return string
615
     *
616
     * @throws \Exception
617
     */
618 10
    public function getPublicDirectory(): string
619
    {
620 10
        if ('' === $this->publicDirectory) {
621 2
            throw new Exception('Public directory is not set.');
622
        }
623
624 8
        return $this->publicDirectory;
625
    }
626
627
    /**
628
     * Set public directory path.
629
     *
630
     * @param string $publicDirectory
631
     *
632
     * @throws Exception
633
     *
634
     * @return \Wszetko\Sitemap\Sitemap
635
     */
636 8
    public function setPublicDirectory(string $publicDirectory): self
637
    {
638 8
        $this->publicDirectory = Directory::checkDirectory($publicDirectory);
639
640 8
        return $this;
641
    }
642
643
644
645
    /**
646
     * Get sitemaps directory path.
647
     *
648
     * @throws \Exception
649
     *
650
     * @return string
651
     */
652 6
    public function getSitemapsDirectory(): string
653
    {
654 6
        if ('' === $this->sitemapsDirectory) {
655 4
            $this->setSitemapsDirectory('');
656
        }
657
658 6
        return $this->sitemapsDirectory;
659
    }
660
661
    /**
662
     * Set sitemaps directory path.
663
     *
664
     * @param string $sitemapsDirectory
665
     *
666
     * @return \Wszetko\Sitemap\Sitemap
667
     * @throws \Exception
668
     */
669 6
    public function setSitemapsDirectory(string $sitemapsDirectory): self
670
    {
671 6
        $this->sitemapsDirectory = Directory::checkDirectory(
672 6
            $this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitemapsDirectory
673
        );
674
675 6
        return $this;
676
    }
677
678
    /**
679
     * Get temporary directory path.
680
     *
681
     * @throws \Exception
682
     *
683
     * @return string
684
     */
685 4
    public function getTempDirectory(): string
686
    {
687 4
        if ('' === $this->sitemapTempDirectory) {
688 4
            $hash = md5(microtime());
689 4
            $this->setTempDirectory(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
690
        }
691
692 4
        return $this->sitemapTempDirectory;
693
    }
694
695
    /**
696
     * Set temporary directory path.
697
     *
698
     * @param string $tempDirectory
699
     *
700
     * @return $this
701
     *
702
     * @throws \Exception
703
     */
704 4
    public function setTempDirectory(string $tempDirectory): self
705
    {
706 4
        $this->sitemapTempDirectory = Directory::checkDirectory($tempDirectory);
707
708 4
        return $this;
709
    }
710
711
    /**
712
     * Get temporary sitemaps directory path.
713
     *
714
     * @throws \Exception
715
     *
716
     * @return string
717
     */
718 2
    public function getSitepamsTempDirectory(): string
719
    {
720 2
        $sitemapsDirectory = str_replace($this->getPublicDirectory(), '', $this->getSitemapsDirectory());
721
722 2
        return Directory::checkDirectory($this->getTempDirectory() . DIRECTORY_SEPARATOR . $sitemapsDirectory);
723
    }
724
725
    /**
726
     * Get separator for filenames.
727
     *
728
     * @param string $separator
729
     *
730
     * @return \Wszetko\Sitemap\Sitemap
731
     */
732 2
    public function setSeparator(string $separator): self
733
    {
734 2
        $this->separator = $separator;
735
736 2
        return $this;
737
    }
738
739
    /**
740
     * Set separator for filenames.
741
     *
742
     * @return string
743
     */
744 2
    public function getSeparator(): string
745
    {
746 2
        return $this->separator;
747
    }
748
749
    /**
750
     * Set if sitemaps files should be GZiped.
751
     *
752
     * Set whether to use compression or not.
753
     *
754
     * @param bool $useCompression
755
     *
756
     * @return \Wszetko\Sitemap\Sitemap
757
     */
758 2
    public function setUseCompression(bool $useCompression): self
759
    {
760 2
        if ($useCompression && extension_loaded('zlib')) {
761 2
            $this->useCompression = $useCompression;
762
        }
763
764 2
        return $this;
765
    }
766
767
    /**
768
     * Checi fi sitemaps files should be GZiped.
769
     *
770
     * Check if compression is used.
771
     *
772
     * @return bool
773
     */
774 4
    public function isUseCompression(): bool
775
    {
776 4
        return $this->useCompression;
777
    }
778
779
    /**
780
     * Set default filename for sitemap file.
781
     *
782
     * @param string $defaultFilename
783
     *
784
     * @return \Wszetko\Sitemap\Sitemap
785
     */
786 2
    public function setDefaultFilename(string $defaultFilename): self
787
    {
788 2
        $this->defaultFilename = $defaultFilename;
789
790 2
        return $this;
791
    }
792
793
    /**
794
     * Get default filename for sitemap file.
795
     *
796
     * @return string
797
     */
798 8
    public function getDefaultFilename(): string
799
    {
800 8
        return $this->defaultFilename;
801
    }
802
803
    /**
804
     * Get extension for sitemap files.
805
     *
806
     * @return string
807
     */
808
    private function getExt(): string
809
    {
810
        if ($this->isUseCompression()) {
811
            return self::GZ_EXT;
812
        }
813
814
        return self::EXT;
815
    }
816
}
817