Completed
Push — master ( 70c415...6060ab )
by Paweł
02:58
created

Sitemap::setDataCollector()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 9
c 1
b 0
f 0
nc 3
nop 2
dl 0
loc 15
ccs 8
cts 8
cp 1
crap 3
rs 9.9666
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Wszetko Sitemap.
7
 *
8
 * (c) Paweł Kłopotek-Główczewski <[email protected]>
9
 *
10
 * This source file is subject to the MIT license that is bundled
11
 * with this source code in the file LICENSE.
12
 */
13
14
namespace Wszetko\Sitemap;
15
16
use Exception;
17
use InvalidArgumentException;
18
use RecursiveDirectoryIterator;
19
use RecursiveIteratorIterator;
20
use RegexIterator;
21
use Wszetko\Sitemap\Drivers\DataCollectors\AbstractDataCollector;
22
use Wszetko\Sitemap\Drivers\Output\OutputXMLWriter;
23
use Wszetko\Sitemap\Interfaces\DataCollector;
24
use Wszetko\Sitemap\Interfaces\XML;
25
use Wszetko\Sitemap\Traits\Domain;
26
27
/**
28
 * Sitemap
29
 * This class used for generating Google Sitemap files.
30
 *
31
 * @package    Sitemap
32
 *
33
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
34
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
35
 * @license    https://opensource.org/licenses/MIT MIT License
36
 *
37
 * @see       https://github.com/wszetko/sitemap
38
 */
39
class Sitemap
40
{
41
    use Domain;
42
43
    /**
44
     * Avaliable values for changefreq tag.
45
     *
46
     * @var array
47
     */
48
    public const CHANGEFREQ = [
49
        'always',
50
        'hourly',
51
        'daily',
52
        'weekly',
53
        'monthly',
54
        'yearly',
55
        'never',
56
    ];
57
58
    /**
59
     * Extension for sitemap file.
60
     *
61
     * @var string
62
     */
63
    public const EXT = '.xml';
64
65
    /**
66
     * Extension for gzipped sitemap file.
67
     *
68
     * @var string
69
     */
70
    public const GZ_EXT = '.xml.gz';
71
72
    /**
73
     * URL to Sitemap Schema.
74
     *
75
     * @var string
76
     */
77
    public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
78
79
    /**
80
     * Limit of items in Sitemap files.
81
     *
82
     * @var int
83
     */
84
    public const ITEM_PER_SITEMAP = 50000;
85
86
    /**
87
     * Limit of Sitmeaps in SitemapsIndex.
88
     *
89
     * @var int
90
     */
91
    public const SITEMAP_PER_SITEMAPINDEX = 1000;
92
93
    /**
94
     * Limit of single files size.
95
     *
96
     * @var int
97
     */
98
    public const SITEMAP_MAX_SIZE = 52000000;
99
100
    /**
101
     * Path on disk to public directory.
102
     *
103
     * @var string
104
     */
105
    private $publicDirectory = '';
106
107
    /**
108
     * Path related to public directory to dir where sitemaps will be.
109
     *
110
     * @var string
111
     */
112
    private $sitepamsDirectory = '';
113
114
    /**
115
     * Path to temporary directory.
116
     *
117
     * @var string
118
     */
119
    private $sitemapTempDirectory = '';
120
121
    /**
122
     * Default filename for sitemap file.
123
     *
124
     * @var string
125
     */
126
    private $defaultFilename = 'sitemap';
127
128
    /**
129
     * Name of index file.
130
     *
131
     * @var string
132
     */
133
    private $indexFilename = 'index';
134
135
    /**
136
     * DataCollector instance.
137
     *
138
     * @var DataCollector
139
     */
140
    private $dataCollector;
141
142
    /**
143
     * Use compression.
144
     *
145
     * @var bool
146
     */
147
    private $useCompression = false;
148
149
    /**
150
     * XML Writer object.
151
     *
152
     * @var XML
153
     */
154
    private $xml;
155
156
    /**
157
     * Separator to be used in Sitemap filenames.
158
     *
159
     * @var string
160
     */
161
    private $separator = '-'; // ~49,6MB - to have some limit to close file
162
163
    /**
164
     * Construktor.
165
     *
166
     * @param string $domain
167
     *
168
     * @throws \InvalidArgumentException
169
     */
170 38
    public function __construct(string $domain = null)
171
    {
172 38
        if (null !== $domain) {
173 36
            $this->setDomain($domain);
174
        }
175 38
    }
176
177
    /**
178
     * @param Items\Url   $item
179
     * @param null|string $group
180
     *
181
     * @throws \Exception
182
     *
183
     * @return \Wszetko\Sitemap\Sitemap
184
     */
185 8
    public function addItem(Items\Url $item, ?string $group = null): self
186
    {
187 8
        if (is_string($group)) {
188
            $group = preg_replace('/\W+/', '', $group);
189
        }
190
191 8
        if (empty($group)) {
192 8
            $group = $this->getDefaultFilename();
193
        }
194
195 8
        $group = mb_strtolower($group);
196 8
        $item->setDomain($this->getDomain());
197 8
        $this->getDataCollector()->add($item, $group);
198
199 6
        return $this;
200
    }
201
202
    /**
203
     * @param array       $items
204
     * @param null|string $group
205
     *
206
     * @throws \Exception
207
     *
208
     * @return $this
209
     */
210 2
    public function addItems(array $items, ?string $group = null): self
211
    {
212 2
        foreach ($items as $item) {
213 2
            $this->addItem($item, $group);
214
        }
215
216 2
        return $this;
217
    }
218
219
    /**
220
     * Get default filename for sitemap file.
221
     *
222
     * @return string
223
     */
224 10
    public function getDefaultFilename(): string
225
    {
226 10
        return $this->defaultFilename;
227
    }
228
229
    /**
230
     * Set default filename for sitemap file.
231
     *
232
     * @param string $defaultFilename
233
     *
234
     * @return \Wszetko\Sitemap\Sitemap
235
     */
236 2
    public function setDefaultFilename(string $defaultFilename): self
237
    {
238 2
        $this->defaultFilename = $defaultFilename;
239
240 2
        return $this;
241
    }
242
243
    /**
244
     * Get DataCollecotr Object.
245
     *
246
     * @return DataCollector
247
     *
248
     * @throws \Exception
249
     */
250 10
    public function getDataCollector(): DataCollector
251
    {
252 10
        if (null === $this->dataCollector) {
253 2
            throw new Exception('DataCollector is not set.');
254
        }
255 8
        return $this->dataCollector;
256
    }
257
258
    /**
259
     * @param string $driver
260
     * @param array  $config
261
     *
262
     * @throws \InvalidArgumentException
263
     *
264
     * @return \Wszetko\Sitemap\Sitemap
265
     */
266 10
    public function setDataCollector(string $driver, $config = []): self
267
    {
268 10
        if (class_exists($driver)) {
269 8
            $dataCollector = new $driver($config);
270
271 8
            if ($dataCollector instanceof AbstractDataCollector) {
272 8
                $this->dataCollector = $dataCollector;
273
            } else {
274 8
                throw new InvalidArgumentException($driver . ' data collector does not exists.');
275
            }
276
        } else {
277 2
            throw new InvalidArgumentException($driver . ' data collector does not exists.');
278
        }
279
280 8
        return $this;
281
    }
282
283
    /**
284
     * @throws Exception
285
     */
286
    public function generate(): void
287
    {
288
        if ('' === $this->getPublicDirectory()) {
289
            throw new Exception('Public directory is not set.');
290
        }
291
292
        if ('' === $this->getDomain()) {
293
            throw new Exception('Domain is not set.');
294
        }
295
296
        if (null === $this->xml) {
297
            $this->setXml(OutputXMLWriter::class, ['domain' => $this->getDomain()]);
298
        }
299
300
        $this->removeDir($this->getTempDirectory());
301
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
302
        $sitemaps = $this->generateSitemaps();
303
        $this->getXml()->setWorkDir($this->getTempDirectory());
304
        $this->generateSitemapsIndex($sitemaps);
305
        $this->publishSitemap();
306
    }
307
308
    /**
309
     * @return string
310
     */
311 2
    public function getPublicDirectory(): string
312
    {
313 2
        return $this->publicDirectory;
314
    }
315
316
    /**
317
     * @param string $publicDirectory
318
     *
319
     * @throws Exception
320
     *
321
     * @return \Wszetko\Sitemap\Sitemap
322
     */
323 4
    public function setPublicDirectory(string $publicDirectory): self
324
    {
325 4
        if (!($publicDirectory = realpath($publicDirectory))) {
326 2
            throw new Exception('Sitemap directory does not exists.');
327
        }
328
329 2
        $this->publicDirectory = $publicDirectory;
330
331 2
        return $this;
332
    }
333
334
    /**
335
     * @return XML
336
     *
337
     * @throws \Exception
338
     */
339 4
    public function getXml(): XML
340
    {
341 4
        if (null === $this->xml) {
342
            throw new Exception('XML writer class is not set.');
343
        }
344
345 4
        return $this->xml;
346
    }
347
348
    /**
349
     * @param string $driver
350
     * @param array  $config
351
     *
352
     * @return \Wszetko\Sitemap\Sitemap
353
     */
354 4
    public function setXml(string $driver, array $config = []): self
355
    {
356 4
        if (class_exists($driver)) {
357 4
            if (!isset($config['domain'])) {
358 2
                $config['domain'] = $this->getDomain();
359
            }
360
361 4
            $xml = new $driver($config);
362
363 4
            if ($xml instanceof XML) {
364 4
                $this->xml = $xml;
365
            }
366
        }
367
368 4
        return $this;
369
    }
370
371
    /**
372
     * @throws \Exception
373
     *
374
     * @return string
375
     */
376 4
    public function getTempDirectory(): string
377
    {
378 4
        if (empty($this->sitemapTempDirectory)) {
379 4
            $hash = md5(microtime());
380
381 4
            if (!is_dir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) {
382 4
                mkdir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
383
            }
384
385 4
            if ($tempDir = realpath(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) {
386 4
                $this->sitemapTempDirectory = $tempDir;
387
            } else {
388
                throw new Exception('Can\'t get temporary directory.');
389
            }
390
        }
391
392 4
        return $this->sitemapTempDirectory;
393
    }
394
395
    /**
396
     * @throws \Exception
397
     *
398
     * @return string
399
     */
400 2
    public function getSitepamsTempDirectory(): string
401
    {
402 2
        if (!($directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory))) {
403 2
            mkdir(
404 2
                $this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory,
405 2
                0777,
406 2
                true
407
            );
408 2
            $directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
409
        }
410
411 2
        if (!$directory) {
412
            throw new Exception('Can\'t get temporary directory.');
413
        }
414
415 2
        return $directory;
416
    }
417
418
    /**
419
     * @throws Exception
420
     *
421
     * @return array
422
     */
423
    public function generateSitemaps(): array
424
    {
425
        if (0 == $this->getDataCollector()->getCount()) {
426
            return [];
427
        }
428
429
        $groups = $this->getDataCollector()->getGroups();
430
        $currentGroup = 0;
431
        $files = [];
432
433
        foreach ($groups as $group) {
434
            $groupNo = 0;
435
            $filesInGroup = 0;
436
            ++$currentGroup;
437
438
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
439
                $this->getXml()->openSitemap(
440
                    $group . $this->getSeparator() . $groupNo . self::EXT,
441
                    $this->getDataCollector()->getExtensions()
442
                );
443
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
444
445
                while ($element = $this->getDataCollector()->fetch($group)) {
446
                    $this->getXml()->addUrl($element);
447
                    ++$filesInGroup;
448
449
                    if (isset($element['lastmod'])) {
450
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
451
                            if (
452
                                strtotime($element['lastmod']) >
453
                                    strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])
454
                            ) {
455
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
456
                            }
457
                        } else {
458
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
459
                        }
460
                    }
461
462
                    // self::SITEMAP_MAX_SIZE - 20 for buffer for close tag
463
                    if (
464
                        $filesInGroup >= self::ITEM_PER_SITEMAP ||
465
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)
466
                    ) {
467
                        $this->getXml()->closeSitemap();
468
469
                        if (!$this->getDataCollector()->isLast($group)) {
470
                            ++$groupNo;
471
                            $filesInGroup = 0;
472
                            $this->getXml()->openSitemap(
473
                                $group . $this->getSeparator() . $groupNo . self::EXT,
474
                                $this->getDataCollector()->getExtensions()
475
                            );
476
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
477
                        }
478
                    }
479
                }
480
481
                $this->getXml()->closeSitemap();
482
            }
483
        }
484
485
        if ($this->isUseCompression() && !empty($files)) {
486
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
487
        }
488
489
        return $files;
490
    }
491
492
    /**
493
     * @return string
494
     */
495 2
    public function getSeparator(): string
496
    {
497 2
        return $this->separator;
498
    }
499
500
    /**
501
     * @param string $separator
502
     *
503
     * @return \Wszetko\Sitemap\Sitemap
504
     */
505 2
    public function setSeparator(string $separator): self
506
    {
507 2
        $this->separator = $separator;
508
509 2
        return $this;
510
    }
511
512
    /**
513
     * Check if compression is used.
514
     *
515
     * @return bool
516
     */
517 4
    public function isUseCompression(): bool
518
    {
519 4
        return $this->useCompression;
520
    }
521
522
    /**
523
     * Set whether to use compression or not.
524
     *
525
     * @param bool $useCompression
526
     *
527
     * @return \Wszetko\Sitemap\Sitemap
528
     */
529 2
    public function setUseCompression(bool $useCompression): self
530
    {
531 2
        if ($useCompression && extension_loaded('zlib')) {
532 2
            $this->useCompression = $useCompression;
533
        }
534
535 2
        return $this;
536
    }
537
538
    /**
539
     * @param array $sitemaps
540
     *
541
     * @throws Exception
542
     *
543
     * @return array
544
     */
545
    public function generateSitemapsIndex(array $sitemaps): array
546
    {
547
        if (0 === count($sitemaps)) {
548
            return [];
549
        }
550
551
        $counter = 0;
552
        $file = $this->getIndexFilename() . self::EXT;
553
        $files = [$file => null];
554
        $this->getXml()->openSitemapIndex($file);
555
        $lastItem = array_key_last($sitemaps);
556
557
        foreach ($sitemaps as $sitemap => $lastmod) {
558
            $this->getXml()->addSitemap((string) $this->getDomain() . '/' . ltrim(str_replace(
559
                $this->getPublicDirectory(),
560
                '',
561
                $this->getSitepamsDirectory()
562
            ), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
563
            ++$counter;
564
565
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
566
                $this->getXml()->closeSitemapIndex();
567
                $counter = 0;
568
                $filesCount = count($files);
569
570
                if ($sitemap != $lastItem) {
571
                    $file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT;
572
                    $files[$file] = null;
573
                    $this->getXml()->openSitemapIndex($file);
574
                }
575
            }
576
        }
577
578
        $this->getXml()->closeSitemapIndex();
579
580
        if ($this->isUseCompression() && !empty($files)) {
581
            $this->compressFiles($this->getTempDirectory(), $files);
582
        }
583
584
        return $files;
585
    }
586
587
    /**
588
     * Get filename of sitemap index file.
589
     *
590
     * @return string
591
     */
592 2
    public function getIndexFilename(): string
593
    {
594 2
        return $this->indexFilename;
595
    }
596
597
    /**
598
     * Set filename of sitemap index file.
599
     *
600
     * @param string $indexFilename
601
     *
602
     * @return \Wszetko\Sitemap\Sitemap
603
     */
604 2
    public function setIndexFilename(string $indexFilename): self
605
    {
606 2
        $this->indexFilename = $indexFilename;
607
608 2
        return $this;
609
    }
610
611
    /**
612
     * @throws \Exception
613
     *
614
     * @return string
615
     */
616
    public function getSitepamsDirectory(): string
617
    {
618
        if (!($directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory))) {
619
            mkdir($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory, 0777, true);
620
            $directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
621
        }
622
623
        if (!$directory) {
624
            throw new Exception('Can\'t get sitemap directory.');
625
        }
626
627
        return $directory;
628
    }
629
630
    /**
631
     * @param string $sitepamsDirectory
632
     *
633
     * @return \Wszetko\Sitemap\Sitemap
634
     */
635 2
    public function setSitepamsDirectory(string $sitepamsDirectory): self
636
    {
637 2
        $this->sitepamsDirectory = $sitepamsDirectory;
638
639 2
        return $this;
640
    }
641
642
    /**
643
     * @param string $dir
644
     *
645
     * @return void
646
     */
647
    private function removeDir($dir): void
648
    {
649
        if (is_dir($dir) && $objects = scandir($dir)) {
650
            foreach ($objects as $object) {
651
                if ('.' != $object && '..' != $object) {
652
                    if ('dir' == filetype($dir . '/' . $object)) {
653
                        $this->removeDir($dir . '/' . $object);
654
                    } else {
655
                        unlink($dir . '/' . $object);
656
                    }
657
                }
658
            }
659
660
            rmdir($dir);
661
        }
662
    }
663
664
    /**
665
     * @param string $dir
666
     * @param array  $files
667
     *
668
     * @throws Exception
669
     *
670
     * @return void
671
     */
672
    private function compressFiles(string $dir, array &$files): void
673
    {
674
        if (!extension_loaded('zlib')) {
675
            throw new Exception('Extension zlib is not loaded.');
676
        }
677
678
        $newFiles = [];
679
680
        foreach ($files as $file => $lastmod) {
681
            $source = $dir . DIRECTORY_SEPARATOR . $file;
682
            $gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT;
683
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
684
            $out = gzopen($output, 'wb9');
685
            $in = fopen($source, 'rb');
686
687
            if (!$out) {
688
                throw new Exception('Can\'t create GZip archive.');
689
            }
690
691
            if (!$in) {
692
                throw new Exception('Can\'t open xml file.');
693
            }
694
695
            while (!feof($in)) {
696
                if ($content = fread($in, 524288)) {
697
                    gzwrite($out, $content);
698
                }
699
            }
700
701
            fclose($in);
702
            gzclose($out);
703
            unlink($source);
704
            $newFiles[$gzFile] = $lastmod;
705
        }
706
707
        $files = $newFiles;
708
    }
709
710
    /**
711
     * @throws \Exception
712
     *
713
     * @return void
714
     */
715
    private function publishSitemap(): void
716
    {
717
        // Clear previous sitemaps
718
        $this->removeDir($this->getSitepamsDirectory());
719
720
        if ($publicDir = scandir($this->getPublicDirectory())) {
721
            foreach ($publicDir as $file) {
722
                if (
723
                    preg_match_all(
724
                        '/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/',
725
                        $file
726
                    )
727
                ) {
728
                    unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
729
                }
730
            }
731
        }
732
733
        $this->getSitepamsDirectory(); //To create sitemaps directory
734
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
735
        $iterator = new RecursiveIteratorIterator($dir);
736
        $files = new RegexIterator(
737
            $iterator,
738
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/",
739
            RegexIterator::GET_MATCH
740
        );
741
        $fileList = [];
742
743
        foreach ($files as $file) {
744
            if (isset($file['path'])) {
745
                $fileList[] = $file['path'];
746
            }
747
        }
748
749
        $currentFile = 0;
750
751
        foreach ($fileList as $file) {
752
            ++$currentFile;
753
            $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
754
            rename($file, $destination);
755
        }
756
757
        $this->removeDir($this->getTempDirectory());
758
    }
759
760
    /**
761
     * @return string
762
     */
763
    private function getExt(): string
764
    {
765
        if ($this->isUseCompression()) {
766
            return self::GZ_EXT;
767
        }
768
769
        return self::EXT;
770
    }
771
}
772