Completed
Push — master ( a0493e...82ab0a )
by Paweł
06:50 queued 04:21
created

Sitemap::removeDir()   B

Complexity

Conditions 7
Paths 6

Size

Total Lines 20
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 7
eloc 11
c 2
b 0
f 0
nc 6
nop 1
dl 0
loc 20
ccs 0
cts 11
cp 0
crap 56
rs 8.8333
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Wszetko Sitemap.
7
 *
8
 * (c) Paweł Kłopotek-Główczewski <[email protected]>
9
 *
10
 * This source file is subject to the MIT license that is bundled
11
 * with this source code in the file LICENSE.
12
 */
13
14
namespace Wszetko\Sitemap;
15
16
use Exception;
17
use InvalidArgumentException;
18
use RecursiveDirectoryIterator;
19
use RecursiveIteratorIterator;
20
use RegexIterator;
21
use Wszetko\Sitemap\Drivers\DataCollectors\AbstractDataCollector;
22
use Wszetko\Sitemap\Drivers\Output\OutputXMLWriter;
23
use Wszetko\Sitemap\Interfaces\DataCollector;
24
use Wszetko\Sitemap\Interfaces\XML;
25
use Wszetko\Sitemap\Traits\Domain;
26
27
/**
28
 * Sitemap
29
 * This class used for generating Google Sitemap files.
30
 *
31
 * @package    Sitemap
32
 *
33
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
34
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
35
 * @license    https://opensource.org/licenses/MIT MIT License
36
 *
37
 * @see       https://github.com/wszetko/sitemap
38
 */
39
class Sitemap
40
{
41
    use Domain;
42
43
    /**
44
     * Avaliable values for changefreq tag.
45
     *
46
     * @var array
47
     */
48
    public const CHANGEFREQ = [
49
        'always',
50
        'hourly',
51
        'daily',
52
        'weekly',
53
        'monthly',
54
        'yearly',
55
        'never',
56
    ];
57
58
    /**
59
     * Extension for sitemap file.
60
     *
61
     * @var string
62
     */
63
    public const EXT = '.xml';
64
65
    /**
66
     * Extension for gzipped sitemap file.
67
     *
68
     * @var string
69
     */
70
    public const GZ_EXT = '.xml.gz';
71
72
    /**
73
     * URL to Sitemap Schema.
74
     *
75
     * @var string
76
     */
77
    public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
78
79
    /**
80
     * Limit of items in Sitemap files.
81
     *
82
     * @var int
83
     */
84
    public const ITEM_PER_SITEMAP = 50000;
85
86
    /**
87
     * Limit of Sitmeaps in SitemapsIndex.
88
     *
89
     * @var int
90
     */
91
    public const SITEMAP_PER_SITEMAPINDEX = 1000;
92
93
    /**
94
     * Limit of single files size.
95
     *
96
     * @var int
97
     */
98
    public const SITEMAP_MAX_SIZE = 52000000;
99
100
    /**
101
     * Path on disk to public directory.
102
     *
103
     * @var string
104
     */
105
    private $publicDirectory = '';
106
107
    /**
108
     * Path related to public directory to dir where sitemaps will be.
109
     *
110
     * @var string
111
     */
112
    private $sitepamsDirectory = '';
113
114
    /**
115
     * Path to temporary directory.
116
     *
117
     * @var string
118
     */
119
    private $sitemapTempDirectory = '';
120
121
    /**
122
     * Default filename for sitemap file.
123
     *
124
     * @var string
125
     */
126
    private $defaultFilename = 'sitemap';
127
128
    /**
129
     * Name of index file.
130
     *
131
     * @var string
132
     */
133
    private $indexFilename = 'index';
134
135
    /**
136
     * DataCollector instance.
137
     *
138
     * @var DataCollector
139
     */
140
    private $dataCollector;
141
142
    /**
143
     * Use compression.
144
     *
145
     * @var bool
146
     */
147
    private $useCompression = false;
148
149
    /**
150
     * XML Writer object.
151
     *
152
     * @var XML
153
     */
154
    private $xml;
155
156
    /**
157
     * Separator to be used in Sitemap filenames.
158
     *
159
     * @var string
160
     */
161
    private $separator = '-'; // ~49,6MB - to have some limit to close file
162
163
    /**
164
     * Construktor.
165
     *
166
     * @param string $domain
167
     *
168
     * @throws \InvalidArgumentException
169
     */
170 40
    public function __construct(string $domain = null)
171
    {
172 40
        if (null !== $domain) {
173 38
            $this->setDomain($domain);
174
        }
175 40
    }
176
177
    /**
178
     * @param Items\Url   $item
179
     * @param null|string $group
180
     *
181
     * @throws \Exception
182
     *
183
     * @return \Wszetko\Sitemap\Sitemap
184
     */
185 8
    public function addItem(Items\Url $item, ?string $group = null): self
186
    {
187 8
        if (is_string($group)) {
188 2
            $group = preg_replace('/\W+/', '', $group);
189
        }
190
191 8
        if ('' === $group || null === $group) {
192 6
            $group = $this->getDefaultFilename();
193
        }
194
195 8
        $group = mb_strtolower($group);
196 8
        $item->setDomain($this->getDomain());
197 8
        $this->getDataCollector()->add($item, $group);
198
199 6
        return $this;
200
    }
201
202
    /**
203
     * @param array       $items
204
     * @param null|string $group
205
     *
206
     * @throws \Exception
207
     *
208
     * @return $this
209
     */
210 2
    public function addItems(array $items, ?string $group = null): self
211
    {
212 2
        foreach ($items as $item) {
213 2
            $this->addItem($item, $group);
214
        }
215
216 2
        return $this;
217
    }
218
219
    /**
220
     * Get default filename for sitemap file.
221
     *
222
     * @return string
223
     */
224 8
    public function getDefaultFilename(): string
225
    {
226 8
        return $this->defaultFilename;
227
    }
228
229
    /**
230
     * Set default filename for sitemap file.
231
     *
232
     * @param string $defaultFilename
233
     *
234
     * @return \Wszetko\Sitemap\Sitemap
235
     */
236 2
    public function setDefaultFilename(string $defaultFilename): self
237
    {
238 2
        $this->defaultFilename = $defaultFilename;
239
240 2
        return $this;
241
    }
242
243
    /**
244
     * Get DataCollecotr Object.
245
     *
246
     * @return DataCollector
247
     *
248
     * @throws \Exception
249
     */
250 10
    public function getDataCollector(): DataCollector
251
    {
252 10
        if (null === $this->dataCollector) {
253 2
            throw new Exception('DataCollector is not set.');
254
        }
255 8
        return $this->dataCollector;
256
    }
257
258
    /**
259
     * @param string $driver
260
     * @param array  $config
261
     *
262
     * @throws \InvalidArgumentException
263
     *
264
     * @return \Wszetko\Sitemap\Sitemap
265
     */
266 10
    public function setDataCollector(string $driver, $config = []): self
267
    {
268 10
        if (class_exists($driver)) {
269 8
            $dataCollector = new $driver($config);
270
271 8
            if ($dataCollector instanceof AbstractDataCollector) {
272 8
                $this->dataCollector = $dataCollector;
273
            } else {
274 8
                throw new InvalidArgumentException($driver . ' data collector does not exists.');
275
            }
276
        } else {
277 2
            throw new InvalidArgumentException($driver . ' data collector does not exists.');
278
        }
279
280 8
        return $this;
281
    }
282
283
    /**
284
     * @throws Exception
285
     */
286
    public function generate(): void
287
    {
288
        if ('' === $this->getPublicDirectory()) {
289
            throw new Exception('Public directory is not set.');
290
        }
291
292
        if ('' === $this->getDomain()) {
293
            throw new Exception('Domain is not set.');
294
        }
295
296
        if (null === $this->xml) {
297
            $this->setXml(OutputXMLWriter::class, ['domain' => $this->getDomain()]);
298
        }
299
300
        $this->removeDir($this->getTempDirectory());
301
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
302
        $sitemaps = $this->generateSitemaps();
303
        $this->getXml()->setWorkDir($this->getTempDirectory());
304
        $this->generateSitemapsIndex($sitemaps);
305
        $this->publishSitemap();
306
    }
307
308
    /**
309
     * @return string
310
     */
311 2
    public function getPublicDirectory(): string
312
    {
313 2
        return $this->publicDirectory;
314
    }
315
316
    /**
317
     * @param string $publicDirectory
318
     *
319
     * @throws Exception
320
     *
321
     * @return \Wszetko\Sitemap\Sitemap
322
     */
323 6
    public function setPublicDirectory(string $publicDirectory): self
324
    {
325 6
        $publicDirectory = realpath($publicDirectory);
326
327 6
        if (false === $publicDirectory) {
328 2
            throw new Exception('Sitemap directory does not exists.');
329
        }
330
331 4
        $this->publicDirectory = $publicDirectory;
332
333 4
        return $this;
334
    }
335
336
    /**
337
     * @return XML
338
     *
339
     * @throws \Exception
340
     */
341 6
    public function getXml(): XML
342
    {
343 6
        if (null === $this->xml) {
344 2
            throw new Exception('XML writer class is not set.');
345
        }
346
347 4
        return $this->xml;
348
    }
349
350
    /**
351
     * @param string $driver
352
     * @param array  $config
353
     *
354
     * @return \Wszetko\Sitemap\Sitemap
355
     */
356 4
    public function setXml(string $driver, array $config = []): self
357
    {
358 4
        if (class_exists($driver)) {
359 4
            if (!isset($config['domain'])) {
360 2
                $config['domain'] = $this->getDomain();
361
            }
362
363 4
            $xml = new $driver($config);
364
365 4
            if ($xml instanceof XML) {
366 4
                $this->xml = $xml;
367
            }
368
        }
369
370 4
        return $this;
371
    }
372
373
    /**
374
     * @throws \Exception
375
     *
376
     * @return string
377
     */
378 4
    public function getTempDirectory(): string
379
    {
380 4
        if (null === $this->sitemapTempDirectory || '' == $this->sitemapTempDirectory) {
381 4
            $hash = md5(microtime());
382
383 4
            if (!is_dir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) {
384 4
                mkdir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
385
            }
386
387 4
            $tempDir = realpath(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
388
389 4
            if (false !== $tempDir) {
390 4
                $this->sitemapTempDirectory = $tempDir;
391
            } else {
392
                // @codeCoverageIgnoreStart
393
                throw new Exception('Can\'t get temporary directory.');
394
                // @codeCoverageIgnoreEnd
395
            }
396
        }
397
398 4
        return $this->sitemapTempDirectory;
399
    }
400
401
    /**
402
     * @throws \Exception
403
     *
404
     * @return string
405
     */
406 2
    public function getSitepamsTempDirectory(): string
407
    {
408 2
        $directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
409
410 2
        if (false === $directory) {
411
            mkdir(
412
                $this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory,
413
                0777,
414
                true
415
            );
416
            $directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
417
        }
418
419 2
        if (false === $directory) {
420
            // @codeCoverageIgnoreStart
421
            throw new Exception('Can\'t get temporary directory.');
422
            // @codeCoverageIgnoreEnd
423
        }
424
425 2
        return $directory;
426
    }
427
428
    /**
429
     * @throws Exception
430
     *
431
     * @return array
432
     */
433
    public function generateSitemaps(): array
434
    {
435
        if (0 == $this->getDataCollector()->getCount()) {
436
            return [];
437
        }
438
439
        $groups = $this->getDataCollector()->getGroups();
440
        $currentGroup = 0;
441
        $files = [];
442
443
        foreach ($groups as $group) {
444
            $groupNo = 0;
445
            $filesInGroup = 0;
446
            ++$currentGroup;
447
448
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
449
                $this->getXml()->openSitemap(
450
                    $group . $this->getSeparator() . $groupNo . self::EXT,
451
                    $this->getDataCollector()->getExtensions()
452
                );
453
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
454
455
                while ($element = $this->getDataCollector()->fetch($group)) {
456
                    $this->getXml()->addUrl($element);
457
                    ++$filesInGroup;
458
459
                    if (isset($element['lastmod'])) {
460
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
461
                            if (
462
                                strtotime($element['lastmod']) >
463
                                    strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])
464
                            ) {
465
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
466
                            }
467
                        } else {
468
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
469
                        }
470
                    }
471
472
                    // self::SITEMAP_MAX_SIZE - 20 for buffer for close tag
473
                    if (
474
                        $filesInGroup >= self::ITEM_PER_SITEMAP ||
475
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)
476
                    ) {
477
                        $this->getXml()->closeSitemap();
478
479
                        if (!$this->getDataCollector()->isLast($group)) {
480
                            ++$groupNo;
481
                            $filesInGroup = 0;
482
                            $this->getXml()->openSitemap(
483
                                $group . $this->getSeparator() . $groupNo . self::EXT,
484
                                $this->getDataCollector()->getExtensions()
485
                            );
486
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
487
                        }
488
                    }
489
                }
490
491
                $this->getXml()->closeSitemap();
492
            }
493
        }
494
495
        if ($this->isUseCompression() && [] !== $files) {
496
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
497
        }
498
499
        return $files;
500
    }
501
502
    /**
503
     * @return string
504
     */
505 2
    public function getSeparator(): string
506
    {
507 2
        return $this->separator;
508
    }
509
510
    /**
511
     * @param string $separator
512
     *
513
     * @return \Wszetko\Sitemap\Sitemap
514
     */
515 2
    public function setSeparator(string $separator): self
516
    {
517 2
        $this->separator = $separator;
518
519 2
        return $this;
520
    }
521
522
    /**
523
     * Check if compression is used.
524
     *
525
     * @return bool
526
     */
527 4
    public function isUseCompression(): bool
528
    {
529 4
        return $this->useCompression;
530
    }
531
532
    /**
533
     * Set whether to use compression or not.
534
     *
535
     * @param bool $useCompression
536
     *
537
     * @return \Wszetko\Sitemap\Sitemap
538
     */
539 2
    public function setUseCompression(bool $useCompression): self
540
    {
541 2
        if ($useCompression && extension_loaded('zlib')) {
542 2
            $this->useCompression = $useCompression;
543
        }
544
545 2
        return $this;
546
    }
547
548
    /**
549
     * @param array $sitemaps
550
     *
551
     * @throws Exception
552
     *
553
     * @return array
554
     */
555
    public function generateSitemapsIndex(array $sitemaps): array
556
    {
557
        if (0 === count($sitemaps)) {
558
            return [];
559
        }
560
561
        $counter = 0;
562
        $file = $this->getIndexFilename() . self::EXT;
563
        $files = [$file => null];
564
        $this->getXml()->openSitemapIndex($file);
565
        $lastItem = array_key_last($sitemaps);
566
567
        foreach ($sitemaps as $sitemap => $lastmod) {
568
            $this->getXml()->addSitemap((string) $this->getDomain() . '/' . ltrim(str_replace(
569
                $this->getPublicDirectory(),
570
                '',
571
                $this->getSitepamsDirectory()
572
            ), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
573
            ++$counter;
574
575
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
576
                $this->getXml()->closeSitemapIndex();
577
                $counter = 0;
578
                $filesCount = count($files);
579
580
                if ($sitemap != $lastItem) {
581
                    $file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT;
582
                    $files[$file] = null;
583
                    $this->getXml()->openSitemapIndex($file);
584
                }
585
            }
586
        }
587
588
        $this->getXml()->closeSitemapIndex();
589
590
        if ($this->isUseCompression() && [] !== $files) {
591
            $this->compressFiles($this->getTempDirectory(), $files);
592
        }
593
594
        return $files;
595
    }
596
597
    /**
598
     * Get filename of sitemap index file.
599
     *
600
     * @return string
601
     */
602 2
    public function getIndexFilename(): string
603
    {
604 2
        return $this->indexFilename;
605
    }
606
607
    /**
608
     * Set filename of sitemap index file.
609
     *
610
     * @param string $indexFilename
611
     *
612
     * @return \Wszetko\Sitemap\Sitemap
613
     */
614 2
    public function setIndexFilename(string $indexFilename): self
615
    {
616 2
        $this->indexFilename = $indexFilename;
617
618 2
        return $this;
619
    }
620
621
    /**
622
     * @throws \Exception
623
     *
624
     * @return string
625
     */
626
    public function getSitepamsDirectory(): string
627
    {
628
        return $this->sitepamsDirectory;
629
    }
630
631
    /**
632
     * @param string $sitepamsDirectory
633
     *
634
     * @return \Wszetko\Sitemap\Sitemap
635
     *
636
     * @throws \Exception
637
     */
638
    public function setSitepamsDirectory(string $sitepamsDirectory): self
639
    {
640
        $directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory);
641
642
        if (false === $directory) {
643
            mkdir($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory, 0777, true);
644
            $directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitepamsDirectory);
645
        }
646
647
        if (false === $directory) {
648
            throw new Exception('Can\'t get sitemap directory.');
649
        }
650
651
        $this->sitepamsDirectory = $directory;
652
653
        return $this;
654
    }
655
656
    /**
657
     * @param string $dir
658
     *
659
     * @return void
660
     */
661
    private function removeDir($dir): void
662
    {
663
        if (is_dir($dir)) {
664
            return;
665
        }
666
667
        $objects = scandir($dir);
668
669
        if (false !== $objects) {
670
            foreach ($objects as $object) {
671
                if ('.' != $object && '..' != $object) {
672
                    if ('dir' == filetype($dir . '/' . $object)) {
673
                        $this->removeDir($dir . '/' . $object);
674
                    } else {
675
                        unlink($dir . '/' . $object);
676
                    }
677
                }
678
            }
679
680
            rmdir($dir);
681
        }
682
    }
683
684
    /**
685
     * @param string $dir
686
     * @param array  $files
687
     *
688
     * @throws Exception
689
     *
690
     * @return void
691
     */
692
    private function compressFiles(string $dir, array &$files): void
693
    {
694
        if (!extension_loaded('zlib')) {
695
            throw new Exception('Extension zlib is not loaded.');
696
        }
697
698
        $newFiles = [];
699
700
        foreach ($files as $file => $lastmod) {
701
            $source = $dir . DIRECTORY_SEPARATOR . $file;
702
            $gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT;
703
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
704
            $out = gzopen($output, 'wb9');
705
            $in = fopen($source, 'rb');
706
707
            if (false === $out) {
708
                throw new Exception('Can\'t create GZip archive.');
709
            }
710
711
            if (false === $in) {
712
                throw new Exception('Can\'t open xml file.');
713
            }
714
715
            while (!feof($in)) {
716
                $content = fread($in, 524288);
717
718
                if (false !== $content) {
719
                    gzwrite($out, $content);
720
                }
721
            }
722
723
            fclose($in);
724
            gzclose($out);
725
            unlink($source);
726
            $newFiles[$gzFile] = $lastmod;
727
        }
728
729
        $files = $newFiles;
730
    }
731
732
    /**
733
     * @throws \Exception
734
     *
735
     * @return void
736
     */
737
    private function publishSitemap(): void
738
    {
739
        // Clear previous sitemaps
740
        $this->removeDir($this->getSitepamsDirectory());
741
        $publicDir = scandir($this->getPublicDirectory());
742
743
        if (is_array($publicDir)) {
744
            foreach ($publicDir as $file) {
745
                if (
746
                    1 === preg_match(
747
                        '/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/',
748
                        $file
749
                    )
750
                ) {
751
                    unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
752
                }
753
            }
754
        }
755
756
        $this->getSitepamsDirectory(); //To create sitemaps directory
757
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
758
        $iterator = new RecursiveIteratorIterator($dir);
759
        $files = new RegexIterator(
760
            $iterator,
761
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/",
762
            RegexIterator::GET_MATCH
763
        );
764
        $fileList = [];
765
766
        foreach ($files as $file) {
767
            if (isset($file['path'])) {
768
                $fileList[] = $file['path'];
769
            }
770
        }
771
772
        $currentFile = 0;
773
774
        foreach ($fileList as $file) {
775
            ++$currentFile;
776
            $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
777
            rename($file, $destination);
778
        }
779
780
        $this->removeDir($this->getTempDirectory());
781
    }
782
783
    /**
784
     * @return string
785
     */
786
    private function getExt(): string
787
    {
788
        if ($this->isUseCompression()) {
789
            return self::GZ_EXT;
790
        }
791
792
        return self::EXT;
793
    }
794
}
795