Completed
Push — master ( 9ef25d...153ee0 )
by Paweł
03:23
created

Sitemap::compressFiles()   B

Complexity

Conditions 7
Paths 7

Size

Total Lines 36
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 7
eloc 21
c 2
b 0
f 0
nc 7
nop 2
dl 0
loc 36
ccs 0
cts 22
cp 0
crap 56
rs 8.6506
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Wszetko Sitemap.
7
 *
8
 * (c) Paweł Kłopotek-Główczewski <[email protected]>
9
 *
10
 * This source file is subject to the MIT license that is bundled
11
 * with this source code in the file LICENSE.
12
 */
13
14
namespace Wszetko\Sitemap;
15
16
use Exception;
17
use RecursiveDirectoryIterator;
18
use RecursiveIteratorIterator;
19
use RegexIterator;
20
use Wszetko\Sitemap\Drivers\XML\XMLWriter;
21
use Wszetko\Sitemap\Interfaces\DataCollector;
22
use Wszetko\Sitemap\Interfaces\XML;
23
use Wszetko\Sitemap\Traits\Domain;
24
25
/**
26
 * Sitemap
27
 * This class used for generating Google Sitemap files.
28
 *
29
 * @package    Sitemap
30
 *
31
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
32
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
33
 * @license    https://opensource.org/licenses/MIT MIT License
34
 *
35
 * @see       https://github.com/wszetko/sitemap
36
 */
37
class Sitemap
38
{
39
    use Domain;
40
41
    /**
42
     * Avaliable values for changefreq tag.
43
     *
44
     * @var array
45
     */
46
    public const CHANGEFREQ = [
47
        'always',
48
        'hourly',
49
        'daily',
50
        'weekly',
51
        'monthly',
52
        'yearly',
53
        'never',
54
    ];
55
56
    /**
57
     * Extension for sitemap file.
58
     *
59
     * @var string
60
     */
61
    public const EXT = '.xml';
62
63
    /**
64
     * Extension for gzipped sitemap file.
65
     *
66
     * @var string
67
     */
68
    public const GZ_EXT = '.xml.gz';
69
70
    /**
71
     * URL to Sitemap Schema.
72
     *
73
     * @var string
74
     */
75
    public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
76
77
    /**
78
     * Limit of items in Sitemap files.
79
     *
80
     * @var int
81
     */
82
    public const ITEM_PER_SITEMAP = 50000;
83
84
    /**
85
     * Limit of Sitmeaps in SitemapsIndex.
86
     *
87
     * @var int
88
     */
89
    public const SITEMAP_PER_SITEMAPINDEX = 1000;
90
91
    /**
92
     * Limit of single files size.
93
     *
94
     * @var int
95
     */
96
    public const SITEMAP_MAX_SIZE = 52000000;
97
98
    /**
99
     * Path on disk to public directory.
100
     *
101
     * @var string
102
     */
103
    private $publicDirectory = '';
104
105
    /**
106
     * Path related to public directory to dir where sitemaps will be.
107
     *
108
     * @var string
109
     */
110
    private $sitepamsDirectory = '';
111
112
    /**
113
     * Path to temporary directory.
114
     *
115
     * @var string
116
     */
117
    private $sitemapTempDirectory = '';
118
119
    /**
120
     * Default filename for sitemap file.
121
     *
122
     * @var string
123
     */
124
    private $defaultFilename = 'sitemap';
125
126
    /**
127
     * Name of index file.
128
     *
129
     * @var string
130
     */
131
    private $indexFilename = 'index';
132
133
    /**
134
     * DataCollector instance.
135
     *
136
     * @var DataCollector
137
     */
138
    private $dataCollector;
139
140
    /**
141
     * Use compression.
142
     *
143
     * @var bool
144
     */
145
    private $useCompression = false;
146
147
    /**
148
     * XML Writer object.
149
     *
150
     * @var XML
151
     */
152
    private $xml;
153
154
    /**
155
     * Separator to be used in Sitemap filenames.
156
     *
157
     * @var string
158
     */
159
    private $separator = '-'; // ~49,6MB - to have some limit to close file
160
161
    /**
162
     * Construktor.
163
     *
164
     * @param string $domain
165
     */
166 38
    public function __construct(string $domain = null)
167
    {
168 38
        if (null !== $domain) {
169 36
            $this->setDomain($domain);
170
        }
171 38
    }
172
173
    /**
174
     * @param Items\Url   $item
175
     * @param null|string $group
176
     *
177
     * @throws \Exception
178
     *
179
     * @return \Wszetko\Sitemap\Sitemap
180
     */
181 8
    public function addItem(Items\Url $item, ?string $group = null): self
182
    {
183 8
        if (is_string($group)) {
184
            $group = preg_replace('/\W+/', '', $group);
185
        }
186
187 8
        if (empty($group)) {
188 8
            $group = $this->getDefaultFilename();
189
        }
190
191 8
        $group = mb_strtolower($group);
192 8
        $item->setDomain($this->getDomain());
193
194 8
        if (null === $this->getDataCollector()) {
195 2
            throw new \Exception('DataCollector is not set.');
196
        }
197
198 6
        $this->getDataCollector()->add($item, $group);
199
200 6
        return $this;
201
    }
202
203
    /**
204
     * @param array       $items
205
     * @param null|string $group
206
     *
207
     * @throws \Exception
208
     *
209
     * @return $this
210
     */
211 2
    public function addItems(array $items, ?string $group = null): self
212
    {
213 2
        foreach ($items as $item) {
214 2
            $this->addItem($item, $group);
215
        }
216
217 2
        return $this;
218
    }
219
220
    /**
221
     * Get default filename for sitemap file.
222
     *
223
     * @return string
224
     */
225 10
    public function getDefaultFilename(): string
226
    {
227 10
        return $this->defaultFilename;
228
    }
229
230
    /**
231
     * Set default filename for sitemap file.
232
     *
233
     * @param string $defaultFilename
234
     *
235
     * @return \Wszetko\Sitemap\Sitemap
236
     */
237 2
    public function setDefaultFilename(string $defaultFilename): self
238
    {
239 2
        $this->defaultFilename = $defaultFilename;
240
241 2
        return $this;
242
    }
243
244
    /**
245
     * Get DataCollecotr Object.
246
     *
247
     * @return null|DataCollector
248
     */
249 10
    public function getDataCollector(): ?DataCollector
250
    {
251 10
        return $this->dataCollector;
252
    }
253
254
    /**
255
     * @param string $driver
256
     * @param array  $config
257
     *
258
     * @throws \InvalidArgumentException
259
     *
260
     * @return \Wszetko\Sitemap\Sitemap
261
     */
262 10
    public function setDataCollector(string $driver, $config = []): self
263
    {
264 10
        $driver = '\Wszetko\Sitemap\Drivers\DataCollectors\\' . $driver;
265
266 10
        if (class_exists($driver)) {
267 8
            $this->dataCollector = new $driver($config);
268
        } else {
269 2
            throw new \InvalidArgumentException($driver . ' data collector does not exists.');
270
        }
271
272 8
        return $this;
273
    }
274
275
    /**
276
     * @throws Exception
277
     */
278
    public function generate(): void
279
    {
280
        if ('' === $this->getPublicDirectory()) {
281
            throw new Exception('Public directory is not set.');
282
        }
283
284
        if ('' === $this->getDomain()) {
285
            throw new Exception('Domain is not set.');
286
        }
287
288
        if (null === $this->getDataCollector()) {
289
            throw new Exception('DataCollector is not set.');
290
        }
291
292
        if (null === $this->getXml()) {
293
            $this->setXml(XMLWriter::class, ['domain' => $this->getDomain()]);
294
        }
295
296
        if (null === $this->getXml()) {
297
            throw new Exception('XML Driver is not set.');
298
        }
299
300
        $this->removeDir($this->getTempDirectory());
301
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
302
        $sitemaps = $this->generateSitemaps();
303
        $this->getXml()->setWorkDir($this->getTempDirectory());
304
        $this->generateSitemapsIndex($sitemaps);
305
        $this->publishSitemap();
306
    }
307
308
    /**
309
     * @return string
310
     */
311 2
    public function getPublicDirectory(): string
312
    {
313 2
        return $this->publicDirectory;
314
    }
315
316
    /**
317
     * @param string $publicDirectory
318
     *
319
     * @throws Exception
320
     *
321
     * @return \Wszetko\Sitemap\Sitemap
322
     */
323 4
    public function setPublicDirectory(string $publicDirectory): self
324
    {
325 4
        if (!($publicDirectory = realpath($publicDirectory))) {
326 2
            throw new Exception('Sitemap directory does not exists.');
327
        }
328
329 2
        $this->publicDirectory = $publicDirectory;
330
331 2
        return $this;
332
    }
333
334
    /**
335
     * @return null|XML
336
     */
337 4
    public function getXml(): ?XML
338
    {
339 4
        return $this->xml;
340
    }
341
342
    /**
343
     * @param string $driver
344
     * @param array  $config
345
     *
346
     * @return \Wszetko\Sitemap\Sitemap
347
     */
348 4
    public function setXml(string $driver, array $config = []): self
349
    {
350 4
        if (class_exists($driver)) {
351 4
            if (!isset($config['domain'])) {
352 2
                $config['domain'] = $this->getDomain();
353
            }
354
355 4
            $xml = new $driver($config);
356
357 4
            if ($xml instanceof XML) {
358 4
                $this->xml = $xml;
359
            }
360
        }
361
362 4
        return $this;
363
    }
364
365
    /**
366
     * @throws \Exception
367
     *
368
     * @return string
369
     */
370 4
    public function getTempDirectory(): string
371
    {
372 4
        if (empty($this->sitemapTempDirectory)) {
373 4
            $hash = md5(microtime());
374
375 4
            if (!is_dir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) {
376 4
                mkdir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
377
            }
378
379 4
            if ($tempDir = realpath(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) {
380 4
                $this->sitemapTempDirectory = $tempDir;
381
            } else {
382
                throw new Exception('Can\'t get temporary directory.');
383
            }
384
        }
385
386 4
        return $this->sitemapTempDirectory;
387
    }
388
389
    /**
390
     * @throws \Exception
391
     *
392
     * @return string
393
     */
394 2
    public function getSitepamsTempDirectory(): string
395
    {
396 2
        if (!($directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory))) {
397 2
            mkdir(
398 2
                $this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory,
399 2
                0777,
400 2
                true
401
            );
402 2
            $directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
403
        }
404
405 2
        if (!$directory) {
406
            throw new Exception('Can\'t get temporary directory.');
407
        }
408
409 2
        return $directory;
410
    }
411
412
    /**
413
     * @throws Exception
414
     *
415
     * @return array
416
     */
417
    public function generateSitemaps(): array
418
    {
419
        if (null === $this->getDataCollector()) {
420
            throw new Exception('DataCollector is not set.');
421
        }
422
423
        if (null === $this->getXml()) {
424
            throw new Exception('XML Driver is not set.');
425
        }
426
427
        $totalItems = $this->getDataCollector()->getCount();
428
429
        if (0 == $totalItems) {
430
            return [];
431
        }
432
433
        $groups = $this->getDataCollector()->getGroups();
434
        $currentGroup = 0;
435
        $files = [];
436
437
        foreach ($groups as $group) {
438
            $groupNo = 0;
439
            $filesInGroup = 0;
440
            ++$currentGroup;
441
442
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
443
                $this->getXml()->openSitemap(
444
                    $group . $this->getSeparator() . $groupNo . self::EXT,
445
                    $this->getDataCollector()->getExtensions()
446
                );
447
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
448
449
                while ($element = $this->getDataCollector()->fetch($group)) {
450
                    $this->getXml()->addUrl($element);
451
                    ++$filesInGroup;
452
453
                    if (isset($element['lastmod'])) {
454
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
455
                            if (strtotime($element['lastmod']) > strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])) {
456
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
457
                            }
458
                        } else {
459
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
460
                        }
461
                    }
462
463
                    if ($filesInGroup >= self::ITEM_PER_SITEMAP ||
464
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)) { // 20 chars buffer for close tag
465
                        $this->getXml()->closeSitemap();
466
467
                        if (!$this->getDataCollector()->isLast($group)) {
468
                            ++$groupNo;
469
                            $filesInGroup = 0;
470
                            $this->getXml()->openSitemap(
471
                                $group . $this->getSeparator() . $groupNo . self::EXT,
472
                                $this->getDataCollector()->getExtensions()
473
                            );
474
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
475
                        }
476
                    }
477
                }
478
479
                $this->getXml()->closeSitemap();
480
            }
481
        }
482
483
        if ($this->isUseCompression() && !empty($files)) {
484
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
485
        }
486
487
        return $files;
488
    }
489
490
    /**
491
     * @return string
492
     */
493 2
    public function getSeparator(): string
494
    {
495 2
        return $this->separator;
496
    }
497
498
    /**
499
     * @param string $separator
500
     *
501
     * @return \Wszetko\Sitemap\Sitemap
502
     */
503 2
    public function setSeparator(string $separator): self
504
    {
505 2
        $this->separator = $separator;
506
507 2
        return $this;
508
    }
509
510
    /**
511
     * Check if compression is used.
512
     *
513
     * @return bool
514
     */
515 4
    public function isUseCompression(): bool
516
    {
517 4
        return $this->useCompression;
518
    }
519
520
    /**
521
     * Set whether to use compression or not.
522
     *
523
     * @param bool $useCompression
524
     *
525
     * @return \Wszetko\Sitemap\Sitemap
526
     */
527 2
    public function setUseCompression(bool $useCompression): self
528
    {
529 2
        if ($useCompression && extension_loaded('zlib')) {
530 2
            $this->useCompression = $useCompression;
531
        }
532
533 2
        return $this;
534
    }
535
536
    /**
537
     * @param array $sitemaps
538
     *
539
     * @throws Exception
540
     *
541
     * @return array
542
     */
543
    public function generateSitemapsIndex(array $sitemaps): array
544
    {
545
        if (0 === count($sitemaps)) {
546
            return [];
547
        }
548
549
        if (null === $this->getXml()) {
550
            throw new Exception('XML Driver is not set.');
551
        }
552
553
        $counter = 0;
554
        $file = $this->getIndexFilename() . self::EXT;
555
        $files = [$file => null];
556
        $this->getXml()->openSitemapIndex($file);
557
        $lastItem = array_key_last($sitemaps);
558
559
        foreach ($sitemaps as $sitemap => $lastmod) {
560
            $this->getXml()->addSitemap($this->getDomain() . '/' . ltrim(str_replace(
561
                $this->getPublicDirectory(),
562
                '',
563
                $this->getSitepamsDirectory()
564
            ), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
565
            ++$counter;
566
567
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
568
                $this->getXml()->closeSitemapIndex();
569
                $counter = 0;
570
                $filesCount = count($files);
571
572
                if ($sitemap != $lastItem) {
573
                    $file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT;
574
                    $files[$file] = null;
575
                    $this->getXml()->openSitemapIndex($file);
576
                }
577
            }
578
        }
579
580
        $this->getXml()->closeSitemapIndex();
581
582
        if ($this->isUseCompression() && !empty($files)) {
583
            $this->compressFiles($this->getTempDirectory(), $files);
584
        }
585
586
        return $files;
587
    }
588
589
    /**
590
     * Get filename of sitemap index file.
591
     *
592
     * @return string
593
     */
594 2
    public function getIndexFilename(): string
595
    {
596 2
        return $this->indexFilename;
597
    }
598
599
    /**
600
     * Set filename of sitemap index file.
601
     *
602
     * @param string $indexFilename
603
     *
604
     * @return \Wszetko\Sitemap\Sitemap
605
     */
606 2
    public function setIndexFilename(string $indexFilename): self
607
    {
608 2
        $this->indexFilename = $indexFilename;
609
610 2
        return $this;
611
    }
612
613
    /**
614
     * @throws \Exception
615
     *
616
     * @return string
617
     */
618
    public function getSitepamsDirectory(): string
619
    {
620
        if (!($directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory))) {
621
            mkdir($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory, 0777, true);
622
            $directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
623
        }
624
625
        if (!$directory) {
626
            throw new Exception('Can\'t get sitemap directory.');
627
        }
628
629
        return $directory;
630
    }
631
632
    /**
633
     * @param string $sitepamsDirectory
634
     *
635
     * @return \Wszetko\Sitemap\Sitemap
636
     */
637 2
    public function setSitepamsDirectory(string $sitepamsDirectory): self
638
    {
639 2
        $this->sitepamsDirectory = $sitepamsDirectory;
640
641 2
        return $this;
642
    }
643
644
    /**
645
     * @param string $dir
646
     *
647
     * @return void
648
     */
649
    private function removeDir($dir): void
650
    {
651
        if (is_dir($dir) && $objects = scandir($dir)) {
652
            foreach ($objects as $object) {
653
                if ('.' != $object && '..' != $object) {
654
                    if ('dir' == filetype($dir . '/' . $object)) {
655
                        $this->removeDir($dir . '/' . $object);
656
                    } else {
657
                        unlink($dir . '/' . $object);
658
                    }
659
                }
660
            }
661
662
            rmdir($dir);
663
        }
664
    }
665
666
    /**
667
     * @param string $dir
668
     * @param array  $files
669
     *
670
     * @throws Exception
671
     *
672
     * @return void
673
     */
674
    private function compressFiles(string $dir, array &$files): void
675
    {
676
        if (!extension_loaded('zlib')) {
677
            throw new Exception('Extension zlib is not loaded.');
678
        }
679
680
        $newFiles = [];
681
682
        foreach ($files as $file => $lastmod) {
683
            $source = $dir . DIRECTORY_SEPARATOR . $file;
684
            $gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT;
685
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
686
            $out = gzopen($output, 'wb9');
687
            $in = fopen($source, 'rb');
688
689
            if (!$out) {
690
                throw new Exception('Can\'t create GZip archive.');
691
            }
692
693
            if (!$in) {
694
                throw new Exception('Can\'t open xml file.');
695
            }
696
697
            while (!feof($in)) {
698
                if ($content = fread($in, 524288)) {
699
                    gzwrite($out, $content);
700
                }
701
            }
702
703
            fclose($in);
704
            gzclose($out);
705
            unlink($source);
706
            $newFiles[$gzFile] = $lastmod;
707
        }
708
709
        $files = $newFiles;
710
    }
711
712
    /**
713
     * @throws \Exception
714
     *
715
     * @return void
716
     */
717
    private function publishSitemap(): void
718
    {
719
        // Clear previous sitemaps
720
        $this->removeDir($this->getSitepamsDirectory());
721
722
        if ($publicDir = scandir($this->getPublicDirectory())) {
723
            foreach ($publicDir as $file) {
724
                if (preg_match_all(
725
                    '/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/',
726
                    $file
727
                )) {
728
                    unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
729
                }
730
            }
731
        }
732
733
        $this->getSitepamsDirectory(); //To create sitemaps directory
734
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
735
        $iterator = new RecursiveIteratorIterator($dir);
736
        $files = new RegexIterator(
737
            $iterator,
738
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/",
739
            RegexIterator::GET_MATCH
740
        );
741
        $fileList = [];
742
743
        foreach ($files as $file) {
744
            if (isset($file['path'])) {
745
                $fileList[] = $file['path'];
746
            }
747
        }
748
749
        $currentFile = 0;
750
751
        foreach ($fileList as $file) {
752
            ++$currentFile;
753
            $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
754
            rename($file, $destination);
755
        }
756
757
        $this->removeDir($this->getTempDirectory());
758
    }
759
760
    /**
761
     * @return string
762
     */
763
    private function getExt(): string
764
    {
765
        if ($this->isUseCompression()) {
766
            return self::GZ_EXT;
767
        }
768
769
        return self::EXT;
770
    }
771
}
772