Completed
Push — master ( 3be2b4...9bcc1d )
by Paweł
02:33
created

Sitemap   F

Complexity

Total Complexity 91

Size/Duplication

Total Lines 748
Duplicated Lines 0 %

Test Coverage

Coverage 36.29%

Importance

Changes 6
Bugs 1 Features 0
Metric Value
eloc 248
c 6
b 1
f 0
dl 0
loc 748
ccs 90
cts 248
cp 0.3629
rs 2
wmc 91

28 Methods

Rating   Name   Duplication   Size   Complexity  
A getPublicDirectory() 0 3 1
A getDefaultFilename() 0 3 1
A setDefaultFilename() 0 5 1
A __construct() 0 4 2
A setDataCollector() 0 15 3
A generate() 0 20 4
A addItems() 0 7 2
A getDataCollector() 0 6 2
C generateSitemaps() 0 67 13
A getExt() 0 7 2
A getSitepamsTempDirectory() 0 18 3
A isUseCompression() 0 3 1
B publishSitemap() 0 44 7
A setIndexFilename() 0 5 1
A setXml() 0 15 4
A getSeparator() 0 3 1
A setUseCompression() 0 7 3
A getIndexFilename() 0 3 1
A setPublicDirectory() 0 11 2
B removeDir() 0 20 7
A getTempDirectory() 0 19 5
B compressFiles() 0 38 7
A setSitepamsDirectory() 0 5 1
A setSeparator() 0 5 1
A getXml() 0 7 2
A getSitepamsDirectory() 0 14 3
A addItem() 0 15 4
B generateSitemapsIndex() 0 40 7

How to fix   Complexity   

Complex Class

Complex classes like Sitemap often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Sitemap, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Wszetko Sitemap.
7
 *
8
 * (c) Paweł Kłopotek-Główczewski <[email protected]>
9
 *
10
 * This source file is subject to the MIT license that is bundled
11
 * with this source code in the file LICENSE.
12
 */
13
14
namespace Wszetko\Sitemap;
15
16
use Exception;
17
use InvalidArgumentException;
18
use RecursiveDirectoryIterator;
19
use RecursiveIteratorIterator;
20
use RegexIterator;
21
use Wszetko\Sitemap\Drivers\DataCollectors\AbstractDataCollector;
22
use Wszetko\Sitemap\Drivers\Output\OutputXMLWriter;
23
use Wszetko\Sitemap\Interfaces\DataCollector;
24
use Wszetko\Sitemap\Interfaces\XML;
25
use Wszetko\Sitemap\Traits\Domain;
26
27
/**
28
 * Sitemap
29
 * This class used for generating Google Sitemap files.
30
 *
31
 * @package    Sitemap
32
 *
33
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
34
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
35
 * @license    https://opensource.org/licenses/MIT MIT License
36
 *
37
 * @see       https://github.com/wszetko/sitemap
38
 */
39
class Sitemap
40
{
41
    use Domain;
42
43
    /**
44
     * Avaliable values for changefreq tag.
45
     *
46
     * @var array
47
     */
48
    public const CHANGEFREQ = [
49
        'always',
50
        'hourly',
51
        'daily',
52
        'weekly',
53
        'monthly',
54
        'yearly',
55
        'never',
56
    ];
57
58
    /**
59
     * Extension for sitemap file.
60
     *
61
     * @var string
62
     */
63
    public const EXT = '.xml';
64
65
    /**
66
     * Extension for gzipped sitemap file.
67
     *
68
     * @var string
69
     */
70
    public const GZ_EXT = '.xml.gz';
71
72
    /**
73
     * URL to Sitemap Schema.
74
     *
75
     * @var string
76
     */
77
    public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
78
79
    /**
80
     * Limit of items in Sitemap files.
81
     *
82
     * @var int
83
     */
84
    public const ITEM_PER_SITEMAP = 50000;
85
86
    /**
87
     * Limit of Sitmeaps in SitemapsIndex.
88
     *
89
     * @var int
90
     */
91
    public const SITEMAP_PER_SITEMAPINDEX = 1000;
92
93
    /**
94
     * Limit of single files size.
95
     *
96
     * @var int
97
     */
98
    public const SITEMAP_MAX_SIZE = 52000000;
99
100
    /**
101
     * Path on disk to public directory.
102
     *
103
     * @var string
104
     */
105
    private $publicDirectory = '';
106
107
    /**
108
     * Path related to public directory to dir where sitemaps will be.
109
     *
110
     * @var string
111
     */
112
    private $sitepamsDirectory = '';
113
114
    /**
115
     * Path to temporary directory.
116
     *
117
     * @var string
118
     */
119
    private $sitemapTempDirectory = '';
120
121
    /**
122
     * Default filename for sitemap file.
123
     *
124
     * @var string
125
     */
126
    private $defaultFilename = 'sitemap';
127
128
    /**
129
     * Name of index file.
130
     *
131
     * @var string
132
     */
133
    private $indexFilename = 'index';
134
135
    /**
136
     * DataCollector instance.
137
     *
138
     * @var DataCollector
139
     */
140
    private $dataCollector;
141
142
    /**
143
     * Use compression.
144
     *
145
     * @var bool
146
     */
147
    private $useCompression = false;
148
149
    /**
150
     * XML Writer object.
151
     *
152
     * @var XML
153
     */
154
    private $xml;
155
156
    /**
157
     * Separator to be used in Sitemap filenames.
158
     *
159
     * @var string
160
     */
161
    private $separator = '-'; // ~49,6MB - to have some limit to close file
162
163
    /**
164
     * Construktor.
165
     *
166
     * @param string $domain
167
     *
168
     * @throws \InvalidArgumentException
169
     */
170 38
    public function __construct(string $domain = null)
171
    {
172 38
        if (null !== $domain) {
173 36
            $this->setDomain($domain);
174
        }
175 38
    }
176
177
    /**
178
     * @param Items\Url   $item
179
     * @param null|string $group
180
     *
181
     * @throws \Exception
182
     *
183
     * @return \Wszetko\Sitemap\Sitemap
184
     */
185 8
    public function addItem(Items\Url $item, ?string $group = null): self
186
    {
187 8
        if (is_string($group)) {
188
            $group = preg_replace('/\W+/', '', $group);
189
        }
190
191 8
        if ('' === $group || null === $group) {
192 8
            $group = $this->getDefaultFilename();
193
        }
194
195 8
        $group = mb_strtolower($group);
196 8
        $item->setDomain($this->getDomain());
197 8
        $this->getDataCollector()->add($item, $group);
198
199 6
        return $this;
200
    }
201
202
    /**
203
     * @param array       $items
204
     * @param null|string $group
205
     *
206
     * @throws \Exception
207
     *
208
     * @return $this
209
     */
210 2
    public function addItems(array $items, ?string $group = null): self
211
    {
212 2
        foreach ($items as $item) {
213 2
            $this->addItem($item, $group);
214
        }
215
216 2
        return $this;
217
    }
218
219
    /**
220
     * Get default filename for sitemap file.
221
     *
222
     * @return string
223
     */
224 10
    public function getDefaultFilename(): string
225
    {
226 10
        return $this->defaultFilename;
227
    }
228
229
    /**
230
     * Set default filename for sitemap file.
231
     *
232
     * @param string $defaultFilename
233
     *
234
     * @return \Wszetko\Sitemap\Sitemap
235
     */
236 2
    public function setDefaultFilename(string $defaultFilename): self
237
    {
238 2
        $this->defaultFilename = $defaultFilename;
239
240 2
        return $this;
241
    }
242
243
    /**
244
     * Get DataCollecotr Object.
245
     *
246
     * @return DataCollector
247
     *
248
     * @throws \Exception
249
     */
250 10
    public function getDataCollector(): DataCollector
251
    {
252 10
        if (null === $this->dataCollector) {
253 2
            throw new Exception('DataCollector is not set.');
254
        }
255 8
        return $this->dataCollector;
256
    }
257
258
    /**
259
     * @param string $driver
260
     * @param array  $config
261
     *
262
     * @throws \InvalidArgumentException
263
     *
264
     * @return \Wszetko\Sitemap\Sitemap
265
     */
266 10
    public function setDataCollector(string $driver, $config = []): self
267
    {
268 10
        if (class_exists($driver)) {
269 8
            $dataCollector = new $driver($config);
270
271 8
            if ($dataCollector instanceof AbstractDataCollector) {
272 8
                $this->dataCollector = $dataCollector;
273
            } else {
274 8
                throw new InvalidArgumentException($driver . ' data collector does not exists.');
275
            }
276
        } else {
277 2
            throw new InvalidArgumentException($driver . ' data collector does not exists.');
278
        }
279
280 8
        return $this;
281
    }
282
283
    /**
284
     * @throws Exception
285
     */
286
    public function generate(): void
287
    {
288
        if ('' === $this->getPublicDirectory()) {
289
            throw new Exception('Public directory is not set.');
290
        }
291
292
        if ('' === $this->getDomain()) {
293
            throw new Exception('Domain is not set.');
294
        }
295
296
        if (null === $this->xml) {
297
            $this->setXml(OutputXMLWriter::class, ['domain' => $this->getDomain()]);
298
        }
299
300
        $this->removeDir($this->getTempDirectory());
301
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
302
        $sitemaps = $this->generateSitemaps();
303
        $this->getXml()->setWorkDir($this->getTempDirectory());
304
        $this->generateSitemapsIndex($sitemaps);
305
        $this->publishSitemap();
306
    }
307
308
    /**
309
     * @return string
310
     */
311 2
    public function getPublicDirectory(): string
312
    {
313 2
        return $this->publicDirectory;
314
    }
315
316
    /**
317
     * @param string $publicDirectory
318
     *
319
     * @throws Exception
320
     *
321
     * @return \Wszetko\Sitemap\Sitemap
322
     */
323 4
    public function setPublicDirectory(string $publicDirectory): self
324
    {
325 4
        $publicDirectory = realpath($publicDirectory);
326
327 4
        if (false === $publicDirectory) {
328 2
            throw new Exception('Sitemap directory does not exists.');
329
        }
330
331 2
        $this->publicDirectory = $publicDirectory;
332
333 2
        return $this;
334
    }
335
336
    /**
337
     * @return XML
338
     *
339
     * @throws \Exception
340
     */
341 4
    public function getXml(): XML
342
    {
343 4
        if (null === $this->xml) {
344
            throw new Exception('XML writer class is not set.');
345
        }
346
347 4
        return $this->xml;
348
    }
349
350
    /**
351
     * @param string $driver
352
     * @param array  $config
353
     *
354
     * @return \Wszetko\Sitemap\Sitemap
355
     */
356 4
    public function setXml(string $driver, array $config = []): self
357
    {
358 4
        if (class_exists($driver)) {
359 4
            if (!isset($config['domain'])) {
360 2
                $config['domain'] = $this->getDomain();
361
            }
362
363 4
            $xml = new $driver($config);
364
365 4
            if ($xml instanceof XML) {
366 4
                $this->xml = $xml;
367
            }
368
        }
369
370 4
        return $this;
371
    }
372
373
    /**
374
     * @throws \Exception
375
     *
376
     * @return string
377
     */
378 4
    public function getTempDirectory(): string
379
    {
380 4
        if (null === $this->sitemapTempDirectory || '' == $this->sitemapTempDirectory) {
381 4
            $hash = md5(microtime());
382
383 4
            if (!is_dir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) {
384 4
                mkdir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
385
            }
386
387 4
            $tempDir = realpath(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
388
389 4
            if (false !== $tempDir) {
390 4
                $this->sitemapTempDirectory = $tempDir;
391
            } else {
392
                throw new Exception('Can\'t get temporary directory.');
393
            }
394
        }
395
396 4
        return $this->sitemapTempDirectory;
397
    }
398
399
    /**
400
     * @throws \Exception
401
     *
402
     * @return string
403
     */
404 2
    public function getSitepamsTempDirectory(): string
405
    {
406 2
        $directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
407
408 2
        if (false === $directory) {
409 2
            mkdir(
410 2
                $this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory,
411 2
                0777,
412 2
                true
413
            );
414 2
            $directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
415
        }
416
417 2
        if (false === $directory) {
418
            throw new Exception('Can\'t get temporary directory.');
419
        }
420
421 2
        return $directory;
422
    }
423
424
    /**
425
     * @throws Exception
426
     *
427
     * @return array
428
     */
429
    public function generateSitemaps(): array
430
    {
431
        if (0 == $this->getDataCollector()->getCount()) {
432
            return [];
433
        }
434
435
        $groups = $this->getDataCollector()->getGroups();
436
        $currentGroup = 0;
437
        $files = [];
438
439
        foreach ($groups as $group) {
440
            $groupNo = 0;
441
            $filesInGroup = 0;
442
            ++$currentGroup;
443
444
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
445
                $this->getXml()->openSitemap(
446
                    $group . $this->getSeparator() . $groupNo . self::EXT,
447
                    $this->getDataCollector()->getExtensions()
448
                );
449
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
450
451
                while ($element = $this->getDataCollector()->fetch($group)) {
452
                    $this->getXml()->addUrl($element);
453
                    ++$filesInGroup;
454
455
                    if (isset($element['lastmod'])) {
456
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
457
                            if (
458
                                strtotime($element['lastmod']) >
459
                                    strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])
460
                            ) {
461
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
462
                            }
463
                        } else {
464
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
465
                        }
466
                    }
467
468
                    // self::SITEMAP_MAX_SIZE - 20 for buffer for close tag
469
                    if (
470
                        $filesInGroup >= self::ITEM_PER_SITEMAP ||
471
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)
472
                    ) {
473
                        $this->getXml()->closeSitemap();
474
475
                        if (!$this->getDataCollector()->isLast($group)) {
476
                            ++$groupNo;
477
                            $filesInGroup = 0;
478
                            $this->getXml()->openSitemap(
479
                                $group . $this->getSeparator() . $groupNo . self::EXT,
480
                                $this->getDataCollector()->getExtensions()
481
                            );
482
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
483
                        }
484
                    }
485
                }
486
487
                $this->getXml()->closeSitemap();
488
            }
489
        }
490
491
        if ($this->isUseCompression() && [] !== $files) {
492
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
493
        }
494
495
        return $files;
496
    }
497
498
    /**
499
     * @return string
500
     */
501 2
    public function getSeparator(): string
502
    {
503 2
        return $this->separator;
504
    }
505
506
    /**
507
     * @param string $separator
508
     *
509
     * @return \Wszetko\Sitemap\Sitemap
510
     */
511 2
    public function setSeparator(string $separator): self
512
    {
513 2
        $this->separator = $separator;
514
515 2
        return $this;
516
    }
517
518
    /**
519
     * Check if compression is used.
520
     *
521
     * @return bool
522
     */
523 4
    public function isUseCompression(): bool
524
    {
525 4
        return $this->useCompression;
526
    }
527
528
    /**
529
     * Set whether to use compression or not.
530
     *
531
     * @param bool $useCompression
532
     *
533
     * @return \Wszetko\Sitemap\Sitemap
534
     */
535 2
    public function setUseCompression(bool $useCompression): self
536
    {
537 2
        if ($useCompression && extension_loaded('zlib')) {
538 2
            $this->useCompression = $useCompression;
539
        }
540
541 2
        return $this;
542
    }
543
544
    /**
545
     * @param array $sitemaps
546
     *
547
     * @throws Exception
548
     *
549
     * @return array
550
     */
551
    public function generateSitemapsIndex(array $sitemaps): array
552
    {
553
        if (0 === count($sitemaps)) {
554
            return [];
555
        }
556
557
        $counter = 0;
558
        $file = $this->getIndexFilename() . self::EXT;
559
        $files = [$file => null];
560
        $this->getXml()->openSitemapIndex($file);
561
        $lastItem = array_key_last($sitemaps);
562
563
        foreach ($sitemaps as $sitemap => $lastmod) {
564
            $this->getXml()->addSitemap((string) $this->getDomain() . '/' . ltrim(str_replace(
565
                $this->getPublicDirectory(),
566
                '',
567
                $this->getSitepamsDirectory()
568
            ), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
569
            ++$counter;
570
571
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
572
                $this->getXml()->closeSitemapIndex();
573
                $counter = 0;
574
                $filesCount = count($files);
575
576
                if ($sitemap != $lastItem) {
577
                    $file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT;
578
                    $files[$file] = null;
579
                    $this->getXml()->openSitemapIndex($file);
580
                }
581
            }
582
        }
583
584
        $this->getXml()->closeSitemapIndex();
585
586
        if ($this->isUseCompression() && [] !== $files) {
587
            $this->compressFiles($this->getTempDirectory(), $files);
588
        }
589
590
        return $files;
591
    }
592
593
    /**
594
     * Get filename of sitemap index file.
595
     *
596
     * @return string
597
     */
598 2
    public function getIndexFilename(): string
599
    {
600 2
        return $this->indexFilename;
601
    }
602
603
    /**
604
     * Set filename of sitemap index file.
605
     *
606
     * @param string $indexFilename
607
     *
608
     * @return \Wszetko\Sitemap\Sitemap
609
     */
610 2
    public function setIndexFilename(string $indexFilename): self
611
    {
612 2
        $this->indexFilename = $indexFilename;
613
614 2
        return $this;
615
    }
616
617
    /**
618
     * @throws \Exception
619
     *
620
     * @return string
621
     */
622
    public function getSitepamsDirectory(): string
623
    {
624
        $directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
625
626
        if (false === $directory) {
627
            mkdir($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory, 0777, true);
628
            $directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
629
        }
630
631
        if (false === $directory) {
632
            throw new Exception('Can\'t get sitemap directory.');
633
        }
634
635
        return $directory;
636
    }
637
638
    /**
639
     * @param string $sitepamsDirectory
640
     *
641
     * @return \Wszetko\Sitemap\Sitemap
642
     */
643 2
    public function setSitepamsDirectory(string $sitepamsDirectory): self
644
    {
645 2
        $this->sitepamsDirectory = $sitepamsDirectory;
646
647 2
        return $this;
648
    }
649
650
    /**
651
     * @param string $dir
652
     *
653
     * @return void
654
     */
655
    private function removeDir($dir): void
656
    {
657
        if (is_dir($dir)) {
658
            return;
659
        }
660
661
        $objects = scandir($dir);
662
663
        if (false !== $objects) {
664
            foreach ($objects as $object) {
665
                if ('.' != $object && '..' != $object) {
666
                    if ('dir' == filetype($dir . '/' . $object)) {
667
                        $this->removeDir($dir . '/' . $object);
668
                    } else {
669
                        unlink($dir . '/' . $object);
670
                    }
671
                }
672
            }
673
674
            rmdir($dir);
675
        }
676
    }
677
678
    /**
679
     * @param string $dir
680
     * @param array  $files
681
     *
682
     * @throws Exception
683
     *
684
     * @return void
685
     */
686
    private function compressFiles(string $dir, array &$files): void
687
    {
688
        if (!extension_loaded('zlib')) {
689
            throw new Exception('Extension zlib is not loaded.');
690
        }
691
692
        $newFiles = [];
693
694
        foreach ($files as $file => $lastmod) {
695
            $source = $dir . DIRECTORY_SEPARATOR . $file;
696
            $gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT;
697
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
698
            $out = gzopen($output, 'wb9');
699
            $in = fopen($source, 'rb');
700
701
            if (false === $out) {
702
                throw new Exception('Can\'t create GZip archive.');
703
            }
704
705
            if (false === $in) {
706
                throw new Exception('Can\'t open xml file.');
707
            }
708
709
            while (!feof($in)) {
710
                $content = fread($in, 524288);
711
712
                if (false !== $content) {
713
                    gzwrite($out, $content);
714
                }
715
            }
716
717
            fclose($in);
718
            gzclose($out);
719
            unlink($source);
720
            $newFiles[$gzFile] = $lastmod;
721
        }
722
723
        $files = $newFiles;
724
    }
725
726
    /**
727
     * @throws \Exception
728
     *
729
     * @return void
730
     */
731
    private function publishSitemap(): void
732
    {
733
        // Clear previous sitemaps
734
        $this->removeDir($this->getSitepamsDirectory());
735
        $publicDir = scandir($this->getPublicDirectory());
736
737
        if (is_array($publicDir)) {
738
            foreach ($publicDir as $file) {
739
                if (
740
                    1 === preg_match(
741
                        '/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/',
742
                        $file
743
                    )
744
                ) {
745
                    unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
746
                }
747
            }
748
        }
749
750
        $this->getSitepamsDirectory(); //To create sitemaps directory
751
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
752
        $iterator = new RecursiveIteratorIterator($dir);
753
        $files = new RegexIterator(
754
            $iterator,
755
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/",
756
            RegexIterator::GET_MATCH
757
        );
758
        $fileList = [];
759
760
        foreach ($files as $file) {
761
            if (isset($file['path'])) {
762
                $fileList[] = $file['path'];
763
            }
764
        }
765
766
        $currentFile = 0;
767
768
        foreach ($fileList as $file) {
769
            ++$currentFile;
770
            $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
771
            rename($file, $destination);
772
        }
773
774
        $this->removeDir($this->getTempDirectory());
775
    }
776
777
    /**
778
     * @return string
779
     */
780
    private function getExt(): string
781
    {
782
        if ($this->isUseCompression()) {
783
            return self::GZ_EXT;
784
        }
785
786
        return self::EXT;
787
    }
788
}
789