Completed
Push — master ( 7fe6b0...d8a5ea )
by Paweł
02:19
created

Sitemap::generateSitemaps()   C

Complexity

Conditions 13
Paths 31

Size

Total Lines 63
Code Lines 38

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 182

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 13
eloc 38
c 3
b 0
f 0
nc 31
nop 0
dl 0
loc 63
ccs 0
cts 38
cp 0
crap 182
rs 6.6166

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Wszetko Sitemap.
7
 *
8
 * (c) Paweł Kłopotek-Główczewski <[email protected]>
9
 *
10
 * This source file is subject to the MIT license that is bundled
11
 * with this source code in the file LICENSE.
12
 */
13
14
namespace Wszetko\Sitemap;
15
16
use Exception;
17
use RecursiveDirectoryIterator;
18
use RecursiveIteratorIterator;
19
use RegexIterator;
20
use Wszetko\Sitemap\Drivers\XML\XMLWriter;
21
use Wszetko\Sitemap\Interfaces\DataCollector;
22
use Wszetko\Sitemap\Interfaces\XML;
23
use Wszetko\Sitemap\Traits\Domain;
24
25
/**
26
 * Sitemap
27
 * This class used for generating Google Sitemap files.
28
 *
29
 * @package    Sitemap
30
 *
31
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
32
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
33
 * @license    https://opensource.org/licenses/MIT MIT License
34
 *
35
 * @see       https://github.com/wszetko/sitemap
36
 */
37
class Sitemap
38
{
39
    use Domain;
40
41
    /**
42
     * Avaliable values for changefreq tag.
43
     *
44
     * @var array
45
     */
46
    public const CHANGEFREQ = [
47
        'always',
48
        'hourly',
49
        'daily',
50
        'weekly',
51
        'monthly',
52
        'yearly',
53
        'never',
54
    ];
55
56
    /**
57
     * Extension for sitemap file.
58
     *
59
     * @var string
60
     */
61
    public const EXT = '.xml';
62
63
    /**
64
     * Extension for gzipped sitemap file.
65
     *
66
     * @var string
67
     */
68
    public const GZ_EXT = '.xml.gz';
69
70
    /**
71
     * URL to Sitemap Schema.
72
     *
73
     * @var string
74
     */
75
    public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
76
77
    /**
78
     * Limit of items in Sitemap files.
79
     *
80
     * @var int
81
     */
82
    public const ITEM_PER_SITEMAP = 50000;
83
84
    /**
85
     * Limit of Sitmeaps in SitemapsIndex.
86
     *
87
     * @var int
88
     */
89
    public const SITEMAP_PER_SITEMAPINDEX = 1000;
90
91
    /**
92
     * Limit of single files size.
93
     *
94
     * @var int
95
     */
96
    public const SITEMAP_MAX_SIZE = 52000000;
97
98
    /**
99
     * Path on disk to public directory.
100
     *
101
     * @var string
102
     */
103
    private $publicDirectory = '';
104
105
    /**
106
     * Path related to public directory to dir where sitemaps will be.
107
     *
108
     * @var string
109
     */
110
    private $sitepamsDirectory = '';
111
112
    /**
113
     * Path to temporary directory.
114
     *
115
     * @var string
116
     */
117
    private $sitemapTempDirectory = '';
118
119
    /**
120
     * Default filename for sitemap file.
121
     *
122
     * @var string
123
     */
124
    private $defaultFilename = 'sitemap';
125
126
    /**
127
     * Name of index file.
128
     *
129
     * @var string
130
     */
131
    private $indexFilename = 'index';
132
133
    /**
134
     * DataCollector instance.
135
     *
136
     * @var DataCollector
137
     */
138
    private $dataCollector;
139
140
    /**
141
     * Use compression.
142
     *
143
     * @var bool
144
     */
145
    private $useCompression = false;
146
147
    /**
148
     * XML Writer object.
149
     *
150
     * @var XML
151
     */
152
    private $xml;
153
154
    /**
155
     * Separator to be used in Sitemap filenames.
156
     *
157
     * @var string
158
     */
159
    private $separator = '-'; // ~49,6MB - to have some limit to close file
160
161
    /**
162
     * Construktor.
163
     *
164
     * @param string $domain
165
     */
166 38
    public function __construct(string $domain = null)
167
    {
168 38
        if (null !== $domain) {
169 36
            $this->setDomain($domain);
170
        }
171 38
    }
172
173
    /**
174
     * @param Items\Url   $item
175
     * @param null|string $group
176
     *
177
     * @throws \Exception
178
     *
179
     * @return \Wszetko\Sitemap\Sitemap
180
     */
181 8
    public function addItem(Items\Url $item, ?string $group = null): self
182
    {
183 8
        if (null === $group) {
184 8
            $group = $this->getDefaultFilename();
185
        }
186
187 8
        $group = mb_strtolower(preg_replace('/\W+/', '', $group));
188 8
        $item->setDomain($this->getDomain());
189
190 8
        if (!$this->getDataCollector()) {
191 2
            throw new \Exception('DataCollector is not set.');
192
        }
193
194 6
        $this->getDataCollector()->add($item, $group);
195
196 6
        return $this;
197
    }
198
199
    /**
200
     * @param array       $items
201
     * @param null|string $group
202
     *
203
     * @throws \Exception
204
     *
205
     * @return $this
206
     */
207 2
    public function addItems(array $items, ?string $group = null): self
208
    {
209 2
        foreach ($items as $item) {
210 2
            $this->addItem($item, $group);
211
        }
212
213 2
        return $this;
214
    }
215
216
    /**
217
     * Get default filename for sitemap file.
218
     *
219
     * @return string
220
     */
221 10
    public function getDefaultFilename(): string
222
    {
223 10
        return $this->defaultFilename;
224
    }
225
226
    /**
227
     * Set default filename for sitemap file.
228
     *
229
     * @param string $defaultFilename
230
     *
231
     * @return \Wszetko\Sitemap\Sitemap
232
     */
233 2
    public function setDefaultFilename(string $defaultFilename): self
234
    {
235 2
        $this->defaultFilename = $defaultFilename;
236
237 2
        return $this;
238
    }
239
240
    /**
241
     * Get DataCollecotr Object.
242
     *
243
     * @return null|DataCollector
244
     */
245 10
    public function getDataCollector(): ?DataCollector
246
    {
247 10
        return $this->dataCollector;
248
    }
249
250
    /**
251
     * @param string     $driver
252
     * @param null|mixed $config
253
     *
254
     * @throws \InvalidArgumentException
255
     *
256
     * @return \Wszetko\Sitemap\Sitemap
257
     */
258 10
    public function setDataCollector(string $driver, $config = null): self
259
    {
260 10
        $driver = '\Wszetko\Sitemap\Drivers\DataCollectors\\' . $driver;
261
262 10
        if (class_exists($driver)) {
263 8
            $this->dataCollector = new $driver($config);
264
        } else {
265 2
            throw new \InvalidArgumentException($driver . ' data collector does not exists.');
266
        }
267
268 8
        return $this;
269
    }
270
271
    /**
272
     * @throws Exception
273
     *
274
     * @return self
275
     */
276
    public function generate(): void
277
    {
278
        if ('' === $this->getPublicDirectory()) {
279
            throw new Exception('Public directory is not set.');
280
        }
281
282
        if ('' === $this->getDomain()) {
283
            throw new Exception('Domain is not set.');
284
        }
285
286
        if (null === $this->getDataCollector()) {
287
            throw new Exception('DataCollector is not set.');
288
        }
289
290
        if (empty($this->getXml())) {
291
            $this->setXml(XMLWriter::class, ['domain' => $this->getDomain()]);
292
        }
293
294
        $this->removeDir($this->getTempDirectory());
295
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
296
        $sitemaps = $this->generateSitemaps();
297
        $this->getXml()->setWorkDir($this->getTempDirectory());
298
        $this->generateSitemapsIndex($sitemaps);
299
        $this->publishSitemap();
300
    }
301
302
    /**
303
     * @return string
304
     */
305 2
    public function getPublicDirectory(): string
306
    {
307 2
        return $this->publicDirectory;
308
    }
309
310
    /**
311
     * @param string $publicDirectory
312
     *
313
     * @throws Exception
314
     *
315
     * @return \Wszetko\Sitemap\Sitemap
316
     */
317 4
    public function setPublicDirectory(string $publicDirectory): self
318
    {
319 4
        if (!($publicDirectory = realpath($publicDirectory))) {
320 2
            throw new Exception('Sitemap directory does not exists.');
321
        }
322
323 2
        $this->publicDirectory = $publicDirectory;
324
325 2
        return $this;
326
    }
327
328
    /**
329
     * @return null|XML
330
     */
331 4
    public function getXml(): ?XML
332
    {
333 4
        return $this->xml;
334
    }
335
336
    /**
337
     * @param string $driver
338
     * @param array  $config
339
     *
340
     * @return \Wszetko\Sitemap\Sitemap
341
     */
342 4
    public function setXml(string $driver, array $config = []): self
343
    {
344 4
        if (class_exists($driver)) {
345 4
            if (!isset($config['domain'])) {
346 2
                $config['domain'] = $this->getDomain();
347
            }
348
349 4
            $xml = new $driver($config);
350
351 4
            if ($xml instanceof XML) {
352 4
                $this->xml = $xml;
353
            }
354
        }
355
356 4
        return $this;
357
    }
358
359
    /**
360
     * @return string
361
     */
362 4
    public function getTempDirectory(): string
363
    {
364 4
        if (empty($this->sitemapTempDirectory)) {
365 4
            $hash = md5(microtime());
366 4
            if (!is_dir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) {
367 4
                mkdir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
368
            }
369
370 4
            $this->sitemapTempDirectory = realpath(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
371
        }
372
373 4
        return $this->sitemapTempDirectory;
374
    }
375
376
    /**
377
     * @return string
378
     */
379 2
    public function getSitepamsTempDirectory(): string
380
    {
381 2
        if (!($directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory))) {
382 2
            mkdir(
383 2
                $this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory,
384 2
                0777,
385 2
                true
386
            );
387 2
            $directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
388
        }
389
390 2
        return $directory;
391
    }
392
393
    /**
394
     * @throws Exception
395
     *
396
     * @return array
397
     */
398
    public function generateSitemaps(): array
399
    {
400
        $totalItems = $this->getDataCollector()->getCount();
401
402
        if (0 == $totalItems) {
403
            return [];
404
        }
405
406
        $groups = $this->getDataCollector()->getGroups();
407
        $currentGroup = 0;
408
        $files = [];
409
410
        foreach ($groups as $group) {
411
            $groupNo = 0;
412
            $filesInGroup = 0;
413
            ++$currentGroup;
414
415
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
416
                $this->getXml()->openSitemap(
417
                    $group . $this->getSeparator() . $groupNo . self::EXT,
418
                    $this->getDataCollector()->getExtensions()
419
                );
420
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
421
422
                while ($element = $this->getDataCollector()->fetch($group)) {
423
                    $this->getXml()->addUrl($element);
424
                    ++$filesInGroup;
425
426
                    if (isset($element['lastmod'])) {
427
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
428
                            if (strtotime($element['lastmod']) > strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])) {
429
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
430
                            }
431
                        } else {
432
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
433
                        }
434
                    }
435
436
                    if ($filesInGroup >= self::ITEM_PER_SITEMAP ||
437
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)) { // 20 chars buffer for close tag
438
                        $this->getXml()->closeSitemap();
439
440
                        if (!$this->getDataCollector()->isLast($group)) {
441
                            ++$groupNo;
442
                            $filesInGroup = 0;
443
                            $this->getXml()->openSitemap(
444
                                $group . $this->getSeparator() . $groupNo . self::EXT,
445
                                $this->getDataCollector()->getExtensions()
446
                            );
447
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
448
                        }
449
                    }
450
                }
451
452
                $this->getXml()->closeSitemap();
453
            }
454
        }
455
456
        if ($this->isUseCompression() && !empty($files)) {
457
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
458
        }
459
460
        return $files;
461
    }
462
463
    /**
464
     * @return string
465
     */
466 2
    public function getSeparator(): string
467
    {
468 2
        return $this->separator;
469
    }
470
471
    /**
472
     * @param string $separator
473
     *
474
     * @return \Wszetko\Sitemap\Sitemap
475
     */
476 2
    public function setSeparator(string $separator): self
477
    {
478 2
        $this->separator = $separator;
479
480 2
        return $this;
481
    }
482
483
    /**
484
     * Check if compression is used.
485
     *
486
     * @return bool
487
     */
488 4
    public function isUseCompression(): bool
489
    {
490 4
        return $this->useCompression;
491
    }
492
493
    /**
494
     * Set whether to use compression or not.
495
     *
496
     * @param bool $useCompression
497
     *
498
     * @return \Wszetko\Sitemap\Sitemap
499
     */
500 2
    public function setUseCompression(bool $useCompression): self
501
    {
502 2
        if ($useCompression && extension_loaded('zlib')) {
503 2
            $this->useCompression = $useCompression;
504
        }
505
506 2
        return $this;
507
    }
508
509
    /**
510
     * @param array $sitemaps
511
     *
512
     * @throws Exception
513
     *
514
     * @return array
515
     */
516
    public function generateSitemapsIndex(array $sitemaps): array
517
    {
518
        if (0 === count($sitemaps)) {
519
            return [];
520
        }
521
522
        $counter = 0;
523
        $files = [$this->getIndexFilename() . self::EXT => null];
524
        $this->getXml()->openSitemapIndex(array_key_last($files));
525
        $lastItem = array_key_last($sitemaps);
526
527
        foreach ($sitemaps as $sitemap => $lastmod) {
528
            $this->getXml()->addSitemap($this->getDomain() . '/' . ltrim(str_replace(
529
                $this->getPublicDirectory(),
530
                '',
531
                $this->getSitepamsDirectory()
532
            ), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
533
            ++$counter;
534
535
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
536
                $this->getXml()->closeSitemapIndex();
537
                $counter = 0;
538
                $filesCount = count($files);
539
540
                if ($sitemap != $lastItem) {
541
                    $files[$this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT] = null;
542
                    $this->getXml()->openSitemapIndex(array_key_last($files));
543
                }
544
            }
545
        }
546
547
        $this->getXml()->closeSitemapIndex();
548
549
        if ($this->isUseCompression() && !empty($files)) {
550
            $this->compressFiles($this->getTempDirectory(), $files);
551
        }
552
553
        return $files;
554
    }
555
556
    /**
557
     * Get filename of sitemap index file.
558
     *
559
     * @return string
560
     */
561 2
    public function getIndexFilename(): string
562
    {
563 2
        return $this->indexFilename;
564
    }
565
566
    /**
567
     * Set filename of sitemap index file.
568
     *
569
     * @param string $indexFilename
570
     *
571
     * @return \Wszetko\Sitemap\Sitemap
572
     */
573 2
    public function setIndexFilename(string $indexFilename): self
574
    {
575 2
        $this->indexFilename = $indexFilename;
576
577 2
        return $this;
578
    }
579
580
    /**
581
     * @return string
582
     */
583
    public function getSitepamsDirectory(): string
584
    {
585
        if (!($directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory))) {
586
            mkdir($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory, 0777, true);
587
            $directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
588
        }
589
590
        return $directory;
591
    }
592
593
    /**
594
     * @param string $sitepamsDirectory
595
     *
596
     * @return \Wszetko\Sitemap\Sitemap
597
     */
598 2
    public function setSitepamsDirectory(string $sitepamsDirectory): self
599
    {
600 2
        $this->sitepamsDirectory = $sitepamsDirectory;
601
602 2
        return $this;
603
    }
604
605
    /**
606
     * @param string $dir
607
     *
608
     * @return void
609
     */
610
    private function removeDir($dir): void
611
    {
612
        if (is_dir($dir)) {
613
            $objects = scandir($dir);
614
615
            foreach ($objects as $object) {
616
                if ('.' != $object && '..' != $object) {
617
                    if ('dir' == filetype($dir . '/' . $object)) {
618
                        $this->removeDir($dir . '/' . $object);
619
                    } else {
620
                        unlink($dir . '/' . $object);
621
                    }
622
                }
623
            }
624
625
            rmdir($dir);
626
        }
627
    }
628
629
    /**
630
     * @param string $dir
631
     * @param array  $files
632
     *
633
     * @throws Exception
634
     *
635
     * @return void
636
     */
637
    private function compressFiles(string $dir, array &$files): void
638
    {
639
        $newFiles = [];
640
641
        foreach ($files as $file => $lastmod) {
642
            $source = $dir . DIRECTORY_SEPARATOR . $file;
643
            $gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT;
644
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
645
            $out = gzopen($output, 'wb9');
646
            $in = fopen($source, 'rb');
647
648
            if (!$out) {
649
                throw new Exception('Can\'t create GZip archive.');
650
            }
651
652
            if (!$in) {
653
                throw new Exception('Can\'t open xml file.');
654
            }
655
656
            while (!feof($in)) {
657
                gzwrite($out, fread($in, 524288));
658
            }
659
660
            fclose($in);
661
            gzclose($out);
662
            unlink($source);
663
            $newFiles[$gzFile] = $lastmod;
664
        }
665
666
        $files = $newFiles;
667
    }
668
669
    /**
670
     * @return void
671
     */
672
    private function publishSitemap(): void
673
    {
674
        // Clear previous sitemaps
675
        $this->removeDir($this->getSitepamsDirectory());
676
        $publicDir = scandir($this->getPublicDirectory());
677
678
        foreach ($publicDir as $file) {
679
            if (preg_match_all(
680
                '/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/',
681
                $file
682
            )) {
683
                unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
684
            }
685
        }
686
687
        $this->getSitepamsDirectory(); //To create sitemaps directory
688
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
689
        $iterator = new RecursiveIteratorIterator($dir);
690
        $files = new RegexIterator(
691
            $iterator,
692
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/",
693
            RegexIterator::GET_MATCH
694
        );
695
        $fileList = [];
696
697
        foreach ($files as $file) {
698
            if (isset($file['path'])) {
699
                $fileList[] = $file['path'];
700
            }
701
        }
702
703
        $currentFile = 0;
704
705
        foreach ($fileList as $file) {
706
            ++$currentFile;
707
            $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
708
            rename($file, $destination);
709
        }
710
711
        $this->removeDir($this->getTempDirectory());
712
    }
713
714
    /**
715
     * @return string
716
     */
717
    private function getExt(): string
718
    {
719
        if ($this->isUseCompression()) {
720
            return self::GZ_EXT;
721
        }
722
723
        return self::EXT;
724
    }
725
}
726