Completed
Push — master ( 82ab0a...209992 )
by Paweł
09:02 queued 06:16
created

Sitemap::setTempDirectory()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
c 0
b 0
f 0
nc 1
nop 1
dl 0
loc 5
ccs 3
cts 3
cp 1
crap 1
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * This file is part of Wszetko Sitemap.
7
 *
8
 * (c) Paweł Kłopotek-Główczewski <[email protected]>
9
 *
10
 * This source file is subject to the MIT license that is bundled
11
 * with this source code in the file LICENSE.
12
 */
13
14
namespace Wszetko\Sitemap;
15
16
use Exception;
17
use InvalidArgumentException;
18
use RecursiveDirectoryIterator;
19
use RecursiveIteratorIterator;
20
use RegexIterator;
21
use Wszetko\Sitemap\Drivers\DataCollectors\AbstractDataCollector;
22
use Wszetko\Sitemap\Drivers\Output\OutputXMLWriter;
23
use Wszetko\Sitemap\Helpers\Directory;
24
use Wszetko\Sitemap\Interfaces\DataCollector;
25
use Wszetko\Sitemap\Interfaces\XML;
26
use Wszetko\Sitemap\Traits\Domain;
27
28
/**
29
 * Sitemap
30
 * This class used for generating Google Sitemap files.
31
 *
32
 * @package    Sitemap
33
 *
34
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
35
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
36
 * @license    https://opensource.org/licenses/MIT MIT License
37
 *
38
 * @see       https://github.com/wszetko/sitemap
39
 */
40
class Sitemap
41
{
42
    use Domain;
43
44
    /**
45
     * Avaliable values for changefreq tag.
46
     *
47
     * @var array
48
     */
49
    public const CHANGEFREQ = [
50
        'always',
51
        'hourly',
52
        'daily',
53
        'weekly',
54
        'monthly',
55
        'yearly',
56
        'never',
57
    ];
58
59
    /**
60
     * Extension for sitemap file.
61
     *
62
     * @var string
63
     */
64
    public const EXT = '.xml';
65
66
    /**
67
     * Extension for gzipped sitemap file.
68
     *
69
     * @var string
70
     */
71
    public const GZ_EXT = '.xml.gz';
72
73
    /**
74
     * URL to Sitemap Schema.
75
     *
76
     * @var string
77
     */
78
    public const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
79
80
    /**
81
     * Limit of items in Sitemap files.
82
     *
83
     * @var int
84
     */
85
    public const ITEM_PER_SITEMAP = 50000;
86
87
    /**
88
     * Limit of Sitmeaps in SitemapsIndex.
89
     *
90
     * @var int
91
     */
92
    public const SITEMAP_PER_SITEMAPINDEX = 1000;
93
94
    /**
95
     * Limit of single files size.
96
     *
97
     * @var int
98
     */
99
    public const SITEMAP_MAX_SIZE = 52000000;
100
101
    /**
102
     * Path on disk to public directory.
103
     *
104
     * @var string
105
     */
106
    private $publicDirectory = '';
107
108
    /**
109
     * Path related to public directory to dir where sitemaps will be.
110
     *
111
     * @var string
112
     */
113
    private $sitemapsDirectory = '';
114
115
    /**
116
     * Path to temporary directory.
117
     *
118
     * @var string
119
     */
120
    private $sitemapTempDirectory = '';
121
122
    /**
123
     * Default filename for sitemap file.
124
     *
125
     * @var string
126
     */
127
    private $defaultFilename = 'sitemap';
128
129
    /**
130
     * Name of index file.
131
     *
132
     * @var string
133
     */
134
    private $indexFilename = 'index';
135
136
    /**
137
     * DataCollector instance.
138
     *
139
     * @var DataCollector
140
     */
141
    private $dataCollector;
142
143
    /**
144
     * Use compression.
145
     *
146
     * @var bool
147
     */
148
    private $useCompression = false;
149
150
    /**
151
     * XML Writer object.
152
     *
153
     * @var XML
154
     */
155
    private $xml;
156
157
    /**
158
     * Separator to be used in Sitemap filenames.
159
     *
160
     * @var string
161
     */
162
    private $separator = '-'; // ~49,6MB - to have some limit to close file
163
164
    /**
165
     * Construktor.
166
     *
167
     * @param string $domain
168
     *
169
     * @throws \InvalidArgumentException
170
     */
171 44
    public function __construct(string $domain = null)
172
    {
173 44
        if (null !== $domain) {
174 42
            $this->setDomain($domain);
175
        }
176 44
    }
177
178
    /**
179
     * @param Items\Url   $item
180
     * @param null|string $group
181
     *
182
     * @throws \Exception
183
     *
184
     * @return \Wszetko\Sitemap\Sitemap
185
     */
186 8
    public function addItem(Items\Url $item, ?string $group = null): self
187
    {
188 8
        if (is_string($group)) {
189 2
            $group = preg_replace('/\W+/', '', $group);
190
        }
191
192 8
        if ('' === $group || null === $group) {
193 6
            $group = $this->getDefaultFilename();
194
        }
195
196 8
        $group = mb_strtolower($group);
197 8
        $item->setDomain($this->getDomain());
198 8
        $this->getDataCollector()->add($item, $group);
199
200 6
        return $this;
201
    }
202
203
    /**
204
     * @param array       $items
205
     * @param null|string $group
206
     *
207
     * @throws \Exception
208
     *
209
     * @return $this
210
     */
211 2
    public function addItems(array $items, ?string $group = null): self
212
    {
213 2
        foreach ($items as $item) {
214 2
            $this->addItem($item, $group);
215
        }
216
217 2
        return $this;
218
    }
219
220
    /**
221
     * Get DataCollecotr Object.
222
     *
223
     * @return DataCollector
224
     *
225
     * @throws \Exception
226
     */
227 10
    public function getDataCollector(): DataCollector
228
    {
229 10
        if (null === $this->dataCollector) {
230 2
            throw new Exception('DataCollector is not set.');
231
        }
232 8
        return $this->dataCollector;
233
    }
234
235
    /**
236
     * @param string $driver
237
     * @param array  $config
238
     *
239
     * @throws \InvalidArgumentException
240
     *
241
     * @return \Wszetko\Sitemap\Sitemap
242
     */
243 10
    public function setDataCollector(string $driver, $config = []): self
244
    {
245 10
        if (class_exists($driver)) {
246 8
            $dataCollector = new $driver($config);
247
248 8
            if ($dataCollector instanceof AbstractDataCollector) {
249 8
                $this->dataCollector = $dataCollector;
250
            } else {
251 8
                throw new InvalidArgumentException($driver . ' data collector does not exists.');
252
            }
253
        } else {
254 2
            throw new InvalidArgumentException($driver . ' data collector does not exists.');
255
        }
256
257 8
        return $this;
258
    }
259
260
    /**
261
     * @return XML
262
     *
263
     * @throws \Exception
264
     */
265 6
    public function getXml(): XML
266
    {
267 6
        if (null === $this->xml) {
268 2
            throw new Exception('XML writer class is not set.');
269
        }
270
271 4
        return $this->xml;
272
    }
273
274
    /**
275
     * @param string $driver
276
     * @param array  $config
277
     *
278
     * @return \Wszetko\Sitemap\Sitemap
279
     */
280 4
    public function setXml(string $driver, array $config = []): self
281
    {
282 4
        if (class_exists($driver)) {
283 4
            if (!isset($config['domain'])) {
284 2
                $config['domain'] = $this->getDomain();
285
            }
286
287 4
            $xml = new $driver($config);
288
289 4
            if ($xml instanceof XML) {
290 4
                $this->xml = $xml;
291
            }
292
        }
293
294 4
        return $this;
295
    }
296
297
    /**
298
     * @throws Exception
299
     */
300
    public function generate(): void
301
    {
302
        if ('' === $this->getDomain()) {
303
            throw new Exception('Domain is not set.');
304
        }
305
306
        if (null === $this->xml) {
307
            $this->setXml(OutputXMLWriter::class, ['domain' => $this->getDomain()]);
308
        }
309
310
        Directory::removeDir($this->getTempDirectory());
311
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
312
        $sitemaps = $this->generateSitemaps();
313
        $this->getXml()->setWorkDir($this->getTempDirectory());
314
        $this->generateSitemapsIndex($sitemaps);
315
        $this->publishSitemap();
316
    }
317
318
    /**
319
     * @throws Exception
320
     *
321
     * @return array
322
     */
323
    public function generateSitemaps(): array
324
    {
325
        if (0 == $this->getDataCollector()->getCount()) {
326
            return [];
327
        }
328
329
        $groups = $this->getDataCollector()->getGroups();
330
        $currentGroup = 0;
331
        $files = [];
332
333
        foreach ($groups as $group) {
334
            $groupNo = 0;
335
            $filesInGroup = 0;
336
            ++$currentGroup;
337
338
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
339
                $this->getXml()->openSitemap(
340
                    $group . $this->getSeparator() . $groupNo . self::EXT,
341
                    $this->getDataCollector()->getExtensions()
342
                );
343
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
344
345
                while ($element = $this->getDataCollector()->fetch($group)) {
346
                    $this->getXml()->addUrl($element);
347
                    ++$filesInGroup;
348
349
                    if (isset($element['lastmod'])) {
350
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
351
                            if (
352
                                strtotime($element['lastmod']) >
353
                                    strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])
354
                            ) {
355
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
356
                            }
357
                        } else {
358
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
359
                        }
360
                    }
361
362
                    // self::SITEMAP_MAX_SIZE - 20 for buffer for close tag
363
                    if (
364
                        $filesInGroup >= self::ITEM_PER_SITEMAP ||
365
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)
366
                    ) {
367
                        $this->getXml()->closeSitemap();
368
369
                        if (!$this->getDataCollector()->isLast($group)) {
370
                            ++$groupNo;
371
                            $filesInGroup = 0;
372
                            $this->getXml()->openSitemap(
373
                                $group . $this->getSeparator() . $groupNo . self::EXT,
374
                                $this->getDataCollector()->getExtensions()
375
                            );
376
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
377
                        }
378
                    }
379
                }
380
381
                $this->getXml()->closeSitemap();
382
            }
383
        }
384
385
        if ($this->isUseCompression() && [] !== $files) {
386
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
387
        }
388
389
        return $files;
390
    }
391
392
    /**
393
     * @param array $sitemaps
394
     *
395
     * @throws Exception
396
     *
397
     * @return array
398
     */
399
    public function generateSitemapsIndex(array $sitemaps): array
400
    {
401
        if (0 === count($sitemaps)) {
402
            return [];
403
        }
404
405
        $counter = 0;
406
        $file = $this->getIndexFilename() . self::EXT;
407
        $files = [$file => null];
408
        $this->getXml()->openSitemapIndex($file);
409
        $lastItem = array_key_last($sitemaps);
410
411
        foreach ($sitemaps as $sitemap => $lastmod) {
412
            $this->getXml()->addSitemap((string) $this->getDomain() . '/' . ltrim(str_replace(
413
                $this->getPublicDirectory(),
414
                '',
415
                $this->getSitemapsDirectory()
416
            ), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
417
            ++$counter;
418
419
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
420
                $this->getXml()->closeSitemapIndex();
421
                $counter = 0;
422
                $filesCount = count($files);
423
424
                if ($sitemap != $lastItem) {
425
                    $file = $this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT;
426
                    $files[$file] = null;
427
                    $this->getXml()->openSitemapIndex($file);
428
                }
429
            }
430
        }
431
432
        $this->getXml()->closeSitemapIndex();
433
434
        if ($this->isUseCompression() && [] !== $files) {
435
            $this->compressFiles($this->getTempDirectory(), $files);
436
        }
437
438
        return $files;
439
    }
440
441
    /**
442
     * @param string $dir
443
     * @param array  $files
444
     *
445
     * @throws Exception
446
     *
447
     * @return void
448
     */
449
    private function compressFiles(string $dir, array &$files): void
450
    {
451
        if (!extension_loaded('zlib')) {
452
            throw new Exception('Extension zlib is not loaded.');
453
        }
454
455
        $newFiles = [];
456
457
        foreach ($files as $file => $lastmod) {
458
            $source = $dir . DIRECTORY_SEPARATOR . $file;
459
            $gzFile = mb_substr($file, 0, mb_strlen($file) - 4) . self::GZ_EXT;
460
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
461
            $out = gzopen($output, 'wb9');
462
            $in = fopen($source, 'rb');
463
464
            if (false === $out) {
465
                throw new Exception('Can\'t create GZip archive.');
466
            }
467
468
            if (false === $in) {
469
                throw new Exception('Can\'t open xml file.');
470
            }
471
472
            while (!feof($in)) {
473
                $content = fread($in, 524288);
474
475
                if (false !== $content) {
476
                    gzwrite($out, $content);
477
                }
478
            }
479
480
            fclose($in);
481
            gzclose($out);
482
            unlink($source);
483
            $newFiles[$gzFile] = $lastmod;
484
        }
485
486
        $files = $newFiles;
487
    }
488
489
    /**
490
     * @throws \Exception
491
     *
492
     * @return void
493
     */
494
    private function publishSitemap(): void
495
    {
496
        // Clear previous sitemaps
497
        Directory::removeDir($this->getSitemapsDirectory());
498
        $publicDir = scandir($this->getPublicDirectory());
499
500
        if (is_array($publicDir)) {
501
            foreach ($publicDir as $file) {
502
                if (
503
                    1 === preg_match(
504
                        '/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/',
505
                        $file
506
                    )
507
                ) {
508
                    unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
509
                }
510
            }
511
        }
512
513
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
514
        $iterator = new RecursiveIteratorIterator($dir);
515
        $files = new RegexIterator(
516
            $iterator,
517
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\\/){1,2}\\w+)?)((\\\\|\\/)(\\w[\\w ]*.*))+({$this->getExt()}){1})$/",
518
            RegexIterator::GET_MATCH
519
        );
520
        $fileList = [];
521
522
        foreach ($files as $file) {
523
            if (isset($file['path'])) {
524
                $fileList[] = $file['path'];
525
            }
526
        }
527
528
        $currentFile = 0;
529
530
        foreach ($fileList as $file) {
531
            ++$currentFile;
532
            $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
533
            rename($file, $destination);
534
        }
535
536
        Directory::removeDir($this->getTempDirectory());
537
    }
538
539
    /**
540
     * Get filename of sitemap index file.
541
     *
542
     * @return string
543
     */
544 2
    public function getIndexFilename(): string
545
    {
546 2
        return $this->indexFilename;
547
    }
548
549
    /**
550
     * Set filename of sitemap index file.
551
     *
552
     * @param string $indexFilename
553
     *
554
     * @return \Wszetko\Sitemap\Sitemap
555
     */
556 2
    public function setIndexFilename(string $indexFilename): self
557
    {
558 2
        $this->indexFilename = $indexFilename;
559
560 2
        return $this;
561
    }
562
563
    /**
564
     * @return string
565
     *
566
     * @throws \Exception
567
     */
568 10
    public function getPublicDirectory(): string
569
    {
570 10
        if ('' === $this->publicDirectory) {
571 2
            throw new Exception('Public directory is not set.');
572
        }
573
574 8
        return $this->publicDirectory;
575
    }
576
577
    /**
578
     * @param string $publicDirectory
579
     *
580
     * @throws Exception
581
     *
582
     * @return \Wszetko\Sitemap\Sitemap
583
     */
584 8
    public function setPublicDirectory(string $publicDirectory): self
585
    {
586 8
        $this->publicDirectory = Directory::checkDirectory($publicDirectory);
587
588 8
        return $this;
589
    }
590
591
592
593
    /**
594
     * @throws \Exception
595
     *
596
     * @return string
597
     */
598 6
    public function getSitemapsDirectory(): string
599
    {
600 6
        if ('' === $this->sitemapsDirectory) {
601 4
            $this->setSitemapsDirectory('');
602
        }
603
604 6
        return $this->sitemapsDirectory;
605
    }
606
607
    /**
608
     * @param string $sitemapsDirectory
609
     *
610
     * @return \Wszetko\Sitemap\Sitemap
611
     * @throws \Exception
612
     */
613 6
    public function setSitemapsDirectory(string $sitemapsDirectory): self
614
    {
615 6
        $this->sitemapsDirectory = Directory::checkDirectory(
616 6
            $this->getPublicDirectory() . DIRECTORY_SEPARATOR . $sitemapsDirectory
617
        );
618
619 6
        return $this;
620
    }
621
622
    /**
623
     * @param string $tempDirectory
624
     *
625
     * @return $this
626
     *
627
     * @throws \Exception
628
     */
629 4
    public function setTempDirectory(string $tempDirectory): self
630
    {
631 4
        $this->sitemapTempDirectory = Directory::checkDirectory($tempDirectory);
632
633 4
        return $this;
634
    }
635
636
    /**
637
     * @throws \Exception
638
     *
639
     * @return string
640
     */
641 4
    public function getTempDirectory(): string
642
    {
643 4
        if (null === $this->sitemapTempDirectory || '' == $this->sitemapTempDirectory) {
644 4
            $hash = md5(microtime());
645 4
            $this->setTempDirectory(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
646
        }
647
648 4
        return $this->sitemapTempDirectory;
649
    }
650
651
    /**
652
     * @throws \Exception
653
     *
654
     * @return string
655
     */
656 2
    public function getSitepamsTempDirectory(): string
657
    {
658 2
        $sitemapsDirectory = str_replace($this->getPublicDirectory(), '', $this->getSitemapsDirectory());
659
660 2
        return Directory::checkDirectory($this->getTempDirectory() . DIRECTORY_SEPARATOR . $sitemapsDirectory);
661
    }
662
663
    /**
664
     * @return string
665
     */
666 2
    public function getSeparator(): string
667
    {
668 2
        return $this->separator;
669
    }
670
671
    /**
672
     * @param string $separator
673
     *
674
     * @return \Wszetko\Sitemap\Sitemap
675
     */
676 2
    public function setSeparator(string $separator): self
677
    {
678 2
        $this->separator = $separator;
679
680 2
        return $this;
681
    }
682
683
    /**
684
     * Check if compression is used.
685
     *
686
     * @return bool
687
     */
688 4
    public function isUseCompression(): bool
689
    {
690 4
        return $this->useCompression;
691
    }
692
693
    /**
694
     * Set whether to use compression or not.
695
     *
696
     * @param bool $useCompression
697
     *
698
     * @return \Wszetko\Sitemap\Sitemap
699
     */
700 2
    public function setUseCompression(bool $useCompression): self
701
    {
702 2
        if ($useCompression && extension_loaded('zlib')) {
703 2
            $this->useCompression = $useCompression;
704
        }
705
706 2
        return $this;
707
    }
708
709
    /**
710
     * Get default filename for sitemap file.
711
     *
712
     * @return string
713
     */
714 8
    public function getDefaultFilename(): string
715
    {
716 8
        return $this->defaultFilename;
717
    }
718
719
    /**
720
     * Set default filename for sitemap file.
721
     *
722
     * @param string $defaultFilename
723
     *
724
     * @return \Wszetko\Sitemap\Sitemap
725
     */
726 2
    public function setDefaultFilename(string $defaultFilename): self
727
    {
728 2
        $this->defaultFilename = $defaultFilename;
729
730 2
        return $this;
731
    }
732
733
    /**
734
     * @return string
735
     */
736
    private function getExt(): string
737
    {
738
        if ($this->isUseCompression()) {
739
            return self::GZ_EXT;
740
        }
741
742
        return self::EXT;
743
    }
744
}
745