Passed
Push — master ( 793471...608ba9 )
by Paweł
01:59
created

Sitemap::getPath()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 0
dl 0
loc 4
ccs 0
cts 2
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
namespace Wszetko\Sitemap;
5
6
use Exception;
7
use RecursiveDirectoryIterator;
8
use RecursiveIteratorIterator;
9
use RegexIterator;
10
use Wszetko\Sitemap\Drivers\XML\XMLWriter;
11
use Wszetko\Sitemap\Interfaces\DataCollector;
12
use Wszetko\Sitemap\Interfaces\XML;
13
use Wszetko\Sitemap\Traits\Domain;
14
15
/**
16
 * Sitemap
17
 * This class used for generating Google Sitemap files
18
 *
19
 * @package    Sitemap
20
 * @author     Paweł Kłopotek-Główczewski <[email protected]>
21
 * @copyright  2019 Paweł Kłopotek-Głowczewski (https://pawelkg.com/)
22
 * @license    https://opensource.org/licenses/MIT MIT License
23
 * @link       https://github.com/wszetko/sitemap
24
 */
25
class Sitemap
26
{
27
    use Domain;
28
29
    /**
30
     * Avaliable values for changefreq tag
31
     *
32
     * @var array
33
     */
34
    const CHANGEFREQ = [
35
        'always',
36
        'hourly',
37
        'daily',
38
        'weekly',
39
        'monthly',
40
        'yearly',
41
        'never'
42
    ];
43
44
    /**
45
     * Extension for sitemap file
46
     *
47
     * @var string
48
     */
49
    const EXT = '.xml';
50
51
    /**
52
     * Extension for gzipped sitemap file
53
     *
54
     * @var string
55
     */
56
    const GZ_EXT = '.xml.gz';
57
58
    /**
59
     * URL to Sitemap Schema
60
     *
61
     * @var string
62
     */
63
    const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9';
64
65
    /**
66
     * Limit of items in Sitemap files
67
     *
68
     * @var int
69
     */
70
    const ITEM_PER_SITEMAP = 50000;
71
72
    /**
73
     * Limit of Sitmeaps in SitemapsIndex
74
     *
75
     * @var int
76
     */
77
    const SITEMAP_PER_SITEMAPINDEX = 1000;
78
79
    /**
80
     * Limit of single files size
81
     *
82
     * @var int
83
     */
84
    const SITEMAP_MAX_SIZE = 52000000;
85
86
    /**
87
     * Path on disk to public directory.
88
     *
89
     * @var string
90
     */
91
    private $publicDirectory = '';
92
93
    /**
94
     * Path related to public directory to dir where sitemaps will be.
95
     *
96
     * @var string
97
     */
98
    private $sitepamsDirectory = '';
99
100
    /**
101
     * Path to temporary directory.
102
     *
103
     * @var string
104
     */
105
    private $sitemapTempDirectory = '';
106
107
    /**
108
     * Default filename for sitemap file
109
     *
110
     * @var string
111
     */
112
    private $defaultFilename = 'sitemap';
113
114
    /**
115
     * Name of index file
116
     *
117
     * @var string
118
     */
119
    private $indexFilename = 'index';
120
121
    /**
122
     * DataCollector instance
123
     *
124
     * @var  DataCollector
125
     */
126
    private $dataCollector = null;
127
128
    /**
129
     * Use compression
130
     *
131
     * @var bool
132
     */
133
    private $useCompression = false;
134
135
    /**
136
     * XML Writer object
137
     *
138
     * @var XML
139
     */
140
    private $xml;
141
142
    /**
143
     * Separator to be used in Sitemap filenames
144
     *
145
     * @var string
146
     */
147
    private $separator = '-'; // ~49,6MB - to have some limit to close file
148
149
    /**
150
     * Construktor
151
     *
152
     * @param string $domain
153
     */
154 11
    public function __construct(string $domain = null)
155
    {
156 11
        if ($domain) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $domain of type null|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
157 10
            $this->setDomain($domain);
158
        }
159 11
    }
160
161
    /**
162
     * @param Items\Url   $item
163
     * @param string|null $group
164
     */
165 1
    public function addItem(Items\Url $item, ?string $group = null): void
166
    {
167 1
        if ($group === null) {
168 1
            $group = $this->getDefaultFilename();
169
        }
170
171 1
        $group = strtolower(preg_replace('/\W+/', '', $group));
172 1
        $item->setDomain($this->getDomain());
173 1
        $this->getDataCollector()->add($item, $group);
174 1
    }
175
176
    /**
177
     * Get default filename for sitemap file
178
     *
179
     * @return string
180
     */
181 2
    public function getDefaultFilename(): string
182
    {
183 2
        return $this->defaultFilename;
184
    }
185
186
    /**
187
     * Set default filename for sitemap file
188
     *
189
     * @param string $defaultFilename
190
     */
191 1
    public function setDefaultFilename(string $defaultFilename): void
192
    {
193 1
        $this->defaultFilename = $defaultFilename;
194 1
    }
195
196
    /**
197
     * Get DataCollecotr Object
198
     *
199
     * @return DataCollector|null
200
     */
201 2
    public function getDataCollector(): ?DataCollector
202
    {
203 2
        return $this->dataCollector;
204
    }
205
206
    /**
207
     * @param string $driver
208
     * @param mixed
209
     */
210 2
    public function setDataCollector(string $driver, $config = null): void
211
    {
212 2
        $driver = '\Wszetko\Sitemap\Drivers\DataCollectors\\' . $driver;
213
214 2
        if (class_exists($driver)) {
215 2
            $this->dataCollector = new $driver($config);
216
        }
217 2
    }
218
219
    /**
220
     * @throws Exception
221
     */
222
    public function generate()
223
    {
224
        if ($this->getPublicDirectory() === '') {
225
            throw new Exception('Public directory is not set.');
226
        }
227
228
        if ($this->getDomain() === '') {
229
            throw new Exception('Domain is not set.');
230
        }
231
232
        if ($this->getDataCollector() === null) {
233
            throw new Exception('DataCollector is not set.');
234
        }
235
236
        if (empty($this->getXml())) {
237
            $this->setXml(XMLWriter::class, ['domain' => $this->getDomain()]);
238
        }
239
240
        $this->removeDir($this->getTempDirectory());
241
        $this->getXml()->setWorkDir($this->getSitepamsTempDirectory());
242
        $sitemaps = $this->generateSitemaps();
243
        $this->getXml()->setWorkDir($this->getTempDirectory());
244
        $this->generateSitemapsIndex($sitemaps);
245
        $this->publishSitemap();
246
    }
247
248
    /**
249
     * @return string
250
     */
251 1
    public function getPublicDirectory(): string
252
    {
253 1
        return $this->publicDirectory;
254
    }
255
256
    /**
257
     * @param string $publicDirectory
258
     *
259
     * @throws Exception
260
     */
261 1
    public function setPublicDirectory(string $publicDirectory): void
262
    {
263 1
        if (!($publicDirectory = realpath($publicDirectory))) {
264 1
            throw new Exception('Sitemap directory does not exists.');
265
        }
266
267 1
        $this->publicDirectory = $publicDirectory;
268 1
    }
269
270
    /**
271
     * @return XML|null
272
     */
273 1
    public function getXml(): ?XML
274
    {
275 1
        return $this->xml;
276
    }
277
278
    /**
279
     * @param string $driver
280
     * @param array  $config
281
     */
282 1
    public function setXml(string $driver, array $config = []): void
283
    {
284 1
        if (class_exists($driver)) {
285 1
            if (!isset($config['domain'])) {
286 1
                $config['domain'] = $this->getDomain();
287
            }
288
289 1
            $xml = new $driver($config);
290
291 1
            if ($xml instanceof XML) {
292 1
                $this->xml = $xml;
293
            }
294
        }
295 1
    }
296
297
    /**
298
     * @param string $dir
299
     */
300
    private function removeDir($dir)
301
    {
302
        if (is_dir($dir)) {
303
            $objects = scandir($dir);
304
305
            foreach ($objects as $object) {
306
                if ($object != "." && $object != "..") {
307
                    if (filetype($dir . "/" . $object) == "dir") {
308
                        $this->removeDir($dir . "/" . $object);
309
                    } else {
310
                        unlink($dir . "/" . $object);
311
                    }
312
                }
313
            }
314
315
            rmdir($dir);
316
        }
317
    }
318
319
    /**
320
     * @return string
321
     */
322 1
    public function getTempDirectory(): string
323
    {
324 1
        if (empty($this->sitemapTempDirectory)) {
325 1
            $hash = md5(microtime());
326 1
            if (!is_dir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash)) {
327 1
                mkdir(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
328
            }
329
330 1
            $this->sitemapTempDirectory = realpath(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'sitemap' . $hash);
331
        }
332
333 1
        return $this->sitemapTempDirectory;
334
    }
335
336
    /**
337
     * @return string
338
     */
339 1
    public function getSitepamsTempDirectory(): string
340
    {
341 1
        if (!($directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory))) {
342 1
            mkdir($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory,
343 1
                0777, true);
344 1
            $directory = realpath($this->getTempDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
345
        }
346
347 1
        return $directory;
348
    }
349
350
    /**
351
     * @throws Exception
352
     */
353
    public function generateSitemaps(): array
354
    {
355
        $totalItems = $this->getDataCollector()->getCount();
356
357
        if ($totalItems == 0) {
358
            return [];
359
        }
360
361
        $groups = $this->getDataCollector()->getGroups();
362
        $currentGroup = 0;
363
        $files = [];
364
365
        foreach ($groups as $group) {
366
            $groupNo = 0;
367
            $filesInGroup = 0;
368
            $currentGroup++;
369
370
            if ($this->getDataCollector()->getGroupCount($group) > 0) {
371
                $this->getXml()->openSitemap($group . $this->getSeparator() . $groupNo . self::EXT,
372
                    $this->getDataCollector()->getExtensions());
373
                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
374
375
                while ($element = $this->getDataCollector()->fetch($group)) {
376
                    $this->getXml()->addUrl($element);
377
                    $filesInGroup++;
378
379
                    if (isset($element['lastmod'])) {
380
                        if ($files[$group . $this->getSeparator() . $groupNo . self::EXT]) {
381
                            if (strtotime($element['lastmod']) > strtotime($files[$group . $this->getSeparator() . $groupNo . self::EXT])) {
382
                                $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
383
                            }
384
                        } else {
385
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = $element['lastmod'];
386
                        }
387
                    }
388
389
                    if ($filesInGroup >= self::ITEM_PER_SITEMAP ||
390
                        $this->getXml()->getSitemapSize() >= (self::SITEMAP_MAX_SIZE - 20)) { // 20 chars buffer for close tag
391
                        $this->getXml()->closeSitemap();
392
393
                        if (!$this->getDataCollector()->isLast($group)) {
394
                            $groupNo++;
395
                            $filesInGroup = 0;
396
                            $this->getXml()->openSitemap($group . $this->getSeparator() . $groupNo . self::EXT,
397
                                $this->getDataCollector()->getExtensions());
398
                            $files[$group . $this->getSeparator() . $groupNo . self::EXT] = null;
399
                        }
400
                    }
401
                }
402
403
                $this->getXml()->closeSitemap();
404
            }
405
        }
406
407
        if ($this->isUseCompression() && !empty($files)) {
408
            $this->compressFiles($this->getSitepamsTempDirectory(), $files);
409
        }
410
411
        return $files;
412
    }
413
414
    /**
415
     * @return string
416
     */
417 1
    public function getSeparator(): string
418
    {
419 1
        return $this->separator;
420
    }
421
422
    /**
423
     * @param string $separator
424
     */
425 1
    public function setSeparator(string $separator): void
426
    {
427 1
        $this->separator = $separator;
428 1
    }
429
430
    /**
431
     * Check if compression is used
432
     *
433
     * @return bool
434
     */
435 1
    public function isUseCompression(): bool
436
    {
437 1
        return $this->useCompression;
438
    }
439
440
    /**
441
     * Set whether to use compression or not
442
     *
443
     * @param bool $useCompression
444
     */
445 1
    public function setUseCompression(bool $useCompression): void
446
    {
447 1
        if ($useCompression && !extension_loaded('zlib')) {
448
            return;
449
        }
450 1
        $this->useCompression = $useCompression;
451 1
    }
452
453
    /**
454
     * @param string $dir
455
     * @param array  $files
456
     *
457
     * @throws Exception
458
     */
459
    private function compressFiles(string $dir, array &$files)
460
    {
461
        $newFiles = [];
462
463
        foreach ($files as $file => $lastmod) {
464
            $source = $dir . DIRECTORY_SEPARATOR . $file;
465
            $gzFile = substr($file, 0, strlen($file) - 4) . self::GZ_EXT;
466
            $output = $dir . DIRECTORY_SEPARATOR . $gzFile;
467
            $out = gzopen($output, 'wb9');
468
            $in = fopen($source, 'rb');
469
470
            if (!$out) {
471
                throw new Exception('Can\'t create GZip archive.');
472
            }
473
474
            if (!$in) {
475
                throw new Exception('Can\'t open xml file.');
476
            }
477
478
            while (!feof($in)) {
479
                gzwrite($out, fread($in, 524288));
480
            }
481
482
            fclose($in);
483
            gzclose($out);
484
            unlink($source);
485
            $newFiles[$gzFile] = $lastmod;
486
        }
487
        $files = $newFiles;
488
    }
489
490
    /**
491
     * @param array $sitemaps
492
     *
493
     * @return array
494
     * @throws Exception
495
     */
496
    public function generateSitemapsIndex(array $sitemaps): array
497
    {
498
        if (count($sitemaps) === 0) {
499
            return [];
500
        }
501
502
        $counter = 0;
503
        $files = [$this->getIndexFilename() . self::EXT => null];
504
        $this->getXml()->openSitemapIndex(array_key_last($files));
505
        $lastItem = array_key_last($sitemaps);
506
507
        foreach ($sitemaps as $sitemap => $lastmod) {
508
            $this->getXml()->addSitemap($this->getDomain() . '/' . ltrim(str_replace($this->getPublicDirectory(), '',
509
                    $this->getSitepamsDirectory()), DIRECTORY_SEPARATOR) . '/' . $sitemap, $lastmod);
510
            $counter++;
511
512
            if ($counter >= self::SITEMAP_PER_SITEMAPINDEX) {
513
                $this->getXml()->closeSitemapIndex();
514
                $counter = 0;
515
                $filesCount = count($files);
516
517
                if ($sitemap != $lastItem) {
518
                    $files[$this->getIndexFilename() . $this->getSeparator() . $filesCount . self::EXT] = null;
519
                    $this->getXml()->openSitemapIndex(array_key_last($files));
520
                }
521
            }
522
        }
523
524
        $this->getXml()->closeSitemapIndex();
525
526
        if ($this->isUseCompression() && !empty($files)) {
527
            $this->compressFiles($this->getTempDirectory(), $files);
528
        }
529
530
        return $files;
531
    }
532
533
    /**
534
     * Get filename of sitemap index file
535
     *
536
     * @return string
537
     */
538 1
    public function getIndexFilename(): string
539
    {
540 1
        return $this->indexFilename;
541
    }
542
543
    /**
544
     * Set filename of sitemap index file
545
     *
546
     * @param string $indexFilename
547
     */
548 1
    public function setIndexFilename(string $indexFilename): void
549
    {
550 1
        $this->indexFilename = $indexFilename;
551 1
    }
552
553
    /**
554
     * @return string
555
     */
556
    public function getSitepamsDirectory(): string
557
    {
558
        if (!($directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory))) {
559
            mkdir($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory, 0777, true);
560
            $directory = realpath($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $this->sitepamsDirectory);
561
        }
562
563
        return $directory;
564
    }
565
566
    /**
567
     * @param string $sitepamsDirectory
568
     */
569 1
    public function setSitepamsDirectory(string $sitepamsDirectory): void
570
    {
571 1
        $this->sitepamsDirectory = $sitepamsDirectory;
572 1
    }
573
574
    private function publishSitemap()
575
    {
576
        // Clear previous sitemaps
577
        $this->removeDir($this->getSitepamsDirectory());
578
        $publicDir = scandir($this->getPublicDirectory());
579
580
        foreach ($publicDir as $file) {
581
            if (preg_match_all('/^(' . $this->getIndexFilename() . ')((-)[\d]+)?(' . $this->getExt() . ')$/',
582
                $file)) {
583
                unlink($this->getPublicDirectory() . DIRECTORY_SEPARATOR . $file);
584
            }
585
        }
586
587
        $this->getSitepamsDirectory(); //To create sitemaps directory
588
        $dir = new RecursiveDirectoryIterator($this->getTempDirectory());
589
        $iterator = new RecursiveIteratorIterator($dir);
590
        $files = new RegexIterator($iterator,
591
            "/^(?'path'(([a-zA-Z]:)|((\\\\|\/){1,2}\w+)?)((\\\\|\/)(\w[\w ]*.*))+({$this->getExt()}){1})$/",
592
            RegexIterator::GET_MATCH);
593
        $fileList = [];
594
595
        foreach ($files as $file) {
596
            if (isset($file['path'])) {
597
                $fileList[] = $file['path'];
598
            }
599
        }
600
601
        $currentFile = 0;
602
603
        foreach ($fileList as $file) {
604
            $currentFile++;
605
            $destination = str_replace($this->getTempDirectory(), $this->getPublicDirectory(), $file);
606
            rename($file, $destination);
607
        }
608
609
        $this->removeDir($this->getTempDirectory());
610
    }
611
612
    /**
613
     * @return string
614
     */
615
    private function getExt()
616
    {
617
        if ($this->isUseCompression()) {
618
            return self::GZ_EXT;
619
        } else {
620
            return self::EXT;
621
        }
622
    }
623
}
624