SitemapUrlProvider::getLocEntries()   A
last analyzed

Complexity

Conditions 3
Paths 4

Size

Total Lines 17
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 3

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 17
ccs 11
cts 11
cp 1
rs 9.4285
cc 3
eloc 9
nc 4
nop 1
crap 3
1
<?php
2
3
namespace Zenstruck\CacheBundle\Url;
4
5
use Http\Client\HttpClient;
6
use Http\Message\MessageFactory;
7
use Symfony\Component\DomCrawler\Crawler as DomCrawler;
8
9
/**
10
 * @author Kevin Bond <[email protected]>
11
 */
12
class SitemapUrlProvider implements UrlProvider
13
{
14
    private $sitemaps;
15
    private $httpClient;
16
    private $messageFactory;
17
    private $urls;
18
19
    /**
20
     * @param array          $sitemaps
21
     * @param HttpClient     $httpClient
22
     * @param MessageFactory $messageFactory
23
     */
24 14
    public function __construct(array $sitemaps, HttpClient $httpClient, MessageFactory $messageFactory)
25
    {
26 7
        if (!class_exists('Symfony\\Component\\DomCrawler\\Crawler')) {
27
            throw new \RuntimeException('symfony/dom-crawler and symfony/css-selector must be installed to use SitemapUrlProvider.');
28
        }
29 14
        $this->sitemaps = $sitemaps;
30 14
        $this->httpClient = $httpClient;
31 14
        $this->messageFactory = $messageFactory;
32 7
    }
33
34
    /**
35
     * {@inheritdoc}
36
     */
37 7
    public function count()
38
    {
39 7
        return count($this->getUrls());
40
    }
41
42
    /**
43
     * {@inheritdoc}
44
     */
45 14
    public function getUrls()
46
    {
47 7
        if (null !== $this->urls) {
48 7
            return $this->urls;
49
        }
50
51 14
        $urls = [];
52
53 14
        foreach ($this->sitemaps as $sitemap) {
54 7
            $urls = array_merge($urls, $this->getUrlsForSitemapUrl($sitemap));
55 7
        }
56
57 14
        return $this->urls = $urls;
58
    }
59
60
    /**
61
     * @param string $sitemap
62
     *
63
     * @return array
64
     */
65 10
    private function getUrlsForSitemapUrl($sitemap)
66
    {
67 7
        $path = parse_url($sitemap, PHP_URL_PATH);
68
69 10
        if (null === $path || '/' === trim($path)) {
70 5
            return $this->tryDefaultSitemapUrls($sitemap);
71
        }
72
73 4
        return $this->parseUrl($sitemap);
74
    }
75
76
    /**
77
     * @param string $host
78
     *
79
     * @return array
80
     */
81 10
    private function tryDefaultSitemapUrls($host)
82
    {
83
        // try default sitemap_index.xml
84 5
        $urls = $this->parseUrl($this->addPathToHost('sitemap_index.xml', $host));
85
86 10
        if (empty($urls)) {
87
            // try default sitemap.xml
88 3
            $urls = $this->parseUrl($this->addPathToHost('sitemap.xml', $host));
89 3
        }
90
91 10
        return $urls;
92
    }
93
94
    /**
95
     * @param string $url
96
     *
97
     * @return array
98
     */
99 10
    private function parseUrl($url)
100
    {
101 7
        $response = $this->httpClient->sendRequest($this->messageFactory->createRequest('GET', $url));
102
103 7
        if (200 !== $response->getStatusCode()) {
104 7
            return [];
105
        }
106
107 6
        $body = (string) $response->getBody();
108
109 6
        if (false !== strpos($body, '<sitemapindex')) {
110 3
            return $this->parseSitemapIndex($body);
111
        }
112
113 6
        return $this->getLocEntries($body);
114
    }
115
116
    /**
117
     * @param string $body
118
     *
119
     * @return array
120
     */
121 5
    private function parseSitemapIndex($body)
122
    {
123 5
        $urls = [];
124
125 5
        foreach ($this->getLocEntries($body) as $entry) {
126 3
            $urls = array_merge($urls, $this->getUrlsForSitemapUrl($entry));
127 3
        }
128
129 5
        return $urls;
130
    }
131
132
    /**
133
     * @param string $body
134
     *
135
     * @return array
136
     */
137 9
    private function getLocEntries($body)
138
    {
139 6
        $crawler = new DomCrawler($body);
140 9
        $entries = [];
141 9
        $filter = 'loc';
142
143
        // check for namespaces
144 6
        if (preg_match('/xmlns:/', $body)) {
145 2
            $filter = 'default|loc';
146 1
        }
147
148 9
        foreach ($crawler->filter($filter) as $node) {
149 6
            $entries[] = $node->nodeValue;
150 6
        }
151
152 9
        return $entries;
153
    }
154
155
    /**
156
     * @param string $path
157
     * @param string $host
158
     *
159
     * @return string
160
     */
161 5
    private function addPathToHost($path, $host)
162
    {
163 5
        return sprintf('%s/%s', trim($host, '/'), ltrim($path, '/'));
164
    }
165
}
166