Completed
Push — master ( dd732a...da0794 )
by Carlos C
01:21
created

AbstractRetriever::recursiveRetrieve()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 29
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 5

Importance

Changes 0
Metric Value
dl 0
loc 29
ccs 17
cts 17
cp 1
rs 8.439
c 0
b 0
f 0
cc 5
eloc 23
nc 5
nop 5
crap 5
1
<?php
2
namespace XmlResourceRetriever;
3
4
use DOMDocument;
5
use XmlResourceRetriever\Downloader\DownloaderInterface;
6
use XmlResourceRetriever\Downloader\PhpDownloader;
7
8
abstract class AbstractRetriever implements RetrieverInterface
9
{
10
    /** @var string */
11
    private $basePath;
12
13
    /** @var DownloaderInterface */
14
    private $downloader;
15
16
    /**
17
     * This variable stores the list of retrieved resources to avoid infinite recursion
18
     * @var array
19
     */
20
    private $history = [];
21
22
    /**
23
     * Must return a string with the namespace to search for
24
     *
25
     * @return string
26
     */
27
    abstract protected function searchNamespace(): string;
28
29
    /**
30
     * Must return a table with rows (array of array)
31
     * every row must contain the keys element and attribute
32
     * "element" is the tag name to search for
33
     * "attribute" is the attribute name that contains the url
34
     *
35
     * @return array
36
     */
37
    abstract protected function searchElements(): array;
38
39
    /**
40
     * Retriever constructor.
41
     *
42
     * @param string $basePath
43
     * @param DownloaderInterface $downloader
44
     */
45 16
    public function __construct($basePath, DownloaderInterface $downloader = null)
46
    {
47 16
        $this->basePath = $basePath;
48 16
        $this->setDownloader($downloader ? : new PhpDownloader());
49 16
    }
50
51 2
    public function getBasePath(): string
52
    {
53 2
        return $this->basePath;
54
    }
55
56 1
    public function getDownloader(): DownloaderInterface
57
    {
58 1
        return $this->downloader;
59
    }
60
61 16
    public function setDownloader(DownloaderInterface $downloader)
62
    {
63 16
        $this->downloader = $downloader;
64 16
    }
65
66 13
    public function buildPath(string $url): string
67
    {
68 13
        if (false === $parts = $this->urlParts($url)) {
69 3
            throw new \InvalidArgumentException("Invalid URL: $url");
70
        }
71 10
        return $this->basePath . '/' . $parts['host'] . '/' . ltrim($parts['path'], '/');
72
    }
73
74 10
    public function download(string $resource): string
75
    {
76
        // validate resource
77 10
        if ('' === $resource) {
78 1
            throw new \UnexpectedValueException('The argument to download is empty');
79
        }
80
81
        // set destination
82 9
        $localPath = $this->buildPath($resource);
83
84
        // create local path
85 9
        $dirname = dirname($localPath);
86 9
        if (! is_dir($dirname) && ! @mkdir($dirname, 0777, true)) {
87 1
            throw new \RuntimeException("Unable to create directory $dirname");
88
        }
89
90
        // download the file into its final destination
91 8
        $this->downloader->downloadTo($resource, $localPath);
92
93
        // check content is xml
94 7
        $mimetype = (new \finfo())->file($localPath, FILEINFO_MIME_TYPE);
95 7
        if (! in_array($mimetype, ['text/xml', 'application/xml'])) {
96 1
            unlink($localPath);
97 1
            throw new \RuntimeException("The source $resource ($mimetype) is not an xml file");
98
        }
99
100 6
        return $localPath;
101
    }
102
103 4
    public function retrieve(string $resource): string
104
    {
105 4
        $this->history = [];
106 4
        return $this->doRetrieve($resource);
107
    }
108
109 1
    public function retrieveHistory(): array
110
    {
111 1
        return $this->history;
112
    }
113
114
    /**
115
     * @param string $resource
116
     * @return string
117
     */
118 4
    private function doRetrieve(string $resource): string
119
    {
120 4
        $localFilename = $this->download($resource);
121 4
        $this->history[$resource] = $localFilename;
122
123 4
        $document = new DOMDocument();
124
        // this error silenced call is intentional,
125
        // don't need to change the value of libxml_use_internal_errors for this
126 4
        if (false === @$document->load($localFilename)) {
127 1
            unlink($localFilename);
128 1
            throw new \RuntimeException("The source $resource contains errors");
129
        }
130
131
        // call recursive get searching on specified the elements
132 3
        $changed = false;
133 3
        foreach ($this->searchElements() as $search) {
134 3
            $recursiveRetrieve = $this->recursiveRetrieve(
135 3
                $document,
136 3
                $search['element'],
137 3
                $search['attribute'],
138 3
                $resource,
139 3
                $localFilename
140
            );
141 3
            if ($recursiveRetrieve) {
142 3
                $changed = true;
143
            }
144
        }
145
146 3
        if ($changed) {
147 3
            $document->save($localFilename);
148
        }
149 3
        return $localFilename;
150
    }
151
152 3
    private function recursiveRetrieve(
153
        DOMDocument $document,
154
        string $tagName,
155
        string $attributeName,
156
        string $currentUrl,
157
        string $currentFile
158
    ): bool {
159 3
        $modified = false;
160 3
        $elements = $document->getElementsByTagNameNS($this->searchNamespace(), $tagName);
161 3
        foreach ($elements as $element) {
162
            /** @var \DOMElement $element */
163 3
            if (! $element->hasAttribute($attributeName)) {
164 1
                continue;
165
            }
166 3
            $location = $element->getAttribute($attributeName);
167 3
            if ('' === $location) {
168 1
                continue;
169
            }
170 3
            $location = $this->relativeToAbsoluteUrl($location, $currentUrl);
171 3
            if (array_key_exists($location, $this->history)) {
172 1
                continue;
173
            }
174 3
            $downloadedChild = $this->doRetrieve($location);
175 3
            $relative = Utils::relativePath($currentFile, $downloadedChild);
176 3
            $element->setAttribute($attributeName, $relative);
177 3
            $modified = true;
178
        }
179 3
        return $modified;
180
    }
181
182 13
    private function urlParts(string $url)
183
    {
184 13
        $options = FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED | FILTER_FLAG_PATH_REQUIRED;
185 13
        if (false === filter_var($url, FILTER_VALIDATE_URL, $options)) {
186 4
            return false;
187
        }
188 10
        return parse_url($url);
189
    }
190
191 3
    private function relativeToAbsoluteUrl(string $url, string $currentUrl)
192
    {
193 3
        if (false !== $parts = $this->urlParts($url)) {
194 3
            return $url;
195
        }
196 1
        $currentParts = $this->urlParts($currentUrl);
197 1
        $currentParts['port'] = (isset($currentParts['port'])) ? ':' . $currentParts['port'] : '';
198 1
        return implode('', [
199 1
            $currentParts['scheme'],
200 1
            '://',
201 1
            $currentParts['host'],
202 1
            $currentParts['port'],
203 1
            implode('/', Utils::simplifyPath(dirname($currentParts['path']) . '/' . $url)),
204
        ]);
205
    }
206
}
207