Passed
Push — master ( a46cb4...8ff499 )
by Michael
27:12 queued 15:45
created

PublicSuffixList::readPSL()   C

Complexity

Conditions 12
Paths 30

Size

Total Lines 29
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 12
eloc 19
c 0
b 0
f 0
nc 30
nop 0
dl 0
loc 29
rs 6.9666

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types=1);
2
3
namespace Xoops\RegDom;
4
5
/**
6
 * Manage the Public Suffix List (PSL) data. This includes, downloading, converting to an array tree
7
 * structure for access in PHP, and caching the results.
8
 *
9
 * @package   Xoops\RegDom
10
 * @author    Florian Sager, 06.08.2008, <[email protected]>
11
 * @author    Marcus Bointon (https://github.com/Synchro/regdom-php)
12
 * @author    Richard Griffith <[email protected]>
13
 * @license   Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
14
 */
15
class PublicSuffixList
16
{
17
    private string $sourceURL = 'https://publicsuffix.org/list/public_suffix_list.dat';
18
    private string $localPSL = 'public_suffix_list.dat';
19
    private string $cachedPrefix = 'cached_';
20
    private ?array $tree = null;
21
    private ?string $url = null;
22
    private string $dataDir = '/../data/'; // relative to __DIR__
23
    /**
24
     * PublicSuffixList constructor.
25
     * @param string|null $url URL for the PSL or null to use default
26
     */
27
    public function __construct(?string $url = null)
28
    {
29
        $this->setURL($url);
30
    }
31
32
    /**
33
     * Set the URL, and clear any existing tree
34
     *
35
     * @param string|null $url URL for the PSL or null to use default
36
     * @return void
37
     */
38
    public function setURL(?string $url): void
39
    {
40
        $this->url = $url;
41
        $this->tree = null;
42
    }
43
44
    /**
45
     * Set a fallback (default) for the URL. If we have a locally saved version, prefer it, but use a
46
     * remote URL if there is no local source.
47
     *
48
     * @return void
49
     */
50
    private function setFallbackURL(): void
51
    {
52
        $this->setLocalPSLName($this->url);
53
        if (null === $this->url) {
54
            $this->url = \file_exists(__DIR__ . $this->localPSL) ? $this->localPSL : $this->sourceURL;
55
        }
56
    }
57
58
    /**
59
     * Load the PSL tree, automatically handling caches
60
     *
61
     * @return void (results in $this->tree)
62
     * @throws \RuntimeException
63
     */
64
    private function loadTree(): void
65
    {
66
        $this->setFallbackURL();
67
68
        $this->tree = $this->readCachedPSL($this->url);
69
        if (null !== $this->tree) {
70
            return;
71
        }
72
73
        $this->tree = [];
74
        $list = $this->readPSL();
75
76
        if (false === $list) {
77
            throw new \RuntimeException('Cannot read ' . $this->url);
78
        }
79
80
        $this->parsePSL($list);
81
        $this->cachePSL($this->url);
82
    }
83
84
    /**
85
     * Parse the PSL data
86
     *
87
     * @param string $fileData the PSL data
88
     * @return void (results in $this->tree)
89
     */
90
    private function parsePSL(string $fileData): void
91
    {
92
        $lines = \explode("\n", $fileData);
93
94
        foreach ($lines as $line) {
95
            if ('' === $line || $this->startsWith($line, '//'))   {
96
                continue;
97
            }
98
99
            // Ensure $this->tree is an array
100
            if (null === $this->tree) {
101
                $this->tree = [];
102
            }
103
104
            // This line should be a TLD
105
            $tldParts = \explode('.', $line);
106
107
            $this->buildSubDomain($this->tree, $tldParts);
108
        }
109
    }
110
111
    /**
112
     * Does $search start with $startString?
113
     *
114
     * @param string $search the string to test
115
     * @param string $startString the starting string to match
116
     * @return bool
117
     */
118
    private function startsWith(string $search, string $startString): bool
119
    {
120
        return (0 === \strpos($search, $startString));
121
    }
122
123
    /**
124
     * Add domains to tree
125
     *
126
     * @param array $node tree array by reference
127
     * @param string[] $tldParts array of domain parts
128
     * @return void - changes made to $node by reference
129
     */
130
    private function buildSubDomain(array &$node, array $tldParts): void
131
    {
132
        $dom = \trim(\array_pop($tldParts));
133
134
        $isNotDomain = false;
135
        if ($this->startsWith($dom, '!')) {
136
            $dom = \substr($dom, 1);
137
            $isNotDomain = true;
138
        }
139
140
        if (!\array_key_exists($dom, $node)) {
141
            if ($isNotDomain) {
142
                $node[$dom] = ['!' => ''];
143
            } else {
144
                $node[$dom] = [];
145
            }
146
        }
147
148
        if (!$isNotDomain && 0 < \count($tldParts)) {
149
            $this->buildSubDomain($node[$dom], $tldParts);
150
        }
151
    }
152
153
    /**
154
     * Return the current tree, loading it if needed
155
     *
156
     * @return array the PSL tree
157
     * @throws \RuntimeException if PSL cannot be loaded
158
     */
159
    public function getTree(): array
160
    {
161
        if (null === $this->tree) {
162
            $this->loadTree();
163
        }
164
        return $this->tree;
165
    }
166
167
    /**
168
     * Read PSL from the URL or file specified in $this->url.
169
     * If we process a remote URL, save a local copy.
170
     *
171
     * @return string|false PSL file contents or false on error
172
     */
173
    private function readPSL()
174
    {
175
        $parts = \parse_url($this->url);
176
        $remote = \is_array($parts) && !empty($parts) && (isset($parts['scheme']) || isset($parts['host']));
177
        // try to read with file_get_contents
178
        $newPSL = \file_get_contents(($remote ? '' : __DIR__) . $this->url);
179
        if (false !== $newPSL) {
180
            if ($remote) {
181
                $this->saveLocalPSL($newPSL);
182
            }
183
            return $newPSL;
184
        }
185
186
        // try again with curl if file_get_contents failed
187
        if (\function_exists('curl_init') && false !== ($curlHandle = \curl_init())) {
188
            \curl_setopt($curlHandle, \CURLOPT_URL, $this->url);
189
            \curl_setopt($curlHandle, \CURLOPT_FAILONERROR, true);
190
            \curl_setopt($curlHandle, \CURLOPT_RETURNTRANSFER, 1);
191
            \curl_setopt($curlHandle, \CURLOPT_CONNECTTIMEOUT, 5);
192
            $curlReturn = \curl_exec($curlHandle);
193
            \curl_close($curlHandle);
194
            if (false !== $curlReturn && \is_string($curlReturn)) {
195
                if ($remote) {
196
                    $this->saveLocalPSL($curlReturn);
197
                }
198
                return $curlReturn;
199
            }
200
        }
201
        return false;
202
    }
203
204
    /**
205
     * Determine cache file name for a specified source
206
     *
207
     * @param string $url URL/filename of source PSL
208
     * @return string cache file name for given resource
209
     */
210
    private function getCacheFileName(string $url): string
211
    {
212
        return __DIR__ . $this->dataDir . $this->cachedPrefix . \md5($url);
213
    }
214
215
    /**
216
     * Attempt to load a cached Public Suffix List tree for a given source
217
     *
218
     * @param string $url URL/filename of source PSL
219
     * @return array|null PSL tree
220
     */
221
    private function readCachedPSL(string $url): ?array
222
    {
223
        $cacheFile = $this->getCacheFileName($url);
224
        return \file_exists($cacheFile)
225
            ? \unserialize(\file_get_contents($cacheFile), ['allowed_classes' => false])
226
            : null;
227
    }
228
229
    /**
230
     * Cache the current Public Suffix List tree and associate with the specified source
231
     *
232
     * @param string $url URL/filename of source PSL
233
     * @return bool|int the number of bytes that were written to the file, or false on failure
234
     */
235
    private function cachePSL(string $url)
236
    {
237
        return \file_put_contents($this->getCacheFileName($url), \serialize($this->tree));
238
    }
239
240
    /**
241
     * Save a local copy of a retrieved Public Suffix List
242
     *
243
     * @param string $fileContents URL/filename of source PSL
244
     * @return bool|int the number of bytes that were written to the file, or false on failure
245
     */
246
    private function saveLocalPSL(string $fileContents)
247
    {
248
        return \file_put_contents(__DIR__ . $this->localPSL, $fileContents);
249
    }
250
251
    /**
252
     * Set localPSL name based on URL
253
     *
254
     * @param string|null $url the URL for the PSL
255
     * @return void (sets $this->localPSL)
256
     */
257
    private function setLocalPSLName(?string $url): void
258
    {
259
        if (null === $url) {
260
            $url = $this->sourceURL;
261
        }
262
        $parts = \parse_url($url);
263
        $fileName = \basename($parts['path']);
264
        $this->localPSL = $this->dataDir . $fileName;
265
    }
266
267
    /**
268
     * Delete files in the data directory
269
     *
270
     * @param bool $cacheOnly true to limit clearing to cached serialized PSLs, false to clear all
271
     * @return void
272
     */
273
    public function clearDataDirectory(bool $cacheOnly = false): void
274
    {
275
        $dir = __DIR__ . $this->dataDir;
276
        if (\is_dir($dir)) {
277
            if (false !== ($dirHandle = \opendir($dir))) {
278
                while (false !== ($file = \readdir($dirHandle))) {
279
                    if ('file' === \filetype($dir . $file)
280
                        && (!$cacheOnly || $this->startsWith($file, $this->cachedPrefix))) {
281
                        \unlink($dir . $file);
282
                    }
283
                }
284
                \closedir($dirHandle);
285
            }
286
        }
287
    }
288
}
289