Issues (3083)

vendor/geekwright/regdom/src/PublicSuffixList.php (2 issues)

1
<?php
2
namespace Geekwright\RegDom;
3
4
/**
5
 * Manage the Public Suffix List (PSL) data. This includes, downloading, converting to an array tree
6
 * structure for access in PHP, and caching the results.
7
 *
8
 * @package   Geekwright\RegDom
9
 * @author    Florian Sager, 06.08.2008, <[email protected]>
10
 * @author    Marcus Bointon (https://github.com/Synchro/regdom-php)
11
 * @author    Richard Griffith <[email protected]>
12
 * @license   Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
13
 */
14
class PublicSuffixList
15
{
16
    protected $sourceURL = 'https://publicsuffix.org/list/public_suffix_list.dat';
17
    protected $localPSL = 'public_suffix_list.dat';
18
    protected $cachedPrefix = 'cached_';
19
20
    protected $tree;
21
    protected $url;
22
    protected $dataDir = '/../data/'; // relative to __DIR__
23
24
    /**
25
     * PublicSuffixList constructor.
26
     * @param string|null $url URL for the PSL or null to use default
27
     */
28
    public function __construct($url = null)
29
    {
30
        $this->setURL($url);
31
    }
32
33
    /**
34
     * Set the URL, and clear any existing tree
35
     *
36
     * @param string|null $url URL for the PSL or null to use default
37
     *
38
     * @return void
39
     */
40
    public function setURL($url)
41
    {
42
        $this->url = $url;
43
        $this->tree = null;
44
    }
45
46
    /**
47
     * Set a fallback (default) for the URL. If we have a locally saved version, prefer it, but use a
48
     * remote URL if there is no local source.
49
     *
50
     * @return void
51
     */
52
    protected function setFallbackURL()
53
    {
54
        $this->setLocalPSLName($this->url);
55
        if (null === $this->url) {
56
            $this->url = file_exists(__DIR__ . $this->localPSL) ? $this->localPSL : $this->sourceURL;
57
        }
58
    }
59
60
    /**
61
     * load the PSL tree, automatically handling caches
62
     *
63
     * @return void (results in $this->tree)
64
     *
65
     * @throws \RuntimeException
66
     */
67
    protected function loadTree()
68
    {
69
        $this->setFallbackURL();
70
71
        $this->tree = $this->readCachedPSL($this->url);
72
        if (false !== $this->tree) {
73
            return;
74
        }
75
76
        $this->tree = array();
77
        $list = $this->readPSL();
78
79
        if (false===$list) {
80
            throw new \RuntimeException('Cannot read ' . $this->url);
81
        }
82
83
        $this->parsePSL($list);
84
        $this->cachePSL($this->url);
85
    }
86
87
    /**
88
     * Parse the PSL data
89
     *
90
     * @param string $fileData the PSL data
91
     *
92
     * @return void (results in $this->tree)
93
     */
94
    protected function parsePSL($fileData)
95
    {
96
        $lines = explode("\n", $fileData);
97
98
        foreach ($lines as $line) {
99
            if ($this->startsWith($line, "//") || $line == '') {
100
                continue;
101
            }
102
103
            // this line should be a TLD
104
            $tldParts = explode('.', $line);
105
106
            $this->buildSubDomain($this->tree, $tldParts);
107
        }
108
    }
109
110
    /**
111
     * Does $search start with $startString?
112
     *
113
     * @param string $search      the string to test
114
     * @param string $startString the starting string to match
115
     *
116
     * @return bool
117
     */
118
    protected function startsWith($search, $startString)
119
    {
120
        return (0 === strpos($search, $startString));
121
    }
122
123
    /**
124
     * Add domains to tree
125
     *
126
     * @param array    $node     tree array by reference
127
     * @param string[] $tldParts array of domain parts
128
     *
129
     * @return void - changes made to $node by reference
130
     */
131
    protected function buildSubDomain(&$node, $tldParts)
132
    {
133
        $dom = trim(array_pop($tldParts));
134
135
        $isNotDomain = false;
136
        if ($this->startsWith($dom, "!")) {
137
            $dom = substr($dom, 1);
138
            $isNotDomain = true;
139
        }
140
141
        if (!array_key_exists($dom, $node)) {
142
            if ($isNotDomain) {
143
                $node[$dom] = array("!" => "");
144
            } else {
145
                $node[$dom] = array();
146
            }
147
        }
148
149
        if (!$isNotDomain && count($tldParts) > 0) {
150
            $this->buildSubDomain($node[$dom], $tldParts);
151
        }
152
    }
153
154
    /**
155
     * Return the current tree, loading it if needed
156
     *
157
     * @return array the PSL tree
158
     * @throws \RuntimeException if PSL cannot be loaded
159
     */
160
    public function getTree()
161
    {
162
        if (null===$this->tree) {
163
            $this->loadTree();
164
        }
165
        return $this->tree;
166
    }
167
168
    /**
169
     * Read PSL from the URL or file specified in $this->url.
170
     * If we process a remote URL, save a local copy.
171
     *
172
     * @return string|false PSL file contents or false on error
173
     */
174
    protected function readPSL()
175
    {
176
        $parts = parse_url($this->url);
177
        $remote = isset($parts['scheme']) || isset($parts['host']);
178
        // try to read with file_get_contents
179
        $newPSL = file_get_contents(($remote ? '' : __DIR__) . $this->url);
180
        if (false !== $newPSL) {
181
            if ($remote) {
182
                $this->saveLocalPSL($newPSL);
183
            }
184
            return $newPSL;
185
        }
186
187
        // try again with curl if file_get_contents failed
188
        if (function_exists('curl_init') && false !== ($curlHandle  = curl_init())) {
189
            curl_setopt($curlHandle, CURLOPT_URL, $this->url);
190
            curl_setopt($curlHandle, CURLOPT_FAILONERROR, true);
191
            curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1);
192
            curl_setopt($curlHandle, CURLOPT_CONNECTTIMEOUT, 5);
193
            $curlReturn = curl_exec($curlHandle);
194
            curl_close($curlHandle);
195
            if (false !== $curlReturn) {
196
                if ($remote) {
197
                    $this->saveLocalPSL($curlReturn);
0 ignored issues
show
It seems like $curlReturn can also be of type true; however, parameter $fileContents of Geekwright\RegDom\PublicSuffixList::saveLocalPSL() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

197
                    $this->saveLocalPSL(/** @scrutinizer ignore-type */ $curlReturn);
Loading history...
198
                }
199
                return $curlReturn;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $curlReturn also could return the type true which is incompatible with the documented return type false|string.
Loading history...
200
            }
201
        }
202
        return false;
203
    }
204
205
    /**
206
     * Determine cache file name for a specified source
207
     *
208
     * @param string $url URL/filename of source PSL
209
     *
210
     * @return string cache file name for given resource
211
     */
212
    protected function getCacheFileName($url)
213
    {
214
        return __DIR__ . $this->dataDir . $this->cachedPrefix . md5($url);
215
    }
216
217
    /**
218
     * Attempt to load a cached Public Suffix List tree for a given source
219
     *
220
     * @param string $url URL/filename of source PSL
221
     *
222
     * @return false|string[] PSL tree
223
     */
224
    protected function readCachedPSL($url)
225
    {
226
        $cacheFile = $this->getCacheFileName($url);
227
        if (file_exists($cacheFile)) {
228
            $cachedTree = file_get_contents($cacheFile);
229
            if((int) PHP_VERSION_ID < 70000) {
230
                return unserialize($cachedTree);
231
            }
232
            return unserialize($cachedTree, array('allowed_classes' => false));
233
        }
234
        return false;
235
    }
236
237
    /**
238
     * Cache the current Public Suffix List tree and associate with the specified source
239
     *
240
     * @param string $url URL/filename of source PSL
241
     *
242
     * @return bool|int the number of bytes that were written to the file, or false on failure
243
     */
244
    protected function cachePSL($url)
245
    {
246
        return file_put_contents($this->getCacheFileName($url), serialize($this->tree));
247
    }
248
249
    /**
250
     * Save a local copy of a retrieved Public Suffix List
251
     *
252
     * @param string $fileContents URL/filename of source PSL
253
     *
254
     * @return bool|int the number of bytes that were written to the file, or false on failure
255
     */
256
    protected function saveLocalPSL($fileContents)
257
    {
258
        return file_put_contents(__DIR__ . $this->localPSL, $fileContents);
259
    }
260
261
    /**
262
     * Set localPSL name based on URL
263
     *
264
     * @param null|string $url the URL for the PSL
265
     *
266
     * @return void (sets $this->localPSL)
267
     */
268
    protected function setLocalPSLName($url)
269
    {
270
        if (null === $url) {
271
            $url = $this->sourceURL;
272
        }
273
        $parts = parse_url($url);
274
        $fileName = basename($parts['path']);
275
        $this->localPSL = $this->dataDir . $fileName;
276
    }
277
278
    /**
279
     * Delete files in the data directory
280
     *
281
     * @param bool $cacheOnly true to limit clearing to cached serialized PSLs, false to clear all
282
     *
283
     * @return void
284
     */
285
    public function clearDataDirectory($cacheOnly = false)
286
    {
287
        $dir = __DIR__ . $this->dataDir;
288
        if (is_dir($dir)) {
289
            if ($dirHandle = opendir($dir)) {
290
                while (($file = readdir($dirHandle)) !== false) {
291
                    if (filetype($dir . $file) === 'file'
292
                        && (false === $cacheOnly || $this->startsWith($file, $this->cachedPrefix))) {
293
                        unlink($dir . $file);
294
                    }
295
                }
296
                closedir($dirHandle);
297
            }
298
        }
299
    }
300
}
301