Passed
Push — master ( b5dddf...91d417 )
by Richard
09:12
created

PublicSuffixList::startsWith()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 2
1
<?php
2
namespace Geekwright\RegDom;
3
4
/**
5
 * Manage the Public Suffix List (PSL) data. This includes, downloading, converting to an array tree
6
 * structure for access in PHP, and caching the results.
7
 *
8
 * @package   Geekwright\RegDom
9
 * @author    Florian Sager, 06.08.2008, <[email protected]>
10
 * @author    Marcus Bointon (https://github.com/Synchro/regdom-php)
11
 * @author    Richard Griffith <[email protected]>
12
 * @license   Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
13
 */
14
class PublicSuffixList
15
{
16
    protected $sourceURL = 'https://publicsuffix.org/list/public_suffix_list.dat';
17
    protected $localPSL = 'public_suffix_list.dat';
18
    protected $cachedPrefix = 'cached_';
19
20
    protected $tree;
21
    protected $url;
22
    protected $dataDir = '/../data/'; // relative to __DIR__
23
24
    /**
25
     * PublicSuffixList constructor.
26
     * @param string|null $url URL for the PSL or null to use default
27
     */
28
    public function __construct($url = null)
29
    {
30
        $this->setURL($url);
31
    }
32
33
    /**
34
     * Set the URL, and clear any existing tree
35
     *
36
     * @param string|null $url URL for the PSL or null to use default
37
     *
38
     * @return void
39
     */
40
    public function setURL($url)
41
    {
42
        $this->url = $url;
43
        $this->tree = null;
44
    }
45
46
    /**
47
     * Set a fallback (default) for the URL. If we have a locally saved version, prefer it, but use a
48
     * remote URL if there is no local source.
49
     *
50
     * @return void
51
     */
52
    protected function setFallbackURL()
53
    {
54
        $this->setLocalPSLName($this->url);
55
        if (null === $this->url) {
56
            $this->url = file_exists(__DIR__ . $this->localPSL) ? $this->localPSL : $this->sourceURL;
57
        }
58
    }
59
60
    /**
61
     * load the PSL tree, automatically handling caches
62
     *
63
     * @return void (results in $this->tree)
64
     *
65
     * @throws \RuntimeException
66
     */
67
    protected function loadTree()
68
    {
69
        $this->setFallbackURL();
70
71
        $this->tree = $this->readCachedPSL($this->url);
72
        if (false !== $this->tree) {
73
            return;
74
        }
75
76
        $this->tree = array();
77
        $list = $this->readPSL();
78
79
        if (false===$list) {
80
            $e = new \RuntimeException('Cannot read ' . $this->url);
81
            throw $e;
82
        }
83
84
        $this->parsePSL($list);
0 ignored issues
show
Bug introduced by
It seems like $list can also be of type true; however, parameter $fileData of Geekwright\RegDom\PublicSuffixList::parsePSL() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

84
        $this->parsePSL(/** @scrutinizer ignore-type */ $list);
Loading history...
85
        $this->cachePSL($this->url);
86
    }
87
88
    /**
89
     * Parse the PSL data
90
     *
91
     * @param string $fileData the PSL data
92
     *
93
     * @return void (results in $this->tree)
94
     */
95
    protected function parsePSL($fileData)
96
    {
97
        $lines = explode("\n", $fileData);
98
99
        foreach ($lines as $line) {
100
            if ($this->startsWith($line, "//") || $line == '') {
101
                continue;
102
            }
103
104
            // this line should be a TLD
105
            $tldParts = explode('.', $line);
106
107
            $this->buildSubDomain($this->tree, $tldParts);
108
        }
109
    }
110
111
    /**
112
     * Does $search start with $startString?
113
     *
114
     * @param string $search      the string to test
115
     * @param string $startString the starting string to match
116
     *
117
     * @return bool
118
     */
119
    protected function startsWith($search, $startString)
120
    {
121
        return (substr($search, 0, strlen($startString)) == $startString);
122
    }
123
124
    /**
125
     * Add domains to tree
126
     *
127
     * @param array    $node     tree array by reference
128
     * @param string[] $tldParts array of domain parts
129
     *
130
     * @return void - changes made to $node by reference
131
     */
132
    protected function buildSubDomain(&$node, $tldParts)
133
    {
134
        $dom = trim(array_pop($tldParts));
135
136
        $isNotDomain = false;
137
        if ($this->startsWith($dom, "!")) {
138
            $dom = substr($dom, 1);
139
            $isNotDomain = true;
140
        }
141
142
        if (!array_key_exists($dom, $node)) {
143
            if ($isNotDomain) {
144
                $node[$dom] = array("!" => "");
145
            } else {
146
                $node[$dom] = array();
147
            }
148
        }
149
150
        if (!$isNotDomain && count($tldParts) > 0) {
151
            $this->buildSubDomain($node[$dom], $tldParts);
152
        }
153
    }
154
155
    /**
156
     * Return the current tree, loading it if needed
157
     *
158
     * @return array the PSL tree
159
     */
160
    public function getTree()
161
    {
162
        if (null===$this->tree) {
163
            $this->loadTree();
164
        }
165
        return $this->tree;
166
    }
167
168
    /**
169
     * Read PSL from the URL or file specified in $this->url.
170
     * If we process a remote URL, save a local copy.
171
     *
172
     * @return bool|string PSL file contents or false on error
173
     */
174
    protected function readPSL()
175
    {
176
        $parts = parse_url($this->url);
177
        $remote = isset($parts['scheme']) || isset($parts['host']);
178
        // try to read with file_get_contents
179
        $newPSL = file_get_contents(($remote ? '' : __DIR__) . $this->url);
180
        if (false !== $newPSL) {
181
            if ($remote) {
182
                $this->saveLocalPSL($newPSL);
183
            }
184
            return $newPSL;
185
        }
186
187
        // try again with curl if file_get_contents failed
188
        if (function_exists('curl_init') && false !== ($curlHandle  = curl_init())) {
189
            curl_setopt($curlHandle, CURLOPT_URL, $this->url);
190
            curl_setopt($curlHandle, CURLOPT_FAILONERROR, true);
191
            curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1);
192
            curl_setopt($curlHandle, CURLOPT_CONNECTTIMEOUT, 5);
193
            $curlReturn = curl_exec($curlHandle);
194
            curl_close($curlHandle);
195
            if (false !== $curlReturn) {
196
                if ($remote) {
197
                    $this->saveLocalPSL($curlReturn);
198
                }
199
                return $curlReturn;
200
            }
201
        }
202
        return false;
203
    }
204
205
    /**
206
     * Determine cache file name for a specified source
207
     *
208
     * @param string $url URL/filename of source PSL
209
     *
210
     * @return string cache file name for given resource
211
     */
212
    protected function getCacheFileName($url)
213
    {
214
        return __DIR__ . $this->dataDir . $this->cachedPrefix . md5($url);
215
    }
216
217
    /**
218
     * Attempt to load a cached Public Suffix List tree for a given source
219
     *
220
     * @param string $url URL/filename of source PSL
221
     *
222
     * @return bool|string[] PSL tree
223
     */
224
    protected function readCachedPSL($url)
225
    {
226
        $cacheFile = $this->getCacheFileName($url);
227
        if (file_exists($cacheFile)) {
228
            $cachedTree = file_get_contents($cacheFile);
229
            return unserialize($cachedTree);
230
        }
231
        return false;
232
    }
233
234
    /**
235
     * Cache the current Public Suffix List tree and associate with the specified source
236
     *
237
     * @param string $url URL/filename of source PSL
238
     *
239
     * @return bool|int the number of bytes that were written to the file, or false on failure
240
     */
241
    protected function cachePSL($url)
242
    {
243
        return file_put_contents($this->getCacheFileName($url), serialize($this->tree));
244
    }
245
246
    /**
247
     * Save a local copy of a retrieved Public Suffix List
248
     *
249
     * @param string $fileContents URL/filename of source PSL
250
     *
251
     * @return bool|int the number of bytes that were written to the file, or false on failure
252
     */
253
    protected function saveLocalPSL($fileContents)
254
    {
255
        return file_put_contents(__DIR__ . $this->localPSL, $fileContents);
256
    }
257
258
    /**
259
     * Set localPSL name based on URL
260
     *
261
     * @param null|string $url the URL for the PSL
262
     *
263
     * @return void (sets $this->localPSL)
264
     */
265
    protected function setLocalPSLName($url)
266
    {
267
        if (null === $url) {
268
            $url = $this->sourceURL;
269
        }
270
        $parts = parse_url($url);
271
        $fileName = basename($parts['path']);
272
        $this->localPSL = $this->dataDir . $fileName;
273
    }
274
275
    /**
276
     * Delete files in the data directory
277
     *
278
     * @param bool $cacheOnly true to limit clearing to cached serialized PSLs, false to clear all
279
     *
280
     * @return void
281
     */
282
    public function clearDataDirectory($cacheOnly = false)
283
    {
284
        $dir = __DIR__ . $this->dataDir;
285
        if (is_dir($dir)) {
286
            if ($dirHandle = opendir($dir)) {
287
                while (($file = readdir($dirHandle)) !== false) {
288
                    if (filetype($dir . $file) === 'file'
289
                        && (false === $cacheOnly || $this->startsWith($file, $this->cachedPrefix)))
290
                    {
291
                        unlink($dir . $file);
292
                    }
293
                }
294
                closedir($dirHandle);
295
            }
296
        }
297
    }
298
}
299