1 | <?php |
||
21 | class PublicSuffixListManager |
||
22 | { |
||
23 | const PDP_PSL_TEXT_FILE = 'public-suffix-list.txt'; |
||
24 | const PDP_PSL_PHP_FILE = 'public-suffix-list.php'; |
||
25 | |||
26 | /** |
||
27 | * @var string Public Suffix List URL |
||
28 | */ |
||
29 | protected $publicSuffixListUrl = 'https://publicsuffix.org/list/effective_tld_names.dat'; |
||
30 | |||
31 | /** |
||
32 | * @var string Directory where text and php versions of list will be cached |
||
33 | */ |
||
34 | protected $cacheDir; |
||
35 | |||
36 | /** |
||
37 | * @var PublicSuffixList Public Suffix List |
||
38 | */ |
||
39 | protected $list; |
||
40 | |||
41 | /** |
||
42 | * @var \Pdp\HttpAdapter\HttpAdapterInterface Http adapter |
||
43 | */ |
||
44 | protected $httpAdapter; |
||
45 | |||
46 | /** |
||
47 | * Public constructor. |
||
48 | * |
||
49 | * @param string $cacheDir Optional cache directory |
||
50 | */ |
||
51 | 11 | public function __construct($cacheDir = null) |
|
52 | { |
||
53 | 11 | if (null === $cacheDir) { |
|
54 | 1 | $cacheDir = realpath( |
|
55 | 1 | dirname(dirname(__DIR__)) . DIRECTORY_SEPARATOR . 'data' |
|
56 | 1 | ); |
|
57 | 1 | } |
|
58 | |||
59 | 11 | $this->cacheDir = $cacheDir; |
|
60 | 11 | } |
|
61 | |||
62 | /** |
||
63 | * Downloads Public Suffix List and writes text cache and PHP cache. If these files |
||
64 | * already exist, they will be overwritten. |
||
65 | */ |
||
66 | 1 | public function refreshPublicSuffixList() |
|
67 | { |
||
68 | 1 | $this->fetchListFromSource(); |
|
69 | 1 | $publicSuffixListArray = $this->parseListToArray( |
|
70 | 1 | $this->cacheDir . '/' . self::PDP_PSL_TEXT_FILE |
|
71 | 1 | ); |
|
72 | 1 | $this->writePhpCache($publicSuffixListArray); |
|
73 | 1 | } |
|
74 | |||
75 | /** |
||
76 | * Obtain Public Suffix List from its online source and write to cache dir. |
||
77 | * |
||
78 | * @return int Number of bytes that were written to the file |
||
79 | */ |
||
80 | 2 | public function fetchListFromSource() |
|
86 | |||
87 | /** |
||
88 | * Parses text representation of list to associative, multidimensional array. |
||
89 | * |
||
90 | * This method is based heavily on the code found in generateEffectiveTLDs.php |
||
91 | * |
||
92 | * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php |
||
93 | * A copy of the Apache License, Version 2.0, is provided with this |
||
94 | * distribution |
||
95 | * |
||
96 | * @param string $textFile Public Suffix List text filename |
||
97 | * |
||
98 | * @return array Associative, multidimensional array representation of the |
||
99 | * public suffx list |
||
100 | * |
||
101 | * @throws \Exception Throws \Exception if unable to read file |
||
102 | */ |
||
103 | 4 | public function parseListToArray($textFile) |
|
104 | { |
||
105 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
||
106 | 4 | $fp = @fopen($textFile, 'r'); |
|
107 | 4 | if (!$fp || !flock($fp, LOCK_SH)) { |
|
108 | 1 | throw new \Exception("Cannot read '$textFile'"); |
|
109 | } |
||
110 | |||
111 | 3 | $data = file( |
|
112 | 3 | $textFile, |
|
113 | 3 | FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES |
|
114 | 3 | ); |
|
115 | |||
116 | 3 | flock($fp, LOCK_UN); |
|
117 | 3 | fclose($fp); |
|
118 | |||
119 | 3 | $data = array_filter( |
|
120 | 3 | $data, |
|
121 | 3 | function ($line) { |
|
122 | 3 | return false === strpos($line, '//'); |
|
123 | } |
||
124 | 3 | ); |
|
125 | |||
126 | 3 | $publicSuffixListArray = array(); |
|
127 | |||
128 | 3 | foreach ($data as $line) { |
|
129 | 3 | $ruleParts = explode('.', $line); |
|
130 | 3 | $this->buildArray($publicSuffixListArray, $ruleParts); |
|
131 | 3 | } |
|
132 | |||
133 | 3 | return $publicSuffixListArray; |
|
134 | } |
||
135 | |||
136 | /** |
||
137 | * Recursive method to build the array representation of the Public Suffix List. |
||
138 | * |
||
139 | * This method is based heavily on the code found in generateEffectiveTLDs.php |
||
140 | * |
||
141 | * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php |
||
142 | * A copy of the Apache License, Version 2.0, is provided with this |
||
143 | * distribution |
||
144 | * |
||
145 | * @param array $publicSuffixListArray Initially an empty array, this eventually |
||
146 | * becomes the array representation of the Public Suffix List |
||
147 | * @param array $ruleParts One line (rule) from the Public Suffix List |
||
148 | * exploded on '.', or the remaining portion of that array during recursion |
||
149 | */ |
||
150 | 3 | public function buildArray(array &$publicSuffixListArray, array $ruleParts) |
|
151 | { |
||
152 | 3 | $isDomain = true; |
|
153 | |||
154 | 3 | $part = array_pop($ruleParts); |
|
155 | |||
156 | // Adheres to canonicalization rule from the "Formal Algorithm" section |
||
157 | // of https://publicsuffix.org/list/ |
||
158 | // "The domain and all rules must be canonicalized in the normal way |
||
159 | // for hostnames - lower-case, Punycode (RFC 3492)." |
||
160 | 3 | $punycode = new Punycode(); |
|
161 | 3 | $part = $punycode->encode($part); |
|
162 | |||
163 | 3 | if (strpos($part, '!') === 0) { |
|
164 | 3 | $part = substr($part, 1); |
|
165 | 3 | $isDomain = false; |
|
166 | 3 | } |
|
167 | |||
168 | 3 | if (!isset($publicSuffixListArray[$part])) { |
|
169 | 3 | if ($isDomain) { |
|
170 | 3 | $publicSuffixListArray[$part] = array(); |
|
171 | 3 | } else { |
|
172 | 3 | $publicSuffixListArray[$part] = array('!' => ''); |
|
173 | } |
||
174 | 3 | } |
|
175 | |||
176 | 3 | if ($isDomain && count($ruleParts) > 0) { |
|
177 | 3 | $this->buildArray($publicSuffixListArray[$part], $ruleParts); |
|
178 | 3 | } |
|
179 | 3 | } |
|
180 | |||
181 | /** |
||
182 | * Writes php array representation of the Public Suffix List to disk. |
||
183 | * |
||
184 | * @param array $publicSuffixList Array representation of the Public Suffix List |
||
185 | * |
||
186 | * @return int Number of bytes that were written to the file |
||
187 | */ |
||
188 | 3 | public function writePhpCache(array $publicSuffixList) |
|
194 | |||
195 | /** |
||
196 | * Gets Public Suffix List. |
||
197 | * |
||
198 | * @return PublicSuffixList Instance of Public Suffix List |
||
199 | * |
||
200 | * @throws \Exception Throws \Exception if unable to read file |
||
201 | */ |
||
202 | 3 | public function getList() |
|
203 | { |
||
204 | 3 | $phpFile = $this->cacheDir . '/' . self::PDP_PSL_PHP_FILE; |
|
205 | |||
206 | 3 | if (!file_exists($phpFile)) { |
|
207 | 1 | $this->refreshPublicSuffixList(); |
|
208 | 1 | } |
|
209 | |||
210 | 3 | $this->list = $this->getListFromFile($phpFile); |
|
211 | |||
212 | 3 | return $this->list; |
|
213 | } |
||
214 | |||
215 | /** |
||
216 | * Retrieves public suffix list from file after obtaining a shared lock. |
||
217 | * |
||
218 | * @return PublicSuffixList Instance of Public Suffix List |
||
219 | * |
||
220 | * @throws \Exception Throws \Exception if unable to read file |
||
221 | */ |
||
222 | 4 | public function getListFromFile($phpFile) |
|
240 | |||
241 | /** |
||
242 | * Writes to file after obtaining an exclusive lock. |
||
243 | * |
||
244 | * @param string $filename Filename in cache dir where data will be written |
||
245 | * @param mixed $data Data to write |
||
246 | * |
||
247 | * @return int Number of bytes that were written to the file |
||
248 | * |
||
249 | * @throws \Exception Throws \Exception if unable to write file |
||
250 | */ |
||
251 | 4 | protected function write($filename, $data) |
|
252 | { |
||
253 | 4 | $filePath = $this->cacheDir . '/' . $filename; |
|
254 | |||
255 | // open with 'c' and truncate file only after obtaining a lock |
||
256 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
||
257 | 4 | $fp = @fopen($filePath, 'c'); |
|
258 | $result = $fp |
||
259 | 4 | && flock($fp, LOCK_EX) |
|
260 | 4 | && ftruncate($fp, 0) |
|
261 | 4 | && fwrite($fp, $data) !== false |
|
262 | 4 | && fflush($fp); |
|
263 | |||
264 | 4 | if (!$result) { |
|
265 | 1 | $fp && fclose($fp); |
|
266 | 1 | throw new \Exception("Cannot write to '$filePath'"); |
|
267 | } |
||
268 | |||
269 | 3 | flock($fp, LOCK_UN); |
|
270 | 3 | fclose($fp); |
|
271 | |||
272 | 3 | return $result; |
|
273 | } |
||
274 | |||
275 | /** |
||
276 | * Returns http adapter. Returns default http adapter if one is not set. |
||
277 | * |
||
278 | * @return \Pdp\HttpAdapter\HttpAdapterInterface Http adapter |
||
279 | */ |
||
280 | 3 | public function getHttpAdapter() |
|
281 | { |
||
282 | 3 | if ($this->httpAdapter === null) { |
|
283 | 1 | if (extension_loaded('curl')) { |
|
284 | 1 | $this->httpAdapter = new HttpAdapter\CurlHttpAdapter(); |
|
285 | 1 | } else { |
|
286 | $this->httpAdapter = new HttpAdapter\PhpHttpAdapter(); |
||
287 | } |
||
288 | 1 | } |
|
289 | |||
290 | 3 | return $this->httpAdapter; |
|
291 | } |
||
292 | |||
293 | /** |
||
294 | * Sets http adapter. |
||
295 | * |
||
296 | * @param \Pdp\HttpAdapter\HttpAdapterInterface $httpAdapter |
||
297 | */ |
||
298 | 11 | public function setHttpAdapter(HttpAdapter\HttpAdapterInterface $httpAdapter) |
|
302 | } |
||
303 |