Completed
Pull Request — develop (#3)
by Lars
03:12
created

PublicSuffixListManager   A

Complexity

Total Complexity 35

Size/Duplication

Total Lines 299
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 5

Test Coverage

Coverage 96.4%

Importance

Changes 0
Metric Value
dl 0
loc 299
ccs 107
cts 111
cp 0.964
rs 9
c 0
b 0
f 0
wmc 35
lcom 1
cbo 5

11 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 10 2
C write() 0 28 8
A refreshPublicSuffixList() 0 16 3
A fetchListFromSource() 0 10 2
B parseListToArray() 0 32 4
B buildArray() 0 30 6
A writePhpCache() 0 6 1
A getList() 0 12 2
A getListFromFile() 0 18 3
A getHttpAdapter() 0 12 3
A setHttpAdapter() 0 4 1
1
<?php
2
3
/**
4
 * PHP Domain Parser: Public Suffix List based URL parsing.
5
 *
6
 * @link      http://github.com/jeremykendall/php-domain-parser for the canonical source repository
7
 *
8
 * @copyright Copyright (c) 2014 Jeremy Kendall (http://about.me/jeremykendall)
9
 * @license   http://github.com/jeremykendall/php-domain-parser/blob/master/LICENSE MIT License
10
 */
11
namespace Pdp;
12
13
use Pdp\HttpAdapter\HttpAdapterInterface;
14
15
/**
16
 * Public Suffix List Manager.
17
 *
18
 * This class obtains, writes, caches, and returns text and PHP representations
19
 * of the Public Suffix List
20
 */
21
class PublicSuffixListManager
22
{
23
  const PDP_PSL_TEXT_FILE = 'public-suffix-list.txt';
24
  const PDP_PSL_PHP_FILE  = 'public-suffix-list.php';
25
26
  /**
27
   * @var string Public Suffix List URL
28
   */
29
  protected $publicSuffixListUrl = 'https://publicsuffix.org/list/effective_tld_names.dat';
30
31
  /**
32
   * @var string Directory where text and php versions of list will be cached
33
   */
34
  protected $cacheDir;
35
36
  /**
37
   * @var PublicSuffixList Public Suffix List
38
   */
39
  protected $list;
40
41
  /**
42
   * @var \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
43
   */
44
  protected $httpAdapter;
45
46
  /**
47
   * Public constructor.
48
   *
49
   * @param string $cacheDir Optional cache directory
50
   */
51 11
  public function __construct($cacheDir = null)
52
  {
53 11
    if (null === $cacheDir) {
54 1
      $cacheDir = realpath(
55 1
          dirname(dirname(__DIR__)) . DIRECTORY_SEPARATOR . 'data'
56 1
      );
57 1
    }
58
59 11
    $this->cacheDir = $cacheDir;
60 11
  }
61
62
  /**
63
   * Downloads Public Suffix List and writes text cache and PHP cache. If these files
64
   * already exist, they will be overwritten.
65
   */
66 1
  public function refreshPublicSuffixList()
67
  {
68 1
    $this->fetchListFromSource();
69 1
    $publicSuffixListArray = $this->parseListToArray(
70 1
        $this->cacheDir . '/' . self::PDP_PSL_TEXT_FILE
71 1
    );
72
73
    // do not empty existing PHP cache file if source TXT is empty
74
    if (
75 1
        is_array($publicSuffixListArray)
76 1
        &&
77
        !empty($publicSuffixListArray)
78 1
    ) {
79 1
      $this->writePhpCache($publicSuffixListArray);
80 1
    }
81 1
  }
82
83
  /**
84
   * Obtain Public Suffix List from its online source and write to cache dir.
85
   *
86
   * @return int|bool Number of bytes that were written to the file OR false in case of error
87
   */
88 2
  public function fetchListFromSource()
89
  {
90 2
    $publicSuffixList = $this->getHttpAdapter()->getContent($this->publicSuffixListUrl);
91
92 2
    if ($publicSuffixList === false) {
93
      return 0;
94
    }
95
96 2
    return $this->write(self::PDP_PSL_TEXT_FILE, $publicSuffixList);
97
  }
98
99
  /**
100
   * Parses text representation of list to associative, multidimensional array.
101
   *
102
   * This method is based heavily on the code found in generateEffectiveTLDs.php
103
   *
104
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
105
   * A copy of the Apache License, Version 2.0, is provided with this
106
   * distribution
107
   *
108
   * @param string $textFile Public Suffix List text filename
109
   *
110
   * @return array Associative, multidimensional array representation of the
111
   *               public suffx list
112
   *
113
   * @throws \Exception Throws \Exception if unable to read file
114
   */
115 4
  public function parseListToArray($textFile)
116
  {
117
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
118 4
    $fp = @fopen($textFile, 'r');
119 4
    if (!$fp || !flock($fp, LOCK_SH)) {
120 1
      throw new \Exception("Cannot read '$textFile'");
121
    }
122
123 3
    $data = file(
124 3
        $textFile,
125 3
        FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
126 3
    );
127
128 3
    flock($fp, LOCK_UN);
129 3
    fclose($fp);
130
131 3
    $data = array_filter(
132 3
        $data,
133 3
        function ($line) {
134 3
          return false === strpos($line, '//');
135
        }
136 3
    );
137
138 3
    $publicSuffixListArray = array();
139
140 3
    foreach ($data as $line) {
141 3
      $ruleParts = explode('.', $line);
142 3
      $this->buildArray($publicSuffixListArray, $ruleParts);
143 3
    }
144
145 3
    return $publicSuffixListArray;
146
  }
147
148
  /**
149
   * Recursive method to build the array representation of the Public Suffix List.
150
   *
151
   * This method is based heavily on the code found in generateEffectiveTLDs.php
152
   *
153
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
154
   * A copy of the Apache License, Version 2.0, is provided with this
155
   * distribution
156
   *
157
   * @param array $publicSuffixListArray Initially an empty array, this eventually
158
   *                                     becomes the array representation of the Public Suffix List
159
   * @param array $ruleParts             One line (rule) from the Public Suffix List
160
   *                                     exploded on '.', or the remaining portion of that array during recursion
161
   */
162 3
  public function buildArray(array &$publicSuffixListArray, array $ruleParts)
163
  {
164 3
    $isDomain = true;
165
166 3
    $part = array_pop($ruleParts);
167
168
    // Adheres to canonicalization rule from the "Formal Algorithm" section
169
    // of https://publicsuffix.org/list/
170
    // "The domain and all rules must be canonicalized in the normal way
171
    // for hostnames - lower-case, Punycode (RFC 3492)."
172 3
    $punycode = new PunycodeWrapper();
173 3
    $part = $punycode->encode($part);
174
175 3
    if (strpos($part, '!') === 0) {
176 3
      $part = substr($part, 1);
177 3
      $isDomain = false;
178 3
    }
179
180 3
    if (!isset($publicSuffixListArray[$part])) {
181 3
      if ($isDomain) {
182 3
        $publicSuffixListArray[$part] = array();
183 3
      } else {
184 3
        $publicSuffixListArray[$part] = array('!' => '');
185
      }
186 3
    }
187
188 3
    if ($isDomain && count($ruleParts) > 0) {
189 3
      $this->buildArray($publicSuffixListArray[$part], $ruleParts);
190 3
    }
191 3
  }
192
193
  /**
194
   * Writes php array representation of the Public Suffix List to disk.
195
   *
196
   * @param array $publicSuffixList Array representation of the Public Suffix List
197
   *
198
   * @return int Number of bytes that were written to the file
199
   */
200 3
  public function writePhpCache(array $publicSuffixList)
201
  {
202 3
    $data = '<?php' . PHP_EOL . 'static $data = ' . var_export($publicSuffixList, true) . '; $result =& $data; unset($data); return $result;';
203
204 3
    return $this->write(self::PDP_PSL_PHP_FILE, $data);
205
  }
206
207
  /**
208
   * Gets Public Suffix List.
209
   *
210
   * @return PublicSuffixList Instance of Public Suffix List
211
   *
212
   * @throws \Exception Throws \Exception if unable to read file
213
   */
214 3
  public function getList()
215
  {
216 3
    $phpFile = $this->cacheDir . '/' . self::PDP_PSL_PHP_FILE;
217
218 3
    if (!file_exists($phpFile)) {
219 1
      $this->refreshPublicSuffixList();
220 1
    }
221
222 3
    $this->list = $this->getListFromFile($phpFile);
223
224 3
    return $this->list;
225
  }
226
227
  /**
228
   * Retrieves public suffix list from file after obtaining a shared lock.
229
   *
230
   * @return PublicSuffixList Instance of Public Suffix List
231
   *
232
   * @throws \Exception Throws \Exception if unable to read file
233
   */
234 4
  public function getListFromFile($phpFile)
235
  {
236
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
237 4
    $fp = @fopen($phpFile, 'r');
238 4
    if (!$fp || !flock($fp, LOCK_SH)) {
239 1
      throw new \Exception("Cannot read '$phpFile'");
240
    }
241
242
    /** @noinspection PhpIncludeInspection */
243 3
    $list = new PublicSuffixList(
244
        require $phpFile
245 3
    );
246
247 3
    flock($fp, LOCK_UN);
248 3
    fclose($fp);
249
250 3
    return $list;
251
  }
252
253
  /**
254
   * Writes to file after obtaining an exclusive lock.
255
   *
256
   * @param string $filename Filename in cache dir where data will be written
257
   * @param mixed  $data     Data to write
258
   *
259
   * @return int Number of bytes that were written to the file
260
   *
261
   * @throws \Exception Throws \Exception if unable to write file
262
   */
263 4
  protected function write($filename, $data)
264
  {
265 4
    $data = trim($data);
266 4
    $filePath = $this->cacheDir . '/' . $filename;
267
268 4
    if (empty($data)) {
269
      throw new \Exception("No data to write into '{$filePath}'");
270
    }
271
    
272
    // open with 'c' and truncate file only after obtaining a lock
273
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
274 4
    $fp = @fopen($filePath, 'c');
275
    $result = $fp
276 4
              && flock($fp, LOCK_EX)
277 4
              && ftruncate($fp, 0)
278 4
              && fwrite($fp, $data) !== false
279 4
              && fflush($fp);
280
281 4
    if (!$result) {
282 1
      $fp && fclose($fp);
283 1
      throw new \Exception("Cannot write to '$filePath'");
284
    }
285
286 3
    flock($fp, LOCK_UN);
287 3
    fclose($fp);
288
289 3
    return $result;
290
  }
291
292
  /**
293
   * Returns http adapter. Returns default http adapter if one is not set.
294
   *
295
   * @return \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
296
   */
297 3
  public function getHttpAdapter()
298
  {
299 3
    if (!$this->httpAdapter instanceof HttpAdapterInterface) {
300 1
      if (extension_loaded('curl')) {
301 1
        $this->httpAdapter = new HttpAdapter\CurlHttpAdapter();
302 1
      } else {
303
        $this->httpAdapter = new HttpAdapter\PhpHttpAdapter();
304
      }
305 1
    }
306
307 3
    return $this->httpAdapter;
308
  }
309
310
  /**
311
   * Sets http adapter.
312
   *
313
   * @param \Pdp\HttpAdapter\HttpAdapterInterface $httpAdapter
314
   */
315 11
  public function setHttpAdapter(HttpAdapter\HttpAdapterInterface $httpAdapter)
316
  {
317 11
    $this->httpAdapter = $httpAdapter;
318 11
  }
319
}
320