Completed
Push — develop ( f810d8...9feba0 )
by Lars
06:45
created

PublicSuffixListManager::refreshPublicSuffixList()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 16
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 4.0072

Importance

Changes 0
Metric Value
cc 4
eloc 10
nc 3
nop 0
dl 0
loc 16
ccs 12
cts 13
cp 0.9231
crap 4.0072
rs 9.2
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * PHP Domain Parser: Public Suffix List based URL parsing.
5
 *
6
 * @link      http://github.com/jeremykendall/php-domain-parser for the canonical source repository
7
 *
8
 * @copyright Copyright (c) 2014 Jeremy Kendall (http://about.me/jeremykendall)
9
 * @license   http://github.com/jeremykendall/php-domain-parser/blob/master/LICENSE MIT License
10
 */
11
namespace Pdp;
12
13
use Pdp\HttpAdapter\HttpAdapterInterface;
14
15
/**
16
 * Public Suffix List Manager.
17
 *
18
 * This class obtains, writes, caches, and returns text and PHP representations
19
 * of the Public Suffix List
20
 */
21
class PublicSuffixListManager
22
{
23
  const ALL_DOMAINS = 'ALL';
24
25
  const PDP_PSL_TEXT_FILE = 'public-suffix-list.txt';
26
  const PDP_PSL_PHP_FILE  = 'public-suffix-list.php';
27
28
  const ICANN_DOMAINS      = 'ICANN';
29
  const ICANN_PSL_PHP_FILE = 'icann-public-suffix-list.php';
30
31
  const PRIVATE_DOMAINS      = 'PRIVATE';
32
  const PRIVATE_PSL_PHP_FILE = 'private-public-suffix-list.php';
33
34
  /**
35
   * @var string Public Suffix List URL
36
   */
37
  protected $publicSuffixListUrl = 'https://publicsuffix.org/list/effective_tld_names.dat';
38
39
  /**
40
   * @var string Directory where text and php versions of list will be cached
41
   */
42
  protected $cacheDir;
43
44
  /**
45
   * @var PublicSuffixList Public Suffix List
46
   */
47
  protected static $domainList = array(
48
      self::ALL_DOMAINS     => self::PDP_PSL_PHP_FILE,
49
      self::ICANN_DOMAINS   => self::ICANN_PSL_PHP_FILE,
50
      self::PRIVATE_DOMAINS => self::PRIVATE_PSL_PHP_FILE,
51
  );
52
53
  /**
54
   * @var \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
55
   */
56
  protected $httpAdapter;
57
58
  /**
59
   * Public constructor.
60
   *
61
   * @param string $cacheDir Optional cache directory
62
   */
63 12
  public function __construct($cacheDir = null)
64
  {
65 12
    if (null === $cacheDir) {
66 2
      $cacheDir = realpath(
67 2
          dirname(dirname(__DIR__)) . DIRECTORY_SEPARATOR . 'data'
68 2
      );
69 2
    }
70
71 12
    $this->cacheDir = $cacheDir;
72 12
  }
73
74
  /**
75
   * Downloads Public Suffix List and writes text cache and PHP cache. If these files
76
   * already exist, they will be overwritten.
77
   */
78 1
  public function refreshPublicSuffixList()
79
  {
80 1
    $this->fetchListFromSource();
81 1
    $cacheFile = $this->cacheDir . '/' . self::PDP_PSL_TEXT_FILE;
82 1
    $publicSuffixListArray = $this->convertListToArray($cacheFile);
83 1
    foreach ($publicSuffixListArray as $domain => $data) {
84
      // do not empty existing PHP cache file if source TXT is empty
85
      if (
86 1
          is_array($data)
87 1
          &&
88
          !empty($data)
89 1
      ) {
90 1
        $this->varExportToFile(self::$domainList[$domain], $data);
91 1
      }
92 1
    }
93 1
  }
94
95
  /**
96
   * Obtain Public Suffix List from its online source and write to cache dir.
97
   *
98
   * @return int|bool Number of bytes that were written to the file OR false in case of error
99
   */
100 2
  public function fetchListFromSource()
101
  {
102 2
    $publicSuffixList = $this->getHttpAdapter()->getContent($this->publicSuffixListUrl);
103
104 2
    if ($publicSuffixList === false) {
105
      return 0;
106
    }
107
108 2
    return $this->write(self::PDP_PSL_TEXT_FILE, $publicSuffixList);
109
  }
110
111
  /**
112
   * Parses text representation of list to associative, multidimensional array.
113
   *
114
   * This method is based heavily on the code found in generateEffectiveTLDs.php
115
   *
116
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
117
   * A copy of the Apache License, Version 2.0, is provided with this
118
   * distribution
119
   *
120
   * @param string $textFile Public Suffix List text filename
121
   *
122
   * @return array Associative, multidimensional array representation of the
123
   *               public suffx list
124
   *
125
   * @throws \Exception Throws \Exception if unable to read file
126
   */
127 3
  public function parseListToArray($textFile)
128
  {
129
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
130 3
    $fp = @fopen($textFile, 'r');
131 3
    if (!$fp || !flock($fp, LOCK_SH)) {
132 1
      throw new \Exception("Cannot read '$textFile'");
133
    }
134
135 2
    $data = file(
136 2
        $textFile,
137 2
        FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
138 2
    );
139
140 2
    flock($fp, LOCK_UN);
141 2
    fclose($fp);
142
143 2
    $data = array_filter(
144 2
        $data,
145 2
        function ($line) {
146 2
          return false === strpos($line, '//');
147
        }
148 2
    );
149
150 2
    $publicSuffixListArray = array();
151
152 2
    foreach ($data as $line) {
153 2
      $ruleParts = explode('.', $line);
154 2
      $this->buildArray($publicSuffixListArray, $ruleParts);
155 2
    }
156
157 2
    return $publicSuffixListArray;
158
  }
159
160
  /**
161
   * Recursive method to build the array representation of the Public Suffix List.
162
   *
163
   * This method is based heavily on the code found in generateEffectiveTLDs.php
164
   *
165
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
166
   * A copy of the Apache License, Version 2.0, is provided with this
167
   * distribution
168
   *
169
   * @param array $publicSuffixListArray Initially an empty array, this eventually
170
   *                                     becomes the array representation of the Public Suffix List
171
   * @param array $ruleParts             One line (rule) from the Public Suffix List
172
   *                                     exploded on '.', or the remaining portion of that array during recursion
173
   */
174 3
  public function buildArray(array &$publicSuffixListArray, array $ruleParts)
175
  {
176 3
    $isDomain = true;
177
178 3
    $part = array_pop($ruleParts);
179
180
    // Adheres to canonicalization rule from the "Formal Algorithm" section
181
    // of https://publicsuffix.org/list/
182
    // "The domain and all rules must be canonicalized in the normal way
183
    // for hostnames - lower-case, Punycode (RFC 3492)."
184 3
    $punycode = new PunycodeWrapper();
185 3
    $part = $punycode->encode($part);
186
187 3
    if (strpos($part, '!') === 0) {
188 3
      $part = substr($part, 1);
189 3
      $isDomain = false;
190 3
    }
191
192 3
    if (!isset($publicSuffixListArray[$part])) {
193 3
      if ($isDomain) {
194 3
        $publicSuffixListArray[$part] = array();
195 3
      } else {
196 3
        $publicSuffixListArray[$part] = array('!' => '');
197
      }
198 3
    }
199
200 3
    if ($isDomain && count($ruleParts) > 0) {
201 3
      $this->buildArray($publicSuffixListArray[$part], $ruleParts);
202 3
    }
203 3
  }
204
205
  /**
206
   * Writes php array representation of the Public Suffix List to disk.
207
   *
208
   * @param array $publicSuffixList Array representation of the Public Suffix List
209
   *
210
   * @return int Number of bytes that were written to the file
211
   */
212 2
  public function writePhpCache(array $publicSuffixList)
213
  {
214 2
    $data = '<?php' . PHP_EOL . 'static $data = ' . var_export($publicSuffixList, true) . '; $result =& $data; unset($data); return $result;';
215
216 2
    return $this->write(self::PDP_PSL_PHP_FILE, $data);
217
  }
218
219
  /**
220
   * Writes php array representation to disk.
221
   *
222
   * @param string $basename file path
223
   * @param array  $input    input data
224
   *
225
   * @return int Number of bytes that were written to the file
226
   */
227 1
  protected function varExportToFile($basename, array $input)
228
  {
229 1
    $data = '<?php' . PHP_EOL . 'static $data = ' . var_export($input, true) . '; $result =& $data; unset($data); return $result;';
230
231 1
    return $this->write($basename, $data);
232
  }
233
234
  /**
235
   * Gets Public Suffix List.
236
   *
237
   * @param string $list the Public Suffix List type
238
   * @param bool   $withStaticCache
239
   *
240
   * @return PublicSuffixList Instance of Public Suffix List
241
   *
242
   * @throws \Exception Throws \Exception if unable to read file
243
   */
244 4
  public function getList($list = self::ALL_DOMAINS, $withStaticCache = true)
245
  {
246
    // init
247 4
    static $LIST_STATIC = array();
248
249 4
    $cacheBasename = isset(self::$domainList[$list]) ? self::$domainList[$list] : self::PDP_PSL_PHP_FILE;
250 4
    $cacheFile = $this->cacheDir . '/' . $cacheBasename;
251 4
    $cacheKey = md5($cacheFile);
252
253 4
    if ($withStaticCache === true && isset($LIST_STATIC[$cacheKey])) {
254 1
      return $LIST_STATIC[$cacheKey];
255
    }
256
257 4
    if (!file_exists($cacheFile)) {
258 1
      $this->refreshPublicSuffixList();
259 1
    }
260
261 4
    if (!isset($LIST_STATIC[$cacheKey])) {
262 3
      $LIST_STATIC[$cacheKey] = new PublicSuffixList($cacheFile);
263 3
    }
264
265 4
    return $LIST_STATIC[$cacheKey];
266
  }
267
268
  /**
269
   * Retrieves public suffix list from file after obtaining a shared lock.
270
   *
271
   * @param string $phpFile
272
   *
273
   * @return PublicSuffixList Instance of Public Suffix List
274
   *
275
   * @throws \Exception Throws \Exception if unable to read file
276
   */
277 1
  public function getListFromFile($phpFile)
278
  {
279
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
280 1
    $fp = @fopen($phpFile, 'r');
281 1
    if (!$fp || !flock($fp, LOCK_SH)) {
282 1
      throw new \Exception("Cannot read '$phpFile'");
283
    }
284
285
    /** @noinspection PhpIncludeInspection */
286
    $list = new PublicSuffixList(
287
        require $phpFile
288
    );
289
290
    flock($fp, LOCK_UN);
291
    fclose($fp);
292
293
    return $list;
294
  }
295
296
  /**
297
   * Parses text representation of list to associative, multidimensional array.
298
   *
299
   * @param string $textFile Public Suffix List text filename
300
   *
301
   * @return array Associative, multidimensional array representation of the
302
   *               public suffx list
303
   */
304 1
  protected function convertListToArray($textFile)
305
  {
306
    $addDomain = array(
307 1
        self::ICANN_DOMAINS   => false,
308 1
        self::PRIVATE_DOMAINS => false,
309 1
    );
310
311
    $publicSuffixListArray = array(
312 1
        self::ALL_DOMAINS     => array(),
313 1
        self::ICANN_DOMAINS   => array(),
314 1
        self::PRIVATE_DOMAINS => array(),
315 1
    );
316
317 1
    $data = new \SplFileObject($textFile);
318 1
    $data->setFlags(\SplFileObject::DROP_NEW_LINE | \SplFileObject::READ_AHEAD | \SplFileObject::SKIP_EMPTY);
319 1
    foreach ($data as $line) {
320 1
      $addDomain = $this->validateDomainAddition($line, $addDomain);
0 ignored issues
show
Bug introduced by
It seems like $line defined by $line on line 319 can also be of type array; however, Pdp\PublicSuffixListMana...alidateDomainAddition() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
321 1
      if (strstr($line, '//') !== false) {
322 1
        continue;
323
      }
324 1
      $publicSuffixListArray = $this->convertLineToArray($line, $publicSuffixListArray, $addDomain);
0 ignored issues
show
Bug introduced by
It seems like $line defined by $line on line 319 can also be of type array; however, Pdp\PublicSuffixListManager::convertLineToArray() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
325 1
    }
326
327 1
    return $publicSuffixListArray;
328
  }
329
330
  /**
331
   * Convert a line from the Public Suffix list.
332
   *
333
   * @param string $textLine              Public Suffix List text line
334
   * @param array  $publicSuffixListArray Associative, multidimensional array representation of the
335
   *                                      public suffx list
336
   * @param array  $addDomain             Tell which section should be converted
337
   *
338
   * @return array Associative, multidimensional array representation of the
339
   *               public suffx list
340
   */
341 1
  protected function convertLineToArray($textLine, array $publicSuffixListArray, array $addDomain)
342
  {
343 1
    $ruleParts = explode('.', $textLine);
344 1
    $this->buildArray($publicSuffixListArray[self::ALL_DOMAINS], $ruleParts);
345 1
    $domainNames = array_keys(array_filter($addDomain));
346 1
    foreach ($domainNames as $domainName) {
347 1
      $this->buildArray($publicSuffixListArray[$domainName], $ruleParts);
348 1
    }
349
350 1
    return $publicSuffixListArray;
351
  }
352
353
  /**
354
   * Update the addition status for a given line against the domain list (ICANN and PRIVATE).
355
   *
356
   * @param string $line      the current file line
357
   * @param array  $addDomain the domain addition status
358
   *
359
   * @return array
360
   */
361 1
  protected function validateDomainAddition($line, array $addDomain)
362
  {
363 1
    foreach ($addDomain as $section => $status) {
364 1
      $addDomain[$section] = $this->isValidSection($status, $line, $section);
365 1
    }
366
367 1
    return $addDomain;
368
  }
369
370
  /**
371
   * Tell whether the line can be converted for a given domain.
372
   *
373
   * @param bool   $previousStatus the previous status
374
   * @param string $line           the current file line
375
   * @param string $section        the section to be considered
376
   *
377
   * @return bool
378
   */
379 1
  protected function isValidSection($previousStatus, $line, $section)
380
  {
381 1 View Code Duplication
    if (!$previousStatus && 0 === strpos($line, '// ===BEGIN ' . $section . ' DOMAINS===')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
382 1
      return true;
383
    }
384
385 1 View Code Duplication
    if ($previousStatus && 0 === strpos($line, '// ===END ' . $section . ' DOMAINS===')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
386 1
      return false;
387
    }
388
389 1
    return $previousStatus;
390
  }
391
392
  /**
393
   * Writes to file after obtaining an exclusive lock.
394
   *
395
   * @param string $filename Filename in cache dir where data will be written
396
   * @param mixed  $data     Data to write
397
   *
398
   * @return int Number of bytes that were written to the file
399
   *
400
   * @throws \Exception Throws \Exception if unable to write file
401
   */
402 4
  protected function write($filename, $data)
403
  {
404 4
    $data = trim($data);
405 4
    $filePath = $this->cacheDir . '/' . $filename;
406
407 4
    if (empty($data)) {
408
      throw new \Exception("No data to write into '{$filePath}'");
409
    }
410
411
    // open with 'c' and truncate file only after obtaining a lock
412
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
413 4
    $fp = @fopen($filePath, 'c');
414
    $result = $fp
415 4
              && flock($fp, LOCK_EX)
416 4
              && ftruncate($fp, 0)
417 4
              && fwrite($fp, $data) !== false
418 4
              && fflush($fp);
419
420 4
    if (!$result) {
421 1
      $fp && fclose($fp);
422 1
      throw new \Exception("Cannot write to '$filePath'");
423
    }
424
425 3
    flock($fp, LOCK_UN);
426 3
    fclose($fp);
427
428 3
    return $result;
429
  }
430
431
  /**
432
   * Returns http adapter. Returns default http adapter if one is not set.
433
   *
434
   * @return \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
435
   */
436 3
  public function getHttpAdapter()
437
  {
438 3
    if (!$this->httpAdapter instanceof HttpAdapterInterface) {
439 1
      if (extension_loaded('curl')) {
440 1
        $this->httpAdapter = new HttpAdapter\CurlHttpAdapter();
441 1
      } else {
442
        $this->httpAdapter = new HttpAdapter\PhpHttpAdapter();
443
      }
444 1
    }
445
446 3
    return $this->httpAdapter;
447
  }
448
449
  /**
450
   * Sets http adapter.
451
   *
452
   * @param \Pdp\HttpAdapter\HttpAdapterInterface $httpAdapter
453
   */
454 12
  public function setHttpAdapter(HttpAdapter\HttpAdapterInterface $httpAdapter)
455
  {
456 12
    $this->httpAdapter = $httpAdapter;
457 12
  }
458
}
459