PublicSuffixListManager::getListFromFile()   A
last analyzed

Complexity

Conditions 3
Paths 2

Size

Total Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 4.5435

Importance

Changes 0
Metric Value
cc 3
nc 2
nop 1
dl 0
loc 18
ccs 4
cts 9
cp 0.4444
crap 4.5435
rs 9.6666
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * PHP Domain Parser: Public Suffix List based URL parsing.
7
 *
8
 * @link      http://github.com/jeremykendall/php-domain-parser for the canonical source repository
9
 *
10
 * @copyright Copyright (c) 2014 Jeremy Kendall (http://about.me/jeremykendall)
11
 * @license   http://github.com/jeremykendall/php-domain-parser/blob/master/LICENSE MIT License
12
 */
13
14
namespace Pdp;
15
16
use Pdp\HttpAdapter\HttpAdapterInterface;
17
18
/**
19
 * Public Suffix List Manager.
20
 *
21
 * This class obtains, writes, caches, and returns text and PHP representations
22
 * of the Public Suffix List
23
 */
24
class PublicSuffixListManager
25
{
26
  const ALL_DOMAINS = 'ALL';
27
28
  const PDP_PSL_TEXT_FILE = 'public-suffix-list.txt';
29
  const PDP_PSL_PHP_FILE  = 'public-suffix-list.php';
30
31
  const ICANN_DOMAINS      = 'ICANN';
32
  const ICANN_PSL_PHP_FILE = 'icann-public-suffix-list.php';
33
34
  const PRIVATE_DOMAINS      = 'PRIVATE';
35
  const PRIVATE_PSL_PHP_FILE = 'private-public-suffix-list.php';
36
37
  /**
38
   * @var string Public Suffix List URL
39
   */
40
  protected $publicSuffixListUrl = 'https://publicsuffix.org/list/effective_tld_names.dat';
41
42
  /**
43
   * @var string Directory where text and php versions of list will be cached
44
   */
45
  protected $cacheDir;
46
47
  /**
48
   * @var PublicSuffixList Public Suffix List
49
   */
50
  protected static $domainList = [
51
      self::ALL_DOMAINS     => self::PDP_PSL_PHP_FILE,
52
      self::ICANN_DOMAINS   => self::ICANN_PSL_PHP_FILE,
53
      self::PRIVATE_DOMAINS => self::PRIVATE_PSL_PHP_FILE,
54
  ];
55
56
  /**
57
   * @var \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
58
   */
59
  protected $httpAdapter;
60
61
  /**
62
   * Public constructor.
63
   *
64
   * @param string $cacheDir Optional cache directory
65
   */
66 10
  public function __construct($cacheDir = null)
67
  {
68 10
    if (null === $cacheDir) {
69 2
      $cacheDir = realpath(
70 2
          dirname(__DIR__, 2) . DIRECTORY_SEPARATOR . 'data'
71
      );
72
    }
73
74 10
    $this->cacheDir = $cacheDir;
75 10
  }
76
77
  /**
78
   * Downloads Public Suffix List and writes text cache and PHP cache. If these files
79
   * already exist, they will be overwritten.
80
   */
81 1
  public function refreshPublicSuffixList()
82
  {
83 1
    $this->fetchListFromSource();
84 1
    $cacheFile = $this->cacheDir . '/' . self::PDP_PSL_TEXT_FILE;
85 1
    $publicSuffixListArray = $this->convertListToArray($cacheFile);
86 1
    foreach ($publicSuffixListArray as $domain => $data) {
87
      // do not empty existing PHP cache file if source TXT is empty
88
      if (
89 1
          \is_array($data)
90
          &&
91 1
          !empty($data)
92
      ) {
93 1
        $this->varExportToFile(self::$domainList[$domain], $data);
94
      }
95
    }
96 1
  }
97
98
  /**
99
   * Obtain Public Suffix List from its online source and write to cache dir.
100
   *
101
   * @return int|bool Number of bytes that were written to the file OR false in case of error
102
   */
103 1
  public function fetchListFromSource()
104
  {
105 1
    $publicSuffixList = $this->getHttpAdapter()->getContent($this->publicSuffixListUrl);
106
107 1
    if ($publicSuffixList === false) {
108
      return 0;
109
    }
110
111 1
    return $this->write(self::PDP_PSL_TEXT_FILE, $publicSuffixList);
112
  }
113
114
  /**
115
   * Parses text representation of list to associative, multidimensional array.
116
   *
117
   * This method is based heavily on the code found in generateEffectiveTLDs.php
118
   *
119
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
120
   * A copy of the Apache License, Version 2.0, is provided with this
121
   * distribution
122
   *
123
   * @param string $textFile Public Suffix List text filename
124
   *
125
   * @return array Associative, multidimensional array representation of the
126
   *               public suffx list
127
   *
128
   * @throws \Exception Throws \Exception if unable to read file
129
   */
130 2
  public function parseListToArray($textFile): array
131
  {
132
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
133 2
    $fp = @fopen($textFile, 'rb');
134 2
    if (!$fp || !flock($fp, LOCK_SH)) {
135 1
      throw new \Exception("Cannot read '$textFile'");
136
    }
137
138 1
    $data = file(
139 1
        $textFile,
140 1
        FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
141
    );
142
143 1
    flock($fp, LOCK_UN);
144 1
    fclose($fp);
145
146 1
    $data = array_filter(
147 1
        $data,
148 1
        function ($line) {
149 1
          return false === strpos($line, '//');
150 1
        }
151
    );
152
153 1
    $publicSuffixListArray = [];
154
155 1
    foreach ($data as $line) {
156 1
      $ruleParts = explode('.', $line);
157 1
      $this->buildArray($publicSuffixListArray, $ruleParts);
158
    }
159
160 1
    return $publicSuffixListArray;
161
  }
162
163
  /**
164
   * Recursive method to build the array representation of the Public Suffix List.
165
   *
166
   * This method is based heavily on the code found in generateEffectiveTLDs.php
167
   *
168
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
169
   * A copy of the Apache License, Version 2.0, is provided with this
170
   * distribution
171
   *
172
   * @param array $publicSuffixListArray Initially an empty array, this eventually
173
   *                                     becomes the array representation of the Public Suffix List
174
   * @param array $ruleParts             One line (rule) from the Public Suffix List
175
   *                                     exploded on '.', or the remaining portion of that array during recursion
176
   */
177 2
  public function buildArray(array &$publicSuffixListArray, array $ruleParts)
178
  {
179 2
    $isDomain = true;
180
181 2
    $part = array_pop($ruleParts);
182
183
    // Adheres to canonicalization rule from the "Formal Algorithm" section
184
    // of https://publicsuffix.org/list/
185
    // "The domain and all rules must be canonicalized in the normal way
186
    // for hostnames - lower-case, Punycode (RFC 3492)."
187 2
    $punycode = new PunycodeWrapper();
188 2
    $part = $punycode->encode($part);
189
190 2
    if (strpos($part, '!') === 0) {
191 2
      $part = substr($part, 1);
192 2
      $isDomain = false;
193
    }
194
195 2
    if (!isset($publicSuffixListArray[$part])) {
196 2
      if ($isDomain) {
197 2
        $publicSuffixListArray[$part] = [];
198
      } else {
199 2
        $publicSuffixListArray[$part] = ['!' => ''];
200
      }
201
    }
202
203 2
    if ($isDomain && \count($ruleParts) > 0) {
204 2
      $this->buildArray($publicSuffixListArray[$part], $ruleParts);
205
    }
206 2
  }
207
208
  /**
209
   * Writes php array representation of the Public Suffix List to disk.
210
   *
211
   * @param array $publicSuffixList Array representation of the Public Suffix List
212
   *
213
   * @return int Number of bytes that were written to the file
214
   */
215 1
  public function writePhpCache(array $publicSuffixList): int
216
  {
217 1
    $data = '<?php' . PHP_EOL . 'static $data = ' . var_export($publicSuffixList, true) . '; $result =& $data; unset($data); return $result;';
218
219 1
    return $this->write(self::PDP_PSL_PHP_FILE, $data);
220
  }
221
222
  /**
223
   * Writes php array representation to disk.
224
   *
225
   * @param string $basename file path
226
   * @param array  $input    input data
227
   *
228
   * @return int Number of bytes that were written to the file
229
   */
230 1
  protected function varExportToFile($basename, array $input): int
231
  {
232 1
    $data = '<?php' . PHP_EOL . 'static $data = ' . var_export($input, true) . '; $result =& $data; unset($data); return $result;';
233
234 1
    return $this->write($basename, $data);
235
  }
236
237
  /**
238
   * Gets Public Suffix List.
239
   *
240
   * @param string $list the Public Suffix List type
241
   * @param bool   $withStaticCache
242
   *
243
   * @return PublicSuffixList Instance of Public Suffix List
244
   *
245
   * @throws \Exception Throws \Exception if unable to read file
246
   */
247 4
  public function getList($list = self::ALL_DOMAINS, bool $withStaticCache = true): PublicSuffixList
248
  {
249
    // init
250 4
    static $LIST_STATIC = [];
251
252 4
    $cacheBasename = self::$domainList[$list] ?? self::PDP_PSL_PHP_FILE;
253 4
    $cacheFile = $this->cacheDir . '/' . $cacheBasename;
254 4
    $cacheKey = md5($cacheFile);
255
256
    if (
257 4
        $withStaticCache === true
258
        &&
259 4
        isset($LIST_STATIC[$cacheKey])
260
    ) {
261 1
      return $LIST_STATIC[$cacheKey];
262
    }
263
264 4
    if (!file_exists($cacheFile)) {
265 1
      $this->refreshPublicSuffixList();
266
    }
267
268 4
    if (!isset($LIST_STATIC[$cacheKey])) {
269 3
      $LIST_STATIC[$cacheKey] = new PublicSuffixList($cacheFile);
270
    }
271
272 4
    return $LIST_STATIC[$cacheKey];
273
  }
274
275
  /**
276
   * Retrieves public suffix list from file after obtaining a shared lock.
277
   *
278
   * @param string $phpFile
279
   *
280
   * @return PublicSuffixList Instance of Public Suffix List
281
   *
282
   * @throws \Exception Throws \Exception if unable to read file
283
   */
284 1
  public function getListFromFile($phpFile): PublicSuffixList
285
  {
286
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
287 1
    $fp = @fopen($phpFile, 'rb');
288 1
    if (!$fp || !flock($fp, LOCK_SH)) {
289 1
      throw new \Exception("Cannot read '$phpFile'");
290
    }
291
292
    /** @noinspection PhpIncludeInspection */
293
    $list = new PublicSuffixList(
294
        require $phpFile
295
    );
296
297
    flock($fp, LOCK_UN);
298
    fclose($fp);
299
300
    return $list;
301
  }
302
303
  /**
304
   * Parses text representation of list to associative, multidimensional array.
305
   *
306
   * @param string $textFile Public Suffix List text filename
307
   *
308
   * @return array Associative, multidimensional array representation of the
309
   *               public suffx list
310
   */
311 1
  protected function convertListToArray($textFile): array
312
  {
313
    $addDomain = [
314 1
        self::ICANN_DOMAINS   => false,
315 1
        self::PRIVATE_DOMAINS => false,
316
    ];
317
318
    $publicSuffixListArray = [
319 1
        self::ALL_DOMAINS     => [],
320 1
        self::ICANN_DOMAINS   => [],
321 1
        self::PRIVATE_DOMAINS => [],
322
    ];
323
324 1
    $data = new \SplFileObject($textFile);
325 1
    $data->setFlags(\SplFileObject::DROP_NEW_LINE | \SplFileObject::READ_AHEAD | \SplFileObject::SKIP_EMPTY);
326 1
    foreach ($data as $line) {
327 1
      $addDomain = $this->validateDomainAddition($line, $addDomain);
0 ignored issues
show
Bug introduced by
It seems like $line defined by $line on line 326 can also be of type array; however, Pdp\PublicSuffixListMana...alidateDomainAddition() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
328 1
      if (false !== strpos($line, '//')) {
329 1
        continue;
330
      }
331 1
      $publicSuffixListArray = $this->convertLineToArray($line, $publicSuffixListArray, $addDomain);
0 ignored issues
show
Bug introduced by
It seems like $line defined by $line on line 326 can also be of type array; however, Pdp\PublicSuffixListManager::convertLineToArray() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
332
    }
333
334 1
    return $publicSuffixListArray;
335
  }
336
337
  /**
338
   * Convert a line from the Public Suffix list.
339
   *
340
   * @param string $textLine              Public Suffix List text line
341
   * @param array  $publicSuffixListArray Associative, multidimensional array representation of the
342
   *                                      public suffx list
343
   * @param array  $addDomain             Tell which section should be converted
344
   *
345
   * @return array Associative, multidimensional array representation of the
346
   *               public suffx list
347
   */
348 1
  protected function convertLineToArray($textLine, array $publicSuffixListArray, array $addDomain): array
349
  {
350 1
    $ruleParts = explode('.', $textLine);
351 1
    $this->buildArray($publicSuffixListArray[self::ALL_DOMAINS], $ruleParts);
352 1
    $domainNames = array_keys(array_filter($addDomain));
353 1
    foreach ($domainNames as $domainName) {
354 1
      $this->buildArray($publicSuffixListArray[$domainName], $ruleParts);
355
    }
356
357 1
    return $publicSuffixListArray;
358
  }
359
360
  /**
361
   * Update the addition status for a given line against the domain list (ICANN and PRIVATE).
362
   *
363
   * @param string $line      the current file line
364
   * @param array  $addDomain the domain addition status
365
   *
366
   * @return array
367
   */
368 1
  protected function validateDomainAddition($line, array $addDomain): array
369
  {
370 1
    foreach ($addDomain as $section => $status) {
371 1
      $addDomain[$section] = $this->isValidSection($status, $line, $section);
372
    }
373
374 1
    return $addDomain;
375
  }
376
377
  /**
378
   * Tell whether the line can be converted for a given domain.
379
   *
380
   * @param bool   $previousStatus the previous status
381
   * @param string $line           the current file line
382
   * @param string $section        the section to be considered
383
   *
384
   * @return bool
385
   */
386 1
  protected function isValidSection($previousStatus, $line, $section): bool
387
  {
388 1 View Code Duplication
    if (!$previousStatus && 0 === strpos($line, '// ===BEGIN ' . $section . ' DOMAINS===')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
389 1
      return true;
390
    }
391
392 1 View Code Duplication
    if ($previousStatus && 0 === strpos($line, '// ===END ' . $section . ' DOMAINS===')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
393 1
      return false;
394
    }
395
396 1
    return $previousStatus;
397
  }
398
399
  /**
400
   * Writes to file after obtaining an exclusive lock.
401
   *
402
   * @param string $filename Filename in cache dir where data will be written
403
   * @param mixed  $data     Data to write
404
   *
405
   * @return int Number of bytes that were written to the file
406
   *
407
   * @throws \Exception <p>Throws \Exception if unable to write file.</p>
408
   */
409 2
  protected function write($filename, $data): int
410
  {
411 2
    $data = trim($data);
412 2
    $filePath = $this->cacheDir . '/' . $filename;
413
414 2
    if (empty($data)) {
415
      throw new \Exception("No data to write into '{$filePath}'");
416
    }
417
418
    // open with 'c' and truncate file only after obtaining a lock
419
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
420 2
    $fp = @fopen($filePath, 'cb');
421 2
    $result = $fp
422 2
              && flock($fp, LOCK_EX)
423 2
              && ftruncate($fp, 0)
424 2
              && fwrite($fp, $data) !== false
425 2
              && fflush($fp);
426
427 2
    if (!$result) {
428 1
      $fp && fclose($fp);
429 1
      throw new \Exception("Cannot write to '$filePath'");
430
    }
431
432 1
    flock($fp, LOCK_UN);
433 1
    fclose($fp);
434
435 1
    $result = (int)$result;
436
437 1
    return $result;
438
  }
439
440
  /**
441
   * Returns http adapter. Returns default http adapter if one is not set.
442
   *
443
   * @return \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
444
   */
445 2
  public function getHttpAdapter(): HttpAdapterInterface
446
  {
447 2
    if (!$this->httpAdapter instanceof HttpAdapterInterface) {
448 1
      if (\extension_loaded('curl')) {
449 1
        $this->httpAdapter = new HttpAdapter\CurlHttpAdapter();
450
      } else {
451
        $this->httpAdapter = new HttpAdapter\PhpHttpAdapter();
452
      }
453
    }
454
455 2
    return $this->httpAdapter;
456
  }
457
458
  /**
459
   * Sets http adapter.
460
   *
461
   * @param \Pdp\HttpAdapter\HttpAdapterInterface $httpAdapter
462
   */
463 10
  public function setHttpAdapter(HttpAdapter\HttpAdapterInterface $httpAdapter)
464
  {
465 10
    $this->httpAdapter = $httpAdapter;
466 10
  }
467
}
468