Completed
Push — develop ( 71033d...a88aab )
by Lars
03:02
created

PublicSuffixListManager::getHttpAdapter()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 3.0416

Importance

Changes 0
Metric Value
cc 3
eloc 7
nc 3
nop 0
dl 0
loc 12
ccs 5
cts 6
cp 0.8333
crap 3.0416
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * PHP Domain Parser: Public Suffix List based URL parsing.
5
 *
6
 * @link      http://github.com/jeremykendall/php-domain-parser for the canonical source repository
7
 *
8
 * @copyright Copyright (c) 2014 Jeremy Kendall (http://about.me/jeremykendall)
9
 * @license   http://github.com/jeremykendall/php-domain-parser/blob/master/LICENSE MIT License
10
 */
11
12
namespace Pdp;
13
14
use Pdp\HttpAdapter\HttpAdapterInterface;
15
16
/**
17
 * Public Suffix List Manager.
18
 *
19
 * This class obtains, writes, caches, and returns text and PHP representations
20
 * of the Public Suffix List
21
 */
22
class PublicSuffixListManager
23
{
24
  const ALL_DOMAINS = 'ALL';
25
26
  const PDP_PSL_TEXT_FILE = 'public-suffix-list.txt';
27
  const PDP_PSL_PHP_FILE  = 'public-suffix-list.php';
28
29
  const ICANN_DOMAINS      = 'ICANN';
30
  const ICANN_PSL_PHP_FILE = 'icann-public-suffix-list.php';
31
32
  const PRIVATE_DOMAINS      = 'PRIVATE';
33
  const PRIVATE_PSL_PHP_FILE = 'private-public-suffix-list.php';
34
35
  /**
36
   * @var string Public Suffix List URL
37
   */
38
  protected $publicSuffixListUrl = 'https://publicsuffix.org/list/effective_tld_names.dat';
39
40
  /**
41
   * @var string Directory where text and php versions of list will be cached
42
   */
43
  protected $cacheDir;
44
45
  /**
46
   * @var PublicSuffixList Public Suffix List
47
   */
48
  protected static $domainList = [
49
      self::ALL_DOMAINS     => self::PDP_PSL_PHP_FILE,
50
      self::ICANN_DOMAINS   => self::ICANN_PSL_PHP_FILE,
51
      self::PRIVATE_DOMAINS => self::PRIVATE_PSL_PHP_FILE,
52
  ];
53
54
  /**
55
   * @var \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
56
   */
57
  protected $httpAdapter;
58
59
  /**
60
   * Public constructor.
61
   *
62
   * @param string $cacheDir Optional cache directory
63
   */
64 10
  public function __construct($cacheDir = null)
65
  {
66 10
    if (null === $cacheDir) {
67 2
      $cacheDir = realpath(
68 2
          \dirname(\dirname(__DIR__)) . DIRECTORY_SEPARATOR . 'data'
69
      );
70
    }
71
72 10
    $this->cacheDir = $cacheDir;
73 10
  }
74
75
  /**
76
   * Downloads Public Suffix List and writes text cache and PHP cache. If these files
77
   * already exist, they will be overwritten.
78
   */
79 1
  public function refreshPublicSuffixList()
80
  {
81 1
    $this->fetchListFromSource();
82 1
    $cacheFile = $this->cacheDir . '/' . self::PDP_PSL_TEXT_FILE;
83 1
    $publicSuffixListArray = $this->convertListToArray($cacheFile);
84 1
    foreach ($publicSuffixListArray as $domain => $data) {
85
      // do not empty existing PHP cache file if source TXT is empty
86
      if (
87 1
          \is_array($data)
88
          &&
89 1
          !empty($data)
90
      ) {
91 1
        $this->varExportToFile(self::$domainList[$domain], $data);
92
      }
93
    }
94 1
  }
95
96
  /**
97
   * Obtain Public Suffix List from its online source and write to cache dir.
98
   *
99
   * @return int|bool Number of bytes that were written to the file OR false in case of error
100
   */
101 1
  public function fetchListFromSource()
102
  {
103 1
    $publicSuffixList = $this->getHttpAdapter()->getContent($this->publicSuffixListUrl);
104
105 1
    if ($publicSuffixList === false) {
106
      return 0;
107
    }
108
109 1
    return $this->write(self::PDP_PSL_TEXT_FILE, $publicSuffixList);
110
  }
111
112
  /**
113
   * Parses text representation of list to associative, multidimensional array.
114
   *
115
   * This method is based heavily on the code found in generateEffectiveTLDs.php
116
   *
117
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
118
   * A copy of the Apache License, Version 2.0, is provided with this
119
   * distribution
120
   *
121
   * @param string $textFile Public Suffix List text filename
122
   *
123
   * @return array Associative, multidimensional array representation of the
124
   *               public suffx list
125
   *
126
   * @throws \Exception Throws \Exception if unable to read file
127
   */
128 2
  public function parseListToArray($textFile): array
129
  {
130
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
131 2
    $fp = @fopen($textFile, 'rb');
132 2
    if (!$fp || !flock($fp, LOCK_SH)) {
133 1
      throw new \Exception("Cannot read '$textFile'");
134
    }
135
136 1
    $data = file(
137 1
        $textFile,
138 1
        FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
139
    );
140
141 1
    flock($fp, LOCK_UN);
142 1
    fclose($fp);
143
144 1
    $data = array_filter(
145 1
        $data,
146 1
        function ($line) {
147 1
          return false === strpos($line, '//');
148 1
        }
149
    );
150
151 1
    $publicSuffixListArray = [];
152
153 1
    foreach ($data as $line) {
154 1
      $ruleParts = explode('.', $line);
155 1
      $this->buildArray($publicSuffixListArray, $ruleParts);
156
    }
157
158 1
    return $publicSuffixListArray;
159
  }
160
161
  /**
162
   * Recursive method to build the array representation of the Public Suffix List.
163
   *
164
   * This method is based heavily on the code found in generateEffectiveTLDs.php
165
   *
166
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
167
   * A copy of the Apache License, Version 2.0, is provided with this
168
   * distribution
169
   *
170
   * @param array $publicSuffixListArray Initially an empty array, this eventually
171
   *                                     becomes the array representation of the Public Suffix List
172
   * @param array $ruleParts             One line (rule) from the Public Suffix List
173
   *                                     exploded on '.', or the remaining portion of that array during recursion
174
   */
175 2
  public function buildArray(array &$publicSuffixListArray, array $ruleParts)
176
  {
177 2
    $isDomain = true;
178
179 2
    $part = array_pop($ruleParts);
180
181
    // Adheres to canonicalization rule from the "Formal Algorithm" section
182
    // of https://publicsuffix.org/list/
183
    // "The domain and all rules must be canonicalized in the normal way
184
    // for hostnames - lower-case, Punycode (RFC 3492)."
185 2
    $punycode = new PunycodeWrapper();
186 2
    $part = $punycode->encode($part);
187
188 2
    if (strpos($part, '!') === 0) {
189 2
      $part = substr($part, 1);
190 2
      $isDomain = false;
191
    }
192
193 2
    if (!isset($publicSuffixListArray[$part])) {
194 2
      if ($isDomain) {
195 2
        $publicSuffixListArray[$part] = [];
196
      } else {
197 2
        $publicSuffixListArray[$part] = ['!' => ''];
198
      }
199
    }
200
201 2
    if ($isDomain && \count($ruleParts) > 0) {
202 2
      $this->buildArray($publicSuffixListArray[$part], $ruleParts);
203
    }
204 2
  }
205
206
  /**
207
   * Writes php array representation of the Public Suffix List to disk.
208
   *
209
   * @param array $publicSuffixList Array representation of the Public Suffix List
210
   *
211
   * @return int Number of bytes that were written to the file
212
   */
213 1
  public function writePhpCache(array $publicSuffixList): int
214
  {
215 1
    $data = '<?php' . PHP_EOL . 'static $data = ' . var_export($publicSuffixList, true) . '; $result =& $data; unset($data); return $result;';
216
217 1
    return $this->write(self::PDP_PSL_PHP_FILE, $data);
218
  }
219
220
  /**
221
   * Writes php array representation to disk.
222
   *
223
   * @param string $basename file path
224
   * @param array  $input    input data
225
   *
226
   * @return int Number of bytes that were written to the file
227
   */
228 1
  protected function varExportToFile($basename, array $input): int
229
  {
230 1
    $data = '<?php' . PHP_EOL . 'static $data = ' . var_export($input, true) . '; $result =& $data; unset($data); return $result;';
231
232 1
    return $this->write($basename, $data);
233
  }
234
235
  /**
236
   * Gets Public Suffix List.
237
   *
238
   * @param string $list the Public Suffix List type
239
   * @param bool   $withStaticCache
240
   *
241
   * @return PublicSuffixList Instance of Public Suffix List
242
   *
243
   * @throws \Exception Throws \Exception if unable to read file
244
   */
245 4
  public function getList($list = self::ALL_DOMAINS, bool $withStaticCache = true): PublicSuffixList
246
  {
247
    // init
248 4
    static $LIST_STATIC = [];
249
250 4
    $cacheBasename = isset(self::$domainList[$list]) ? self::$domainList[$list] : self::PDP_PSL_PHP_FILE;
251 4
    $cacheFile = $this->cacheDir . '/' . $cacheBasename;
252 4
    $cacheKey = md5($cacheFile);
253
254 4
    if ($withStaticCache === true && isset($LIST_STATIC[$cacheKey])) {
255 1
      return $LIST_STATIC[$cacheKey];
256
    }
257
258 4
    if (!file_exists($cacheFile)) {
259 1
      $this->refreshPublicSuffixList();
260
    }
261
262 4
    if (!isset($LIST_STATIC[$cacheKey])) {
263 3
      $LIST_STATIC[$cacheKey] = new PublicSuffixList($cacheFile);
264
    }
265
266 4
    return $LIST_STATIC[$cacheKey];
267
  }
268
269
  /**
270
   * Retrieves public suffix list from file after obtaining a shared lock.
271
   *
272
   * @param string $phpFile
273
   *
274
   * @return PublicSuffixList Instance of Public Suffix List
275
   *
276
   * @throws \Exception Throws \Exception if unable to read file
277
   */
278 1
  public function getListFromFile($phpFile): PublicSuffixList
279
  {
280
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
281 1
    $fp = @fopen($phpFile, 'rb');
282 1
    if (!$fp || !flock($fp, LOCK_SH)) {
283 1
      throw new \Exception("Cannot read '$phpFile'");
284
    }
285
286
    /** @noinspection PhpIncludeInspection */
287
    $list = new PublicSuffixList(
288
        require $phpFile
289
    );
290
291
    flock($fp, LOCK_UN);
292
    fclose($fp);
293
294
    return $list;
295
  }
296
297
  /**
298
   * Parses text representation of list to associative, multidimensional array.
299
   *
300
   * @param string $textFile Public Suffix List text filename
301
   *
302
   * @return array Associative, multidimensional array representation of the
303
   *               public suffx list
304
   */
305 1
  protected function convertListToArray($textFile): array
306
  {
307
    $addDomain = [
308 1
        self::ICANN_DOMAINS   => false,
309 1
        self::PRIVATE_DOMAINS => false,
310
    ];
311
312
    $publicSuffixListArray = [
313 1
        self::ALL_DOMAINS     => [],
314 1
        self::ICANN_DOMAINS   => [],
315 1
        self::PRIVATE_DOMAINS => [],
316
    ];
317
318 1
    $data = new \SplFileObject($textFile);
319 1
    $data->setFlags(\SplFileObject::DROP_NEW_LINE | \SplFileObject::READ_AHEAD | \SplFileObject::SKIP_EMPTY);
320 1
    foreach ($data as $line) {
321 1
      $addDomain = $this->validateDomainAddition($line, $addDomain);
0 ignored issues
show
Bug introduced by
It seems like $line defined by $line on line 320 can also be of type array; however, Pdp\PublicSuffixListMana...alidateDomainAddition() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
322 1
      if (false !== strpos($line, '//')) {
323 1
        continue;
324
      }
325 1
      $publicSuffixListArray = $this->convertLineToArray($line, $publicSuffixListArray, $addDomain);
0 ignored issues
show
Bug introduced by
It seems like $line defined by $line on line 320 can also be of type array; however, Pdp\PublicSuffixListManager::convertLineToArray() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
326
    }
327
328 1
    return $publicSuffixListArray;
329
  }
330
331
  /**
332
   * Convert a line from the Public Suffix list.
333
   *
334
   * @param string $textLine              Public Suffix List text line
335
   * @param array  $publicSuffixListArray Associative, multidimensional array representation of the
336
   *                                      public suffx list
337
   * @param array  $addDomain             Tell which section should be converted
338
   *
339
   * @return array Associative, multidimensional array representation of the
340
   *               public suffx list
341
   */
342 1
  protected function convertLineToArray($textLine, array $publicSuffixListArray, array $addDomain): array
343
  {
344 1
    $ruleParts = explode('.', $textLine);
345 1
    $this->buildArray($publicSuffixListArray[self::ALL_DOMAINS], $ruleParts);
346 1
    $domainNames = array_keys(array_filter($addDomain));
347 1
    foreach ($domainNames as $domainName) {
348 1
      $this->buildArray($publicSuffixListArray[$domainName], $ruleParts);
349
    }
350
351 1
    return $publicSuffixListArray;
352
  }
353
354
  /**
355
   * Update the addition status for a given line against the domain list (ICANN and PRIVATE).
356
   *
357
   * @param string $line      the current file line
358
   * @param array  $addDomain the domain addition status
359
   *
360
   * @return array
361
   */
362 1
  protected function validateDomainAddition($line, array $addDomain): array
363
  {
364 1
    foreach ($addDomain as $section => $status) {
365 1
      $addDomain[$section] = $this->isValidSection($status, $line, $section);
366
    }
367
368 1
    return $addDomain;
369
  }
370
371
  /**
372
   * Tell whether the line can be converted for a given domain.
373
   *
374
   * @param bool   $previousStatus the previous status
375
   * @param string $line           the current file line
376
   * @param string $section        the section to be considered
377
   *
378
   * @return bool
379
   */
380 1
  protected function isValidSection($previousStatus, $line, $section): bool
381
  {
382 1 View Code Duplication
    if (!$previousStatus && 0 === strpos($line, '// ===BEGIN ' . $section . ' DOMAINS===')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
383 1
      return true;
384
    }
385
386 1 View Code Duplication
    if ($previousStatus && 0 === strpos($line, '// ===END ' . $section . ' DOMAINS===')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
387 1
      return false;
388
    }
389
390 1
    return $previousStatus;
391
  }
392
393
  /**
394
   * Writes to file after obtaining an exclusive lock.
395
   *
396
   * @param string $filename Filename in cache dir where data will be written
397
   * @param mixed  $data     Data to write
398
   *
399
   * @return int Number of bytes that were written to the file
400
   *
401
   * @throws \Exception Throws \Exception if unable to write file
402
   */
403 2
  protected function write($filename, $data): int
404
  {
405 2
    $data = trim($data);
406 2
    $filePath = $this->cacheDir . '/' . $filename;
407
408 2
    if (empty($data)) {
409
      throw new \Exception("No data to write into '{$filePath}'");
410
    }
411
412
    // open with 'c' and truncate file only after obtaining a lock
413
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
414 2
    $fp = @fopen($filePath, 'cb');
415 2
    $result = $fp
416 2
              && flock($fp, LOCK_EX)
417 2
              && ftruncate($fp, 0)
418 2
              && fwrite($fp, $data) !== false
419 2
              && fflush($fp);
420
421 2
    if (!$result) {
422 1
      $fp && fclose($fp);
423 1
      throw new \Exception("Cannot write to '$filePath'");
424
    }
425
426 1
    flock($fp, LOCK_UN);
427 1
    fclose($fp);
428
429 1
    return $result;
430
  }
431
432
  /**
433
   * Returns http adapter. Returns default http adapter if one is not set.
434
   *
435
   * @return \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
436
   */
437 2
  public function getHttpAdapter(): HttpAdapterInterface
438
  {
439 2
    if (!$this->httpAdapter instanceof HttpAdapterInterface) {
440 1
      if (\extension_loaded('curl')) {
441 1
        $this->httpAdapter = new HttpAdapter\CurlHttpAdapter();
442
      } else {
443
        $this->httpAdapter = new HttpAdapter\PhpHttpAdapter();
444
      }
445
    }
446
447 2
    return $this->httpAdapter;
448
  }
449
450
  /**
451
   * Sets http adapter.
452
   *
453
   * @param \Pdp\HttpAdapter\HttpAdapterInterface $httpAdapter
454
   */
455 10
  public function setHttpAdapter(HttpAdapter\HttpAdapterInterface $httpAdapter)
456
  {
457 10
    $this->httpAdapter = $httpAdapter;
458 10
  }
459
}
460