Completed
Push — develop ( 1f8417...7020e4 )
by Lars
03:18
created

PublicSuffixListManager::parseListToArray()   B

Complexity

Conditions 4
Paths 3

Size

Total Lines 32
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 4

Importance

Changes 6
Bugs 3 Features 1
Metric Value
cc 4
eloc 18
c 6
b 3
f 1
nc 3
nop 1
dl 0
loc 32
ccs 21
cts 21
cp 1
crap 4
rs 8.5806
1
<?php
2
3
/**
4
 * PHP Domain Parser: Public Suffix List based URL parsing.
5
 *
6
 * @link      http://github.com/jeremykendall/php-domain-parser for the canonical source repository
7
 *
8
 * @copyright Copyright (c) 2014 Jeremy Kendall (http://about.me/jeremykendall)
9
 * @license   http://github.com/jeremykendall/php-domain-parser/blob/master/LICENSE MIT License
10
 */
11
namespace Pdp;
12
13
use Pdp\HttpAdapter\HttpAdapterInterface;
14
15
/**
16
 * Public Suffix List Manager.
17
 *
18
 * This class obtains, writes, caches, and returns text and PHP representations
19
 * of the Public Suffix List
20
 */
21
class PublicSuffixListManager
22
{
23
  const PDP_PSL_TEXT_FILE = 'public-suffix-list.txt';
24
  const PDP_PSL_PHP_FILE  = 'public-suffix-list.php';
25
26
  /**
27
   * @var string Public Suffix List URL
28
   */
29
  protected $publicSuffixListUrl = 'https://publicsuffix.org/list/effective_tld_names.dat';
30
31
  /**
32
   * @var string Directory where text and php versions of list will be cached
33
   */
34
  protected $cacheDir;
35
36
  /**
37
   * @var PublicSuffixList Public Suffix List
38
   */
39
  protected $list;
40
41
  /**
42
   * @var \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
43
   */
44
  protected $httpAdapter;
45
46
  /**
47
   * Public constructor.
48
   *
49
   * @param string $cacheDir Optional cache directory
50
   */
51 11
  public function __construct($cacheDir = null)
52
  {
53 11
    if (null === $cacheDir) {
54 1
      $cacheDir = realpath(
55 1
          dirname(dirname(__DIR__)) . DIRECTORY_SEPARATOR . 'data'
56 1
      );
57 1
    }
58
59 11
    $this->cacheDir = $cacheDir;
60 11
  }
61
62
  /**
63
   * Downloads Public Suffix List and writes text cache and PHP cache. If these files
64
   * already exist, they will be overwritten.
65
   */
66 1
  public function refreshPublicSuffixList()
67
  {
68 1
    $this->fetchListFromSource();
69 1
    $publicSuffixListArray = $this->parseListToArray(
70 1
        $this->cacheDir . '/' . self::PDP_PSL_TEXT_FILE
71 1
    );
72 1
    $this->writePhpCache($publicSuffixListArray);
73 1
  }
74
75
  /**
76
   * Obtain Public Suffix List from its online source and write to cache dir.
77
   *
78
   * @return int Number of bytes that were written to the file
79
   */
80 2
  public function fetchListFromSource()
81
  {
82 2
    $publicSuffixList = $this->getHttpAdapter()->getContent($this->publicSuffixListUrl);
83
84 2
    if ($publicSuffixList === false) {
85
      return 0;
86
    }
87
88 2
    return $this->write(self::PDP_PSL_TEXT_FILE, $publicSuffixList);
89
  }
90
91
  /**
92
   * Parses text representation of list to associative, multidimensional array.
93
   *
94
   * This method is based heavily on the code found in generateEffectiveTLDs.php
95
   *
96
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
97
   * A copy of the Apache License, Version 2.0, is provided with this
98
   * distribution
99
   *
100
   * @param string $textFile Public Suffix List text filename
101
   *
102
   * @return array Associative, multidimensional array representation of the
103
   *               public suffx list
104
   *
105
   * @throws \Exception Throws \Exception if unable to read file
106
   */
107 4
  public function parseListToArray($textFile)
108
  {
109
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
110 4
    $fp = @fopen($textFile, 'r');
111 4
    if (!$fp || !flock($fp, LOCK_SH)) {
112 1
      throw new \Exception("Cannot read '$textFile'");
113
    }
114
115 3
    $data = file(
116 3
        $textFile,
117 3
        FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES
118 3
    );
119
120 3
    flock($fp, LOCK_UN);
121 3
    fclose($fp);
122
123 3
    $data = array_filter(
124 3
        $data,
125 3
        function ($line) {
126 3
          return false === strpos($line, '//');
127
        }
128 3
    );
129
130 3
    $publicSuffixListArray = array();
131
132 3
    foreach ($data as $line) {
133 3
      $ruleParts = explode('.', $line);
134 3
      $this->buildArray($publicSuffixListArray, $ruleParts);
135 3
    }
136
137 3
    return $publicSuffixListArray;
138
  }
139
140
  /**
141
   * Recursive method to build the array representation of the Public Suffix List.
142
   *
143
   * This method is based heavily on the code found in generateEffectiveTLDs.php
144
   *
145
   * @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
146
   * A copy of the Apache License, Version 2.0, is provided with this
147
   * distribution
148
   *
149
   * @param array $publicSuffixListArray Initially an empty array, this eventually
150
   *                                     becomes the array representation of the Public Suffix List
151
   * @param array $ruleParts             One line (rule) from the Public Suffix List
152
   *                                     exploded on '.', or the remaining portion of that array during recursion
153
   */
154 3
  public function buildArray(array &$publicSuffixListArray, array $ruleParts)
155
  {
156 3
    $isDomain = true;
157
158 3
    $part = array_pop($ruleParts);
159
160
    // Adheres to canonicalization rule from the "Formal Algorithm" section
161
    // of https://publicsuffix.org/list/
162
    // "The domain and all rules must be canonicalized in the normal way
163
    // for hostnames - lower-case, Punycode (RFC 3492)."
164 3
    $punycode = new PunycodeWrapper();
165 3
    $part = $punycode->encode($part);
166
167 3
    if (strpos($part, '!') === 0) {
168 3
      $part = substr($part, 1);
169 3
      $isDomain = false;
170 3
    }
171
172 3
    if (!isset($publicSuffixListArray[$part])) {
173 3
      if ($isDomain) {
174 3
        $publicSuffixListArray[$part] = array();
175 3
      } else {
176 3
        $publicSuffixListArray[$part] = array('!' => '');
177
      }
178 3
    }
179
180 3
    if ($isDomain && count($ruleParts) > 0) {
181 3
      $this->buildArray($publicSuffixListArray[$part], $ruleParts);
182 3
    }
183 3
  }
184
185
  /**
186
   * Writes php array representation of the Public Suffix List to disk.
187
   *
188
   * @param array $publicSuffixList Array representation of the Public Suffix List
189
   *
190
   * @return int Number of bytes that were written to the file
191
   */
192 3
  public function writePhpCache(array $publicSuffixList)
193
  {
194 3
    $data = '<?php' . PHP_EOL . 'static $data = ' . var_export($publicSuffixList, true) . '; $result =& $data; unset($data); return $result;';
195
196 3
    return $this->write(self::PDP_PSL_PHP_FILE, $data);
197
  }
198
199
  /**
200
   * Gets Public Suffix List.
201
   *
202
   * @return PublicSuffixList Instance of Public Suffix List
203
   *
204
   * @throws \Exception Throws \Exception if unable to read file
205
   */
206 3
  public function getList()
207
  {
208 3
    $phpFile = $this->cacheDir . '/' . self::PDP_PSL_PHP_FILE;
209
210 3
    if (!file_exists($phpFile)) {
211 1
      $this->refreshPublicSuffixList();
212 1
    }
213
214 3
    $this->list = $this->getListFromFile($phpFile);
215
216 3
    return $this->list;
217
  }
218
219
  /**
220
   * Retrieves public suffix list from file after obtaining a shared lock.
221
   *
222
   * @return PublicSuffixList Instance of Public Suffix List
223
   *
224
   * @throws \Exception Throws \Exception if unable to read file
225
   */
226 4
  public function getListFromFile($phpFile)
227
  {
228
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
229 4
    $fp = @fopen($phpFile, 'r');
230 4
    if (!$fp || !flock($fp, LOCK_SH)) {
231 1
      throw new \Exception("Cannot read '$phpFile'");
232
    }
233
234
    /** @noinspection PhpIncludeInspection */
235 3
    $list = new PublicSuffixList(
236
        require $phpFile
237 3
    );
238
239 3
    flock($fp, LOCK_UN);
240 3
    fclose($fp);
241
242 3
    return $list;
243
  }
244
245
  /**
246
   * Writes to file after obtaining an exclusive lock.
247
   *
248
   * @param string $filename Filename in cache dir where data will be written
249
   * @param mixed  $data     Data to write
250
   *
251
   * @return int Number of bytes that were written to the file
252
   *
253
   * @throws \Exception Throws \Exception if unable to write file
254
   */
255 4
  protected function write($filename, $data)
256
  {
257 4
    $filePath = $this->cacheDir . '/' . $filename;
258
    
259
    // open with 'c' and truncate file only after obtaining a lock
260
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
261 4
    $fp = @fopen($filePath, 'c');
262
    $result = $fp
263 4
              && flock($fp, LOCK_EX)
264 4
              && ftruncate($fp, 0)
265 4
              && fwrite($fp, $data) !== false
266 4
              && fflush($fp);
267
268 4
    if (!$result) {
269 1
      $fp && fclose($fp);
270 1
      throw new \Exception("Cannot write to '$filePath'");
271
    }
272
273 3
    flock($fp, LOCK_UN);
274 3
    fclose($fp);
275
276 3
    return $result;
277
  }
278
279
  /**
280
   * Returns http adapter. Returns default http adapter if one is not set.
281
   *
282
   * @return \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
283
   */
284 3
  public function getHttpAdapter()
285
  {
286 3
    if (!$this->httpAdapter instanceof HttpAdapterInterface) {
287 1
      if (extension_loaded('curl')) {
288 1
        $this->httpAdapter = new HttpAdapter\CurlHttpAdapter();
289 1
      } else {
290
        $this->httpAdapter = new HttpAdapter\PhpHttpAdapter();
291
      }
292 1
    }
293
294 3
    return $this->httpAdapter;
295
  }
296
297
  /**
298
   * Sets http adapter.
299
   *
300
   * @param \Pdp\HttpAdapter\HttpAdapterInterface $httpAdapter
301
   */
302 11
  public function setHttpAdapter(HttpAdapter\HttpAdapterInterface $httpAdapter)
303
  {
304 11
    $this->httpAdapter = $httpAdapter;
305 11
  }
306
}
307