| Total Complexity | 44 |
| Total Lines | 281 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like PublicSuffixList often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PublicSuffixList, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 14 | class PublicSuffixList |
||
| 15 | { |
||
| 16 | protected $sourceURL = 'https://publicsuffix.org/list/public_suffix_list.dat'; |
||
| 17 | protected $localPSL = 'public_suffix_list.dat'; |
||
| 18 | protected $cachedPrefix = 'cached_'; |
||
| 19 | |||
| 20 | protected $tree; |
||
| 21 | protected $url; |
||
| 22 | protected $dataDir = '/../data/'; // relative to __DIR__ |
||
| 23 | |||
| 24 | /** |
||
| 25 | * PublicSuffixList constructor. |
||
| 26 | * @param string|null $url URL for the PSL or null to use default |
||
| 27 | */ |
||
| 28 | public function __construct($url = null) |
||
| 31 | } |
||
| 32 | |||
| 33 | /** |
||
| 34 | * Set the URL, and clear any existing tree |
||
| 35 | * |
||
| 36 | * @param string|null $url URL for the PSL or null to use default |
||
| 37 | * |
||
| 38 | * @return void |
||
| 39 | */ |
||
| 40 | public function setURL($url) |
||
| 41 | { |
||
| 42 | $this->url = $url; |
||
| 43 | $this->tree = null; |
||
| 44 | } |
||
| 45 | |||
| 46 | /** |
||
| 47 | * Set a fallback (default) for the URL. If we have a locally saved version, prefer it, but use a |
||
| 48 | * remote URL if there is no local source. |
||
| 49 | * |
||
| 50 | * @return void |
||
| 51 | */ |
||
| 52 | protected function setFallbackURL() |
||
| 53 | { |
||
| 54 | $this->setLocalPSLName($this->url); |
||
| 55 | if (null === $this->url) { |
||
| 56 | $this->url = file_exists(__DIR__ . $this->localPSL) ? $this->localPSL : $this->sourceURL; |
||
| 57 | } |
||
| 58 | } |
||
| 59 | |||
| 60 | /** |
||
| 61 | * load the PSL tree, automatically handling caches |
||
| 62 | * |
||
| 63 | * @return void (results in $this->tree) |
||
| 64 | * |
||
| 65 | * @throws \RuntimeException |
||
| 66 | */ |
||
| 67 | protected function loadTree() |
||
| 68 | { |
||
| 69 | $this->setFallbackURL(); |
||
| 70 | |||
| 71 | $this->tree = $this->readCachedPSL($this->url); |
||
| 72 | if (false !== $this->tree) { |
||
| 73 | return; |
||
| 74 | } |
||
| 75 | |||
| 76 | $this->tree = array(); |
||
| 77 | $list = $this->readPSL(); |
||
| 78 | |||
| 79 | if (false===$list) { |
||
| 80 | $e = new \RuntimeException('Cannot read ' . $this->url); |
||
| 81 | throw $e; |
||
| 82 | } |
||
| 83 | |||
| 84 | $this->parsePSL($list); |
||
|
|
|||
| 85 | $this->cachePSL($this->url); |
||
| 86 | } |
||
| 87 | |||
| 88 | /** |
||
| 89 | * Parse the PSL data |
||
| 90 | * |
||
| 91 | * @param string $fileData the PSL data |
||
| 92 | * |
||
| 93 | * @return void (results in $this->tree) |
||
| 94 | */ |
||
| 95 | protected function parsePSL($fileData) |
||
| 108 | } |
||
| 109 | } |
||
| 110 | |||
| 111 | /** |
||
| 112 | * Does $search start with $startString? |
||
| 113 | * |
||
| 114 | * @param string $search the string to test |
||
| 115 | * @param string $startString the starting string to match |
||
| 116 | * |
||
| 117 | * @return bool |
||
| 118 | */ |
||
| 119 | protected function startsWith($search, $startString) |
||
| 120 | { |
||
| 121 | return (substr($search, 0, strlen($startString)) == $startString); |
||
| 122 | } |
||
| 123 | |||
| 124 | /** |
||
| 125 | * Add domains to tree |
||
| 126 | * |
||
| 127 | * @param array $node tree array by reference |
||
| 128 | * @param string[] $tldParts array of domain parts |
||
| 129 | * |
||
| 130 | * @return void - changes made to $node by reference |
||
| 131 | */ |
||
| 132 | protected function buildSubDomain(&$node, $tldParts) |
||
| 133 | { |
||
| 134 | $dom = trim(array_pop($tldParts)); |
||
| 135 | |||
| 136 | $isNotDomain = false; |
||
| 137 | if ($this->startsWith($dom, "!")) { |
||
| 138 | $dom = substr($dom, 1); |
||
| 139 | $isNotDomain = true; |
||
| 140 | } |
||
| 141 | |||
| 142 | if (!array_key_exists($dom, $node)) { |
||
| 143 | if ($isNotDomain) { |
||
| 144 | $node[$dom] = array("!" => ""); |
||
| 145 | } else { |
||
| 146 | $node[$dom] = array(); |
||
| 147 | } |
||
| 148 | } |
||
| 149 | |||
| 150 | if (!$isNotDomain && count($tldParts) > 0) { |
||
| 151 | $this->buildSubDomain($node[$dom], $tldParts); |
||
| 152 | } |
||
| 153 | } |
||
| 154 | |||
| 155 | /** |
||
| 156 | * Return the current tree, loading it if needed |
||
| 157 | * |
||
| 158 | * @return array the PSL tree |
||
| 159 | */ |
||
| 160 | public function getTree() |
||
| 161 | { |
||
| 162 | if (null===$this->tree) { |
||
| 163 | $this->loadTree(); |
||
| 164 | } |
||
| 165 | return $this->tree; |
||
| 166 | } |
||
| 167 | |||
| 168 | /** |
||
| 169 | * Read PSL from the URL or file specified in $this->url. |
||
| 170 | * If we process a remote URL, save a local copy. |
||
| 171 | * |
||
| 172 | * @return bool|string PSL file contents or false on error |
||
| 173 | */ |
||
| 174 | protected function readPSL() |
||
| 175 | { |
||
| 176 | $parts = parse_url($this->url); |
||
| 177 | $remote = isset($parts['scheme']) || isset($parts['host']); |
||
| 178 | // try to read with file_get_contents |
||
| 179 | $newPSL = file_get_contents(($remote ? '' : __DIR__) . $this->url); |
||
| 180 | if (false !== $newPSL) { |
||
| 181 | if ($remote) { |
||
| 182 | $this->saveLocalPSL($newPSL); |
||
| 183 | } |
||
| 184 | return $newPSL; |
||
| 185 | } |
||
| 186 | |||
| 187 | // try again with curl if file_get_contents failed |
||
| 188 | if (function_exists('curl_init') && false !== ($curlHandle = curl_init())) { |
||
| 189 | curl_setopt($curlHandle, CURLOPT_URL, $this->url); |
||
| 190 | curl_setopt($curlHandle, CURLOPT_FAILONERROR, true); |
||
| 191 | curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1); |
||
| 192 | curl_setopt($curlHandle, CURLOPT_CONNECTTIMEOUT, 5); |
||
| 193 | $curlReturn = curl_exec($curlHandle); |
||
| 194 | curl_close($curlHandle); |
||
| 195 | if (false !== $curlReturn) { |
||
| 196 | if ($remote) { |
||
| 197 | $this->saveLocalPSL($curlReturn); |
||
| 198 | } |
||
| 199 | return $curlReturn; |
||
| 200 | } |
||
| 201 | } |
||
| 202 | return false; |
||
| 203 | } |
||
| 204 | |||
| 205 | /** |
||
| 206 | * Determine cache file name for a specified source |
||
| 207 | * |
||
| 208 | * @param string $url URL/filename of source PSL |
||
| 209 | * |
||
| 210 | * @return string cache file name for given resource |
||
| 211 | */ |
||
| 212 | protected function getCacheFileName($url) |
||
| 213 | { |
||
| 214 | return __DIR__ . $this->dataDir . $this->cachedPrefix . md5($url); |
||
| 215 | } |
||
| 216 | |||
| 217 | /** |
||
| 218 | * Attempt to load a cached Public Suffix List tree for a given source |
||
| 219 | * |
||
| 220 | * @param string $url URL/filename of source PSL |
||
| 221 | * |
||
| 222 | * @return bool|string[] PSL tree |
||
| 223 | */ |
||
| 224 | protected function readCachedPSL($url) |
||
| 232 | } |
||
| 233 | |||
| 234 | /** |
||
| 235 | * Cache the current Public Suffix List tree and associate with the specified source |
||
| 236 | * |
||
| 237 | * @param string $url URL/filename of source PSL |
||
| 238 | * |
||
| 239 | * @return bool|int the number of bytes that were written to the file, or false on failure |
||
| 240 | */ |
||
| 241 | protected function cachePSL($url) |
||
| 242 | { |
||
| 243 | return file_put_contents($this->getCacheFileName($url), serialize($this->tree)); |
||
| 244 | } |
||
| 245 | |||
| 246 | /** |
||
| 247 | * Save a local copy of a retrieved Public Suffix List |
||
| 248 | * |
||
| 249 | * @param string $fileContents URL/filename of source PSL |
||
| 250 | * |
||
| 251 | * @return bool|int the number of bytes that were written to the file, or false on failure |
||
| 252 | */ |
||
| 253 | protected function saveLocalPSL($fileContents) |
||
| 254 | { |
||
| 255 | return file_put_contents(__DIR__ . $this->localPSL, $fileContents); |
||
| 256 | } |
||
| 257 | |||
| 258 | /** |
||
| 259 | * Set localPSL name based on URL |
||
| 260 | * |
||
| 261 | * @param null|string $url the URL for the PSL |
||
| 262 | * |
||
| 263 | * @return void (sets $this->localPSL) |
||
| 264 | */ |
||
| 265 | protected function setLocalPSLName($url) |
||
| 273 | } |
||
| 274 | |||
| 275 | /** |
||
| 276 | * Delete files in the data directory |
||
| 277 | * |
||
| 278 | * @param bool $cacheOnly true to limit clearing to cached serialized PSLs, false to clear all |
||
| 279 | * |
||
| 280 | * @return void |
||
| 281 | */ |
||
| 282 | public function clearDataDirectory($cacheOnly = false) |
||
| 295 | } |
||
| 296 | } |
||
| 297 | } |
||
| 298 | } |
||
| 299 |