@@ -72,7 +72,7 @@ |
||
| 72 | 72 | |
| 73 | 73 | /** |
| 74 | 74 | * @param string $option |
| 75 | - * @param mixed $value |
|
| 75 | + * @param string $value |
|
| 76 | 76 | */ |
| 77 | 77 | public function set($option, $value) { |
| 78 | 78 | $this->options[$option] = $value; |
@@ -347,7 +347,7 @@ discard block |
||
| 347 | 347 | return false; |
| 348 | 348 | } |
| 349 | 349 | |
| 350 | - $regex = '@' . implode('|', $this->badFileNames) . '@i'; |
|
| 350 | + $regex = '@'.implode('|', $this->badFileNames).'@i'; |
|
| 351 | 351 | |
| 352 | 352 | if (preg_match($regex, $imgSrc)) { |
| 353 | 353 | return false; |
@@ -545,10 +545,10 @@ discard block |
||
| 545 | 545 | $knownImage = null; |
| 546 | 546 | |
| 547 | 547 | foreach ($knownImgDomNames as $knownName) { |
| 548 | - $known = $this->article()->getRawDoc()->find('#' . $knownName); |
|
| 548 | + $known = $this->article()->getRawDoc()->find('#'.$knownName); |
|
| 549 | 549 | |
| 550 | 550 | if (!$known->count()) { |
| 551 | - $known = $this->article()->getRawDoc()->find('.' . $knownName); |
|
| 551 | + $known = $this->article()->getRawDoc()->find('.'.$knownName); |
|
| 552 | 552 | } |
| 553 | 553 | |
| 554 | 554 | if ($known->count()) { |
@@ -602,7 +602,7 @@ discard block |
||
| 602 | 602 | */ |
| 603 | 603 | private function customSiteMapping() { |
| 604 | 604 | if (empty(self::$CUSTOM_SITE_MAPPING)) { |
| 605 | - $file = __DIR__ . '/../../../resources/images/known-image-css.txt'; |
|
| 605 | + $file = __DIR__.'/../../../resources/images/known-image-css.txt'; |
|
| 606 | 606 | |
| 607 | 607 | $lines = explode("\n", str_replace(["\r\n", "\r"], "\n", file_get_contents($file))); |
| 608 | 608 | |
@@ -30,7 +30,7 @@ |
||
| 30 | 30 | private function getDateFromURL() { |
| 31 | 31 | // Determine date based on URL |
| 32 | 32 | if (preg_match('@(?:[\d]{4})(?<delimiter>[/-])(?:[\d]{2})\k<delimiter>(?:[\d]{2})@U', $this->article()->getFinalUrl(), $matches)) { |
| 33 | - $dt = \DateTime::createFromFormat('Y' . $matches['delimiter'] . 'm' . $matches['delimiter'] . 'd', $matches[0]); |
|
| 33 | + $dt = \DateTime::createFromFormat('Y'.$matches['delimiter'].'m'.$matches['delimiter'].'d', $matches[0]); |
|
| 34 | 34 | $dt->setTime(0, 0, 0); |
| 35 | 35 | |
| 36 | 36 | if ($dt === false) { |
@@ -92,8 +92,7 @@ discard block |
||
| 92 | 92 | $dt = new \DateTime($node->getAttribute('content')); |
| 93 | 93 | break; |
| 94 | 94 | } |
| 95 | - } |
|
| 96 | - catch (\Exception $e) { |
|
| 95 | + } catch (\Exception $e) { |
|
| 97 | 96 | // Do nothing here in case the node has unrecognizable date information. |
| 98 | 97 | } |
| 99 | 98 | } |
@@ -113,8 +112,7 @@ discard block |
||
| 113 | 112 | $dt = new \DateTime($json->datePublished); |
| 114 | 113 | break; |
| 115 | 114 | } |
| 116 | - } |
|
| 117 | - catch (\Exception $e) { |
|
| 115 | + } catch (\Exception $e) { |
|
| 118 | 116 | // Do nothing here in case the node has unrecognizable date information. |
| 119 | 117 | } |
| 120 | 118 | } |
@@ -141,8 +139,7 @@ discard block |
||
| 141 | 139 | $dt = new \DateTime($node->getAttribute('content')); |
| 142 | 140 | break; |
| 143 | 141 | } |
| 144 | - } |
|
| 145 | - catch (\Exception $e) { |
|
| 142 | + } catch (\Exception $e) { |
|
| 146 | 143 | // Do nothing here in case the node has unrecognizable date information. |
| 147 | 144 | } |
| 148 | 145 | } |
@@ -174,8 +171,7 @@ discard block |
||
| 174 | 171 | if (is_null($dt) && isset($og_data['pubdate'])) { |
| 175 | 172 | $dt = new \DateTime($og_data['pubdate']); |
| 176 | 173 | } |
| 177 | - } |
|
| 178 | - catch (\Exception $e) { |
|
| 174 | + } catch (\Exception $e) { |
|
| 179 | 175 | // Do nothing here in case the node has unrecognizable date information. |
| 180 | 176 | } |
| 181 | 177 | |
@@ -207,8 +203,7 @@ discard block |
||
| 207 | 203 | $dt = new \DateTime($json->dateCreated); |
| 208 | 204 | break; |
| 209 | 205 | } |
| 210 | - } |
|
| 211 | - catch (\Exception $e) { |
|
| 206 | + } catch (\Exception $e) { |
|
| 212 | 207 | // Do nothing here in case the node has unrecognizable date information. |
| 213 | 208 | } |
| 214 | 209 | } |
@@ -227,8 +222,7 @@ discard block |
||
| 227 | 222 | $dt = new \DateTime($node->getAttribute('content')); |
| 228 | 223 | break; |
| 229 | 224 | } |
| 230 | - } |
|
| 231 | - catch (\Exception $e) { |
|
| 225 | + } catch (\Exception $e) { |
|
| 232 | 226 | // Do nothing here in case the node has unrecognizable date information. |
| 233 | 227 | } |
| 234 | 228 | } |
@@ -250,8 +244,7 @@ discard block |
||
| 250 | 244 | break; |
| 251 | 245 | } |
| 252 | 246 | } |
| 253 | - } |
|
| 254 | - catch (\Exception $e) { |
|
| 247 | + } catch (\Exception $e) { |
|
| 255 | 248 | // Do nothing here in case the node has unrecognizable date information. |
| 256 | 249 | } |
| 257 | 250 | } |
@@ -165,7 +165,7 @@ discard block |
||
| 165 | 165 | ]; |
| 166 | 166 | |
| 167 | 167 | $exceptions = array_map(function($value) { |
| 168 | - return ':not(' . $value . ')'; |
|
| 168 | + return ':not('.$value.')'; |
|
| 169 | 169 | }, $this->exceptionSelectors); |
| 170 | 170 | |
| 171 | 171 | $exceptions = implode('', $exceptions); |
@@ -173,7 +173,7 @@ discard block |
||
| 173 | 173 | foreach ($lists as $expr => $list) { |
| 174 | 174 | foreach ($list as $value) { |
| 175 | 175 | foreach ($attrs as $attr) { |
| 176 | - $selector = sprintf($expr, $attr, $value) . $exceptions; |
|
| 176 | + $selector = sprintf($expr, $attr, $value).$exceptions; |
|
| 177 | 177 | |
| 178 | 178 | foreach ($this->document()->find($selector) as $node) { |
| 179 | 179 | $node->remove(); |
@@ -14,7 +14,7 @@ |
||
| 14 | 14 | */ |
| 15 | 15 | public function __construct($options = []) { |
| 16 | 16 | foreach ($options as $key => $value) { |
| 17 | - $method = 'set' . ucfirst($key); |
|
| 17 | + $method = 'set'.ucfirst($key); |
|
| 18 | 18 | |
| 19 | 19 | if (method_exists($this, $method)) { |
| 20 | 20 | call_user_func([$this, $method], $value); |
@@ -22,12 +22,12 @@ |
||
| 22 | 22 | $parts = parse_url($urlToCrawl); |
| 23 | 23 | |
| 24 | 24 | if ($parts === false) { |
| 25 | - throw new MalformedURLException($urlToCrawl . ' - is a malformed URL and cannot be processed'); |
|
| 25 | + throw new MalformedURLException($urlToCrawl.' - is a malformed URL and cannot be processed'); |
|
| 26 | 26 | } |
| 27 | 27 | |
| 28 | 28 | $prefix = isset($parts['query']) && $parts['query'] ? '&' : '?'; |
| 29 | 29 | |
| 30 | - $finalUrl = str_replace('#!', $prefix . '_escaped_fragment_=', $urlToCrawl); |
|
| 30 | + $finalUrl = str_replace('#!', $prefix.'_escaped_fragment_=', $urlToCrawl); |
|
| 31 | 31 | |
| 32 | 32 | return (object)[ |
| 33 | 33 | 'url' => $urlToCrawl, |
@@ -66,7 +66,7 @@ |
||
| 66 | 66 | */ |
| 67 | 67 | public function getWordList() { |
| 68 | 68 | if (empty($this->cached)) { |
| 69 | - $file = sprintf(__DIR__ . '/../../resources/text/stopwords-%s.txt', $this->getLanguage()); |
|
| 69 | + $file = sprintf(__DIR__.'/../../resources/text/stopwords-%s.txt', $this->getLanguage()); |
|
| 70 | 70 | |
| 71 | 71 | $this->cached = explode("\n", str_replace(["\r\n", "\r"], "\n", file_get_contents($file))); |
| 72 | 72 | } |
@@ -84,7 +84,7 @@ discard block |
||
| 84 | 84 | $srcHost = parse_url($src, PHP_URL_HOST); |
| 85 | 85 | $srcScheme = parse_url($src, PHP_URL_SCHEME); |
| 86 | 86 | |
| 87 | - return $match || preg_match('@' . $domain . '$@i', $srcHost) && in_array($srcScheme, ['http', 'https']); |
|
| 87 | + return $match || preg_match('@'.$domain.'$@i', $srcHost) && in_array($srcScheme, ['http', 'https']); |
|
| 88 | 88 | }); |
| 89 | 89 | |
| 90 | 90 | if ($match) { |
@@ -126,10 +126,10 @@ discard block |
||
| 126 | 126 | $stopWords = $this->config()->getStopWords()->getCurrentStopWords(); |
| 127 | 127 | |
| 128 | 128 | $text = $this->article()->getTitle(); |
| 129 | - $text .= ' ' . $this->article()->getMetaDescription(); |
|
| 129 | + $text .= ' '.$this->article()->getMetaDescription(); |
|
| 130 | 130 | |
| 131 | 131 | if ($this->article()->getTopNode()) { |
| 132 | - $text .= ' ' . $this->article()->getCleanedArticleText(); |
|
| 132 | + $text .= ' '.$this->article()->getCleanedArticleText(); |
|
| 133 | 133 | } |
| 134 | 134 | |
| 135 | 135 | // Decode and split words by white-space |
@@ -60,7 +60,7 @@ |
||
| 60 | 60 | |
| 61 | 61 | // Additionally retrieve type values based on provided og:type (http://ogp.me/#types) |
| 62 | 62 | if (isset($results['type'])) { |
| 63 | - $nodes = $this->article()->getDoc()->find('meta[property^="' . $results['type'] .':"]'); |
|
| 63 | + $nodes = $this->article()->getDoc()->find('meta[property^="'.$results['type'].':"]'); |
|
| 64 | 64 | |
| 65 | 65 | foreach ($nodes as $node) { |
| 66 | 66 | $property = explode(':', $node->attr('property')); |