@@ -42,10 +42,10 @@ discard block |
||
| 42 | 42 | $link = pathinfo($baseUrl, PATHINFO_DIRNAME).'/'.$link; |
| 43 | 43 | } elseif (preg_match('@^http(s?)://.*$@', $link) === 0) { //is not absolute |
| 44 | 44 | $urlParts = parse_url($baseUrl); |
| 45 | - $scheme = isset($urlParts['scheme'])===true?$urlParts['scheme']:'http'; |
|
| 46 | - $host = isset($urlParts['host'])===true?$urlParts['host']:''; |
|
| 45 | + $scheme = isset($urlParts['scheme']) === true ? $urlParts['scheme'] : 'http'; |
|
| 46 | + $host = isset($urlParts['host']) === true ? $urlParts['host'] : ''; |
|
| 47 | 47 | if (strpos($link, '//') === 0) { //begins with // |
| 48 | - $link = $scheme . ':' . $link; |
|
| 48 | + $link = $scheme.':'.$link; |
|
| 49 | 49 | } elseif (strpos($link, '/') === 0) { //begins with / |
| 50 | 50 | $link = $scheme.'://'.$host.$link; |
| 51 | 51 | } else { |
@@ -71,7 +71,7 @@ discard block |
||
| 71 | 71 | return $raw_html; |
| 72 | 72 | } |
| 73 | 73 | |
| 74 | - $disallowed_tags = ['script', 'style', 'meta','form','aside']; |
|
| 74 | + $disallowed_tags = ['script', 'style', 'meta', 'form', 'aside']; |
|
| 75 | 75 | |
| 76 | 76 | $xmlDoc = new \DOMDocument(); |
| 77 | 77 | libxml_use_internal_errors(true); |
@@ -100,7 +100,7 @@ discard block |
||
| 100 | 100 | */ |
| 101 | 101 | public function normalizeBodyLinks($html) |
| 102 | 102 | { |
| 103 | - if (empty($html)===true) { //if html is empty, do nothing |
|
| 103 | + if (empty($html) === true) { //if html is empty, do nothing |
|
| 104 | 104 | return $html; |
| 105 | 105 | } |
| 106 | 106 | |
@@ -159,7 +159,7 @@ discard block |
||
| 159 | 159 | |
| 160 | 160 | $ret = ''; |
| 161 | 161 | $html_crawler->filter('body')->each( |
| 162 | - function (Crawler $node) use (&$ret) { |
|
| 162 | + function(Crawler $node) use (&$ret) { |
|
| 163 | 163 | $ret = $node->html(); |
| 164 | 164 | } |
| 165 | 165 | ); |
@@ -177,7 +177,7 @@ discard block |
||
| 177 | 177 | protected function getSrcByImgSelector(Crawler $crawler, $selector) |
| 178 | 178 | { |
| 179 | 179 | $ret = null; |
| 180 | - $imgExtractClosure = function (Crawler $node) use (&$ret) { |
|
| 180 | + $imgExtractClosure = function(Crawler $node) use (&$ret) { |
|
| 181 | 181 | $ret = $node->attr('src'); |
| 182 | 182 | }; |
| 183 | 183 | if (Selector::isXPath($selector)) { |
@@ -158,7 +158,7 @@ |
||
| 158 | 158 | |
| 159 | 159 | $ret = null; |
| 160 | 160 | if ($extractClosure === null) { |
| 161 | - $extractClosure = function (Crawler $node) use (&$ret) { |
|
| 161 | + $extractClosure = function(Crawler $node) use (&$ret) { |
|
| 162 | 162 | $ret = $node->html(); |
| 163 | 163 | }; |
| 164 | 164 | } |
@@ -22,7 +22,7 @@ discard block |
||
| 22 | 22 | |
| 23 | 23 | $crawler->filterXPath('//head/title') |
| 24 | 24 | ->each( |
| 25 | - function (Crawler $node) use (&$ret) { |
|
| 25 | + function(Crawler $node) use (&$ret) { |
|
| 26 | 26 | $ret = $node->text(); |
| 27 | 27 | } |
| 28 | 28 | ); |
@@ -42,7 +42,7 @@ discard block |
||
| 42 | 42 | |
| 43 | 43 | $crawler->filterXPath('//img') |
| 44 | 44 | ->each( |
| 45 | - function (Crawler $node) use (&$ret, $theAdapter) { |
|
| 45 | + function(Crawler $node) use (&$ret, $theAdapter) { |
|
| 46 | 46 | $img_src = $theAdapter->normalizeLink($node->attr('src')); |
| 47 | 47 | $width_org = $height_org = 0; |
| 48 | 48 | |
@@ -77,7 +77,7 @@ discard block |
||
| 77 | 77 | |
| 78 | 78 | $crawler->filterXPath("//head/meta[@name='description']") |
| 79 | 79 | ->each( |
| 80 | - function (Crawler $node) use (&$ret) { |
|
| 80 | + function(Crawler $node) use (&$ret) { |
|
| 81 | 81 | $ret = $node->attr('content'); |
| 82 | 82 | } |
| 83 | 83 | ); |
@@ -96,7 +96,7 @@ discard block |
||
| 96 | 96 | |
| 97 | 97 | $crawler->filterXPath("//head/meta[@name='keywords']") |
| 98 | 98 | ->each( |
| 99 | - function (Crawler $node) use (&$ret) { |
|
| 99 | + function(Crawler $node) use (&$ret) { |
|
| 100 | 100 | $node_txt = trim($node->attr('content')); |
| 101 | 101 | if (!empty($node_txt)) { |
| 102 | 102 | $ret = explode(',', $node_txt); |
@@ -119,7 +119,7 @@ discard block |
||
| 119 | 119 | |
| 120 | 120 | $crawler->filterXPath("//article") |
| 121 | 121 | ->each( |
| 122 | - function (Crawler $node) use (&$ret) { |
|
| 122 | + function(Crawler $node) use (&$ret) { |
|
| 123 | 123 | |
| 124 | 124 | $node_txt = $node->text(); |
| 125 | 125 | if (strlen($node_txt) > strlen($ret)) { |
@@ -142,7 +142,7 @@ discard block |
||
| 142 | 142 | |
| 143 | 143 | $crawler->filterXPath("//meta[@name='pubdate']") |
| 144 | 144 | ->each( |
| 145 | - function (Crawler $node) use (&$date_str) { |
|
| 145 | + function(Crawler $node) use (&$date_str) { |
|
| 146 | 146 | if (empty($date_str) === true) { |
| 147 | 147 | $date_str = $node->attr('content'); |
| 148 | 148 | } |
@@ -172,7 +172,7 @@ discard block |
||
| 172 | 172 | $ret = null; |
| 173 | 173 | $crawler->filterXPath("//head/meta[@name='author']") |
| 174 | 174 | ->each( |
| 175 | - function (Crawler $node) use (&$ret) { |
|
| 175 | + function(Crawler $node) use (&$ret) { |
|
| 176 | 176 | $ret = $node->attr('content'); |
| 177 | 177 | } |
| 178 | 178 | ); |
@@ -17,7 +17,7 @@ discard block |
||
| 17 | 17 | |
| 18 | 18 | $crawler->filter('.hentry .entry-title') |
| 19 | 19 | ->each( |
| 20 | - function (Crawler $node) use (&$ret) { |
|
| 20 | + function(Crawler $node) use (&$ret) { |
|
| 21 | 21 | $ret = $node->text(); |
| 22 | 22 | } |
| 23 | 23 | ); |
@@ -38,7 +38,7 @@ discard block |
||
| 38 | 38 | |
| 39 | 39 | $crawler->filter('.hentry .entry-summary') |
| 40 | 40 | ->each( |
| 41 | - function (Crawler $node) use (&$ret) { |
|
| 41 | + function(Crawler $node) use (&$ret) { |
|
| 42 | 42 | $ret = $node->text(); |
| 43 | 43 | } |
| 44 | 44 | ); |
@@ -52,7 +52,7 @@ discard block |
||
| 52 | 52 | |
| 53 | 53 | $crawler->filter('.hentry a[rel="tag"]') |
| 54 | 54 | ->each( |
| 55 | - function (Crawler $node) use (&$ret) { |
|
| 55 | + function(Crawler $node) use (&$ret) { |
|
| 56 | 56 | $ret[] = $node->text(); |
| 57 | 57 | } |
| 58 | 58 | ); |
@@ -65,7 +65,7 @@ discard block |
||
| 65 | 65 | $ret = null; |
| 66 | 66 | $crawler->filter(".hentry .entry-content") |
| 67 | 67 | ->each( |
| 68 | - function (Crawler $node) use (&$ret) { |
|
| 68 | + function(Crawler $node) use (&$ret) { |
|
| 69 | 69 | $ret = $this->normalizeHtml($node->html()); |
| 70 | 70 | } |
| 71 | 71 | ); |
@@ -79,7 +79,7 @@ discard block |
||
| 79 | 79 | |
| 80 | 80 | $crawler->filter('time.published, .hentry .entry-date') |
| 81 | 81 | ->each( |
| 82 | - function (Crawler $node) use (&$date_str) { |
|
| 82 | + function(Crawler $node) use (&$date_str) { |
|
| 83 | 83 | $date_str = $node->attr('datetime'); |
| 84 | 84 | } |
| 85 | 85 | ); |
@@ -95,7 +95,7 @@ discard block |
||
| 95 | 95 | $ret = null; |
| 96 | 96 | $crawler->filter('.hentry .author.vcard') |
| 97 | 97 | ->each( |
| 98 | - function (Crawler $node) use (&$ret) { |
|
| 98 | + function(Crawler $node) use (&$ret) { |
|
| 99 | 99 | $ret = $node->text(); |
| 100 | 100 | } |
| 101 | 101 | ); |
@@ -17,7 +17,7 @@ discard block |
||
| 17 | 17 | public function extractTitle(Crawler $crawler) |
| 18 | 18 | { |
| 19 | 19 | $article_data = $this->getJsonData($crawler); |
| 20 | - $ret = isset($article_data['headline'])?$article_data['headline']:null; |
|
| 20 | + $ret = isset($article_data['headline']) ? $article_data['headline'] : null; |
|
| 21 | 21 | |
| 22 | 22 | return $ret; |
| 23 | 23 | } |
@@ -39,7 +39,7 @@ discard block |
||
| 39 | 39 | public function extractDescription(Crawler $crawler) |
| 40 | 40 | { |
| 41 | 41 | $article_data = $this->getJsonData($crawler); |
| 42 | - $ret = isset($article_data['description'])?$article_data['description']:null; |
|
| 42 | + $ret = isset($article_data['description']) ? $article_data['description'] : null; |
|
| 43 | 43 | |
| 44 | 44 | return $ret; |
| 45 | 45 | } |
@@ -47,7 +47,7 @@ discard block |
||
| 47 | 47 | public function extractKeywords(Crawler $crawler) |
| 48 | 48 | { |
| 49 | 49 | $article_data = $this->getJsonData($crawler); |
| 50 | - $ret = isset($article_data['keywords'])?$article_data['keywords']:array(); |
|
| 50 | + $ret = isset($article_data['keywords']) ? $article_data['keywords'] : array(); |
|
| 51 | 51 | |
| 52 | 52 | if (!is_array($ret)) { |
| 53 | 53 | $ret = explode(',', $ret); |
@@ -137,7 +137,7 @@ discard block |
||
| 137 | 137 | |
| 138 | 138 | $ret = array(); |
| 139 | 139 | $crawler->filterXPath('//script[@type="application/ld+json"]') |
| 140 | - ->each(function (Crawler $node) use (&$ret) { |
|
| 140 | + ->each(function(Crawler $node) use (&$ret) { |
|
| 141 | 141 | $json_content = trim($node->text()); |
| 142 | 142 | if (empty($json_content) === true && $node->attr('src')) { |
| 143 | 143 | $script_path = $this->normalizeLink($node->attr('src')); |
@@ -171,7 +171,7 @@ discard block |
||
| 171 | 171 | 'APIReference']; |
| 172 | 172 | |
| 173 | 173 | if (isset($article_data['@context']) && |
| 174 | - $article_data['@context']=='http://schema.org' && |
|
| 174 | + $article_data['@context'] == 'http://schema.org' && |
|
| 175 | 175 | isset($article_data['@type']) && |
| 176 | 176 | in_array($article_data['@type'], $article_types)) { |
| 177 | 177 | return true; |
@@ -22,7 +22,7 @@ discard block |
||
| 22 | 22 | |
| 23 | 23 | $crawler->filterXPath('//*[@itemprop="headline"]') |
| 24 | 24 | ->each( |
| 25 | - function (Crawler $node) use (&$ret) { |
|
| 25 | + function(Crawler $node) use (&$ret) { |
|
| 26 | 26 | $ret = trim($node->text()); |
| 27 | 27 | } |
| 28 | 28 | ); |
@@ -44,7 +44,7 @@ discard block |
||
| 44 | 44 | |
| 45 | 45 | $crawler->filterXPath('//*[@itemprop="description"]') |
| 46 | 46 | ->each( |
| 47 | - function (Crawler $node) use (&$ret) { |
|
| 47 | + function(Crawler $node) use (&$ret) { |
|
| 48 | 48 | if ($node->nodeName() === 'meta') { |
| 49 | 49 | $ret = trim($node->attr('content')); |
| 50 | 50 | } else { |
@@ -67,7 +67,7 @@ discard block |
||
| 67 | 67 | |
| 68 | 68 | $crawler->filterXPath('//*[@itemprop="keywords"]') |
| 69 | 69 | ->each( |
| 70 | - function (Crawler $node) use (&$ret) { |
|
| 70 | + function(Crawler $node) use (&$ret) { |
|
| 71 | 71 | if ($node->nodeName() === 'meta') { |
| 72 | 72 | $keyword_txt = trim($node->attr('content')); |
| 73 | 73 | } else { |
@@ -89,7 +89,7 @@ discard block |
||
| 89 | 89 | |
| 90 | 90 | $crawler->filterXPath('//*[@itemprop="articleBody"]') |
| 91 | 91 | ->each( |
| 92 | - function (Crawler $node) use (&$ret) { |
|
| 92 | + function(Crawler $node) use (&$ret) { |
|
| 93 | 93 | $ret .= $node->html(); |
| 94 | 94 | } |
| 95 | 95 | ); |
@@ -106,7 +106,7 @@ discard block |
||
| 106 | 106 | "//*[@itemtype='http://schema.org/$article_type']" |
| 107 | 107 | ) |
| 108 | 108 | ->each( |
| 109 | - function (Crawler $node) use (&$ret) { |
|
| 109 | + function(Crawler $node) use (&$ret) { |
|
| 110 | 110 | $ret .= $node->html(); |
| 111 | 111 | } |
| 112 | 112 | ); |
@@ -128,7 +128,7 @@ discard block |
||
| 128 | 128 | |
| 129 | 129 | $crawler->filterXPath('//*[@itemprop="datePublished"]') |
| 130 | 130 | ->each( |
| 131 | - function (Crawler $node) use (&$date_str) { |
|
| 131 | + function(Crawler $node) use (&$date_str) { |
|
| 132 | 132 | if ($node->nodeName() === 'meta') { |
| 133 | 133 | $date_str = $node->attr('content'); |
| 134 | 134 | } elseif ($node->attr('datetime')) { |
@@ -157,7 +157,7 @@ discard block |
||
| 157 | 157 | 'and @itemtype="http://schema.org/Person"]//*[@itemprop="name"]' |
| 158 | 158 | ) |
| 159 | 159 | ->each( |
| 160 | - function (Crawler $node) use (&$ret) { |
|
| 160 | + function(Crawler $node) use (&$ret) { |
|
| 161 | 161 | $ret = $node->text(); |
| 162 | 162 | } |
| 163 | 163 | ); |
@@ -165,7 +165,7 @@ discard block |
||
| 165 | 165 | if (is_null($ret)) { |
| 166 | 166 | $crawler->filterXPath('//*[@itemprop="author"]') |
| 167 | 167 | ->each( |
| 168 | - function (Crawler $node) use (&$ret) { |
|
| 168 | + function(Crawler $node) use (&$ret) { |
|
| 169 | 169 | if ($node->nodeName() === 'meta') { |
| 170 | 170 | $ret = $node->attr('content'); |
| 171 | 171 | } else { |
@@ -22,7 +22,7 @@ discard block |
||
| 22 | 22 | |
| 23 | 23 | $crawler->filterXPath("//head/meta[@property='og:title']") |
| 24 | 24 | ->each( |
| 25 | - function (Crawler $node) use (&$ret) { |
|
| 25 | + function(Crawler $node) use (&$ret) { |
|
| 26 | 26 | $ret = $node->attr('content'); |
| 27 | 27 | } |
| 28 | 28 | ); |
@@ -31,7 +31,7 @@ discard block |
||
| 31 | 31 | if (empty($ret) === true) { |
| 32 | 32 | $crawler->filterXPath('//h1') |
| 33 | 33 | ->each( |
| 34 | - function (Crawler $node) use (&$ret) { |
|
| 34 | + function(Crawler $node) use (&$ret) { |
|
| 35 | 35 | $ret = $node->text(); |
| 36 | 36 | } |
| 37 | 37 | ); |
@@ -40,7 +40,7 @@ discard block |
||
| 40 | 40 | if (empty($ret) === true) { |
| 41 | 41 | $crawler->filterXPath('//head/title') |
| 42 | 42 | ->each( |
| 43 | - function (Crawler $node) use (&$ret) { |
|
| 43 | + function(Crawler $node) use (&$ret) { |
|
| 44 | 44 | $ret = $node->text(); |
| 45 | 45 | } |
| 46 | 46 | ); |
@@ -61,7 +61,7 @@ discard block |
||
| 61 | 61 | |
| 62 | 62 | $crawler->filterXPath("//head/meta[@property='og:image']") |
| 63 | 63 | ->each( |
| 64 | - function (Crawler $node) use (&$ret) { |
|
| 64 | + function(Crawler $node) use (&$ret) { |
|
| 65 | 65 | $ret = $node->attr('content'); |
| 66 | 66 | } |
| 67 | 67 | ); |
@@ -69,7 +69,7 @@ discard block |
||
| 69 | 69 | if (empty($ret) === true) { |
| 70 | 70 | $crawler->filterXPath('//img') |
| 71 | 71 | ->each( |
| 72 | - function (Crawler $node) use (&$ret, $theAdapter) { |
|
| 72 | + function(Crawler $node) use (&$ret, $theAdapter) { |
|
| 73 | 73 | $img_src = $theAdapter->normalizeLink($node->attr('src')); |
| 74 | 74 | $width_org = $height_org = 0; |
| 75 | 75 | |
@@ -106,7 +106,7 @@ discard block |
||
| 106 | 106 | |
| 107 | 107 | $crawler->filterXPath("//head/meta[@property='og:description']") |
| 108 | 108 | ->each( |
| 109 | - function (Crawler $node) use (&$ret) { |
|
| 109 | + function(Crawler $node) use (&$ret) { |
|
| 110 | 110 | $ret = $node->attr('content'); |
| 111 | 111 | } |
| 112 | 112 | ); |
@@ -125,7 +125,7 @@ discard block |
||
| 125 | 125 | |
| 126 | 126 | $crawler->filterXPath("//head/meta[@property='og:keywords']") |
| 127 | 127 | ->each( |
| 128 | - function (Crawler $node) use (&$ret) { |
|
| 128 | + function(Crawler $node) use (&$ret) { |
|
| 129 | 129 | |
| 130 | 130 | $node_txt = trim($node->attr('content')); |
| 131 | 131 | if (!empty($node_txt)) { |
@@ -150,7 +150,7 @@ discard block |
||
| 150 | 150 | |
| 151 | 151 | $crawler->filterXPath("//head/meta[@property='article:published_time']") |
| 152 | 152 | ->each( |
| 153 | - function (Crawler $node) use (&$date_str) { |
|
| 153 | + function(Crawler $node) use (&$date_str) { |
|
| 154 | 154 | $date_str = $node->attr('content'); |
| 155 | 155 | } |
| 156 | 156 | ); |
@@ -168,7 +168,7 @@ discard block |
||
| 168 | 168 | $ret = null; |
| 169 | 169 | $crawler->filterXPath("//head/meta[@property='article:author']") |
| 170 | 170 | ->each( |
| 171 | - function (Crawler $node) use (&$ret) { |
|
| 171 | + function(Crawler $node) use (&$ret) { |
|
| 172 | 172 | $ret = $node->attr('content'); |
| 173 | 173 | } |
| 174 | 174 | ); |
@@ -18,7 +18,7 @@ discard block |
||
| 18 | 18 | |
| 19 | 19 | $crawler->filterXPath('//meta[@name="parsely-title"]') |
| 20 | 20 | ->each( |
| 21 | - function (Crawler $node) use (&$ret) { |
|
| 21 | + function(Crawler $node) use (&$ret) { |
|
| 22 | 22 | $ret = $node->attr('content'); |
| 23 | 23 | } |
| 24 | 24 | ); |
@@ -33,7 +33,7 @@ discard block |
||
| 33 | 33 | |
| 34 | 34 | $crawler->filterXPath('//meta[@name="parsely-image-url"]') |
| 35 | 35 | ->each( |
| 36 | - function (Crawler $node) use (&$ret) { |
|
| 36 | + function(Crawler $node) use (&$ret) { |
|
| 37 | 37 | $ret = $node->attr('content'); |
| 38 | 38 | } |
| 39 | 39 | ); |
@@ -55,7 +55,7 @@ discard block |
||
| 55 | 55 | |
| 56 | 56 | $crawler->filterXPath('//meta[@name="parsely-tags"]') |
| 57 | 57 | ->each( |
| 58 | - function (Crawler $node) use (&$ret) { |
|
| 58 | + function(Crawler $node) use (&$ret) { |
|
| 59 | 59 | $ret = explode(',', $node->attr('content')); |
| 60 | 60 | } |
| 61 | 61 | ); |
@@ -74,7 +74,7 @@ discard block |
||
| 74 | 74 | |
| 75 | 75 | $crawler->filterXPath('//meta[@name="parsely-pub-date"]') |
| 76 | 76 | ->each( |
| 77 | - function (Crawler $node) use (&$date_str) { |
|
| 77 | + function(Crawler $node) use (&$date_str) { |
|
| 78 | 78 | $date_str = $node->attr('content'); |
| 79 | 79 | } |
| 80 | 80 | ); |
@@ -90,7 +90,7 @@ discard block |
||
| 90 | 90 | $ret = null; |
| 91 | 91 | $crawler->filterXPath('//meta[@name="parsely-author"]') |
| 92 | 92 | ->each( |
| 93 | - function (Crawler $node) use (&$ret) { |
|
| 93 | + function(Crawler $node) use (&$ret) { |
|
| 94 | 94 | $ret = $node->attr('content'); |
| 95 | 95 | } |
| 96 | 96 | ); |
@@ -32,12 +32,12 @@ discard block |
||
| 32 | 32 | |
| 33 | 33 | $this->scrapClient->followRedirects(); |
| 34 | 34 | $this->scrapClient->getClient()->setDefaultOption( |
| 35 | - 'config/curl/' . |
|
| 35 | + 'config/curl/'. |
|
| 36 | 36 | CURLOPT_SSL_VERIFYHOST, |
| 37 | 37 | false |
| 38 | 38 | ); |
| 39 | 39 | $this->scrapClient->getClient()->setDefaultOption( |
| 40 | - 'config/curl/' . |
|
| 40 | + 'config/curl/'. |
|
| 41 | 41 | CURLOPT_SSL_VERIFYPEER, |
| 42 | 42 | false |
| 43 | 43 | ); |
@@ -61,7 +61,7 @@ discard block |
||
| 61 | 61 | */ |
| 62 | 62 | public function setAdapter($adapter_name) |
| 63 | 63 | { |
| 64 | - $adapterClass = "\Zrashwani\NewsScrapper\Adapters\\" . $adapter_name . "Adapter"; |
|
| 64 | + $adapterClass = "\Zrashwani\NewsScrapper\Adapters\\".$adapter_name."Adapter"; |
|
| 65 | 65 | if (class_exists($adapterClass)) { |
| 66 | 66 | $this->adapter = new $adapterClass(); |
| 67 | 67 | } else { |
@@ -88,11 +88,11 @@ discard block |
||
| 88 | 88 | $theAdapter->currentUrl = $baseUrl; |
| 89 | 89 | |
| 90 | 90 | $isXpath = Selector::isXPath($linkSelector); |
| 91 | - $method = ($isXpath ===false)?'filter':'filterXPath'; |
|
| 91 | + $method = ($isXpath === false) ? 'filter' : 'filterXPath'; |
|
| 92 | 92 | |
| 93 | 93 | $crawler->$method($linkSelector) |
| 94 | 94 | ->each( |
| 95 | - function (Crawler $link_node) use (&$scrap_result, $theAdapter, &$limit) { |
|
| 95 | + function(Crawler $link_node) use (&$scrap_result, $theAdapter, &$limit) { |
|
| 96 | 96 | if (!is_null($limit) && count($scrap_result) >= $limit) { |
| 97 | 97 | return; |
| 98 | 98 | } |
@@ -147,21 +147,21 @@ discard block |
||
| 147 | 147 | ) { |
| 148 | 148 | $adapter->currentUrl = $article_info->url; //associate link url to adapter |
| 149 | 149 | |
| 150 | - $article_info->title = empty($article_info->title) === true? |
|
| 151 | - $adapter->extractTitle($pageCrawler):$article_info->title; |
|
| 152 | - $article_info->image = empty($article_info->image) === true? |
|
| 153 | - $adapter->extractImage($pageCrawler, $article_info->url):$article_info->image; |
|
| 154 | - $article_info->description = empty($article_info->description) === true? |
|
| 155 | - $adapter->extractDescription($pageCrawler):$article_info->description; |
|
| 156 | - $article_info->keywords = !isset($article_info->keywords) || count($article_info->keywords) === 0? |
|
| 157 | - $adapter->extractKeywords($pageCrawler):$article_info->keywords; |
|
| 150 | + $article_info->title = empty($article_info->title) === true ? |
|
| 151 | + $adapter->extractTitle($pageCrawler) : $article_info->title; |
|
| 152 | + $article_info->image = empty($article_info->image) === true ? |
|
| 153 | + $adapter->extractImage($pageCrawler, $article_info->url) : $article_info->image; |
|
| 154 | + $article_info->description = empty($article_info->description) === true ? |
|
| 155 | + $adapter->extractDescription($pageCrawler) : $article_info->description; |
|
| 156 | + $article_info->keywords = !isset($article_info->keywords) || count($article_info->keywords) === 0 ? |
|
| 157 | + $adapter->extractKeywords($pageCrawler) : $article_info->keywords; |
|
| 158 | 158 | |
| 159 | - $article_info->author = empty($article_info->author) === true? |
|
| 160 | - $adapter->extractAuthor($pageCrawler):$article_info->author; |
|
| 161 | - $article_info->publishDate = empty($article_info->publishDate) === true? |
|
| 162 | - $adapter->extractPublishDate($pageCrawler):$article_info->publishDate; |
|
| 163 | - $article_info->body = empty($article_info->body) === true? |
|
| 164 | - $adapter->extractBody($pageCrawler):$article_info->body; |
|
| 159 | + $article_info->author = empty($article_info->author) === true ? |
|
| 160 | + $adapter->extractAuthor($pageCrawler) : $article_info->author; |
|
| 161 | + $article_info->publishDate = empty($article_info->publishDate) === true ? |
|
| 162 | + $adapter->extractPublishDate($pageCrawler) : $article_info->publishDate; |
|
| 163 | + $article_info->body = empty($article_info->body) === true ? |
|
| 164 | + $adapter->extractBody($pageCrawler) : $article_info->body; |
|
| 165 | 165 | |
| 166 | 166 | } |
| 167 | 167 | } |