pilipinews /
cnn
| 1 | <?php |
||
| 2 | |||
| 3 | namespace Pilipinews\Website\Cnn; |
||
| 4 | |||
| 5 | use Pilipinews\Common\Article; |
||
| 6 | use Pilipinews\Common\Client; |
||
| 7 | use Pilipinews\Common\Crawler as DomCrawler; |
||
| 8 | use Pilipinews\Common\Interfaces\ScraperInterface; |
||
| 9 | use Pilipinews\Common\Scraper as AbstractScraper; |
||
| 10 | |||
| 11 | /** |
||
| 12 | * CNN Philippines Scraper |
||
| 13 | * |
||
| 14 | * @package Pilipinews |
||
| 15 | * @author Rougin Gutib <[email protected]> |
||
| 16 | */ |
||
| 17 | class Scraper extends AbstractScraper implements ScraperInterface |
||
| 18 | { |
||
| 19 | /** |
||
| 20 | * @var string[] |
||
| 21 | */ |
||
| 22 | protected $removables = array('p > script', '.flourish-credit'); |
||
| 23 | |||
| 24 | /** |
||
| 25 | * @var string[] |
||
| 26 | */ |
||
| 27 | protected $reload = array( |
||
| 28 | 'Please click the source link below for more updates.', |
||
| 29 | 'Please refresh for updates.', |
||
| 30 | 'Please refresh the page for updates.', |
||
| 31 | 'Please refresh this page for updates.', |
||
| 32 | 'Refresh this page for more updates.', |
||
| 33 | ); |
||
| 34 | |||
| 35 | /** |
||
| 36 | * Returns the contents of an article. |
||
| 37 | * |
||
| 38 | * @param string $link |
||
| 39 | * @return \Pilipinews\Common\Article |
||
| 40 | */ |
||
| 41 | 27 | public function scrape($link) |
|
| 42 | { |
||
| 43 | 27 | $this->prepare((string) $link); |
|
| 44 | |||
| 45 | 27 | $title = $this->title('.title'); |
|
| 46 | |||
| 47 | 27 | $body = $this->body('.article-maincontent-p'); |
|
| 48 | |||
| 49 | 27 | $body = $this->image($body); |
|
| 50 | |||
| 51 | 27 | $body = $this->video($this->tweet($body)); |
|
| 52 | |||
| 53 | 27 | $html = $this->html($body, $this->reload); |
|
| 54 | |||
| 55 | 27 | $search = '/pic.twitter.com\/(.*)- CNN/i'; |
|
| 56 | |||
| 57 | 27 | $replace = (string) 'pic.twitter.com/$1 - CNN'; |
|
| 58 | |||
| 59 | 27 | $html = preg_replace($search, $replace, $html); |
|
| 60 | |||
| 61 | 27 | return new Article($title, $html, $link); |
|
| 62 | } |
||
| 63 | |||
| 64 | /** |
||
| 65 | * Converts image elements into a readable string. |
||
| 66 | * |
||
| 67 | * @param \Symfony\Component\DomCrawler\Crawler $crawler |
||
| 68 | * @return \Symfony\Component\DomCrawler\Crawler |
||
| 69 | */ |
||
| 70 | 9 | protected function image(DomCrawler $crawler) |
|
| 71 | { |
||
| 72 | 18 | $callback = function (DomCrawler $crawler, $html) |
|
|
0 ignored issues
–
show
|
|||
| 73 | { |
||
| 74 | 6 | $base = 'https://cnnphilippines.com'; |
|
| 75 | |||
| 76 | 6 | $link = $crawler->filter('img')->attr('src'); |
|
| 77 | |||
| 78 | 6 | $caption = $crawler->filter('.picture-caption'); |
|
| 79 | |||
| 80 | 6 | if ($text = $caption->first()->text()) |
|
| 81 | 2 | { |
|
| 82 | 6 | $text = ' - ' . $text; |
|
| 83 | 2 | } |
|
| 84 | |||
| 85 | 6 | return '<p>PHOTO: ' . $base . $link . $text . '</p>'; |
|
| 86 | 27 | }; |
|
| 87 | |||
| 88 | 27 | return $this->replace($crawler, '.img-container.picture', $callback); |
|
| 89 | } |
||
| 90 | |||
| 91 | /** |
||
| 92 | * Converts video elements to readable string. |
||
| 93 | * |
||
| 94 | * @param \Pilipinews\Common\Crawler $crawler |
||
| 95 | * @return \Pilipinews\Common\Crawler |
||
| 96 | */ |
||
| 97 | protected function video(DomCrawler $crawler) |
||
| 98 | { |
||
| 99 | 27 | $callback = function (DomCrawler $crawler) |
|
| 100 | { |
||
| 101 | $embed = strpos($link = $crawler->attr('src'), 'embed'); |
||
| 102 | |||
| 103 | $type = $embed !== false ? 'EMBED' : 'VIDEO'; |
||
| 104 | |||
| 105 | return '<p>' . $type . ': ' . $link . '</p><br><br><br>'; |
||
| 106 | 27 | }; |
|
| 107 | |||
| 108 | 9 | return $this->replace($crawler, 'p > iframe', $callback); |
|
| 109 | } |
||
| 110 | } |
||
| 111 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.