1 | <?php |
||
2 | |||
3 | namespace Pilipinews\Website\Cnn; |
||
4 | |||
5 | use Pilipinews\Common\Article; |
||
6 | use Pilipinews\Common\Client; |
||
7 | use Pilipinews\Common\Crawler as DomCrawler; |
||
8 | use Pilipinews\Common\Interfaces\ScraperInterface; |
||
9 | use Pilipinews\Common\Scraper as AbstractScraper; |
||
10 | |||
11 | /** |
||
12 | * CNN Philippines Scraper |
||
13 | * |
||
14 | * @package Pilipinews |
||
15 | * @author Rougin Gutib <[email protected]> |
||
16 | */ |
||
17 | class Scraper extends AbstractScraper implements ScraperInterface |
||
18 | { |
||
19 | /** |
||
20 | * @var string[] |
||
21 | */ |
||
22 | protected $removables = array('p > script', '.flourish-credit'); |
||
23 | |||
24 | /** |
||
25 | * @var string[] |
||
26 | */ |
||
27 | protected $reload = array( |
||
28 | 'Please click the source link below for more updates.', |
||
29 | 'Please refresh for updates.', |
||
30 | 'Please refresh the page for updates.', |
||
31 | 'Please refresh this page for updates.', |
||
32 | 'Refresh this page for more updates.', |
||
33 | ); |
||
34 | |||
35 | /** |
||
36 | * Returns the contents of an article. |
||
37 | * |
||
38 | * @param string $link |
||
39 | * @return \Pilipinews\Common\Article |
||
40 | */ |
||
41 | 27 | public function scrape($link) |
|
42 | { |
||
43 | 27 | $this->prepare((string) $link); |
|
44 | |||
45 | 27 | $title = $this->title('.title'); |
|
46 | |||
47 | 27 | $body = $this->body('.article-maincontent-p'); |
|
48 | |||
49 | 27 | $body = $this->image($body); |
|
50 | |||
51 | 27 | $body = $this->video($this->tweet($body)); |
|
52 | |||
53 | 27 | $html = $this->html($body, $this->reload); |
|
54 | |||
55 | 27 | $search = '/pic.twitter.com\/(.*)- CNN/i'; |
|
56 | |||
57 | 27 | $replace = (string) 'pic.twitter.com/$1 - CNN'; |
|
58 | |||
59 | 27 | $html = preg_replace($search, $replace, $html); |
|
60 | |||
61 | 27 | return new Article($title, $html, $link); |
|
62 | } |
||
63 | |||
64 | /** |
||
65 | * Converts image elements into a readable string. |
||
66 | * |
||
67 | * @param \Symfony\Component\DomCrawler\Crawler $crawler |
||
68 | * @return \Symfony\Component\DomCrawler\Crawler |
||
69 | */ |
||
70 | 9 | protected function image(DomCrawler $crawler) |
|
71 | { |
||
72 | 18 | $callback = function (DomCrawler $crawler, $html) |
|
0 ignored issues
–
show
|
|||
73 | { |
||
74 | 6 | $base = 'https://cnnphilippines.com'; |
|
75 | |||
76 | 6 | $link = $crawler->filter('img')->attr('src'); |
|
77 | |||
78 | 6 | $caption = $crawler->filter('.picture-caption'); |
|
79 | |||
80 | 6 | if ($text = $caption->first()->text()) |
|
81 | 2 | { |
|
82 | 6 | $text = ' - ' . $text; |
|
83 | 2 | } |
|
84 | |||
85 | 6 | return '<p>PHOTO: ' . $base . $link . $text . '</p>'; |
|
86 | 27 | }; |
|
87 | |||
88 | 27 | return $this->replace($crawler, '.img-container.picture', $callback); |
|
89 | } |
||
90 | |||
91 | /** |
||
92 | * Converts video elements to readable string. |
||
93 | * |
||
94 | * @param \Pilipinews\Common\Crawler $crawler |
||
95 | * @return \Pilipinews\Common\Crawler |
||
96 | */ |
||
97 | protected function video(DomCrawler $crawler) |
||
98 | { |
||
99 | 27 | $callback = function (DomCrawler $crawler) |
|
100 | { |
||
101 | $embed = strpos($link = $crawler->attr('src'), 'embed'); |
||
102 | |||
103 | $type = $embed !== false ? 'EMBED' : 'VIDEO'; |
||
104 | |||
105 | return '<p>' . $type . ': ' . $link . '</p><br><br><br>'; |
||
106 | 27 | }; |
|
107 | |||
108 | 9 | return $this->replace($crawler, 'p > iframe', $callback); |
|
109 | } |
||
110 | } |
||
111 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.