Issues (2)

src/Scraper.php (2 issues)

1
<?php
2
3
namespace Pilipinews\Website\Abscbn;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\ScraperInterface;
8
use Pilipinews\Common\Scraper as AbstractScraper;
9
10
/**
11
 * ABS-CBN News Scraper
12
 *
13
 * @package Pilipinews
14
 * @author  Rougin Gutib <[email protected]>
15
 */
16
class Scraper extends AbstractScraper implements ScraperInterface
17
{
18
    /**
19
     * @var string[]
20
     */
21
    protected $removables = array('.patrolbox', '.op-related-articles', 'script', '.iwantbar');
22
23
    /**
24
     * @var string[]
25
     */
26
    protected $texts = array(
27
        'I-refresh ang pahinang ito para sa updates.',
28
        'I-refresh ang page na ito para sa updates.',
29
        'Refresh this link for more details.',
30
        'I-refresh ang web page na ito para sa mga pinakahuling update.',
31
        'Please refresh this page for updates.',
32
    );
33
34
    /**
35
     * Returns the contents of an article.
36
     *
37
     * @param  string $link
38
     * @return \Pilipinews\Common\Article
39
     */
40 30
    public function scrape($link)
41
    {
42 30
        $this->prepare(mb_strtolower($link));
43
44 30
        $title = $this->title('h1.news-title');
45
46 30
        $this->remove((array) $this->removables);
47
48 30
        $body = $this->body('.article-content');
49
50 30
        $body = $this->album($body);
51
52 30
        $body = $this->embedly($body);
53
54 30
        $body = $this->image($body);
55
56 30
        $body = $this->tweet($body);
57
58 30
        $body = $this->video($body);
59
60 30
        $body = $this->post($body);
61
62 30
        $html = $this->html($body, $this->texts);
63
64 30
        $html = htmlspecialchars_decode($html);
65
66 30
        return new Article($title, $html, $link);
67
    }
68
69
    /**
70
     * Converts an album element into a readable string.
71
     *
72
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
73
     * @return \Symfony\Component\DomCrawler\Crawler
74
     */
75
    protected function album(DomCrawler $crawler)
76
    {
77 30
        $callback = function (DomCrawler $crawler, $html)
0 ignored issues
show
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

77
        $callback = function (DomCrawler $crawler, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
78
        {
79 6
            $results = array();
80
81 6
            $pattern = '.slider-for > div > img';
82
83 6
            $items = $crawler->filter($pattern);
84
85 6
            $pattern = '.slider-desc > .item-desc > p';
86
87 6
            $texts = $crawler->filter($pattern);
88
89 6
            for ($i = 0; $i < $items->count(); $i++)
90
            {
91 6
                $link = 'PHOTO: ' . $items->eq($i)->attr('src');
92
93 6
                $text = '';
94
95 6
                if ($texts->count() !== 1)
96
                {
97 3
                    $text = $texts->eq($i)->text();
98
                }
99
100 6
                $result = '<p>' . $link . ' - ' . $text . '</p>';
101
102 6
                $results[] = str_replace(' - </', '</', $result);
103
            }
104
105 6
            return implode("\n\n", (array) $results);
106 30
        };
107
108 30
        return $this->replace($crawler, '.media-content', $callback);
109
    }
110
111
    /**
112
     * Converts an embedly elements into a readable string.
113
     *
114
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
115
     * @return \Symfony\Component\DomCrawler\Crawler
116
     */
117
    protected function embedly(DomCrawler $crawler)
118
    {
119 30
        $callback = function (DomCrawler $crawler)
120
        {
121 3
            $item = $crawler->filter('a')->first();
122
123 3
            return 'EMBED: ' . $item->attr('href');
124 30
        };
125
126 30
        return $this->replace($crawler, '.embedly-card', $callback);
127
    }
128
129
    /**
130
     * Converts image elements into a readable string.
131
     *
132
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
133
     * @return \Symfony\Component\DomCrawler\Crawler
134
     */
135
    protected function image(DomCrawler $crawler)
136
    {
137 30
        $callback = function (DomCrawler $crawler, $html)
138
        {
139 6
            $image = 'PHOTO: ' . $crawler->filter('img')->attr('src');
140
141 6
            $image = str_replace('?ext=.jpg', '', (string) $image);
142
143 6
            $text = '<p>' . $image . ' - ' . $crawler->text() . '</p>';
144
145 6
            if (strpos($html, '<em>') !== false)
146
            {
147 3
                $em = $crawler->filter('em')->first()->text();
148
149 3
                $text = str_replace($em, '(' . $em . ')', $text);
150
            }
151
152 6
            return str_replace(' -   </', '</', (string) $text);
153 30
        };
154
155 30
        return $this->replace($crawler, '.embed-wrap', $callback);
156
    }
157
158
    /**
159
     * Converts post elements into a readable string.
160
     *
161
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
162
     * @return \Symfony\Component\DomCrawler\Crawler
163
     */
164
    protected function post(DomCrawler $crawler)
165
    {
166 30
        $callback = function (DomCrawler $node, $html)
0 ignored issues
show
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

166
        $callback = function (DomCrawler $node, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
167
        {
168 3
            return '<p>POST: ' . $node->attr('data-href') . '</p>';
169 30
        };
170
171 30
        return $this->replace($crawler, '.fb-post', $callback);
172
    }
173
174
    /**
175
     * Converts video elements into a readable string.
176
     *
177
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
178
     * @return \Symfony\Component\DomCrawler\Crawler
179
     */
180
    protected function video(DomCrawler $crawler)
181
    {
182 30
        $callback = function (DomCrawler $crawler)
183
        {
184 12
            $element = $crawler->filter('iframe');
185
186 12
            $link = $element->attr('src');
187
188 12
            return '<p>VIDEO: ' . $link . '</p>';
189 30
        };
190
191
        return $this->replace($crawler, '.op-interactive', $callback);
192
    }
193
}
194