Passed
Push — master ( 23a320...acc738 )
by Rougin
02:39
created

Scraper::video()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 8
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 8
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
namespace Pilipinews\Website\Rappler;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Interfaces\ScraperInterface;
7
use Pilipinews\Common\Scraper as AbstractScraper;
8
use Pilipinews\Common\Crawler as DomCrawler;
9
10
/**
11
 * Rappler News Scraper
12
 *
13
 * @package Pilipinews
14
 * @author  Rougin Gutib <[email protected]>
15
 */
16
class Scraper extends AbstractScraper implements ScraperInterface
17
{
18
    /**
19
     * @var string[]
20
     */
21
    protected $removables = array('.author-box');
22
23
    /**
24
     * @var string[]
25
     */
26
    protected $texts = array(
27
        "What's the weather like in your area? Report the situation through Rappler's Agos (http://agos.rappler.com/) or tweet us at @rapplerdotcom (https://twitter.com/rapplerdotcom).",
28
        "Not on the list? Help us crowdsource class suspensions by posting in the comments section or tweeting @rapplerdotcom (https://twitter.com/rapplerdotcom).\n\nFor more information:  (https://www.facebook.com/gov.abet/posts/10152811185356858)When are classes cancelled or suspended? (https://www.rappler.com/move-ph/31299-classes-cancelled-suspended)",
29
        "\n\nPlease refresh this page for updates."
30
    );
31
32
    /**
33
     * Returns the contents of an article.
34
     *
35
     * @param  string $link
36
     * @return \Pilipinews\Common\Article
37
     */
38 33
    public function scrape($link)
39
    {
40 33
        $this->prepare(mb_strtolower($link));
41
42 33
        $title = $this->title('.select-headline');
43
44 33
        $this->remove((array) $this->removables);
45
46 33
        $body = $this->body('.storypage-divider');
47
48 33
        $body = $this->image($body);
49
50 33
        $body = $this->scribd($body);
51
52 33
        $body = $this->video($body);
53
54 33
        $body = $this->tweet($body);
55
56 33
        $html = $this->html($body, $this->texts);
57
58 33
        return new Article($title, $html, $link);
59
    }
60
61
    /**
62
     * Converts image elements to readable string.
63
     *
64
     * @param  \Pilipinews\Common\Crawler $crawler
65
     * @return \Pilipinews\Common\Crawler
66
     */
67 22
    protected function image(DomCrawler $crawler)
68
    {
69 11
        $callback = function (DomCrawler $crawler, $html)
0 ignored issues
show
Unused Code introduced by
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

69
        $callback = function (DomCrawler $crawler, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
70
        {
71 33
            $image = $crawler->previousAll()->first();
72
73 33
            $photo = $image->filter('img')->attr('data-original');
74
75 33
            $node = $image->getNode((integer) 0);
76
77 33
            $node->parentNode->removeChild($node);
0 ignored issues
show
Bug introduced by
It seems like $node can also be of type null; however, parameter $oldnode of DOMElement::removeChild() does only seem to accept DOMNode, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

77
            $node->parentNode->removeChild(/** @scrutinizer ignore-type */ $node);
Loading history...
78
79 33
            $text = ' - ' . $crawler->first()->text();
80
81 33
            $text = $text === ' -  ' ? '' : $text;
82
83 33
            return '<p>PHOTO: ' . $photo . $text . '</p>';
84 33
        };
85
86 33
        return $this->replace($crawler, 'p.caption', $callback);
87
    }
88
89
    /**
90
     * Converts embedded Scribd elements to readable string.
91
     *
92
     * @param  \Pilipinews\Common\Crawler $crawler
93
     * @return \Pilipinews\Common\Crawler
94
     */
95 22
    protected function scribd(DomCrawler $crawler)
96
    {
97 11
        $callback = function (DomCrawler $crawler, $html)
0 ignored issues
show
Unused Code introduced by
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

97
        $callback = function (DomCrawler $crawler, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
98
        {
99 3
            $title = (string) $crawler->attr('title');
100
101 3
            $link = (string) $crawler->attr('src');
102
103 3
            return '<p>' . $title . ' (' . $link . ')</p>';
104 33
        };
105
106 33
        $class = (string) '.scribd_iframe_embed';
107
108 33
        return $this->replace($crawler, $class, $callback);
109
    }
110
111
    /**
112
     * Converts embedded iframe elements to readable string.
113
     *
114
     * @param  \Pilipinews\Common\Crawler $crawler
115
     * @return \Pilipinews\Common\Crawler
116
     */
117
    protected function video(DomCrawler $crawler)
118
    {
119 33
        $callback = function (DomCrawler $crawler, $html)
0 ignored issues
show
Unused Code introduced by
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

119
        $callback = function (DomCrawler $crawler, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
120
        {
121 3
            return '<p>VIDEO: ' . $crawler->attr('src') . '</p>';
122 33
        };
123
124 22
        return $this->replace($crawler, 'iframe', $callback);
125
    }
126
}
127