Scraper::scribd()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 14
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1.0787

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 6
c 1
b 0
f 0
dl 0
loc 14
ccs 4
cts 7
cp 0.5714
rs 10
cc 1
nc 1
nop 1
crap 1.0787
1
<?php
2
3
namespace Pilipinews\Website\Rappler;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Interfaces\ScraperInterface;
7
use Pilipinews\Common\Scraper as AbstractScraper;
8
use Pilipinews\Common\Crawler as DomCrawler;
9
10
/**
11
 * Rappler News Scraper
12
 *
13
 * @package Pilipinews
14
 * @author  Rougin Gutib <[email protected]>
15
 */
16
class Scraper extends AbstractScraper implements ScraperInterface
17
{
18
    /**
19
     * @var string[]
20
     */
21
    protected $removables = array('.author-box');
22
23
    /**
24
     * @var string[]
25
     */
26
    protected $texts = array(
27
        "What's the weather like in your area? Report the situation through Rappler's Agos (http://agos.rappler.com/) or tweet us at @rapplerdotcom (https://twitter.com/rapplerdotcom).",
28
        "Not on the list? Help us crowdsource class suspensions by posting in the comments section or tweeting @rapplerdotcom (https://twitter.com/rapplerdotcom).\n\nFor more information:  (https://www.facebook.com/gov.abet/posts/10152811185356858)When are classes cancelled or suspended? (https://www.rappler.com/move-ph/31299-classes-cancelled-suspended)",
29
        "\n\nPlease refresh this page for updates."
30
    );
31
32
    /**
33
     * Returns the contents of an article.
34
     *
35
     * @param  string $link
36
     * @return \Pilipinews\Common\Article
37
     */
38 30
    public function scrape($link)
39
    {
40 30
        $this->prepare(mb_strtolower($link));
41
42 30
        $title = $this->title('h1');
43
44 30
        $this->remove((array) $this->removables);
45
46 30
        $body = $this->body('.ArticleWrapper__ArticleBodyWrapper-sc-36pn73-0');
47
48 30
        $body = $this->image($body);
49
50 30
        $body = $this->scribd($body);
51
52 30
        $body = $this->video($body);
53
54 30
        $body = $this->tweet($body);
55
56 30
        $html = $this->html($body, $this->texts);
57
58 30
        $html = htmlspecialchars_decode($html);
59
60 30
        return new Article($title, $html, $link);
61
    }
62
63
    /**
64
     * Converts image elements to readable string.
65
     *
66
     * @param  \Pilipinews\Common\Crawler $crawler
67
     * @return \Pilipinews\Common\Crawler
68
     */
69
    protected function image(DomCrawler $crawler)
70
    {
71 30
        $callback = function (DomCrawler $crawler, $html)
0 ignored issues
show
Unused Code introduced by
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

71
        $callback = function (DomCrawler $crawler, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
72
        {
73
            $image = $crawler->previousAll()->first();
74
75
            $photo = $image->filter('img')->attr('data-original');
76
77
            $node = $image->getNode((integer) 0);
78
79
            $node->parentNode->removeChild($node);
80
81
            if ($text = trim($crawler->first()->text()))
82
            {
83
                $text = ' - ' . $text;
84
            }
85
86
            return '<p>PHOTO: ' . $photo . $text . '</p>';
87 30
        };
88
89 30
        return $this->replace($crawler, 'p.caption', $callback);
90
    }
91
92
    /**
93
     * Converts embedded Scribd elements to readable string.
94
     *
95
     * @param  \Pilipinews\Common\Crawler $crawler
96
     * @return \Pilipinews\Common\Crawler
97
     */
98
    protected function scribd(DomCrawler $crawler)
99
    {
100 30
        $callback = function (DomCrawler $crawler, $html)
0 ignored issues
show
Unused Code introduced by
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

100
        $callback = function (DomCrawler $crawler, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
101
        {
102
            $title = (string) $crawler->attr('title');
103
104
            $link = (string) $crawler->attr('src');
105
106
            return '<p>' . $title . ' (' . $link . ')</p>';
107 30
        };
108
109 30
        $class = (string) '.scribd_iframe_embed';
110
111 30
        return $this->replace($crawler, $class, $callback);
112
    }
113
114
    /**
115
     * Converts embedded iframe elements to readable string.
116
     *
117
     * @param  \Pilipinews\Common\Crawler $crawler
118
     * @return \Pilipinews\Common\Crawler
119
     */
120
    protected function video(DomCrawler $crawler)
121
    {
122 30
        $callback = function (DomCrawler $crawler, $html)
0 ignored issues
show
Unused Code introduced by
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

122
        $callback = function (DomCrawler $crawler, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
123
        {
124
            return '<p>VIDEO: ' . $crawler->attr('src') . '</p>';
125 30
        };
126
127
        return $this->replace($crawler, 'iframe', $callback);
128
    }
129
}
130