Passed
Push — master ( 88138d...af6eb6 )
by Rougin
01:54
created

Scraper::video()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 5
nc 1
nop 1
dl 0
loc 11
ccs 6
cts 6
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Pilipinews\Website\Sunstar;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\ScraperInterface;
8
use Pilipinews\Common\Scraper as AbstractScraper;
9
10
/**
11
 * Sunstar News Scraper
12
 *
13
 * @package Pilipinews
14
 * @author  Rougin Royce Gutib <[email protected]>
15
 */
16
class Scraper extends AbstractScraper implements ScraperInterface
17
{
18
    /**
19
     * @var array
20
     */
21
    protected $elements = array('.article-header', '.articleBody');
22
23
    /**
24
     * @var string[]
25
     */
26
    protected $removables = array('.subSection', '.titleArticle', '.pagingWrap', 'script', '#fb-root');
27
28
    /**
29
     * @var string[]
30
     */
31
    protected $texts = array("PHOTO: https://www.sunstar.com.ph/\n", 'Please refresh page for updates.');
32
    /**
33
     * Returns the contents of an article.
34
     *
35
     * @param  string $link
36
     * @return \Pilipinews\Common\Article
37
     */
38 30
    public function scrape($link)
39
    {
40 30
        $this->prepare((string) strtolower($link));
41
42 30
        $title = $this->title('title', ' - SUNSTAR');
43
44 30
        $this->remove((array) $this->removables);
45
46 30
        $this->crawler = $this->carousel($this->crawler);
47
48 30
        $body = $this->body((array) $this->elements);
49
50 29
        $body = $this->video($this->image($body));
51
52 29
        $html = $this->html($body, $this->texts);
53
54 29
        return new Article($title, (string) $html);
55
    }
56
57
    /**
58
     * Returns the article content based on a given element.
59
     *
60
     * @param  string|string[] $element
61
     * @return \Pilipinews\Common\Crawler
62
     */
63 30
    protected function body($elements)
64
    {
65 30
        is_string($elements) && $elements = array($elements);
66
67 30
        foreach ((array) $elements as $key => $element) {
68 30
            $body = $this->crawler->filter($element)->last()->html();
69
70 29
            $body = (string) trim(preg_replace('/\s+/', ' ', $body));
71
72 29
            $elements[$key] = str_replace('  ', ' ', (string) $body);
73 20
        }
74
75 29
        return new DomCrawler(implode('<br><br><br>', $elements));
76
    }
77
78
    /**
79
     * Converts carousel elements to readable string.
80
     *
81
     * @param  \Pilipinews\Common\Crawler $crawler
82
     * @return \Pilipinews\Common\Crawler
83
     */
84 20
    protected function carousel(DomCrawler $crawler)
85
    {
86 10
        $callback = function (DomCrawler $crawler) {
87 3
            $texts = $crawler->filter('.img-caption');
88
89 1
            $function = function ($result, $index) use ($texts) {
90 3
                $text = $texts->eq($index)->text();
91
92 3
                $image = $result->attr('src') . ' - ' . $text;
93
94 3
                return '<p>PHOTO: ' . $image . '</p>';
95 3
            };
96
97 3
            $items = $crawler->filter('img');
98
99 3
            $image = $items->each($function);
100
101 3
            return implode("<br><br>", $image);
102 30
        };
103
104 30
        return $this->replace($crawler, '.owl-carousel', $callback);
105
    }
106
107
    /**
108
     * Converts image elements to readable string.
109
     *
110
     * @param  \Pilipinews\Common\Crawler $crawler
111
     * @return \Pilipinews\Common\Crawler
112
     */
113 20
    protected function image(DomCrawler $crawler)
114
    {
115 9
        $callback = function (DomCrawler $crawler) {
116 26
            $break = (string) '<br><br><br>';
117
118 26
            $result = $crawler->filter('img')->first();
119
120 26
            $image = $result->attr('src') . $break;
121
122 26
            return (string) $break . 'PHOTO: ' . $image;
123 29
        };
124
125 29
        return $this->replace($crawler, '.imgArticle', $callback);
126
    }
127
128
    /**
129
     * Initializes the crawler instance.
130
     *
131
     * @param  string $link
132
     * @return void
133
     */
134 30
    protected function prepare($link)
135
    {
136 30
        $this->crawler = new DomCrawler(Client::request($link));
137 30
    }
138
139
    /**
140
     * Converts video elements to readable string.
141
     *
142
     * @param  \Pilipinews\Common\Crawler $crawler
143
     * @return \Pilipinews\Common\Crawler
144
     */
145
    protected function video(DomCrawler $crawler)
146
    {
147 29
        $callback = function (DomCrawler $crawler) {
148 6
            $link = trim($crawler->attr('data-href'));
149
150 6
            $break = '<br><br><br>';
151
152 6
            return $break . 'VIDEO: ' . $link . $break;
153 29
        };
154
155 20
        return $this->replace($crawler, '.fb-video', $callback);
156
    }
157
}
158