Passed
Push — master ( f25201...109ec6 )
by Rougin
01:46
created

Scraper::prepare()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Pilipinews\Website\Sunstar;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Crawler as DomCrawler;
7
use Pilipinews\Common\Interfaces\ScraperInterface;
8
use Pilipinews\Common\Scraper as AbstractScraper;
9
10
/**
11
 * Sunstar News Scraper
12
 *
13
 * @package Pilipinews
14
 * @author  Rougin Royce Gutib <[email protected]>
15
 */
16
class Scraper extends AbstractScraper implements ScraperInterface
17
{
18
    /**
19
     * @var array
20
     */
21
    protected $elements = array('.article-header', '.articleBody');
22
23
    /**
24
     * @var string[]
25
     */
26
    protected $removables = array('.subSection', '.titleArticle', '.pagingWrap', 'script', '#fb-root');
27
28
    /**
29
     * @var string[]
30
     */
31
    protected $texts = array("PHOTO: https://www.sunstar.com.ph/\n", 'Please refresh page for updates.');
32
    /**
33
     * Returns the contents of an article.
34
     *
35
     * @param  string $link
36
     * @return \Pilipinews\Common\Article
37
     */
38 30
    public function scrape($link)
39
    {
40 30
        $this->prepare((string) strtolower($link));
41
42
        $title = $this->title('title', ' - SUNSTAR');
43
44
        $this->remove((array) $this->removables);
45
46
        $this->crawler = $this->carousel($this->crawler);
47
48
        $body = $this->body((array) $this->elements);
49
50
        $body = $this->video($this->image($body));
51
52
        $html = $this->html($body, $this->texts);
53
54
        return new Article($title, (string) $html);
55
    }
56
57
    /**
58
     * Returns the article content based on a given element.
59
     *
60
     * @param  string|string[] $element
61
     * @return \Pilipinews\Common\Crawler
62
     */
63
    protected function body($elements)
64
    {
65
        is_string($elements) && $elements = array($elements);
66
67
        foreach ((array) $elements as $key => $element) {
68
            $body = $this->crawler->filter($element)->last()->html();
69
70
            $body = (string) trim(preg_replace('/\s+/', ' ', $body));
71
72
            $elements[$key] = str_replace('  ', ' ', (string) $body);
73
        }
74
75
        return new DomCrawler(implode('<br><br><br>', $elements));
76
    }
77
78
    /**
79
     * Converts carousel elements to readable string.
80
     *
81
     * @param  \Pilipinews\Common\Crawler $crawler
82
     * @return \Pilipinews\Common\Crawler
83
     */
84
    protected function carousel(DomCrawler $crawler)
85
    {
86
        $callback = function (DomCrawler $crawler) {
87
            $texts = $crawler->filter('.img-caption');
88
89
            $function = function ($result, $index) use ($texts) {
90
                $text = $texts->eq($index)->text();
91
92
                $image = $result->attr('src') . ' - ' . $text;
93
94
                return '<p>PHOTO: ' . $image . '</p>';
95
            };
96
97
            $items = $crawler->filter('img');
98
99
            $image = $items->each($function);
100
101
            return implode("<br><br>", $image);
102
        };
103
104
        return $this->replace($crawler, '.owl-carousel', $callback);
105
    }
106
107
    /**
108
     * Converts image elements to readable string.
109
     *
110
     * @param  \Pilipinews\Common\Crawler $crawler
111
     * @return \Pilipinews\Common\Crawler
112
     */
113
    protected function image(DomCrawler $crawler)
114
    {
115
        $callback = function (DomCrawler $crawler) {
116
            $break = (string) '<br><br><br>';
117
118
            $result = $crawler->filter('img')->first();
119
120
            $image = $result->attr('src') . $break;
121
122
            return (string) $break . 'PHOTO: ' . $image;
123
        };
124
125
        return $this->replace($crawler, '.imgArticle', $callback);
126
    }
127
128
    /**
129
     * Initializes the crawler instance.
130
     *
131
     * @param  string $link
132
     * @return void
133
     */
134 30
    protected function prepare($link)
135
    {
136 30
        $this->crawler = new DomCrawler(Client::request($link));
137
    }
138
139
    /**
140
     * Converts video elements to readable string.
141
     *
142
     * @param  \Pilipinews\Common\Crawler $crawler
143
     * @return \Pilipinews\Common\Crawler
144
     */
145
    protected function video(DomCrawler $crawler)
146
    {
147
        $callback = function (DomCrawler $crawler) {
148
            $link = trim($crawler->attr('data-href'));
149
150
            $break = '<br><br><br>';
151
152
            return $break . 'VIDEO: ' . $link . $break;
153
        };
154
155
        return $this->replace($crawler, '.fb-video', $callback);
156
    }
157
}
158