Passed
Push — master ( c7b0bf...abc1fb )
by Rougin
03:54
created

Scraper::slidenav()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 21
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1.2963

Importance

Changes 0
Metric Value
eloc 9
c 0
b 0
f 0
dl 0
loc 21
ccs 4
cts 12
cp 0.3333
rs 9.9666
cc 1
nc 1
nop 1
crap 1.2963
1
<?php
2
3
namespace Pilipinews\Website\Bulletin;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Client;
7
use Pilipinews\Common\Crawler as DomCrawler;
8
use Pilipinews\Common\Interfaces\ScraperInterface;
9
use Pilipinews\Common\Scraper as AbstractScraper;
10
11
/**
12
 * Manila Bulletin Scraper
13
 *
14
 * @package Pilipinews
15
 * @author  Rougin Gutib <[email protected]>
16
 */
17
class Scraper extends AbstractScraper implements ScraperInterface
18
{
19
    /**
20
     * @var string[]
21
     */
22
    protected $removables = array(
23
        '.uk-article-title',
24
        '.share-container',
25
        '.uk-grid.uk-grid-large.uk-margin-bottom',
26
        '.uk-visible-small.uk-margin-top.uk-margin-bottom',
27
        '#related_post',
28
        '#disqus_thread',
29
        'script',
30
    );
31
32
    /**
33
     * Returns the contents of an article.
34
     *
35
     * @param  string $link
36
     * @return \Pilipinews\Common\Article
37
     */
38 3
    public function scrape($link)
39
    {
40 3
        $this->prepare((string) $link);
41
42 3
        $title = $this->title('h2.title');
43
44 3
        $this->remove($this->removables);
45
46 3
        $body = $this->body('.article-content');
47
48 3
        $body = $this->image($body);
49
50 3
        $body = $this->slidenav($body);
51
52 3
        $html = $this->html($body);
53
54 3
        return new Article($title, $html, $link);
55
    }
56
57
    /**
58
     * Converts image elements to readable string.
59
     *
60
     * @param  \Pilipinews\Common\Crawler $crawler
61
     * @return \Pilipinews\Common\Crawler
62
     */
63 1
    protected function image(DomCrawler $crawler)
64
    {
65 2
        $callback = function (DomCrawler $crawler)
66
        {
67 3
            $result = $crawler->filter('img')->first();
68
69 3
            $image = (string) $result->attr('src');
70
71 3
            $text = $crawler->filter('figcaption')->first();
72
73 3
            $message = $image . ' - ' . $text->html();
74
75 3
            $message = str_replace('<br>', ' ', $message);
76
77 3
            return '<p>PHOTO: ' . $message . '</p>';
78 3
        };
79
80 3
        return $this->replace($crawler, '.wp-block-image', $callback);
81
    }
82
83
    /**
84
     * Initializes the crawler instance.
85
     *
86
     * @param  string $link
87
     * @return void
88
     */
89 3
    protected function prepare($link)
90
    {
91 3
        $response = Client::request((string) $link);
92
93 3
        $regex = '/<p>Tags:(.*?)<\/p>/i';
94
95 3
        $html = preg_replace($regex, '', $response);
96
97 3
        $this->crawler = new DomCrawler((string) $html);
98 3
    }
99
100
    /**
101
     * Converts an slidenav element into a readable string.
102
     *
103
     * @param  \Pilipinews\Common\Crawler $crawler
104
     * @return \Pilipinews\Common\Crawler
105
     */
106 1
    protected function slidenav(DomCrawler $crawler)
107
    {
108
        $callback = function (DomCrawler $crawler)
109
        {
110
            $items = $crawler->filter('img');
111
112
            $items = $items->each(function ($crawler)
113
            {
114
                $link = 'https://mb.com.ph';
115
116
                $image = $link . $crawler->attr('src');
117
118
                return '<p>PHOTO: ' . $image . '</p>';
119
            });
120
121
            return implode("\n\n", (array) $items);
122 1
        };
123
124 1
        $class = '.uk-slidenav-position';
125
126 1
        return $this->replace($crawler, $class, $callback);
127
    }
128
}
129