Passed
Push — master ( 48eaaa...d014fd )
by Rougin
09:32
created

Scraper   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 90
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
eloc 31
dl 0
loc 90
ccs 27
cts 27
cp 1
rs 10
c 0
b 0
f 0
wmc 4

3 Methods

Rating   Name   Duplication   Size   Complexity  
A prepare() 0 19 1
A scrape() 0 19 1
A video() 0 12 2
1
<?php
2
3
namespace Pilipinews\Website\Cnn;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Client;
7
use Pilipinews\Common\Crawler as DomCrawler;
8
use Pilipinews\Common\Interfaces\ScraperInterface;
9
use Pilipinews\Common\Scraper as AbstractScraper;
10
11
/**
12
 * CNN Philippines Scraper
13
 *
14
 * @package Pilipinews
15
 * @author  Rougin Gutib <[email protected]>
16
 */
17
class Scraper extends AbstractScraper implements ScraperInterface
18
{
19
    /**
20
     * @var string[]
21
     */
22
    protected $removables = array('p > script', '.flourish-credit');
23
24
    /**
25
     * @var string[]
26
     */
27
    protected $reload = array(
28
        'Please click the source link below for more updates.',
29
        'Please refresh for updates.',
30
        'Please refresh the page for updates.',
31
        'Please refresh this page for updates.',
32
        'Refresh this page for more updates.',
33
    );
34
35
    /**
36
     * Returns the contents of an article.
37
     *
38
     * @param  string $link
39
     * @return \Pilipinews\Common\Article
40
     */
41 30
    public function scrape($link)
42
    {
43 30
        $this->prepare((string) $link);
44
45 30
        $title = $this->title('title', ' - CNN Philippines');
46
47 30
        $body = $this->body('#content-body');
48
49 30
        $body = $this->video($this->tweet($body));
50
51 30
        $html = $this->html($body, $this->reload);
52
53 30
        $search = '/pic.twitter.com\/(.*)- CNN/i';
54
55 30
        $replace = (string) 'pic.twitter.com/$1 - CNN';
56
57 30
        $html = preg_replace($search, $replace, $html);
58
59 30
        return new Article($title, $html, $link);
60
    }
61
62
    /**
63
     * Initializes the crawler instance.
64
     *
65
     * @param  string $link
66
     * @return void
67
     */
68 30
    protected function prepare($link)
69
    {
70 30
        $pattern = '/content-body-[0-9]+(-[0-9]+)+/i';
71
72 30
        $html = Client::request((string) $link);
73
74 30
        $html = str_replace(' </em> ', '</em> ', $html);
75
76 30
        preg_match($pattern, (string) $html, $matches);
77
78 30
        $html = str_replace($matches[0], 'content-body', $html);
79
80 30
        $html = str_replace(' </a>', '</a> ', $html);
81
82 30
        $html = str_replace('<strong> </strong>', ' ', $html);
83
84 30
        $this->crawler = new DomCrawler((string) $html);
85
86 30
        $this->remove((array) $this->removables);
87 30
    }
88
89
    /**
90
     * Converts video elements to readable string.
91
     *
92
     * @param  \Pilipinews\Common\Crawler $crawler
93
     * @return \Pilipinews\Common\Crawler
94
     */
95
    protected function video(DomCrawler $crawler)
96
    {
97 30
        $callback = function (DomCrawler $crawler)
98
        {
99 9
            $embed = strpos($link = $crawler->attr('src'), 'embed');
100
101 9
            $type = $embed !== false ? 'EMBED' : 'VIDEO';
102
103 9
            return '<p>' . $type . ': ' . $link . '</p><br><br><br>';
104 30
        };
105
106 30
        return $this->replace($crawler, 'p > iframe', $callback);
107
    }
108
}
109