Scraper   A
last analyzed

Complexity

Total Complexity 5

Size/Duplication

Total Lines 92
Duplicated Lines 0 %

Test Coverage

Coverage 89.66%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 31
c 1
b 0
f 0
dl 0
loc 92
ccs 26
cts 29
cp 0.8966
rs 10
wmc 5

3 Methods

Rating   Name   Duplication   Size   Complexity  
A video() 0 12 2
A image() 0 19 2
A scrape() 0 21 1
1
<?php
2
3
namespace Pilipinews\Website\Cnn;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Client;
7
use Pilipinews\Common\Crawler as DomCrawler;
8
use Pilipinews\Common\Interfaces\ScraperInterface;
9
use Pilipinews\Common\Scraper as AbstractScraper;
10
11
/**
12
 * CNN Philippines Scraper
13
 *
14
 * @package Pilipinews
15
 * @author  Rougin Gutib <[email protected]>
16
 */
17
class Scraper extends AbstractScraper implements ScraperInterface
18
{
19
    /**
20
     * @var string[]
21
     */
22
    protected $removables = array('p > script', '.flourish-credit');
23
24
    /**
25
     * @var string[]
26
     */
27
    protected $reload = array(
28
        'Please click the source link below for more updates.',
29
        'Please refresh for updates.',
30
        'Please refresh the page for updates.',
31
        'Please refresh this page for updates.',
32
        'Refresh this page for more updates.',
33
    );
34
35
    /**
36
     * Returns the contents of an article.
37
     *
38
     * @param  string $link
39
     * @return \Pilipinews\Common\Article
40
     */
41 27
    public function scrape($link)
42
    {
43 27
        $this->prepare((string) $link);
44
45 27
        $title = $this->title('.title');
46
47 27
        $body = $this->body('.article-maincontent-p');
48
49 27
        $body = $this->image($body);
50
51 27
        $body = $this->video($this->tweet($body));
52
53 27
        $html = $this->html($body, $this->reload);
54
55 27
        $search = '/pic.twitter.com\/(.*)- CNN/i';
56
57 27
        $replace = (string) 'pic.twitter.com/$1 - CNN';
58
59 27
        $html = preg_replace($search, $replace, $html);
60
61 27
        return new Article($title, $html, $link);
62
    }
63
64
    /**
65
     * Converts image elements into a readable string.
66
     *
67
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
68
     * @return \Symfony\Component\DomCrawler\Crawler
69
     */
70 9
    protected function image(DomCrawler $crawler)
71
    {
72 18
        $callback = function (DomCrawler $crawler, $html)
0 ignored issues
show
Unused Code introduced by
The parameter $html is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

72
        $callback = function (DomCrawler $crawler, /** @scrutinizer ignore-unused */ $html)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
73
        {
74 6
            $base = 'https://cnnphilippines.com';
75
76 6
            $link = $crawler->filter('img')->attr('src');
77
78 6
            $caption = $crawler->filter('.picture-caption');
79
80 6
            if ($text = $caption->first()->text())
81 2
            {
82 6
                $text = ' - ' . $text;
83 2
            }
84
85 6
            return '<p>PHOTO: ' . $base . $link . $text . '</p>';
86 27
        };
87
88 27
        return $this->replace($crawler, '.img-container.picture', $callback);
89
    }
90
91
    /**
92
     * Converts video elements to readable string.
93
     *
94
     * @param  \Pilipinews\Common\Crawler $crawler
95
     * @return \Pilipinews\Common\Crawler
96
     */
97
    protected function video(DomCrawler $crawler)
98
    {
99 27
        $callback = function (DomCrawler $crawler)
100
        {
101
            $embed = strpos($link = $crawler->attr('src'), 'embed');
102
103
            $type = $embed !== false ? 'EMBED' : 'VIDEO';
104
105
            return '<p>' . $type . ': ' . $link . '</p><br><br><br>';
106 27
        };
107
108 9
        return $this->replace($crawler, 'p > iframe', $callback);
109
    }
110
}
111