Scraper - Code Metrics - Inspection of "Update ScraperTest" - pilipinews/cnn - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 48eaaa...d014fd )

by Rougin

created 2019-06-19 15:41 UTC

Scraper A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	90
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
eloc	31
dl	0
loc	90
ccs	27
cts	27
cp	1
rs	10
c	0
b	0
f	0
wmc	4

3 Methods

Rating	Name	Size	Complexity
A	prepare()	19	1
A	scrape()	19	1
A	video()	12	2

<?php

namespace Pilipinews\Website\Cnn;

use Pilipinews\Common\Article;
use Pilipinews\Common\Client;
use Pilipinews\Common\Crawler as DomCrawler;
use Pilipinews\Common\Interfaces\ScraperInterface;
use Pilipinews\Common\Scraper as AbstractScraper;

/**
 * CNN Philippines Scraper
 *
 * @package Pilipinews
 * @author  Rougin Gutib <[email protected]>
 */
class Scraper extends AbstractScraper implements ScraperInterface
{
    /**
     * @var string[]
     */
    protected $removables = array('p > script', '.flourish-credit');

    /**
     * @var string[]
     */
    protected $reload = array(
        'Please click the source link below for more updates.',
        'Please refresh for updates.',
        'Please refresh the page for updates.',
        'Please refresh this page for updates.',
        'Refresh this page for more updates.',
    );

    /**
     * Returns the contents of an article.
     *
     * @param  string $link
     * @return \Pilipinews\Common\Article
     */
    public function scrape($link)
    {
        $this->prepare((string) $link);

        $title = $this->title('title', ' - CNN Philippines');

        $body = $this->body('#content-body');

        $body = $this->video($this->tweet($body));

        $html = $this->html($body, $this->reload);

        $search = '/pic.twitter.com\/(.*)- CNN/i';

        $replace = (string) 'pic.twitter.com/$1 - CNN';

        $html = preg_replace($search, $replace, $html);

        return new Article($title, $html, $link);
    }

    /**
     * Initializes the crawler instance.
     *
     * @param  string $link
     * @return void
     */
    protected function prepare($link)
    {
        $pattern = '/content-body-[0-9]+(-[0-9]+)+/i';

        $html = Client::request((string) $link);

        $html = str_replace(' </em> ', '</em> ', $html);

        preg_match($pattern, (string) $html, $matches);

        $html = str_replace($matches[0], 'content-body', $html);

        $html = str_replace(' </a>', '</a> ', $html);

        $html = str_replace('<strong> </strong>', ' ', $html);

        $this->crawler = new DomCrawler((string) $html);

        $this->remove((array) $this->removables);
    }

    /**
     * Converts video elements to readable string.
     *
     * @param  \Pilipinews\Common\Crawler $crawler
     * @return \Pilipinews\Common\Crawler
     */
    protected function video(DomCrawler $crawler)
    {
        $callback = function (DomCrawler $crawler)
        {
            $embed = strpos($link = $crawler->attr('src'), 'embed');

            $type = $embed !== false ? 'EMBED' : 'VIDEO';

            return '<p>' . $type . ': ' . $link . '</p><br><br><br>';
        };

        return $this->replace($crawler, 'p > iframe', $callback);
    }
}


1		<?php
2
3		namespace Pilipinews\Website\Cnn;
4
5		use Pilipinews\Common\Article;
6		use Pilipinews\Common\Client;
7		use Pilipinews\Common\Crawler as DomCrawler;
8		use Pilipinews\Common\Interfaces\ScraperInterface;
9		use Pilipinews\Common\Scraper as AbstractScraper;
10
11		/**
12		* CNN Philippines Scraper
13		*
14		* @package Pilipinews
15		* @author Rougin Gutib <[email protected]>
16		*/
17		class Scraper extends AbstractScraper implements ScraperInterface
18		{
19		/**
20		* @var string[]
21		*/
22		protected $removables = array('p > script', '.flourish-credit');
23
24		/**
25		* @var string[]
26		*/
27		protected $reload = array(
28		'Please click the source link below for more updates.',
29		'Please refresh for updates.',
30		'Please refresh the page for updates.',
31		'Please refresh this page for updates.',
32		'Refresh this page for more updates.',
33		);
34
35		/**
36		* Returns the contents of an article.
37		*
38		* @param string $link
39		* @return \Pilipinews\Common\Article
40		*/
41	30	public function scrape($link)
42		{
43	30	$this->prepare((string) $link);
44
45	30	$title = $this->title('title', ' - CNN Philippines');
46
47	30	$body = $this->body('#content-body');
48
49	30	$body = $this->video($this->tweet($body));
50
51	30	$html = $this->html($body, $this->reload);
52
53	30	$search = '/pic.twitter.com\/(.*)- CNN/i';
54
55	30	$replace = (string) 'pic.twitter.com/$1 - CNN';
56
57	30	$html = preg_replace($search, $replace, $html);
58
59	30	return new Article($title, $html, $link);
60		}
61
62		/**
63		* Initializes the crawler instance.
64		*
65		* @param string $link
66		* @return void
67		*/
68	30	protected function prepare($link)
69		{
70	30	$pattern = '/content-body-[0-9]+(-[0-9]+)+/i';
71
72	30	$html = Client::request((string) $link);
73
74	30	$html = str_replace(' </em> ', '</em> ', $html);
75
76	30	preg_match($pattern, (string) $html, $matches);
77
78	30	$html = str_replace($matches[0], 'content-body', $html);
79
80	30	$html = str_replace(' </a>', '</a> ', $html);
81
82	30	$html = str_replace('<strong> </strong>', ' ', $html);
83
84	30	$this->crawler = new DomCrawler((string) $html);
85
86	30	$this->remove((array) $this->removables);
87	30	}
88
89		/**
90		* Converts video elements to readable string.
91		*
92		* @param \Pilipinews\Common\Crawler $crawler
93		* @return \Pilipinews\Common\Crawler
94		*/
95		protected function video(DomCrawler $crawler)
96		{
97	30	$callback = function (DomCrawler $crawler)
98		{
99	9	$embed = strpos($link = $crawler->attr('src'), 'embed');
100
101	9	$type = $embed !== false ? 'EMBED' : 'VIDEO';
102
103	9	return '<p>' . $type . ': ' . $link . '</p><br><br><br>';
104	30	};
105
106	30	return $this->replace($crawler, 'p > iframe', $callback);
107		}
108		}
109

pilipinews / cnn

Push — master ( 48eaaa...d014fd )

Scraper A

Complexity

Size/Duplication

Test Coverage

Importance

3 Methods

Duplication Side-by-Side

Filter issues like