Scraper::fbpost() - Code Metrics - Inspection of "Replace Scraper::video with Scraper::fbpost" - pilipinews/inquirer - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( b47649...99e771 )

by Rougin

created 2018-11-23 16:22 UTC

Scraper::fbpost() A

↳ Parent: Scraper

Complexity

Conditions	1
Paths	1

Size

Total Lines	14
Code Lines	6

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	4
CRAP Score	1.125

Importance

Changes

Metric	Value
cc	1
eloc	6
nc	1
nop	1
dl	0
loc	14
ccs	4
cts	8
cp	0.5
crap	1.125
rs	10
c	0
b	0
f	0

<?php

namespace Pilipinews\Website\Inquirer;

use Pilipinews\Common\Article;
use Pilipinews\Common\Client;
use Pilipinews\Common\Crawler as DomCrawler;
use Pilipinews\Common\Interfaces\ScraperInterface;
use Pilipinews\Common\Scraper as AbstractScraper;

/**
 * Inquirer News Scraper
 *
 * @package Pilipinews
 * @author  Rougin Royce Gutib <[email protected]>
 */
class Scraper extends AbstractScraper implements ScraperInterface
{
    const TEXT_FOOTER = 'Subscribe to INQUIRER PLUS (http://www.inquirer.net/plus) to get access to The Philippine Daily Inquirer & other 70+ titles, share up to 5 gadgets, listen to the news, download as early as 4am & share articles on social media. Call 896 6000.';

    /**
     * @var string[]
     */
    protected $refresh = array('Refresh this page for updates.');

    /**
     * @var string[]
     */
    protected $removables = array(
        'script',
        '#billboard_article',
        '.ventuno-vid',
        '#article_disclaimer',
        '.OUTBRAIN',
        '#ch-follow-us',
        '.view-comments',
        '#article_tags',
        '.adsbygoogle',
        '#article-new-featured',
        '#read-next-2018',
        '#rn-lbl',
        '#fb-root',
    );

    /**
     * Returns the contents of an article.
     *
     * @param  string $link
     * @return \Pilipinews\Common\Article
     */
    public function scrape($link)
    {
        $this->prepare((string) mb_strtolower($link));

        $title = $this->title('.entry-title');

        $pattern = '/-(\d+)x(\d+).jpg/i';

        $this->remove((array) $this->removables);

        $body = $this->body('#article_content');

        $body = $this->caption($body);

        $body = $this->fbvideo($body);

        $body = $this->fbpost($body)->html();

        $body = preg_replace($pattern, '.jpg', $body);

        $body = $this->html(new DomCrawler($body), $this->refresh);

        $body = str_replace(self::TEXT_FOOTER, '', trim($body));

        return new Article($title, (string) trim($body));
    }

    /**
     * Converts caption elements to readable string.
     *
     * @param  \Pilipinews\Common\Crawler $crawler
     * @return \Pilipinews\Common\Crawler
     */
    protected function caption(DomCrawler $crawler)
    {
        $callback = function (DomCrawler $crawler)
        {
            $image = $crawler->filter('img')->first()->attr('src');

            $format = (string) '<p>PHOTO: %s</p><p>%s</p>';

            $text = $crawler->filter('.wp-caption-text')->first();

            return sprintf($format, $image, $text->html());
        };

        return $this->replace($crawler, '.wp-caption', $callback);
    }

    /**
     * Converts Facebook embedded posts to readable string.
     *
     * @param  \Pilipinews\Common\Crawler $crawler
     * @return \Pilipinews\Common\Crawler
     */
    protected function fbpost(DomCrawler $crawler)
    {
        $callback = function (DomCrawler $crawler)
        {
            $link = $crawler->attr('cite');


            $text = '<p>POST: ' . $crawler->attr('cite') . '</p>';

            $message = $crawler->filter('p > a')->first();

            return $text . '<p>' . $message->text() . '</p>';
        };

        return $this->replace($crawler, '.fb-xfbml-parse-ignore', $callback);
    }

    /**
     * Converts fbvideo elements to readable string.
     *
     * @param  \Pilipinews\Common\Crawler $crawler
     * @return \Pilipinews\Common\Crawler
     */
    protected function fbvideo(DomCrawler $crawler)
    {
        $callback = function (DomCrawler $crawler)
        {
            $link = $crawler->attr('data-href');

            return '<p>VIDEO: ' . $link . '</p>';
        };

        return $this->replace($crawler, '.fb-video', $callback);
    }

    /**
     * Initializes the crawler instance.
     *
     * @param  string $link
     * @return void
     */
    protected function prepare($link)
    {
        $response = Client::request((string) $link);

        $response = str_replace('<p>Click <a href="https://www.inquirer.net/philippine-typhoon-news">here</a> for more weather related news."</p>', '', $response);

        $response = str_replace('<p>Click <a href="https://www.inquirer.net/philippine-typhoon-news">here</a> for more weather related news.</p>', '', $response);

        $response = str_replace('<strong> </strong>', ' ', $response);

        $this->crawler = new DomCrawler($response);
    }
}


1		<?php
2
3		namespace Pilipinews\Website\Inquirer;
4
5		use Pilipinews\Common\Article;
6		use Pilipinews\Common\Client;
7		use Pilipinews\Common\Crawler as DomCrawler;
8		use Pilipinews\Common\Interfaces\ScraperInterface;
9		use Pilipinews\Common\Scraper as AbstractScraper;
10
11		/**
12		* Inquirer News Scraper
13		*
14		* @package Pilipinews
15		* @author Rougin Royce Gutib <[email protected]>
16		*/
17		class Scraper extends AbstractScraper implements ScraperInterface
18		{
19		const TEXT_FOOTER = 'Subscribe to INQUIRER PLUS (http://www.inquirer.net/plus) to get access to The Philippine Daily Inquirer & other 70+ titles, share up to 5 gadgets, listen to the news, download as early as 4am & share articles on social media. Call 896 6000.';
20
21		/**
22		* @var string[]
23		*/
24		protected $refresh = array('Refresh this page for updates.');
25
26		/**
27		* @var string[]
28		*/
29		protected $removables = array(
30		'script',
31		'#billboard_article',
32		'.ventuno-vid',
33		'#article_disclaimer',
34		'.OUTBRAIN',
35		'#ch-follow-us',
36		'.view-comments',
37		'#article_tags',
38		'.adsbygoogle',
39		'#article-new-featured',
40		'#read-next-2018',
41		'#rn-lbl',
42		'#fb-root',
43		);
44
45		/**
46		* Returns the contents of an article.
47		*
48		* @param string $link
49		* @return \Pilipinews\Common\Article
50		*/
51	30	public function scrape($link)
52		{
53	30	$this->prepare((string) mb_strtolower($link));
54
55	30	$title = $this->title('.entry-title');
56
57	30	$pattern = '/-(\d+)x(\d+).jpg/i';
58
59	30	$this->remove((array) $this->removables);
60
61	30	$body = $this->body('#article_content');
62
63	30	$body = $this->caption($body);
64
65	30	$body = $this->fbvideo($body);
66
67	30	$body = $this->fbpost($body)->html();
68
69	30	$body = preg_replace($pattern, '.jpg', $body);
70
71	30	$body = $this->html(new DomCrawler($body), $this->refresh);
72
73	30	$body = str_replace(self::TEXT_FOOTER, '', trim($body));
74
75	30	return new Article($title, (string) trim($body));
76		}
77
78		/**
79		* Converts caption elements to readable string.
80		*
81		* @param \Pilipinews\Common\Crawler $crawler
82		* @return \Pilipinews\Common\Crawler
83		*/
84	20	protected function caption(DomCrawler $crawler)
85		{
86	10	$callback = function (DomCrawler $crawler)
87		{
88	12	$image = $crawler->filter('img')->first()->attr('src');
89
90	12	$format = (string) '<p>PHOTO: %s</p><p>%s</p>';
91
92	12	$text = $crawler->filter('.wp-caption-text')->first();
93
94	12	return sprintf($format, $image, $text->html());
95	30	};
96
97	30	return $this->replace($crawler, '.wp-caption', $callback);
98		}
99
100		/**
101		* Converts Facebook embedded posts to readable string.
102		*
103		* @param \Pilipinews\Common\Crawler $crawler
104		* @return \Pilipinews\Common\Crawler
105		*/
106	20	protected function fbpost(DomCrawler $crawler)
107		{
108	10	$callback = function (DomCrawler $crawler)
109		{
110		$link = $crawler->attr('cite');
		0 ignored issues – show Unused Code introduced 2018-11-23 16:24 UTC by Report Bug Copy Issue Report The assignment to `$link` is dead and can be removed. Loading history...
111
112		$text = '<p>POST: ' . $crawler->attr('cite') . '</p>';
113
114		$message = $crawler->filter('p > a')->first();
115
116		return $text . '<p>' . $message->text() . '</p>';
117	30	};
118
119	30	return $this->replace($crawler, '.fb-xfbml-parse-ignore', $callback);
120		}
121
122		/**
123		* Converts fbvideo elements to readable string.
124		*
125		* @param \Pilipinews\Common\Crawler $crawler
126		* @return \Pilipinews\Common\Crawler
127		*/
128		protected function fbvideo(DomCrawler $crawler)
129		{
130	30	$callback = function (DomCrawler $crawler)
131		{
132	3	$link = $crawler->attr('data-href');
133
134	3	return '<p>VIDEO: ' . $link . '</p>';
135	30	};
136
137	30	return $this->replace($crawler, '.fb-video', $callback);
138		}
139
140		/**
141		* Initializes the crawler instance.
142		*
143		* @param string $link
144		* @return void
145		*/
146	30	protected function prepare($link)
147		{
148	30	$response = Client::request((string) $link);
149
150	30	$response = str_replace('<p>Click <a href="https://www.inquirer.net/philippine-typhoon-news">here</a> for more weather related news."</p>', '', $response);
151
152	30	$response = str_replace('<p>Click <a href="https://www.inquirer.net/philippine-typhoon-news">here</a> for more weather related news.</p>', '', $response);
153
154	30	$response = str_replace('<strong> </strong>', ' ', $response);
155
156	30	$this->crawler = new DomCrawler($response);
157	30	}
158		}
159

pilipinews / inquirer

Push — master ( b47649...99e771 )

Scraper::fbpost() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like