OpenGraphAdapter::getCheckSmallImage() - Code Metrics - Inspection of "changing constrain on og image to be 200x200 minim..." - zrashwani/news-scrapper - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( b9980e...af141c )

by Zeid

created 2017-05-08 16:31 UTC

OpenGraphAdapter::getCheckSmallImage() A

↳ Parent: OpenGraphAdapter

Complexity

Conditions	3
Paths	2

Size

Total Lines	13
Code Lines	8

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
dl	0
loc	13
rs	9.4285
c	0
b	0
f	0
nc	2
cc	3
eloc	8
nop	1

<?php

namespace Zrashwani\NewsScrapper\Adapters;

use \Symfony\Component\DomCrawler\Crawler;

/**
 * Adapter to extract news base on open graph protocol specifications
 * @link http://ogp.me/ open graph meta data specifications
 * @author Zeid Rashwani <zrashwani.com>
 */
class OpenGraphAdapter extends AbstractAdapter
{
    /**
     * extract title information from crawler object
     * @param Crawler $crawler
     * @return string
     */
    public function extractTitle(Crawler $crawler)
    {
        $ret = null;

        $crawler->filterXPath("//head/meta[@property='og:title']")
            ->each(
                function(Crawler $node) use (&$ret) {
                        $ret = $node->attr('content');
                }
            );

        //fallback in case document don't have og:title
        if (empty($ret) === true) {
            $crawler->filterXPath('//h1')
                ->each(
                    function(Crawler $node) use (&$ret) {
                            $ret = $node->text();
                    }
                );
        }
        
        if (empty($ret) === true) {
            $crawler->filterXPath('//head/title')
                ->each(
                    function(Crawler $node) use (&$ret) {
                            $ret = $node->text();
                    }
                );
        }
        
        return $ret;
    }

    /**
     * extract image url from crawler open graph
     * @param Crawler $crawler
     * @return string
     */
    public function extractImage(Crawler $crawler)
    {
        $ret = null;
        $theAdapter = $this;

        $crawler->filterXPath("//head/meta[@property='og:image']")
            ->each(
                function(Crawler $node) use (&$ret) {
                    if($this->getCheckSmallImage($node->attr('content')) === false){ //not small image size
                        $ret = $node->attr('content');
                    }
                }
            );
        
        if (empty($ret) === true) {            
            $crawler->filterXPath('//img')
                ->each(                        
                    function(Crawler $node) use (&$ret, $theAdapter) {                    
                        $img_src = $theAdapter->normalizeLink($node->attr('src'));
                        $width_org = $height_org = 0;
                    
                        $url = pathinfo($img_src);
                        list($width, $height) = getimagesize($url['dirname'].'/'.urlencode($url['basename']));

                        if (empty($ret) === false) {

                            $url_ret = pathinfo($ret);
                            list($width_org, $height_org) = getimagesize(
                                $url_ret['dirname'].
                                '/'.urlencode($url_ret['basename'])
                            );
                        }
                        if ($width > $width_org && $height > $height_org
                            && $width > 200 && $height > 200 //min size of the image amended
                        ) {
                            $ret = $img_src;
                        }
                    }
                );
        }
        
        if (empty($ret) === false) {
            $ret = $this->normalizeLink($ret);
        }
        
        return $ret;
    }

    public function extractDescription(Crawler $crawler)

    {
        $ret = null;

        $crawler->filterXPath("//head/meta[@property='og:description']")
            ->each(
                function(Crawler $node) use (&$ret) {
                        $ret = $node->attr('content');
                }
            );

        return $ret;
    }

    /**
     * extract keywords out of crawler object
     * @param Crawler $crawler
     * @return array
     */
    public function extractKeywords(Crawler $crawler)

    {
        $ret = array();

        $crawler->filterXPath("//head/meta[@property='og:keywords']")
            ->each(
                function(Crawler $node) use (&$ret) {
                
                        $node_txt = trim($node->attr('content'));
                    if (!empty($node_txt)) {
                        $ret = explode(',', $node_txt);
                        
                    }
                }
            );

        return $ret;
    }

    public function extractBody(Crawler $crawler)
    {
        //No body can be extracted from open graph protocol
        return null;
    }

    public function extractPublishDate(Crawler $crawler)

    {
        $date_str = null;

        $crawler->filterXPath("//head/meta[@property='article:published_time']")
            ->each(
                function(Crawler $node) use (&$date_str) {
                        $date_str = $node->attr('content');
                }
            );
            
        if (!is_null($date_str)) {
            $ret = new \DateTime($date_str);
            return $ret->format(\DateTime::ISO8601);
        } else {
            return null;
        }
    }

    public function extractAuthor(Crawler $crawler)

    {
        $ret = null;
        $crawler->filterXPath("//head/meta[@property='article:author']")
            ->each(
                function(Crawler $node) use (&$ret) {
                        $ret = $node->attr('content');
                }
            );
                
        return $ret;
    }
    
    public function getCheckSmallImage($imageUrl){

        $url_ret = pathinfo($imageUrl);
        list($width_org, $height_org) = getimagesize(
            $url_ret['dirname'].'/'.urlencode($url_ret['basename'])
        );

        if($width_org<200 || $height_org < 200){
            return true;
        }else{
            return false;
        }
    }
}


1		<?php
2
3		namespace Zrashwani\NewsScrapper\Adapters;
4
5		use \Symfony\Component\DomCrawler\Crawler;
6
7		/**
8		* Adapter to extract news base on open graph protocol specifications
9		* @link http://ogp.me/ open graph meta data specifications
10		* @author Zeid Rashwani <zrashwani.com>
11		*/
12		class OpenGraphAdapter extends AbstractAdapter
13		{
14		/**
15		* extract title information from crawler object
16		* @param Crawler $crawler
17		* @return string
18		*/
19		public function extractTitle(Crawler $crawler)
20		{
21		$ret = null;
22
23		$crawler->filterXPath("//head/meta[@property='og:title']")
24		->each(
25		function(Crawler $node) use (&$ret) {
26		$ret = $node->attr('content');
27		}
28		);
29
30		//fallback in case document don't have og:title
31		if (empty($ret) === true) {
32		$crawler->filterXPath('//h1')
33		->each(
34		function(Crawler $node) use (&$ret) {
35		$ret = $node->text();
36		}
37		);
38		}
39
40		if (empty($ret) === true) {
41		$crawler->filterXPath('//head/title')
42		->each(
43		function(Crawler $node) use (&$ret) {
44		$ret = $node->text();
45		}
46		);
47		}
48
49		return $ret;
50		}
51
52		/**
53		* extract image url from crawler open graph
54		* @param Crawler $crawler
55		* @return string
56		*/
57		public function extractImage(Crawler $crawler)
58		{
59		$ret = null;
60		$theAdapter = $this;
61
62		$crawler->filterXPath("//head/meta[@property='og:image']")
63		->each(
64		function(Crawler $node) use (&$ret) {
65		if($this->getCheckSmallImage($node->attr('content')) === false){ //not small image size
66		$ret = $node->attr('content');
67		}
68		}
69		);
70
71		if (empty($ret) === true) {
72		$crawler->filterXPath('//img')
73		->each(
74		function(Crawler $node) use (&$ret, $theAdapter) {
75		$img_src = $theAdapter->normalizeLink($node->attr('src'));
76		$width_org = $height_org = 0;
77
78		$url = pathinfo($img_src);
79		list($width, $height) = getimagesize($url['dirname'].'/'.urlencode($url['basename']));
80
81	View Code Duplication	if (empty($ret) === false) {
		0 ignored issues – show Duplication introduced 2015-11-28 14:05 UTC by Report Bug Copy Issue Report This code seems to be duplicated across your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
82		$url_ret = pathinfo($ret);
83		list($width_org, $height_org) = getimagesize(
84		$url_ret['dirname'].
85		'/'.urlencode($url_ret['basename'])
86		);
87		}
88		if ($width > $width_org && $height > $height_org
89		&& $width > 200 && $height > 200 //min size of the image amended
90		) {
91		$ret = $img_src;
92		}
93		}
94		);
95		}
96
97		if (empty($ret) === false) {
98		$ret = $this->normalizeLink($ret);
99		}
100
101		return $ret;
102		}
103
104	View Code Duplication	public function extractDescription(Crawler $crawler)
		0 ignored issues – show Duplication introduced 2015-11-28 14:05 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
105		{
106		$ret = null;
107
108		$crawler->filterXPath("//head/meta[@property='og:description']")
109		->each(
110		function(Crawler $node) use (&$ret) {
111		$ret = $node->attr('content');
112		}
113		);
114
115		return $ret;
116		}
117
118		/**
119		* extract keywords out of crawler object
120		* @param Crawler $crawler
121		* @return array
122		*/
123	View Code Duplication	public function extractKeywords(Crawler $crawler)
		0 ignored issues – show Duplication introduced 2015-11-28 14:05 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
124		{
125		$ret = array();
126
127		$crawler->filterXPath("//head/meta[@property='og:keywords']")
128		->each(
129		function(Crawler $node) use (&$ret) {
130
131		$node_txt = trim($node->attr('content'));
132		if (!empty($node_txt)) {
133		$ret = explode(',', $node_txt);
134
135		}
136		}
137		);
138
139		return $ret;
140		}
141
142		public function extractBody(Crawler $crawler)
143		{
144		//No body can be extracted from open graph protocol
145		return null;
146		}
147
148	View Code Duplication	public function extractPublishDate(Crawler $crawler)
		0 ignored issues – show Duplication introduced 2015-11-28 14:05 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
149		{
150		$date_str = null;
151
152		$crawler->filterXPath("//head/meta[@property='article:published_time']")
153		->each(
154		function(Crawler $node) use (&$date_str) {
155		$date_str = $node->attr('content');
156		}
157		);
158
159		if (!is_null($date_str)) {
160		$ret = new \DateTime($date_str);
161		return $ret->format(\DateTime::ISO8601);
162		} else {
163		return null;
164		}
165		}
166
167	View Code Duplication	public function extractAuthor(Crawler $crawler)
		0 ignored issues – show Duplication introduced 2015-11-28 14:05 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
168		{
169		$ret = null;
170		$crawler->filterXPath("//head/meta[@property='article:author']")
171		->each(
172		function(Crawler $node) use (&$ret) {
173		$ret = $node->attr('content');
174		}
175		);
176
177		return $ret;
178		}
179
180		public function getCheckSmallImage($imageUrl){
181
182		$url_ret = pathinfo($imageUrl);
183		list($width_org, $height_org) = getimagesize(
184		$url_ret['dirname'].'/'.urlencode($url_ret['basename'])
185		);
186
187		if($width_org<200 \|\| $height_org < 200){
188		return true;
189		}else{
190		return false;
191		}
192		}
193		}
194

zrashwani / news-scrapper

Push — master ( b9980e...af141c )

OpenGraphAdapter::getCheckSmallImage() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like