DefaultFinder::find() - Code Metrics - Inspection of "Support redirection in cURL download callback" - ArthurHoaro/web-thumbnailer - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( e15538...7938c9 )

by Arthur

created 2018-05-01 14:50 UTC

DefaultFinder::find() C

↳ Parent: DefaultFinder

Complexity

Conditions	11
Paths	11

Size

Total Lines	32
Code Lines	19

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	11
eloc	19
nc	11
nop	0
dl	0
loc	32
rs	5.2653
c	0
b	0
f	0

How to fix Complexity

<?php

namespace WebThumbnailer\Finder;

use WebThumbnailer\Application\ConfigManager;
use WebThumbnailer\Application\WebAccess\WebAccess;
use WebThumbnailer\Application\WebAccess\WebAccessCUrl;
use WebThumbnailer\Application\WebAccess\WebAccessFactory;
use WebThumbnailer\Utils\ImageUtils;
use WebThumbnailer\Utils\UrlUtils;

/**
 * Class DefaultFinder
 *
 * This finder isn't linked to any domain.
 * It will return the resource if it is an image (by extension, or by content).
 * Otherwise, it'll try to retrieve an OpenGraph resource.
 *
 * @package WebThumbnailer\Finder
 */
class DefaultFinder extends FinderCommon
{
    /**
     * @var WebAccess instance.
     */
    protected $webAccess;

    /**
     * @inheritdoc
     */
    public function __construct($domain, $url, $rules, $options)
    {
        $this->webAccess = WebAccessFactory::getWebAccess($url);
        $this->url = $url;
        $this->domain = $domain;
    }

    /**
     * Generic finder.
     *
     * @inheritdoc
     */
    public function find()
    {
        if (ImageUtils::isImageExtension(UrlUtils::getUrlFileExtension($this->url))) {
            return $this->url;
        }

        $content = $thumbnail = null;
        $callback = $this->webAccess instanceof WebAccessCUrl
            ? $this->getCurlCallback($content, $thumbnail)
            : null;
        list($headers, $content) = $this->webAccess->getContent(
            $this->url,
            (int) ConfigManager::get('settings.default.timeout', 30),
            (int) ConfigManager::get('settings.default.max_img_dl', 16777216),
            $callback,
            $content
        );

        if (empty($thumbnail) && ImageUtils::isImageString($content)) {
            return $this->url;
        }

        if (empty($thumbnail) && ! empty($headers) && strpos($headers[0], '200') === false) {
            return false;
        }

        // With curl, the thumb is extracted during the download
        if ($this->webAccess instanceof WebAccessCUrl && ! empty($thumbnail)) {
            return $thumbnail;
        }

        return ! empty($content) ? self::extractMetaTag($content) : false;
    }

    /**
     * Get a callback for curl write function.
     *
     * @param string $content   A variable reference in which the downloaded content should be stored.
     * @param string $thumbnail A variable reference in which extracted thumb URL should be stored.
     *
     * @return \Closure CURLOPT_WRITEFUNCTION callback
     */
    protected function getCurlCallback(&$content, &$thumbnail)
    {
        $url = $this->url;
        $isRedirected = false;

        /**
         * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
         *
         * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
         * Then we extract the title and the charset and stop the download when it's done.
         *
         * Note that when using CURLOPT_WRITEFUNCTION, we have to manually handle the content retrieved,
         * hence the $content reference variable.
         *
         * @param resource $ch   cURL resource
         * @param string   $data chunk of data being downloaded
         *
         * @return int|bool length of $data or false if we need to stop the download
         */
        return function (&$ch, $data) use ($url, &$content, &$thumbnail, &$isRedirected) {
            $content .= $data;
            $responseCode = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
            if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
                $isRedirected = true;
                return strlen($data);
            }
            if (!empty($responseCode) && $responseCode !== 200) {
                return false;
            }
            // After a redirection, the content type will keep the previous request value
            // until it finds the next content-type header.
            if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
                $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
            }
            // we look for image, and ignore application/octet-stream,
            // which is a the default content type for any binary
            // @see https://developer.mozilla.org/fr/docs/Web/HTTP/Basics_of_HTTP/MIME_types
            if (!empty($contentType)
                && strpos($contentType, 'image/') !== false
                && strpos($contentType, 'application/octet-stream') === false
            ) {
                $thumbnail = $url;
                return false;
            } elseif (!empty($contentType)
                && strpos($contentType, 'text/html') === false
                && strpos($contentType, 'application/octet-stream') === false
            ) {
                return false;
            }
            if (empty($thumbnail)) {
                $thumbnail = DefaultFinder::extractMetaTag($data);
            }
            // We got everything we want, stop the download.
            if (!empty($responseCode) && !empty($contentType) && !empty($thumbnail)) {
                return false;
            }

            return strlen($data);
        };
    }

    /**
     * Applies the regexp on the HTML $content to extract the thumb URL.
     *
     * @param string $content Downloaded HTML content
     *
     * @return string|bool Extracted thumb URL or false if not found.
     */
    public static function extractMetaTag($content)
    {
        $propertiesKey = ['property', 'name', 'itemprop'];
        $properties = implode('|', $propertiesKey);
        // Try to retrieve OpenGraph image.
        $ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?og:image["\'\s][^>]*content=["\']?(.*?)["\'\s>]#';
        // If the attributes are not in the order property => content (e.g. Github)
        // New regex to keep this readable... more or less.
        $ogRegexReverse = '#<meta[^>]+content=["\']?([^"\'\s]+)[^>]+(?:'. $properties .')=["\']?og:image["\'\s/>]#';

        if (preg_match($ogRegex, $content, $matches) > 0
            || preg_match($ogRegexReverse, $content, $matches) > 0
        ) {
            return $matches[1];
        }

        return false;
    }

    /**
     * @inheritdoc
     */
    public function isHotlinkAllowed()
    {
        return true;
    }

    /**
     * @inheritdoc
     */
    public function checkRules($rules)
    {
    }

    /**
     * @inheritdoc
     */
    public function loadRules($rules)
    {
    }

    /**
     * @inheritdoc
     */
    public function getName()
    {
        return 'default';
    }
}


1			<?php
2
3			namespace WebThumbnailer\Finder;
4
5			use WebThumbnailer\Application\ConfigManager;
6			use WebThumbnailer\Application\WebAccess\WebAccess;
7			use WebThumbnailer\Application\WebAccess\WebAccessCUrl;
8			use WebThumbnailer\Application\WebAccess\WebAccessFactory;
9			use WebThumbnailer\Utils\ImageUtils;
10			use WebThumbnailer\Utils\UrlUtils;
11
12			/**
13			* Class DefaultFinder
14			*
15			* This finder isn't linked to any domain.
16			* It will return the resource if it is an image (by extension, or by content).
17			* Otherwise, it'll try to retrieve an OpenGraph resource.
18			*
19			* @package WebThumbnailer\Finder
20			*/
21			class DefaultFinder extends FinderCommon
22			{
23			/**
24			* @var WebAccess instance.
25			*/
26			protected $webAccess;
27
28			/**
29			* @inheritdoc
30			*/
31			public function __construct($domain, $url, $rules, $options)
32			{
33			$this->webAccess = WebAccessFactory::getWebAccess($url);
34			$this->url = $url;
35			$this->domain = $domain;
36			}
37
38			/**
39			* Generic finder.
40			*
41			* @inheritdoc
42			*/
43			public function find()
44			{
45			if (ImageUtils::isImageExtension(UrlUtils::getUrlFileExtension($this->url))) {
46			return $this->url;
47			}
48
49			$content = $thumbnail = null;
50			$callback = $this->webAccess instanceof WebAccessCUrl
51			? $this->getCurlCallback($content, $thumbnail)
52			: null;
53			list($headers, $content) = $this->webAccess->getContent(
54			$this->url,
55			(int) ConfigManager::get('settings.default.timeout', 30),
56			(int) ConfigManager::get('settings.default.max_img_dl', 16777216),
57			$callback,
58			$content
59			);
60
61			if (empty($thumbnail) && ImageUtils::isImageString($content)) {
62			return $this->url;
63			}
64
65			if (empty($thumbnail) && ! empty($headers) && strpos($headers[0], '200') === false) {
66			return false;
67			}
68
69			// With curl, the thumb is extracted during the download
70			if ($this->webAccess instanceof WebAccessCUrl && ! empty($thumbnail)) {
71			return $thumbnail;
72			}
73
74			return ! empty($content) ? self::extractMetaTag($content) : false;
75			}
76
77			/**
78			* Get a callback for curl write function.
79			*
80			* @param string $content A variable reference in which the downloaded content should be stored.
81			* @param string $thumbnail A variable reference in which extracted thumb URL should be stored.
82			*
83			* @return \Closure CURLOPT_WRITEFUNCTION callback
84			*/
85			protected function getCurlCallback(&$content, &$thumbnail)
86			{
87			$url = $this->url;
88			$isRedirected = false;
89
90			/**
91			* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
92			*
93			* While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
94			* Then we extract the title and the charset and stop the download when it's done.
95			*
96			* Note that when using CURLOPT_WRITEFUNCTION, we have to manually handle the content retrieved,
97			* hence the $content reference variable.
98			*
99			* @param resource $ch cURL resource
100			* @param string $data chunk of data being downloaded
101			*
102			* @return int\|bool length of $data or false if we need to stop the download
103			*/
104			return function (&$ch, $data) use ($url, &$content, &$thumbnail, &$isRedirected) {
105			$content .= $data;
106			$responseCode = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
107			if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
108			$isRedirected = true;
109			return strlen($data);
110			}
111			if (!empty($responseCode) && $responseCode !== 200) {
112			return false;
113			}
114			// After a redirection, the content type will keep the previous request value
115			// until it finds the next content-type header.
116			if (! $isRedirected \|\| strpos(strtolower($data), 'content-type') !== false) {
117			$contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
118			}
119			// we look for image, and ignore application/octet-stream,
120			// which is a the default content type for any binary
121			// @see https://developer.mozilla.org/fr/docs/Web/HTTP/Basics_of_HTTP/MIME_types
122			if (!empty($contentType)
123			&& strpos($contentType, 'image/') !== false
124			&& strpos($contentType, 'application/octet-stream') === false
125			) {
126			$thumbnail = $url;
127			return false;
128			} elseif (!empty($contentType)
129			&& strpos($contentType, 'text/html') === false
130			&& strpos($contentType, 'application/octet-stream') === false
131			) {
132			return false;
133			}
134			if (empty($thumbnail)) {
135			$thumbnail = DefaultFinder::extractMetaTag($data);
136			}
137			// We got everything we want, stop the download.
138			if (!empty($responseCode) && !empty($contentType) && !empty($thumbnail)) {
139			return false;
140			}
141
142			return strlen($data);
143			};
144			}
145
146			/**
147			* Applies the regexp on the HTML $content to extract the thumb URL.
148			*
149			* @param string $content Downloaded HTML content
150			*
151			* @return string\|bool Extracted thumb URL or false if not found.
152			*/
153			public static function extractMetaTag($content)
154			{
155			$propertiesKey = ['property', 'name', 'itemprop'];
156			$properties = implode('\|', $propertiesKey);
157			// Try to retrieve OpenGraph image.
158			$ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?og:image["\'\s][^>]content=["\']?(.?)["\'\s>]#';
159			// If the attributes are not in the order property => content (e.g. Github)
160			// New regex to keep this readable... more or less.
161			$ogRegexReverse = '#<meta[^>]+content=["\']?([^"\'\s]+)[^>]+(?:'. $properties .')=["\']?og:image["\'\s/>]#';
162
163			if (preg_match($ogRegex, $content, $matches) > 0
164			\|\| preg_match($ogRegexReverse, $content, $matches) > 0
165			) {
166			return $matches[1];
167			}
168
169			return false;
170			}
171
172			/**
173			* @inheritdoc
174			*/
175			public function isHotlinkAllowed()
176			{
177			return true;
178			}
179
180			/**
181			* @inheritdoc
182			*/
183			public function checkRules($rules)
184			{
185			}
186
187			/**
188			* @inheritdoc
189			*/
190			public function loadRules($rules)
191			{
192			}
193
194			/**
195			* @inheritdoc
196			*/
197			public function getName()
198			{
199			return 'default';
200			}
201			}
202

ArthurHoaro / web-thumbnailer

Push — master ( e15538...7938c9 )

DefaultFinder::find() C

Complexity

Size

Duplication

Importance

How to fix Complexity

Long Method

Duplication Side-by-Side

Filter issues like