Passed
Pull Request — master (#72)
by
unknown
02:41
created

OpenGraph   A

Complexity

Total Complexity 24

Size/Duplication

Total Lines 117
Duplicated Lines 0 %

Importance

Changes 24
Bugs 3 Features 0
Metric Value
eloc 68
c 24
b 3
f 0
dl 0
loc 117
rs 10
wmc 24

4 Methods

Rating   Name   Duplication   Size   Complexity  
A verify_image_url() 0 15 4
A curl_get_contents() 0 37 3
C fetch() 0 45 14
A get_meta_value() 0 11 3
1
<?php
2
3
namespace shweshi\OpenGraph;
4
5
use DOMDocument;
6
use shweshi\OpenGraph\Exceptions\FetchException;
7
8
class OpenGraph
9
{
10
    public function fetch($url, $allMeta = null, $lang = null, $options = LIBXML_NOWARNING | LIBXML_NOERROR, $userAgent = 'Curl')
11
    {
12
        $html = $this->curl_get_contents($url, $lang, $userAgent);
13
        /**
14
         * parsing starts here:.
15
         */
16
        $doc = new DOMDocument();
17
18
        $libxml_previous_state = libxml_use_internal_errors(true);
19
        $doc->loadHTML('<?xml encoding="utf-8" ?>'.$html, $options);
20
        //catch possible errors due to empty or malformed HTML
21
        if ($options > 0 && ($options & (LIBXML_NOWARNING | LIBXML_NOERROR)) == 0) {
22
            Log::warning(libxml_get_errors());
0 ignored issues
show
Bug introduced by
The type shweshi\OpenGraph\Log was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
23
        }
24
        libxml_clear_errors();
25
        // restore previous state
26
        libxml_use_internal_errors($libxml_previous_state);
27
28
        $tags = $doc->getElementsByTagName('meta');
29
        $metadata = [];
30
        foreach ($tags as $tag) {
31
            $metaproperty = ($tag->hasAttribute('property')) ? $tag->getAttribute('property') : $tag->getAttribute('name');
32
            if (!$allMeta && $metaproperty && strpos($tag->getAttribute('property'), 'og:') === 0) {
33
                $key = strtr(substr($metaproperty, 3), '-', '_');
34
                $value = $this->get_meta_value($tag);
35
            }
36
            if ($allMeta && $metaproperty) {
37
                $key = (strpos($metaproperty, 'og:') === 0) ? strtr(substr($metaproperty, 3), '-', '_') : $metaproperty;
38
                $value = $this->get_meta_value($tag);
39
            }
40
            if (!empty($key)) {
41
                $metadata[$key] = $value;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $value does not seem to be defined for all execution paths leading up to this point.
Loading history...
42
            }
43
            /*
44
             * Verify image url
45
             */
46
            if (isset($metadata['image'])) {
47
                $isValidImageUrl = $this->verify_image_url($metadata['image']);
48
                if (!$isValidImageUrl) {
49
                    $metadata['image'] = '';
50
                }
51
            }
52
        }
53
54
        return $metadata;
55
    }
56
57
    protected function curl_get_contents($url, $lang, $userAgent)
58
    {
59
        $headers = [
60
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
61
            'Cache-Control: no-cache',
62
            'User-Agent: '.$userAgent,
63
        ];
64
65
        if ($lang) {
66
            array_push($headers, 'Accept-Language: '.$lang);
67
        }
68
69
        $curl = curl_init();
70
71
        curl_setopt_array($curl, [
0 ignored issues
show
Bug introduced by
It seems like $curl can also be of type false; however, parameter $ch of curl_setopt_array() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

71
        curl_setopt_array(/** @scrutinizer ignore-type */ $curl, [
Loading history...
72
            CURLOPT_URL            => $url,
73
            CURLOPT_FAILONERROR    => false,
74
            CURLOPT_FOLLOWLOCATION => true,
75
            CURLOPT_RETURNTRANSFER => true,
76
            CURLOPT_SSL_VERIFYHOST => false,
77
            CURLOPT_SSL_VERIFYPEER => false,
78
            CURLOPT_ENCODING       => 'UTF-8',
79
            CURLOPT_MAXREDIRS      => 10,
80
            CURLOPT_TIMEOUT        => 30,
81
            CURLOPT_HTTP_VERSION   => CURL_HTTP_VERSION_1_1,
82
            CURLOPT_CUSTOMREQUEST  => 'GET',
83
            CURLOPT_HTTPHEADER     => $headers,
84
        ]);
85
86
        $response = curl_exec($curl);
0 ignored issues
show
Bug introduced by
It seems like $curl can also be of type false; however, parameter $ch of curl_exec() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

86
        $response = curl_exec(/** @scrutinizer ignore-type */ $curl);
Loading history...
87
        if (curl_errno(/** @scrutinizer ignore-type */ $curl) !== 0) {
88
            throw new FetchException(curl_error(/** @scrutinizer ignore-type */ $curl), curl_errno($curl), null, curl_getinfo(/** @scrutinizer ignore-type */ $curl));
0 ignored issues
show
Bug introduced by
It seems like $curl can also be of type false; however, parameter $ch of curl_errno() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

88
            throw new FetchException(curl_error(/** @scrutinizer ignore-type */ $curl), curl_errno(/** @scrutinizer ignore-type */ $curl), null, curl_getinfo(/** @scrutinizer ignore-type */ $curl));
Loading history...
89
        }
90
91
        curl_close(/** @scrutinizer ignore-type */ $curl);
92
93
        return $response;
94
    }
95
96
    protected function verify_image_url($url)
97
    {
98
        $path = parse_url($url, PHP_URL_PATH);
99
        $encoded_path = array_map('urlencode', explode('/', $path));
100
        $url = str_replace($path, implode('/', $encoded_path), $url);
101
        if (!filter_var($url, FILTER_VALIDATE_URL)) {
102
            return false;
103
        }
104
105
        try {
106
            $headers = get_headers($url);
107
108
            return stripos($headers[0], '200 OK') ? true : false;
109
        } catch (\Exception $e) {
110
            return false;
111
        }
112
    }
113
114
    protected function get_meta_value($tag)
115
    {
116
        if (!empty($tag->getAttribute('content'))) {
117
            $value = $tag->getAttribute('content');
118
        } elseif (!empty($tag->getAttribute('value'))) {
119
            $value = $tag->getAttribute('value');
120
        } else {
121
            $value = '';
122
        }
123
124
        return $value;
125
    }
126
}
127