Test Setup Failed
Pull Request — master (#93)
by
unknown
10:09
created

OpenGraph::fetch()   C

Complexity

Conditions 14
Paths 146

Size

Total Lines 45
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 15
Bugs 2 Features 0
Metric Value
cc 14
eloc 25
c 15
b 2
f 0
nc 146
nop 5
dl 0
loc 45
rs 5.8833

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace shweshi\OpenGraph;
4
5
use DOMDocument;
6
use Illuminate\Support\Facades\Log;
7
use shweshi\OpenGraph\Exceptions\FetchException;
8
9
class OpenGraph
10
{
11
12
    private const string DEFAULT_USER_AGENT = 'Curl';
0 ignored issues
show
Bug introduced by
A parse error occurred: Syntax error, unexpected T_STRING, expecting '=' on line 12 at column 25
Loading history...
13
14
    private string $userAgent = self::DEFAULT_USER_AGENT;
15
16
    /**
17
     * @throws FetchException
18
     */
19
    public function fetch($url, $allMeta = null, $lang = null, $options = LIBXML_NOWARNING | LIBXML_NOERROR): array
20
    {
21
        $html = $this->curl_get_contents($url, $lang, $this->userAgent);
22
        /**
23
         * parsing starts here:.
24
         */
25
        $doc = new DOMDocument();
26
27
        $libxml_previous_state = libxml_use_internal_errors(true);
28
        $doc->loadHTML('<?xml encoding="utf-8" ?>'.$html, $options);
29
        //catch possible errors due to empty or malformed HTML
30
        if ($options > 0 && ($options & (LIBXML_NOWARNING | LIBXML_NOERROR)) == 0) {
31
            Log::warning(libxml_get_errors());
32
        }
33
        libxml_clear_errors();
34
        // restore previous state
35
        libxml_use_internal_errors($libxml_previous_state);
36
37
        $tags = $doc->getElementsByTagName('meta');
38
        $metadata = [];
39
        foreach ($tags as $tag) {
40
            $metaProperty = ($tag->hasAttribute('property')) ? $tag->getAttribute('property') : $tag->getAttribute('name');
41
            if (!$allMeta && $metaProperty && str_starts_with($tag->getAttribute('property'), 'og:')) {
42
                $key = strtr(substr($metaProperty, 3), '-', '_');
43
                $value = $this->get_meta_value($tag);
44
            }
45
            if ($allMeta && $metaProperty) {
46
                $key = (str_starts_with($metaProperty, 'og:')) ? strtr(substr($metaProperty, 3), '-', '_') : $metaProperty;
47
                $value = $this->get_meta_value($tag);
48
            }
49
            if (!empty($key)) {
50
                $metadata[$key] = $value;
51
            }
52
            /*
53
             * Verify image url
54
             */
55
            if (isset($metadata['image'])) {
56
                $isValidImageUrl = $this->verify_image_url($metadata['image']);
57
                if (!$isValidImageUrl) {
58
                    $metadata['image'] = '';
59
                }
60
            }
61
        }
62
63
        return $metadata;
64
    }
65
66
    /**
67
     * @throws FetchException
68
     */
69
    protected function curl_get_contents($url, $lang, $userAgent): bool|string
70
    {
71
        $headers = [
72
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
73
            'Cache-Control: no-cache',
74
            'User-Agent: '.$userAgent,
75
        ];
76
77
        if ($lang) {
78
            $headers[] = 'Accept-Language: '.$lang;
79
        }
80
81
        $curl = curl_init();
82
83
        curl_setopt_array($curl, [
84
            CURLOPT_URL            => $url,
85
            CURLOPT_FAILONERROR    => false,
86
            CURLOPT_FOLLOWLOCATION => true,
87
            CURLOPT_RETURNTRANSFER => true,
88
            CURLOPT_SSL_VERIFYHOST => 2,
89
            CURLOPT_SSL_VERIFYPEER => true,
90
            CURLOPT_ENCODING       => 'UTF-8',
91
            CURLOPT_MAXREDIRS      => 10,
92
            CURLOPT_TIMEOUT        => 30,
93
            CURLOPT_HTTP_VERSION   => CURL_HTTP_VERSION_1_1,
94
            CURLOPT_CUSTOMREQUEST  => 'GET',
95
            CURLOPT_HTTPHEADER     => $headers,
96
        ]);
97
98
        $response = curl_exec($curl);
99
        if (curl_errno(/** @scrutinizer ignore-type */ $curl) !== 0) {
100
            throw new FetchException(curl_error(/** @scrutinizer ignore-type */ $curl), curl_errno($curl), null, curl_getinfo(/** @scrutinizer ignore-type */ $curl));
101
        }
102
103
        curl_close(/** @scrutinizer ignore-type */ $curl);
104
105
        return $response;
106
    }
107
108
    protected function verify_image_url($url)
109
    {
110
        $path = parse_url($url, PHP_URL_PATH);
111
        $encoded_path = array_map('urlencode', explode('/', $path));
112
        $url = str_replace($path, implode('/', $encoded_path), $url);
113
        if (!filter_var($url, FILTER_VALIDATE_URL)) {
114
            return false;
115
        }
116
117
        try {
118
            $contextHeaders = ['ssl' => [
119
                'verify_peer'      => false,
120
                'verify_peer_name' => false,
121
            ]];
122
123
            if ($this->userAgent !== self::DEFAULT_USER_AGENT) {
124
                $contextHeaders['http'] = [
125
                    'user_agent' => $this->userAgent,
126
                ];
127
            }
128
129
            $streamContext = stream_context_create($contextHeaders);
130
            $headers = get_headers($url, true, $streamContext);
131
132
            return (bool) stripos($headers[0], '200 OK');
133
        } catch (\Exception $e) {
134
            Log::error($e->getMessage());
135
            return false;
136
        }
137
    }
138
139
    protected function get_meta_value($tag)
140
    {
141
        if (!empty($tag->getAttribute('content'))) {
142
            $value = $tag->getAttribute('content');
143
        } elseif (!empty($tag->getAttribute('value'))) {
144
            $value = $tag->getAttribute('value');
145
        } else {
146
            $value = '';
147
        }
148
149
        return $value;
150
    }
151
152
    public function userAgent(string $userAgent): self
153
    {
154
        $this->userAgent = $userAgent;
155
156
        return $this;
157
    }
158
}
159