UriClient::request()   B
last analyzed

Complexity

Conditions 5
Paths 2

Size

Total Lines 56

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 56
rs 8.6488
c 0
b 0
f 0
cc 5
nc 2
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * vipnytt/RobotsTxtParser
4
 *
5
 * @link https://github.com/VIPnytt/RobotsTxtParser
6
 * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT)
7
 */
8
9
namespace vipnytt\RobotsTxtParser;
10
11
use Composer\CaBundle\CaBundle;
12
use vipnytt\RobotsTxtParser\Parser\StatusCodeParser;
13
use vipnytt\RobotsTxtParser\Parser\UriParser;
14
15
/**
16
 * Class UriClient
17
 *
18
 * @see https://github.com/VIPnytt/RobotsTxtParser/blob/master/docs/methods/UriClient.md for documentation
19
 * @package vipnytt\RobotsTxtParser
20
 */
21
class UriClient extends TxtClient
22
{
23
    /**
24
     * User-agent
25
     */
26
    const CURL_USER_AGENT = 'RobotsTxtParser-VIPnytt/2.1 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)';
27
28
    /**
29
     * Base uri
30
     * @var string
31
     */
32
    private $base;
33
34
    /**
35
     * Header parser
36
     * @var Parser\HeaderParser
37
     */
38
    private $headerParser;
39
40
    /**
41
     * RequestClient timestamp
42
     * @var int
43
     */
44
    private $time;
45
46
    /**
47
     * Status code
48
     * @var int|null
49
     */
50
    private $rawStatusCode;
51
52
    /**
53
     * Effective uri
54
     * @var string
55
     */
56
    private $effective;
57
58
    /**
59
     * Cache-Control max-age
60
     * @var int
61
     */
62
    private $rawMaxAge;
63
64
    /**
65
     * Robots.txt contents
66
     * @var string
67
     */
68
    private $rawContents;
69
70
    /**
71
     * Robots.txt character encoding
72
     * @var string
73
     */
74
    private $rawEncoding;
75
76
    /**
77
     * RequestClient constructor.
78
     *
79
     * @param string $baseUri
80
     * @param array $curlOptions
81
     * @param int|null $byteLimit
82
     */
83
    public function __construct($baseUri, array $curlOptions = [], $byteLimit = self::BYTE_LIMIT)
84
    {
85
        $uriParser = new UriParser($baseUri);
86
        $this->base = $uriParser->base();
87
        if ($this->request($curlOptions) === false) {
88
            $this->time = time();
89
            $this->effective = $this->base;
90
            $this->rawStatusCode = null;
91
            $this->rawContents = '';
92
            $this->rawEncoding = self::ENCODING;
93
            $this->rawMaxAge = 0;
94
        }
95
        parent::__construct($this->base, $this->rawStatusCode, $this->rawContents, $this->rawEncoding, $this->effective, $byteLimit);
96
    }
97
98
    /**
99
     * cURL request
100
     *
101
     * @param array $options
102
     * @return bool
103
     */
104
    private function request($options = [])
105
    {
106
        $curl = curl_init();
107
        $caPathOrFile = CaBundle::getSystemCaRootBundlePath();
108
        // Set default cURL options
109
        curl_setopt_array($curl, [
110
            CURLOPT_AUTOREFERER => true,
111
            CURLOPT_CONNECTTIMEOUT => 30,
112
            CURLOPT_ENCODING => 'identity',
113
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_NONE,
114
            CURLOPT_IPRESOLVE => CURL_IPRESOLVE_WHATEVER,
115
            CURLOPT_SSL_VERIFYHOST => 2,
116
            CURLOPT_SSL_VERIFYPEER => true,
117
            CURLOPT_TIMEOUT => 120,
118
            CURLOPT_USERAGENT => self::CURL_USER_AGENT,
119
            (is_dir($caPathOrFile) ||
120
                (
121
                    is_link($caPathOrFile) &&
122
                    is_dir(readlink($caPathOrFile))
123
                )
124
            ) ? CURLOPT_CAPATH : CURLOPT_CAINFO => $caPathOrFile
125
        ]);
126
        // Apply custom cURL options
127
        curl_setopt_array($curl, $options);
128
        // Initialize the header parser
129
        $this->headerParser = new Parser\HeaderParser($curl);
130
        // Make sure these cURL options stays untouched
131
        curl_setopt_array($curl, [
132
            CURLOPT_FAILONERROR => false,
133
            CURLOPT_FOLLOWLOCATION => true,
134
            CURLOPT_FTPSSLAUTH => CURLFTPAUTH_DEFAULT,
135
            CURLOPT_HEADER => false,
136
            CURLOPT_HEADERFUNCTION => [$this->headerParser, 'curlCallback'],
137
            CURLOPT_HTTPAUTH => CURLAUTH_BASIC,
138
            CURLOPT_MAXREDIRS => self::MAX_REDIRECTS,
139
            CURLOPT_NOBODY => false,
140
            CURLOPT_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP,
141
            CURLOPT_REDIR_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP,
142
            CURLOPT_RETURNTRANSFER => true,
143
            CURLOPT_URL => $this->base . self::PATH,
144
            CURLOPT_USERPWD => 'anonymous:anonymous@',
145
        ]);
146
        // Execute cURL request
147
        if (($this->rawContents = curl_exec($curl)) === false) {
148
            // Request failed
149
            return false;
150
        }
151
        $this->time = time();
152
        $this->rawStatusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); // also works with FTP status codes
153
        $uriParser = new UriParser(curl_getinfo($curl, CURLINFO_EFFECTIVE_URL));
154
        $this->effective = $uriParser->base();
155
        curl_close($curl);
156
        $this->rawEncoding = $this->headerParser->getCharset();
157
        $this->rawMaxAge = $this->headerParser->getMaxAge();
158
        return true;
159
    }
160
161
    /**
162
     * Base uri
163
     *
164
     * @return string
165
     */
166
    public function getBaseUri()
167
    {
168
        return $this->base;
169
    }
170
171
    /**
172
     * Effective uri
173
     *
174
     * @return string|null
175
     */
176
    public function getEffectiveUri()
177
    {
178
        return $this->effective;
179
    }
180
181
    /**
182
     * Status code
183
     *
184
     * @return int|null
185
     */
186
    public function getStatusCode()
187
    {
188
        $statusCodeParser = new StatusCodeParser($this->rawStatusCode, parse_url($this->effective, PHP_URL_SCHEME));
189
        return $statusCodeParser->isValid() ? $this->rawStatusCode : null;
190
    }
191
192
    /**
193
     * Body content
194
     *
195
     * @return string
196
     */
197
    public function getContents()
198
    {
199
        return $this->rawContents;
200
    }
201
202
    /**
203
     * Encoding
204
     *
205
     * @return string
206
     */
207
    public function getEncoding()
208
    {
209
        return $this->rawEncoding;
210
    }
211
212
    /**
213
     * Next update timestamp
214
     *
215
     * @return int
216
     */
217
    public function nextUpdate()
218
    {
219
        if ($this->rawStatusCode === 503 &&
220
            strpos($this->base, 'http') === 0
221
        ) {
222
            return $this->time + min(self::CACHE_TIME, $this->headerParser->getRetryAfter($this->time));
223
        }
224
        return $this->time + self::CACHE_TIME;
225
    }
226
227
    /**
228
     * Valid until timestamp
229
     *
230
     * @return int
231
     */
232
    public function validUntil()
233
    {
234
        return $this->time + max(self::CACHE_TIME, $this->rawMaxAge);
235
    }
236
}
237