Completed
Push — master ( c07126...706ec6 )
by Jan-Petter
04:09
created

UriClient::request()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 50
Code Lines 39

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 2 Features 1
Metric Value
c 3
b 2
f 1
dl 0
loc 50
rs 9.3333
cc 2
eloc 39
nc 2
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use Composer\CaBundle\CaBundle;
5
use vipnytt\RobotsTxtParser\Parser\StatusCodeParser;
6
use vipnytt\RobotsTxtParser\Parser\UriParser;
7
8
/**
9
 * Class UriClient
10
 *
11
 * @see https://github.com/VIPnytt/RobotsTxtParser/blob/master/docs/methods/UriClient.md for documentation
12
 * @package vipnytt\RobotsTxtParser
13
 */
14
class UriClient extends TxtClient
15
{
16
    /**
17
     * User-agent
18
     */
19
    const CURL_USER_AGENT = 'RobotsTxtParser-VIPnytt/2.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)';
20
21
    /**
22
     * Base uri
23
     * @var string
24
     */
25
    private $base;
26
27
    /**
28
     * Header parser
29
     * @var Parser\HeaderParser
30
     */
31
    private $headerParser;
32
33
    /**
34
     * RequestClient timestamp
35
     * @var int
36
     */
37
    private $time;
38
39
    /**
40
     * Status code
41
     * @var int|null
42
     */
43
    private $rawStatusCode;
44
45
    /**
46
     * Effective uri
47
     * @var string
48
     */
49
    private $effective;
50
51
    /**
52
     * Cache-Control max-age
53
     * @var int
54
     */
55
    private $rawMaxAge;
56
57
    /**
58
     * Robots.txt contents
59
     * @var string
60
     */
61
    private $rawContents;
62
63
    /**
64
     * Robots.txt character encoding
65
     * @var string
66
     */
67
    private $rawEncoding;
68
69
    /**
70
     * RequestClient constructor.
71
     *
72
     * @param string $baseUri
73
     * @param array $curlOptions
74
     * @param int|null $byteLimit
75
     */
76
    public function __construct($baseUri, array $curlOptions = [], $byteLimit = self::BYTE_LIMIT)
77
    {
78
        $uriParser = new UriParser($baseUri);
79
        $this->base = $uriParser->base();
80
        if ($this->request($curlOptions) === false) {
81
            $this->time = time();
82
            $this->effective = $this->base;
83
            $this->rawStatusCode = null;
84
            $this->rawContents = '';
85
            $this->rawEncoding = self::ENCODING;
86
            $this->rawMaxAge = 0;
87
        }
88
        parent::__construct($this->base, $this->rawStatusCode, $this->rawContents, $this->rawEncoding, $this->effective, $byteLimit);
89
    }
90
91
    /**
92
     * cURL request
93
     *
94
     * @param array $options
95
     * @return bool
96
     */
97
    private function request($options = [])
98
    {
99
        $curl = curl_init();
100
        // Set default cURL options
101
        curl_setopt_array($curl, [
102
            CURLOPT_AUTOREFERER => true,
103
            CURLOPT_CAINFO => CaBundle::getSystemCaRootBundlePath(),
104
            CURLOPT_CONNECTTIMEOUT => 30,
105
            CURLOPT_ENCODING => 'identity',
106
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_NONE,
107
            CURLOPT_IPRESOLVE => CURL_IPRESOLVE_WHATEVER,
108
            CURLOPT_SSL_VERIFYHOST => 2,
109
            CURLOPT_SSL_VERIFYPEER => true,
110
            //CURLOPT_SSL_VERIFYSTATUS => true, // PHP 7.0.7
111
            CURLOPT_TIMEOUT => 120,
112
            CURLOPT_USERAGENT => self::CURL_USER_AGENT,
113
        ]);
114
        // Apply custom cURL options
115
        curl_setopt_array($curl, $options);
116
        $this->headerParser = new Parser\HeaderParser($curl);
117
        // Make sure these cURL options stays untouched
118
        curl_setopt_array($curl, [
119
            CURLOPT_FAILONERROR => false,
120
            CURLOPT_FOLLOWLOCATION => true,
121
            CURLOPT_FTPSSLAUTH => CURLFTPAUTH_DEFAULT,
122
            CURLOPT_HEADER => false,
123
            CURLOPT_HEADERFUNCTION => [$this->headerParser, 'curlCallback'],
124
            CURLOPT_HTTPAUTH => CURLAUTH_BASIC,
125
            CURLOPT_MAXREDIRS => self::MAX_REDIRECTS,
126
            CURLOPT_NOBODY => false,
127
            CURLOPT_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP,
128
            CURLOPT_REDIR_PROTOCOLS => CURLPROTO_FTP | CURLPROTO_FTPS | CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_SFTP,
129
            CURLOPT_RETURNTRANSFER => true,
130
            CURLOPT_URL => $this->base . self::PATH,
131
            CURLOPT_USERPWD => 'anonymous:anonymous@',
132
        ]);
133
        // Execute cURL request
134
        if (($this->rawContents = curl_exec($curl)) === false) {
135
            // Request failed
136
            return false;
137
        }
138
        $this->time = time();
139
        $this->rawStatusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); // also works with FTP status codes
140
        $uriParser = new UriParser(curl_getinfo($curl, CURLINFO_EFFECTIVE_URL));
141
        $this->effective = $uriParser->base();
142
        curl_close($curl);
143
        $this->rawEncoding = $this->headerParser->getCharset();
144
        $this->rawMaxAge = $this->headerParser->getMaxAge();
145
        return true;
146
    }
147
148
    /**
149
     * Base uri
150
     *
151
     * @return string
152
     */
153
    public function getBaseUri()
154
    {
155
        return $this->base;
156
    }
157
158
    /**
159
     * Effective uri
160
     *
161
     * @return string|null
162
     */
163
    public function getEffectiveUri()
164
    {
165
        return $this->effective;
166
    }
167
168
    /**
169
     * Status code
170
     *
171
     * @return int|null
172
     */
173
    public function getStatusCode()
174
    {
175
        $statusCodeParser = new StatusCodeParser($this->rawStatusCode, parse_url($this->effective, PHP_URL_SCHEME));
176
        return $statusCodeParser->isValid() ? $this->rawStatusCode : null;
177
    }
178
179
    /**
180
     * Body content
181
     *
182
     * @return string
183
     */
184
    public function getContents()
185
    {
186
        return $this->rawContents;
187
    }
188
189
    /**
190
     * Encoding
191
     *
192
     * @return string
193
     */
194
    public function getEncoding()
195
    {
196
        return $this->rawEncoding;
197
    }
198
199
    /**
200
     * Next update timestamp
201
     *
202
     * @return int
203
     */
204
    public function nextUpdate()
205
    {
206
        if (
207
            $this->rawStatusCode === 503 &&
208
            strpos($this->base, 'http') === 0
209
        ) {
210
            return $this->time + min(self::CACHE_TIME, $this->headerParser->getRetryAfter($this->time));
211
        }
212
        return $this->time + self::CACHE_TIME;
213
    }
214
215
    /**
216
     * Valid until timestamp
217
     *
218
     * @return int
219
     */
220
    public function validUntil()
221
    {
222
        return $this->time + max(self::CACHE_TIME, $this->rawMaxAge);
223
    }
224
}
225