Completed
Push — master ( b23c5f...0489e6 )
by Jan-Petter
02:23
created

UriClient::headerCharset()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 7
rs 9.4285
cc 2
eloc 4
nc 2
nop 0
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use GuzzleHttp;
5
6
/**
7
 * Class UriClient
8
 *
9
 * @package vipnytt\RobotsTxtParser
10
 */
11
class UriClient extends TxtClient
12
{
13
    /**
14
     * ANSI C's asctime() format
15
     */
16
    const DATE_ASCTIME = 'D M j h:i:s Y';
17
18
    /**
19
     * HTTP date formats
20
     */
21
    const DATE_HTTP = [
22
        DATE_RFC1123,
23
        DATE_RFC850,
24
        self::DATE_ASCTIME,
25
    ];
26
27
    /**
28
     * GuzzleHttp config
29
     */
30
    const GUZZLE_HTTP_CONFIG = [
31
        'allow_redirects' => [
32
            'max' => self::MAX_REDIRECTS,
33
            'referer' => true,
34
            'strict' => true,
35
        ],
36
        'decode_content' => false,
37
        'headers' => [
38
            'accept' => 'text/plain;q=1.0, text/*;q=0.8, */*;q=0.1',
39
            'accept-charset' => 'utf-8;q=1.0, *;q=0.1',
40
            'accept-encoding' => 'identity;q=1.0, *;q=0.1',
41
            'user-agent' => 'RobotsTxtParser-VIPnytt/2.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)',
42
        ],
43
        'http_errors' => false,
44
        'verify' => true,
45
    ];
46
47
    /**
48
     * Base uri
49
     * @var string
50
     */
51
    private $base;
52
53
    /**
54
     * RequestClient timestamp
55
     * @var int
56
     */
57
    private $time;
58
59
    /**
60
     * @var \Psr\Http\Message\ResponseInterface
61
     */
62
    private $response;
63
64
    /**
65
     * Cache-Control max-age
66
     * @var int
67
     */
68
    private $maxAge;
69
70
    /**
71
     * Robots.txt contents
72
     * @var string
73
     */
74
    private $contents;
75
76
    /**
77
     * Robots.txt character encoding
78
     * @var string
79
     */
80
    private $encoding;
81
82
    /**
83
     * RequestClient constructor.
84
     *
85
     * @param string $baseUri
86
     * @param array $guzzleConfig
87
     * @param int|null $byteLimit
88
     */
89
    public function __construct($baseUri, array $guzzleConfig = [], $byteLimit = self::BYTE_LIMIT)
90
    {
91
        $this->base = $this->urlBase($this->urlEncode($baseUri));
92
        $this->time = time();
93
        try {
94
            $client = new GuzzleHttp\Client(
95
                array_merge_recursive(
96
                    self::GUZZLE_HTTP_CONFIG,
97
                    $guzzleConfig,
98
                    [
99
                        'base_uri' => $this->base,
100
                    ]
101
                )
102
            );
103
            $this->response = $client->request('GET', self::PATH);
104
            $this->time = time();
105
            $this->statusCode = $this->response->getStatusCode();
106
            $this->contents = $this->response->getBody()->getContents();
107
            $this->encoding = $this->headerCharset();
108
            $this->maxAge = $this->headerMaxAge();
109
        } catch (GuzzleHttp\Exception\TransferException $e) {
110
            $this->statusCode = null;
111
            $this->contents = '';
112
            $this->encoding = self::ENCODING;
113
            $this->maxAge = 0;
114
        }
115
        parent::__construct($this->base, $this->statusCode, $this->contents, $this->encoding, $byteLimit);
116
    }
117
118
    /**
119
     * Content-Type encoding HTTP header
120
     *
121
     * @link https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
122
     *
123
     * @return string
124
     */
125
    private function headerCharset()
126
    {
127
        if (($value = $this->parseHeader($this->response->getHeader('content-type'), 'charset', ';')) !== false) {
128
            return $value;
129
        }
130
        return self::ENCODING;
131
    }
132
133
    /**
134
     * Client header
135
     *
136
     * @param string[] $headers
137
     * @param string $part
138
     * @param string $delimiter
139
     * @return string|false
140
     */
141
    private function parseHeader(array $headers, $part, $delimiter = ";")
142
    {
143
        foreach ($headers as $header) {
144
            foreach (array_map('trim', mb_split($delimiter, $header)) as $string) {
145
                if (mb_stripos($string, $part . '=') === 0) {
146
                    return mb_split('=', $string, 2)[1];
147
                }
148
            }
149
        }
150
        return false;
151
    }
152
153
    /**
154
     * Cache-Control max-age HTTP header
155
     *
156
     * @link https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9.3
157
     *
158
     * @return int
159
     */
160
    private function headerMaxAge()
161
    {
162
        if (($value = $this->parseHeader($this->response->getHeader('cache-control'), 'max-age', ',')) !== false) {
163
            return intval($value);
164
        }
165
        return 0;
166
    }
167
168
    /**
169
     * Base UriClient
170
     *
171
     * @return string
172
     */
173
    public function getBaseUri()
174
    {
175
        return $this->base;
176
    }
177
178
    /**
179
     * Status code
180
     *
181
     * @return int|null
182
     */
183
    public function getStatusCode()
184
    {
185
        return $this->statusCode;
186
    }
187
188
    /**
189
     * URL content
190
     *
191
     * @return string
192
     */
193
    public function getContents()
194
    {
195
        return $this->contents;
196
    }
197
198
    /**
199
     * Encoding
200
     *
201
     * @return string
202
     */
203
    public function getEncoding()
204
    {
205
        return $this->encoding;
206
    }
207
208
    /**
209
     * Next update timestamp
210
     *
211
     * @return int
212
     */
213
    public function nextUpdate()
214
    {
215
        if (
216
            $this->statusCode === 503 &&
217
            ($retryTime = $this->headerRetryAfter()) !== false
218
        ) {
219
            return min($this->time + self::CACHE_TIME, $retryTime);
220
        }
221
        return $this->time + self::CACHE_TIME;
222
    }
223
224
    /**
225
     * Cache-Control Retry-After HTTP header
226
     *
227
     * @link https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.37
228
     *
229
     * @return int|false
230
     */
231
    private function headerRetryAfter()
232
    {
233
        foreach ($this->response->getHeader('retry-after') as $parts) {
234
            $value = implode(', ', $parts);
235
            if (is_numeric($value)) {
236
                return $this->time + $value;
237
            } elseif (($time = $this->parseHttpDate($value)) !== false) {
238
                return $time;
239
            }
240
        }
241
        return false;
242
    }
243
244
    /**
245
     * Parse HTTP-date
246
     *
247
     * @param string $string
248
     * @return int|false
249
     */
250
    private function parseHttpDate($string)
251
    {
252
        foreach (self::DATE_HTTP as $format) {
253
            $dateTime = date_create_from_format($format, $string, new \DateTimeZone('GMT'));
254
            if ($dateTime !== false) {
255
                return (int)date_format($dateTime, 'U');
256
            }
257
        }
258
        return false;
259
    }
260
261
    /**
262
     * Valid until timestamp
263
     *
264
     * @return int
265
     */
266
    public function validUntil()
267
    {
268
        return $this->time + max(self::CACHE_TIME, $this->maxAge);
269
    }
270
}
271