Completed
Push — master ( 512542...4d85fd )
by Jan-Petter
02:22
created

UriClient::parseHeader()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 11
rs 9.2
cc 4
eloc 6
nc 4
nop 3
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use GuzzleHttp;
5
6
/**
7
 * Class UriClient
8
 *
9
 * @package vipnytt\RobotsTxtParser
10
 */
11
class UriClient extends TxtClient
12
{
13
    const GUZZLE_HTTP_CONFIG = [
14
        'allow_redirects' => [
15
            'max' => self::MAX_REDIRECTS,
16
            'referer' => true,
17
            'strict' => true,
18
        ],
19
        'decode_content' => true,
20
        'headers' => [
21
            'Accept' => 'text/plain;q=1.0, text/*;q=0.8, */*;q=0.1',
22
            'Accept-Charset' => 'utf-8;q=1.0, *;q=0.1',
23
            'Accept-Encoding' => 'identity;q=1.0, *;q=0.1',
24
            'User-Agent' => 'RobotsTxtParser-VIPnytt/2.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)',
25
        ],
26
        'http_errors' => false,
27
        'verify' => true,
28
    ];
29
30
    /**
31
     * Base uri
32
     * @var string
33
     */
34
    private $base;
35
36
    /**
37
     * RequestClient timestamp
38
     * @var int
39
     */
40
    private $time;
41
42
    /**
43
     * Cache-Control max-age
44
     * @var int
45
     */
46
    private $maxAge;
47
48
    /**
49
     * Robots.txt contents
50
     * @var string
51
     */
52
    private $contents;
53
54
    /**
55
     * Robots.txt character encoding
56
     * @var string
57
     */
58
    private $encoding;
59
60
    /**
61
     * RequestClient constructor.
62
     *
63
     * @param string $baseUri
64
     * @param array $guzzleConfig
65
     * @param int|null $byteLimit
66
     */
67
    public function __construct($baseUri, array $guzzleConfig = [], $byteLimit = self::BYTE_LIMIT)
68
    {
69
        $this->base = $this->urlBase($this->urlEncode($baseUri));
70
        try {
71
            $client = new GuzzleHttp\Client(
72
                array_merge_recursive(
73
                    self::GUZZLE_HTTP_CONFIG,
74
                    $guzzleConfig,
75
                    [
76
                        'base_uri' => $this->base,
77
                    ]
78
                )
79
            );
80
            $response = $client->request('GET', self::PATH);
81
            $this->time = time();
82
            $this->statusCode = $response->getStatusCode();
83
            $this->contents = $response->getBody()->getContents();
84
            $this->encoding = $this->headerEncoding($response->getHeader('content-type'));
85
            $this->maxAge = $this->headerMaxAge($response->getHeader('cache-control'));
86
        } catch (GuzzleHttp\Exception\TransferException $e) {
87
            $this->statusCode = 523;
88
            $this->contents = '';
89
            $this->encoding = self::ENCODING;
90
            $this->maxAge = 0;
91
        }
92
        parent::__construct($this->base, $this->statusCode, $this->contents, $this->encoding, $byteLimit);
93
    }
94
95
    /**
96
     * Content-Type encoding HTTP header
97
     *
98
     * @param string[] $headers
99
     * @return string
100
     */
101
    private function headerEncoding(array $headers)
102
    {
103
        if (($value = $this->parseHeader($headers, 'charset', ';')) !== false) {
104
            return $value;
105
        }
106
        return self::ENCODING;
107
    }
108
109
    /**
110
     * Client header
111
     *
112
     * @param string[] $headers
113
     * @param string $part
114
     * @param string $delimiter
115
     * @return string|false
116
     */
117
    private function parseHeader(array $headers, $part, $delimiter = ";")
118
    {
119
        foreach ($headers as $header) {
120
            foreach (array_map('trim', mb_split($delimiter, $header)) as $string) {
121
                if (mb_stripos($string, $part . '=') === 0) {
122
                    return mb_split('=', $string, 2)[1];
123
                }
124
            }
125
        }
126
        return false;
127
    }
128
129
    /**
130
     * Cache-Control max-age HTTP header
131
     *
132
     * @param string[] $headers
133
     * @return int
134
     */
135
    private function headerMaxAge(array $headers)
136
    {
137
        if (($value = $this->parseHeader($headers, 'max-age', ',')) !== false) {
138
            return intval($value);
139
        }
140
        return 0;
141
    }
142
143
    /**
144
     * Base UriClient
145
     *
146
     * @return string
147
     */
148
    public function getBaseUri()
149
    {
150
        return $this->base;
151
    }
152
153
    /**
154
     * Status code
155
     *
156
     * @return int
157
     */
158
    public function getStatusCode()
159
    {
160
        return $this->statusCode;
161
    }
162
163
    /**
164
     * URL content
165
     *
166
     * @return string
167
     */
168
    public function getContents()
169
    {
170
        return $this->contents;
171
    }
172
173
    /**
174
     * Encoding
175
     *
176
     * @return string
177
     */
178
    public function getEncoding()
179
    {
180
        return $this->encoding;
181
    }
182
183
    /**
184
     * Next update timestamp
185
     *
186
     * @return int
187
     */
188
    public function nextUpdate()
189
    {
190
        return $this->time + self::CACHE_TIME;
191
    }
192
193
    /**
194
     * Valid until timestamp
195
     *
196
     * @return int
197
     */
198
    public function validUntil()
199
    {
200
        return $this->time + max(self::CACHE_TIME, $this->maxAge);
201
    }
202
}
203