Completed
Push — master ( 929853...4e0c1b )
by Jan-Petter
02:17
created

Download::headerMaxAge()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 13
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 13
rs 9.2
cc 4
eloc 8
nc 4
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use DateTime;
5
use GuzzleHttp;
6
use vipnytt\RobotsTxtParser\Client;
7
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
8
9
/**
10
 * Class Download
11
 *
12
 * @package vipnytt\RobotsTxtParser
13
 */
14
class Download implements RobotsTxtInterface
15
{
16
    /**
17
     * Base uri
18
     * @var string
19
     */
20
    protected $baseUri;
21
22
    /**
23
     * Download time
24
     * @var int
25
     */
26
    protected $time;
27
28
    /**
29
     * Robots.txt max-age
30
     * @var int|null
31
     */
32
    protected $maxAge = null;
33
34
    /**
35
     * HTTP Status code
36
     * @var int
37
     */
38
    protected $statusCode;
39
40
    /**
41
     * Robots.txt contents
42
     * @var string
43
     */
44
    protected $contents;
45
46
    /**
47
     * Robots.txt character encoding
48
     * @var string
49
     */
50
    protected $encoding;
51
52
    /**
53
     * Parser client class
54
     * @var Client
55
     */
56
    protected $parserClient;
57
58
    /**
59
     * Download constructor.
60
     *
61
     * @param string $baseUri
62
     * @param array $guzzleConfig
63
     */
64
    public function __construct($baseUri, array $guzzleConfig = [])
65
    {
66
        $this->baseUri = $baseUri;
67
        try {
68
            $client = new GuzzleHttp\Client(
69
                array_merge_recursive(
70
                    [
71
                        'allow_redirects' => [
72
                            'max' => self::MAX_REDIRECTS,
73
                            'referer' => true,
74
                            'strict' => true,
75
                            'track_redirects' => true,
76
                        ],
77
                        'base_uri' => $baseUri,
78
                        'headers' => [
79
                            'Accept' => 'text/plain;q=1.0, text/*;q=0.8, */*;q=0.1',
80
                            'Accept-Charset' => 'utf-8;q=1.0, *;q=0.1',
81
                            'Accept-Encoding' => 'identity;q=1.0, *;q=0.1',
82
                            'User-Agent' => 'RobotsTxtParser-VIPnytt/1.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)',
83
                        ],
84
                        'http_errors' => false,
85
                        'timeout' => 60,
86
                        'verify' => true,
87
                    ],
88
                    $guzzleConfig
89
                )
90
            );
91
            $response = $client->request('GET', '/robots.txt');
92
            $this->time = time();
93
            $this->statusCode = $response->getStatusCode();
94
            $this->contents = $response->getBody()->getContents();
95
            $this->encoding = $this->headerEncoding($response->getHeader('content-type'));
96
            $this->maxAge = $this->headerMaxAge($response->getHeader('cache-control'));
97
        } catch (GuzzleHttp\Exception\TransferException $e) {
98
            $this->statusCode = 523;
99
            $this->contents = '';
100
            $this->encoding = self::ENCODING;
101
            $this->maxAge = 0;
102
        }
103
    }
104
105
    /**
106
     * Content-Type encoding HTTP header
107
     *
108
     * @param array $headers
109
     * @return string
110
     */
111
    protected function headerEncoding(array $headers)
112
    {
113
        foreach ($headers as $header) {
114
            $split = array_map('trim', mb_split(';', $header));
115
            foreach ($split as $string) {
116
                if (mb_stripos($string, 'charset=') === 0) {
117
                    $encoding = mb_split('=', $string, 2)[1];
118
                    if (in_array(mb_strtolower($encoding), array_map('mb_strtolower', mb_list_encodings()))) {
119
                        return $encoding;
120
                    }
121
                }
122
            }
123
        }
124
        return $this->detectEncoding();
125
    }
126
127
    /**
128
     * Manually detect encoding
129
     *
130
     * @return string
131
     */
132
    protected function detectEncoding()
133
    {
134
        if (($encoding = mb_detect_encoding($this->getContents())) !== false) {
135
            return $encoding;
136
        }
137
        return self::ENCODING;
138
    }
139
140
    /**
141
     * URL content
142
     *
143
     * @return string
144
     */
145
    public function getContents()
146
    {
147
        return $this->contents;
148
    }
149
150
    /**
151
     * Cache-Control max-age HTTP header
152
     *
153
     * @param array $headers
154
     * @return integer
155
     */
156
    protected function headerMaxAge(array $headers)
157
    {
158
        foreach ($headers as $header) {
159
            $split = array_map('trim', mb_split(',', $header));
160
            foreach ($split as $string) {
161
                if (mb_stripos($string, 'max-age=') === 0) {
162
                    $maxAge = mb_split('=', $string, 2)[1];
163
                    return intval($maxAge);
164
                }
165
            }
166
        }
167
        return 0;
168
    }
169
170
    /**
171
     * Parser client
172
     *
173
     * @param int|null $byteLimit
174
     * @return Client
175
     */
176
    public function parserClient($byteLimit = self::BYTE_LIMIT)
177
    {
178
        if (!is_object($this->parserClient)) {
179
            $this->parserClient = new Client($this->baseUri, $this->getStatusCode(), $this->getContents(), $this->getEncoding(), $byteLimit);
180
        }
181
        return $this->parserClient;
182
    }
183
184
    /**
185
     * Status code
186
     *
187
     * @return int
188
     */
189
    public function getStatusCode()
190
    {
191
        return $this->statusCode;
192
    }
193
194
    /**
195
     * Encoding
196
     *
197
     * @return string
198
     */
199
    public function getEncoding()
200
    {
201
        return $this->encoding;
202
    }
203
204
    /**
205
     * Next update timestamp
206
     *
207
     * @return \DateTime|false
208
     */
209
    public function nextUpdate()
210
    {
211
        $dateTime = new DateTime;
212
        $dateTime->setTimestamp($this->time + self::CACHE_TIME);
213
        return $dateTime;
214
    }
215
216
    /**
217
     * Valid until timestamp
218
     *
219
     * @return \DateTime|false
220
     */
221
    public function validUntil()
222
    {
223
        $dateTime = new DateTime;
224
        $dateTime->setTimestamp($this->time + max(self::CACHE_TIME, is_int($this->maxAge) ? $this->maxAge : 0));
225
        return $dateTime;
226
    }
227
}
228