Completed
Push — master ( a6d8dc...680f8e )
by Jan-Petter
03:57
created

Download::nextUpdate()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 6
rs 9.4285
cc 1
eloc 4
nc 1
nop 0
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use DateTime;
5
use GuzzleHttp;
6
use vipnytt\RobotsTxtParser\Client;
7
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
8
9
/**
10
 * Class Download
11
 *
12
 * @package vipnytt\RobotsTxtParser
13
 */
14
class Download implements RobotsTxtInterface
15
{
16
    /**
17
     * Base uri
18
     * @var string
19
     */
20
    protected $baseUri;
21
22
    /**
23
     * Download time
24
     * @var int
25
     */
26
    protected $time;
27
28
    /**
29
     * Robots.txt max-age
30
     * @var int|null
31
     */
32
    protected $maxAge = null;
33
34
    /**
35
     * HTTP Status code
36
     * @var int
37
     */
38
    protected $statusCode;
39
40
    /**
41
     * Robots.txt contents
42
     * @var string
43
     */
44
    protected $contents;
45
46
    /**
47
     * Robots.txt character encoding
48
     * @var string
49
     */
50
    protected $encoding;
51
52
    /**
53
     * Parser client class
54
     * @var Client
55
     */
56
    protected $parserClient;
57
58
    /**
59
     * Download constructor.
60
     *
61
     * @param string $baseUri
62
     * @param array $guzzleConfig
63
     */
64
    public function __construct($baseUri, array $guzzleConfig = [])
65
    {
66
        $this->baseUri = $baseUri;
67
        try {
68
            $client = new GuzzleHttp\Client(
69
                array_merge_recursive(
70
                    [
71
                        'allow_redirects' => [
72
                            'max' => self::MAX_REDIRECTS,
73
                            'referer' => true,
74
                            'strict' => true,
75
                            'track_redirects' => true,
76
                        ],
77
                        'base_uri' => $baseUri,
78
                        'headers' => [
79
                            'Accept' => 'text/plain;q=1.0, text/*;q=0.8, */*;q=0.1',
80
                            'Accept-Charset' => 'utf-8;q=1.0, *;q=0.1',
81
                            'Accept-Encoding' => 'identity;q=1.0, *;q=0.1',
82
                            'User-Agent' => 'RobotsTxtParser-VIPnytt/1.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)',
83
                        ],
84
                        'http_errors' => false,
85
                        'timeout' => 60,
86
                        'verify' => true,
87
                    ],
88
                    $guzzleConfig
89
                )
90
            );
91
            $response = $client->request('GET', '/robots.txt');
92
            $this->time = time();
93
            $this->statusCode = $response->getStatusCode();
94
            $this->contents = $response->getBody()->getContents();
95
            $this->encoding = $this->headerEncoding($response->getHeader('content-type'));
96
        } catch (GuzzleHttp\Exception\TransferException $e) {
97
            $this->statusCode = 523;
98
            $this->contents = '';
99
            $this->encoding = self::ENCODING;
100
        }
101
    }
102
103
    /**
104
     * HTTP header encoding
105
     *
106
     * @param array $headers
107
     * @return string
108
     */
109
    protected function headerEncoding(array $headers)
110
    {
111
        foreach ($headers as $header) {
112
            $split = array_map('trim', mb_split(';', $header));
113
            foreach ($split as $string) {
114
                if (mb_stripos($string, 'charset=') === 0) {
115
                    $encoding = mb_split('=', $string, 2)[1];
116
                    if (in_array(mb_strtolower($encoding), array_map('mb_strtolower', mb_list_encodings()))) {
117
                        return $encoding;
118
                    }
119
                }
120
            }
121
        }
122
        return $this->detectEncoding();
123
    }
124
125
    /**
126
     * Manually detect encoding
127
     *
128
     * @return string
129
     */
130
    protected function detectEncoding()
131
    {
132
        if (($encoding = mb_detect_encoding($this->getContents())) !== false) {
133
            return $encoding;
134
        }
135
        return self::ENCODING;
136
    }
137
138
    /**
139
     * URL content
140
     *
141
     * @return string
142
     */
143
    public function getContents()
144
    {
145
        return $this->contents;
146
    }
147
148
    /**
149
     * Parser client
150
     *
151
     * @param int|null $byteLimit
152
     * @return Client
153
     */
154
    public function parserClient($byteLimit = self::BYTE_LIMIT)
155
    {
156
        if (!is_a($this->parserClient, 'Client')) {
157
            $this->parserClient = new Client($this->baseUri, $this->getStatusCode(), $this->getContents(), $this->getEncoding(), $byteLimit);
158
        }
159
        if (!is_a($this->parserClient, 'Client')) {
160
            exit;
0 ignored issues
show
Coding Style Compatibility introduced by
The method parserClient() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
161
        }
162
        return $this->parserClient;
163
    }
164
165
    /**
166
     * Status code
167
     *
168
     * @return int
169
     */
170
    public function getStatusCode()
171
    {
172
        return $this->statusCode;
173
    }
174
175
    /**
176
     * Encoding
177
     *
178
     * @return string
179
     */
180
    public function getEncoding()
181
    {
182
        return $this->encoding;
183
    }
184
185
    /**
186
     * Next update timestamp
187
     *
188
     * @return \DateTime|false
189
     */
190
    public function nextUpdate()
191
    {
192
        $dateTime = new DateTime;
193
        $dateTime->setTimestamp($this->time + self::CACHE_TIME);
194
        return $dateTime;
195
    }
196
197
    /**
198
     * Valid until timestamp
199
     *
200
     * @return \DateTime|false
201
     */
202
    public function validUntil()
203
    {
204
        $dateTime = new DateTime;
205
        $dateTime->setTimestamp($this->time + max(self::CACHE_TIME, is_int($this->maxAge) ? $this->maxAge : 0));
206
        return $dateTime;
207
    }
208
}
209