Completed
Push — master ( 4e0c1b...178c14 )
by Jan-Petter
02:10
created

Download::headerEncoding()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 12
Code Lines 7

Duplication

Lines 12
Ratio 100 %

Importance

Changes 3
Bugs 2 Features 0
Metric Value
c 3
b 2
f 0
dl 12
loc 12
rs 9.2
cc 4
eloc 7
nc 4
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use DateTime;
5
use GuzzleHttp;
6
use vipnytt\RobotsTxtParser\Client;
7
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
8
9
/**
10
 * Class Download
11
 *
12
 * @package vipnytt\RobotsTxtParser
13
 */
14
class Download implements RobotsTxtInterface
15
{
16
    /**
17
     * Base uri
18
     * @var string
19
     */
20
    protected $baseUri;
21
22
    /**
23
     * Download timestamp
24
     * @var int
25
     */
26
    protected $time;
27
28
    /**
29
     * Cache-Control max-age
30
     * @var int
31
     */
32
    protected $maxAge;
33
34
    /**
35
     * HTTP Status code
36
     * @var int
37
     */
38
    protected $statusCode;
39
40
    /**
41
     * Robots.txt contents
42
     * @var string
43
     */
44
    protected $contents;
45
46
    /**
47
     * Robots.txt character encoding
48
     * @var string
49
     */
50
    protected $encoding;
51
52
    /**
53
     * Parser client class
54
     * @var Client
55
     */
56
    protected $parserClient;
57
58
    /**
59
     * Download constructor.
60
     *
61
     * @param string $baseUri
62
     * @param array $guzzleConfig
63
     */
64
    public function __construct($baseUri, array $guzzleConfig = [])
65
    {
66
        $this->baseUri = $baseUri;
67
        try {
68
            $client = new GuzzleHttp\Client(
69
                array_merge_recursive(
70
                    [
71
                        'allow_redirects' => [
72
                            'max' => self::MAX_REDIRECTS,
73
                            'referer' => true,
74
                            'strict' => true,
75
                        ],
76
                        'base_uri' => $baseUri,
77
                        'decode_content' => true,
78
                        'headers' => [
79
                            'Accept' => 'text/plain;q=1.0, text/*;q=0.8, */*;q=0.1',
80
                            'Accept-Charset' => 'utf-8;q=1.0, *;q=0.1',
81
                            'Accept-Encoding' => 'identity;q=1.0, *;q=0.1',
82
                            'User-Agent' => 'RobotsTxtParser-VIPnytt/1.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)',
83
                        ],
84
                        'http_errors' => false,
85
                        'timeout' => 60,
86
                        'verify' => true,
87
                    ],
88
                    $guzzleConfig
89
                )
90
            );
91
            $response = $client->request('GET', '/robots.txt');
92
            $this->time = time();
93
            $this->statusCode = $response->getStatusCode();
94
            $this->contents = $response->getBody()->getContents();
95
            $this->encoding = $this->headerEncoding($response->getHeader('content-type'));
96
            $this->maxAge = $this->headerMaxAge($response->getHeader('cache-control'));
97
        } catch (GuzzleHttp\Exception\TransferException $e) {
98
            $this->statusCode = 523;
99
            $this->contents = '';
100
            $this->encoding = self::ENCODING;
101
            $this->maxAge = 0;
102
        }
103
    }
104
105
    /**
106
     * Content-Type encoding HTTP header
107
     *
108
     * @param array $headers
109
     * @return string
110
     */
111 View Code Duplication
    protected function headerEncoding(array $headers)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
112
    {
113
        foreach ($headers as $header) {
114
            $split = array_map('trim', mb_split(';', $header));
115
            foreach ($split as $string) {
116
                if (mb_stripos($string, 'charset=') === 0) {
117
                    return mb_split('=', $string, 2)[1];
118
                }
119
            }
120
        }
121
        return self::ENCODING;
122
    }
123
124
    /**
125
     * Cache-Control max-age HTTP header
126
     *
127
     * @param array $headers
128
     * @return int
129
     */
130 View Code Duplication
    protected function headerMaxAge(array $headers)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
131
    {
132
        foreach ($headers as $header) {
133
            $split = array_map('trim', mb_split(',', $header));
134
            foreach ($split as $string) {
135
                if (mb_stripos($string, 'max-age=') === 0) {
136
                    return intval(mb_split('=', $string, 2)[1]);
137
                }
138
            }
139
        }
140
        return 0;
141
    }
142
143
    /**
144
     * Parser client
145
     *
146
     * @param int|null $byteLimit
147
     * @return Client
148
     */
149
    public function parserClient($byteLimit = self::BYTE_LIMIT)
150
    {
151
        if (!is_object($this->parserClient)) {
152
            $this->parserClient = new Client($this->baseUri, $this->getStatusCode(), $this->getContents(), $this->getEncoding(), $byteLimit);
153
        }
154
        return $this->parserClient;
155
    }
156
157
    /**
158
     * Status code
159
     *
160
     * @return int
161
     */
162
    public function getStatusCode()
163
    {
164
        return $this->statusCode;
165
    }
166
167
    /**
168
     * URL content
169
     *
170
     * @return string
171
     */
172
    public function getContents()
173
    {
174
        return $this->contents;
175
    }
176
177
    /**
178
     * Encoding
179
     *
180
     * @return string
181
     */
182
    public function getEncoding()
183
    {
184
        return $this->encoding;
185
    }
186
187
    /**
188
     * Next update timestamp
189
     *
190
     * @return \DateTime|false
191
     */
192
    public function nextUpdate()
193
    {
194
        $dateTime = new DateTime;
195
        $dateTime->setTimestamp($this->time + self::CACHE_TIME);
196
        return $dateTime;
197
    }
198
199
    /**
200
     * Valid until timestamp
201
     *
202
     * @return \DateTime|false
203
     */
204
    public function validUntil()
205
    {
206
        $dateTime = new DateTime;
207
        $dateTime->setTimestamp($this->time + max(self::CACHE_TIME, $this->maxAge));
208
        return $dateTime;
209
    }
210
}
211