Completed
Push — master ( 657bbc...fb660e )
by Jan-Petter
01:57
created

Download::headerEncoding()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 1 Features 0
Metric Value
c 2
b 1
f 0
dl 0
loc 15
rs 8.8571
cc 5
eloc 9
nc 5
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use GuzzleHttp;
5
use vipnytt\RobotsTxtParser\Client;
6
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
7
8
/**
9
 * Class Download
10
 *
11
 * @package vipnytt\RobotsTxtParser
12
 */
13
class Download implements RobotsTxtInterface
14
{
15
    /**
16
     * Base uri
17
     * @var string
18
     */
19
    protected $baseUri;
20
21
    /**
22
     * HTTP Status code
23
     * @var int
24
     */
25
    protected $statusCode;
26
27
    /**
28
     * Robots.txt contents
29
     * @var string
30
     */
31
    protected $contents;
32
33
    /**
34
     * Robots.txt character encoding
35
     * @var string
36
     */
37
    protected $encoding;
38
39
    /**
40
     * Download constructor.
41
     *
42
     * @param string $baseUri
43
     * @param array $guzzleConfig
44
     */
45
    public function __construct($baseUri, $guzzleConfig = [])
46
    {
47
        $this->baseUri = $baseUri;
48
        try {
49
            $client = new GuzzleHttp\Client(
50
                array_merge_recursive(
51
                    [
52
                        'allow_redirects' => [
53
                            'max' => self::MAX_REDIRECTS,
54
                            'referer' => true,
55
                            'strict' => true,
56
                            'track_redirects' => true,
57
                        ],
58
                        'base_uri' => $baseUri,
59
                        'headers' => [
60
                            'Accept' => 'text/plain;q=1.0, text/*;q=0.8, */*;q=0.1',
61
                            'Accept-Charset' => 'utf-8;q=1.0, *;q=0.1',
62
                            'Accept-Encoding' => 'identity;q=1.0, *;q=0.1',
63
                            'User-Agent' => 'RobotsTxtParser-VIPnytt/1.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)',
64
                        ],
65
                        'http_errors' => false,
66
                        'timeout' => 60,
67
                        'verify' => true,
68
                    ],
69
                    $guzzleConfig
70
                )
71
            );
72
            $response = $client->request('GET', '/robots.txt');
73
            $this->statusCode = $response->getStatusCode();
74
            $this->contents = $response->getBody()->getContents();
75
            $this->encoding = $this->headerEncoding($response->getHeader('content-type'));
76
        } catch (GuzzleHttp\Exception\TransferException $e) {
77
            $this->statusCode = 523;
78
            $this->contents = '';
79
            $this->encoding = self::ENCODING;
80
        }
81
    }
82
83
    /**
84
     * HTTP header encoding
85
     *
86
     * @param array $headers
87
     * @return string
88
     */
89
    protected function headerEncoding($headers)
90
    {
91
        foreach ($headers as $header) {
92
            $split = array_map('trim', mb_split(';', $header));
93
            foreach ($split as $string) {
94
                if (mb_stripos($string, 'charset=') === 0) {
95
                    $encoding = mb_split('=', $string, 2)[1];
96
                    if (in_array(mb_strtolower($encoding), array_map('mb_strtolower', mb_list_encodings()))) {
97
                        return $encoding;
98
                    }
99
                }
100
            }
101
        }
102
        return $this->detectEncoding();
103
    }
104
105
    /**
106
     * Manually detect encoding
107
     *
108
     * @return string
109
     */
110
    protected function detectEncoding()
111
    {
112
        if (($encoding = mb_detect_encoding($this->getContents())) !== false) {
113
            return $encoding;
114
        }
115
        return self::ENCODING;
116
    }
117
118
    /**
119
     * URL content
120
     *
121
     * @return string
122
     */
123
    public function getContents()
124
    {
125
        return $this->contents;
126
    }
127
128
    /**
129
     * Parser client
130
     *
131
     * @param int|null $byteLimit
132
     * @return Client
133
     */
134
    public function parserClient($byteLimit = self::BYTE_LIMIT)
135
    {
136
        return new Client($this->baseUri, $this->getStatusCode(), $this->getContents(), $this->getEncoding(), $byteLimit);
137
    }
138
139
    /**
140
     * Status code
141
     *
142
     * @return int
143
     */
144
    public function getStatusCode()
145
    {
146
        return $this->statusCode;
147
    }
148
149
    /**
150
     * Encoding
151
     *
152
     * @return string
153
     */
154
    public function getEncoding()
155
    {
156
        return $this->encoding;
157
    }
158
}
159