Completed
Push — master ( eeddbb...e54ce6 )
by Jan-Petter
02:30
created

DownloadHandler::detectEncoding()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 7
rs 9.4285
cc 2
eloc 4
nc 2
nop 0
1
<?php
2
namespace vipnytt\RobotsTxtParser\Client;
3
4
use GuzzleHttp;
5
use vipnytt\RobotsTxtParser\Client;
6
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
7
8
/**
9
 * Class DownloadHandler
10
 *
11
 * @package vipnytt\RobotsTxtParser\Client
12
 */
13
class DownloadHandler implements RobotsTxtInterface
14
{
15
    /**
16
     * Base uri
17
     * @var string
18
     */
19
    protected $baseUri;
20
21
    /**
22
     * HTTP Status code
23
     * @var int
24
     */
25
    protected $statusCode;
26
27
    /**
28
     * Robots.txt contents
29
     * @var string
30
     */
31
    protected $contents;
32
33
    /**
34
     * Robots.txt character encoding
35
     * @var string
36
     */
37
    protected $encoding;
38
39
    /**
40
     * DownloadHandler constructor.
41
     *
42
     * @param string $baseUri
43
     * @param array $guzzleConfig
44
     */
45
    public function __construct($baseUri, $guzzleConfig = [])
46
    {
47
        $this->baseUri = $baseUri;
48
        try {
49
            $client = new GuzzleHttp\Client(
50
                array_merge_recursive(
51
                    [
52
                        'allow_redirects' => [
53
                            'max' => self::MAX_REDIRECTS,
54
                            'referer' => true,
55
                            'strict' => true,
56
                            'track_redirects' => true,
57
                        ],
58
                        'base_uri' => $baseUri,
59
                        'headers' => [
60
                            'Accept' => 'text/plain;q=1.0, text/*;q=0.8, */*;q=0.1',
61
                            'Accept-Charset' => 'utf-8;q=1.0, *;q=0.1',
62
                            'Accept-Encoding' => 'identity;q=1.0, *;q=0.1',
63
                            'User-Agent' => 'RobotsTxtParser-VIPnytt/1.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)',
64
                        ],
65
                        'http_errors' => false,
66
                        'timeout' => 60,
67
                        'verify' => true,
68
                    ],
69
                    $guzzleConfig
70
                )
71
            );
72
            $response = $client->request('GET', '/robots.txt');
73
            $this->statusCode = $response->getStatusCode();
74
            $this->contents = $response->getBody()->getContents();
75
            $this->encoding = $this->headerEncoding($response->getHeader('content-type')[0]);
76
        } catch (GuzzleHttp\Exception\ConnectException $e) {
77
            $this->connectionIssue();
78
        }
79
    }
80
81
    /**
82
     * HTTP header encoding
83
     *
84
     * @param $header
85
     * @return string
86
     */
87
    protected function headerEncoding($header)
88
    {
89
        $split = array_map('trim', mb_split(';', $header));
90
        foreach ($split as $string) {
91
            if (mb_stripos($string, 'charset=') === 0) {
92
                $encoding = mb_split('=', $string, 2)[1];
93
                if (in_array(mb_strtolower($encoding), array_map('mb_strtolower', mb_list_encodings()))) {
94
                    return $encoding;
95
                }
96
            }
97
        }
98
        return $this->detectEncoding();
99
    }
100
101
    /**
102
     * Manually detect encoding
103
     *
104
     * @return string
105
     */
106
    protected function detectEncoding()
107
    {
108
        if (($encoding = mb_detect_encoding($this->getContents())) !== false) {
109
            return $encoding;
110
        }
111
        return self::ENCODING;
112
    }
113
114
    /**
115
     * URL content
116
     *
117
     * @return string
118
     */
119
    public function getContents()
120
    {
121
        return $this->contents;
122
    }
123
124
    /**
125
     * Connection issue
126
     *
127
     * @return void
128
     */
129
    private function connectionIssue()
130
    {
131
        $this->statusCode = 523;
132
        $this->contents = '';
133
        $this->encoding = self::ENCODING;
134
    }
135
136
    /**
137
     * Parser client
138
     *
139
     * @param int|null $byteLimit
140
     * @return Client
141
     */
142
    public function parserClient($byteLimit = self::BYTE_LIMIT)
143
    {
144
        return new Client($this->baseUri, $this->getStatusCode(), $this->getContents(), $this->getEncoding(), $byteLimit);
145
    }
146
147
    /**
148
     * Status code
149
     *
150
     * @return int
151
     */
152
    public function getStatusCode()
153
    {
154
        return $this->statusCode;
155
    }
156
157
    /**
158
     * Encoding
159
     *
160
     * @return string
161
     */
162
    public function getEncoding()
163
    {
164
        return $this->encoding;
165
    }
166
}
167