Completed
Push — master ( 2b63d8...25ece7 )
by Jan-Petter
02:14
created

Download::getContents()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 4
Ratio 100 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 4
loc 4
rs 10
cc 1
eloc 2
nc 1
nop 0
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use GuzzleHttp;
5
use vipnytt\RobotsTxtParser\Client;
6
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
7
8
/**
9
 * Class Download
10
 *
11
 * @package vipnytt\RobotsTxtParser
12
 */
13 View Code Duplication
class Download implements RobotsTxtInterface
0 ignored issues
show
Duplication introduced by
This class seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
14
{
15
    /**
16
     * Base uri
17
     * @var string
18
     */
19
    protected $baseUri;
20
21
    /**
22
     * HTTP Status code
23
     * @var int
24
     */
25
    protected $statusCode;
26
27
    /**
28
     * Robots.txt contents
29
     * @var string
30
     */
31
    protected $contents;
32
33
    /**
34
     * Robots.txt character encoding
35
     * @var string
36
     */
37
    protected $encoding;
38
39
    /**
40
     * Download constructor.
41
     *
42
     * @param string $baseUri
43
     * @param array $guzzleConfig
44
     */
45
    public function __construct($baseUri, $guzzleConfig = [])
46
    {
47
        $this->baseUri = $baseUri;
48
        try {
49
            $client = new GuzzleHttp\Client(
50
                array_merge_recursive(
51
                    [
52
                        'allow_redirects' => [
53
                            'max' => self::MAX_REDIRECTS,
54
                            'referer' => true,
55
                            'strict' => true,
56
                            'track_redirects' => true,
57
                        ],
58
                        'base_uri' => $baseUri,
59
                        'headers' => [
60
                            'Accept' => 'text/plain;q=1.0, text/*;q=0.8, */*;q=0.1',
61
                            'Accept-Charset' => 'utf-8;q=1.0, *;q=0.1',
62
                            'Accept-Encoding' => 'identity;q=1.0, *;q=0.1',
63
                            'User-Agent' => 'RobotsTxtParser-VIPnytt/1.0 (+https://github.com/VIPnytt/RobotsTxtParser/blob/master/README.md)',
64
                        ],
65
                        'http_errors' => false,
66
                        'timeout' => 60,
67
                        'verify' => true,
68
                    ],
69
                    $guzzleConfig
70
                )
71
            );
72
            $response = $client->request('GET', '/robots.txt');
73
            $this->statusCode = $response->getStatusCode();
74
            $this->contents = $response->getBody()->getContents();
75
            $this->encoding = $this->headerEncoding($response->getHeader('content-type')[0]);
76
        } catch (GuzzleHttp\Exception\ConnectException $e) {
77
            $this->connectionIssue();
78
        }
79
    }
80
81
    /**
82
     * HTTP header encoding
83
     *
84
     * @param $header
85
     * @return string
86
     */
87
    protected function headerEncoding($header)
88
    {
89
        $split = array_map('trim', mb_split(';', $header));
90
        foreach ($split as $string) {
91
            if (mb_stripos($string, 'charset=') === 0) {
92
                $encoding = mb_split('=', $string, 2)[1];
93
                if (in_array(mb_strtolower($encoding), array_map('mb_strtolower', mb_list_encodings()))) {
94
                    return $encoding;
95
                }
96
            }
97
        }
98
        return $this->detectEncoding();
99
    }
100
101
    /**
102
     * Manually detect encoding
103
     *
104
     * @return string
105
     */
106
    protected function detectEncoding()
107
    {
108
        if (($encoding = mb_detect_encoding($this->getContents())) !== false) {
109
            return $encoding;
110
        }
111
        return self::ENCODING;
112
    }
113
114
    /**
115
     * URL content
116
     *
117
     * @return string
118
     */
119
    public function getContents()
120
    {
121
        return $this->contents;
122
    }
123
124
    /**
125
     * Connection issue
126
     *
127
     * @return void
128
     */
129
    private function connectionIssue()
130
    {
131
        $this->statusCode = 523;
132
        $this->contents = '';
133
        $this->encoding = self::ENCODING;
134
    }
135
136
    /**
137
     * Parser client
138
     *
139
     * @param int|null $byteLimit
140
     * @return Client
141
     */
142
    public function parserClient($byteLimit = self::BYTE_LIMIT)
143
    {
144
        return new Client($this->baseUri, $this->getStatusCode(), $this->getContents(), $this->getEncoding(), $byteLimit);
145
    }
146
147
    /**
148
     * Status code
149
     *
150
     * @return int
151
     */
152
    public function getStatusCode()
153
    {
154
        return $this->statusCode;
155
    }
156
157
    /**
158
     * Encoding
159
     *
160
     * @return string
161
     */
162
    public function getEncoding()
163
    {
164
        return $this->encoding;
165
    }
166
}
167