Completed
Push — master ( 2d7c5c...16dc59 )
by Dev
09:53
created

Request::getResponse()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace PiedWeb\UrlHarvester;
4
5
use PiedWeb\Curl\Request as CurlRequest;
6
use PiedWeb\Curl\Response;
7
8
/**
9
 * Request a page and get it only if it's an html page.
10
 */
11
class Request
12
{
13
    /**
14
     * @var string
15
     */
16
    private $url;
17
18
    /**
19
     * @var string
20
     */
21
    private $userAgent;
22
23
    /**
24
     * @var string
25
     */
26
    private $language;
27
28
    /**
29
     * @var string
30
     */
31
    private $proxy;
32
33
    /**
34
     * @var bool
35
     */
36
    private $tryHttps;
37
38
    /**
39
     * @var string
40
     */
41
    private $downloadOnly;
42
43
    /**
44
     * @var CurlRequest
45
     */
46
    private $request;
47
48
    /**
49
     * @var Response
50
     */
51
    private $response;
52
53
    /**
54
     * @param string $url
55
     * @param string $userAgent
56
     * @param string $language
57
     * @param bool   $tryHttps
58
     *
59
     * @return self
60
     */
61 30
    public static function make(
62
        string  $url,
63
        string  $userAgent,
64
        $downloadOnly = '200;html',
65
        string  $language = 'en,en-US;q=0.5',
66
        bool    $tryHttps = false,
67
        ?string $proxy = null
68
    ) {
69 30
        $request = new Request($url);
70
71 30
        $request->tryHttps = $tryHttps;
72 30
        $request->userAgent = $userAgent;
73 30
        $request->downloadOnly = $downloadOnly;
74 30
        $request->language = $language;
75 30
        $request->proxy = $proxy;
76
77 30
        $request->request();
78
79 30
        return $request;
80
    }
81
82 30
    private function __construct($url)
83
    {
84
        /*
85
        if (!filter_var($string, FILTER_VALIDATE_URL)) {
86
            throw new \Exception('URL invalid: '.$string);
87
        }**/
88 30
        $this->url = $url;
89 30
    }
90
91
    /**
92
     * Prepare headers as a normal browser (same order, same content).
93
     *
94
     * @return array
95
     */
96 30
    private function prepareHeadersForRequest()
97
    {
98 30
        $host = parse_url($this->url, PHP_URL_HOST);
99
100 30
        $headers = [];
101 30
        $headers[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8';
102 30
        $headers[] = 'Accept-Encoding: gzip, deflate';
103 30
        $headers[] = 'Accept-Language: '.$this->language;
104 30
        $headers[] = 'Connection: keep-alive';
105
106 30
        if ($host) {
107
            //$headers[] =  'Host: '.$host;
108
        }
109
        // Referer
110
111 30
        $headers[] = 'Upgrade-Insecure-Requests: 1';
112 30
        $headers[] = 'User-Agent: '.$this->userAgent;
113
114 30
        return $headers;
115
    }
116
117
    /**
118
     * @return self
119
     */
120 30
    private function request()
121
    {
122 30
        $this->request = new CurlRequest($this->url);
123 30
        $this->request
124 30
            ->setReturnHeader()
125 30
            ->setEncodingGzip()
126 30
            ->setUserAgent($this->userAgent)
127 30
            ->setDefaultSpeedOptions()
128 30
            ->setOpt(CURLOPT_SSL_VERIFYHOST, 0)
129 30
            ->setOpt(CURLOPT_SSL_VERIFYPEER, 0)
130 30
            ->setOpt(CURLOPT_MAXREDIRS, 1)
131 30
            ->setOpt(CURLOPT_COOKIE, false)
132 30
            ->setOpt(CURLOPT_CONNECTTIMEOUT, 20)
133 30
            ->setOpt(CURLOPT_TIMEOUT, 80);
134
135 30
        $this->setDownloadOnly();
136
137 30
        if ($this->proxy) {
138
            $this->request->setProxy($this->proxy);
139
        }
140
141 30
        $this->request->setOpt(CURLOPT_HTTPHEADER, $this->prepareHeadersForRequest());
142
143 30
        $this->response = $this->request->exec();
144
145
        // Recrawl https version if it's asked
146 30
        if (true === $this->tryHttps && false !== ($httpsUrl = $this->amIRedirectToHttps())) {
147
            $requestForHttps = self::make($httpsUrl, $this->userAgent, $this->downloadOnly, $this->language);
148
            if (!$requestForHttps->get()->hasError()) { // if no error, $this becode https request
149
                return $requestForHttps;
150
            }
151
        }
152
153 30
        return $this;
154
    }
155
156 30
    protected function setDownloadOnly()
157
    {
158 30
        if ($this->downloadOnly) {
159 30
            if ('200;html' == $this->downloadOnly) {
160 30
                $download = new \PiedWeb\Curl\MultipleCheckInHeaders();
161 30
                $this->request->setDownloadOnlyIf([$download, 'check']);
162
            } elseif (is_callable($this->downloadOnly)) {
163
                $this->request->setDownloadOnlyIf($this->downloadOnly);
164
            }
165
        }
166 30
    }
167
168
    /**
169
     * @return CurlRequest
170
     */
171 3
    public function get()
172
    {
173 3
        return $this->request;
174
    }
175
176
    /**
177
     * @return Response|int corresponding to the curl error
178
     */
179 27
    public function getResponse()
180
    {
181 27
        return $this->response;
182
    }
183
184
    /**
185
     * @return string|false
186
     */
187
    private function amIRedirectToHttps()
188
    {
189
        $headers = $this->response->getHeaders();
190
        $headers = array_change_key_case(null !== $headers ? $headers : []);
191
        $redirUrl = isset($headers['location']) ? $headers['location'] : null;
192
        if (null !== $redirUrl && ($httpsUrl = preg_replace('#^http://#', 'https://', $this->url, 1)) == $redirUrl) {
193
            return $httpsUrl;
194
        }
195
196
        return false;
197
    }
198
}
199