Passed
Push — master ( f60a2c...2d7c5c )
by Dev
10:34 queued 19s
created

Request::request()   B

Complexity

Conditions 6
Paths 12

Size

Total Lines 32
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 6.1893

Importance

Changes 0
Metric Value
cc 6
eloc 22
nc 12
nop 0
dl 0
loc 32
ccs 19
cts 23
cp 0.8261
crap 6.1893
rs 8.9457
c 0
b 0
f 0
1
<?php
2
3
namespace PiedWeb\UrlHarvester;
4
5
use PiedWeb\Curl\Request as CurlRequest;
6
use PiedWeb\Curl\Response;
7
8
/**
9
 * Request a page and get it only if it's an html page.
10
 */
11
class Request
12
{
13
    /**
14
     * @var string
15
     */
16
    private $url;
17
18
    /**
19
     * @var string
20
     */
21
    private $userAgent;
22
23
    /**
24
     * @var string
25
     */
26
    private $language;
27
28
    /**
29
     * @var string
30
     */
31
    private $proxy;
32
33
    /**
34
     * @var bool
35
     */
36
    private $tryHttps;
37
38
    /**
39
     * @var string
40
     */
41
    private $donwloadOnly;
42
43
    /**
44
     * @var CurlRequest
45
     */
46
    private $request;
47
48
    /**
49
     * @var Response
50
     */
51
    private $response;
52
53
    /**
54
     * @param string $url
55
     * @param string $userAgent
56
     * @param string $language
57
     * @param bool   $tryHttps
58
     *
59
     * @return self
60
     */
61 24
    public static function make(
62
        string  $url,
63
        string  $userAgent,
64
        string  $donwloadOnly = 'text/html',
65
        string  $language = 'en,en-US;q=0.5',
66
        bool    $tryHttps = false,
67
        ?string $proxy = null
68
    ) {
69 24
        $request = new Request($url);
70
71 24
        $request->tryHttps = $tryHttps;
72 24
        $request->userAgent = $userAgent;
73 24
        $request->donwloadOnly = $donwloadOnly;
74 24
        $request->language = $language;
75 24
        $request->proxy = $proxy;
76
77 24
        $request->request();
78
79 24
        return $request;
80
    }
81
82 24
    private function __construct($url)
83
    {
84
        /*
85
        if (!filter_var($string, FILTER_VALIDATE_URL)) {
86
            throw new \Exception('URL invalid: '.$string);
87
        }**/
88 24
        $this->url = $url;
89 24
    }
90
91
    /**
92
     * Prepare headers as a normal browser (same order, same content).
93
     *
94
     * @return array
95
     */
96 24
    private function prepareHeadersForRequest()
97
    {
98 24
        $host = parse_url($this->url, PHP_URL_HOST);
99
100 24
        $headers = [];
101 24
        $headers[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8';
102 24
        $headers[] = 'Accept-Encoding: gzip, deflate';
103 24
        $headers[] = 'Accept-Language: '.$this->language;
104 24
        $headers[] = 'Connection: keep-alive';
105
106 24
        if ($host) {
107
            //$headers[] =  'Host: '.$host;
108
        }
109
        // Referer
110
111 24
        $headers[] = 'Upgrade-Insecure-Requests: 1';
112 24
        $headers[] = 'User-Agent: '.$this->userAgent;
113
114 24
        return $headers;
115
    }
116
117
    /**
118
     * @return self
119
     */
120 24
    private function request()
121
    {
122 24
        $this->request = new CurlRequest($this->url);
123 24
        $this->request
124 24
            ->setReturnHeader()
125 24
            ->setEncodingGzip()
126 24
            ->setDefaultSpeedOptions()
127 24
            ->setOpt(CURLOPT_SSL_VERIFYHOST, 0)
128 24
            ->setOpt(CURLOPT_SSL_VERIFYPEER, 0)
129 24
            ->setOpt(CURLOPT_MAXREDIRS, 0)
130 24
            ->setOpt(CURLOPT_COOKIE, false)
131 24
            ->setOpt(CURLOPT_CONNECTTIMEOUT, 20)
132 24
            ->setOpt(CURLOPT_TIMEOUT, 80);
133 24
        if ($this->donwloadOnly) {
134 24
            $this->request->setDownloadOnlyIf($this->donwloadOnly);
135
        }
136 24
        if ($this->proxy) {
137
            $this->request->setProxy($this->proxy);
138
        }
139 24
        $this->request->setOpt(CURLOPT_HTTPHEADER, $this->prepareHeadersForRequest());
140
141 24
        $this->response = $this->request->exec();
142
143
        // Recrawl https version if it's asked
144 24
        if (true === $this->tryHttps && false !== ($httpsUrl = $this->amIRedirectToHttps())) {
145
            $requestForHttps = self::make($httpsUrl, $this->userAgent, $this->donwloadOnly, $this->language);
146
            if (!$requestForHttps->get()->hasError()) { // if no error, $this becode https request
147
                return $requestForHttps;
148
            }
149
        }
150
151 24
        return $this;
152
    }
153
154
    /**
155
     * @return CurlRequest
156
     */
157 3
    public function get()
158
    {
159 3
        return $this->request;
160
    }
161
162
    /**
163
     * @return Response|int corresponding to the curl error
164
     */
165 21
    public function getResponse()
166
    {
167 21
        return $this->response;
168
    }
169
170
    /**
171
     * @return string|false
172
     */
173
    private function amIRedirectToHttps()
174
    {
175
        $headers = $this->response->getHeaders();
176
        $headers = array_change_key_case(null !== $headers ? $headers : []);
177
        $redirUrl = isset($headers['location']) ? $headers['location'] : null;
178
        if (null !== $redirUrl && ($httpsUrl = preg_replace('#^http://#', 'https://', $this->url, 1)) == $redirUrl) {
179
            return $httpsUrl;
180
        }
181
182
        return false;
183
    }
184
}
185