Completed
Push — master ( 7cb44c...7cbff6 )
by Dev
02:31
created

Request::setDownloadOnly()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 20

Importance

Changes 0
Metric Value
cc 4
eloc 6
nc 4
nop 0
dl 0
loc 8
ccs 0
cts 7
cp 0
crap 20
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace PiedWeb\UrlHarvester;
4
5
use PiedWeb\Curl\Request as CurlRequest;
6
use PiedWeb\Curl\Response;
7
8
/**
9
 * Request a page and get it only if it's an html page.
10
 */
11
class Request
12
{
13
    /**
14
     * @var string
15
     */
16
    private $url;
17
18
    /**
19
     * @var string
20
     */
21
    private $userAgent;
22
23
    /**
24
     * @var string
25
     */
26
    private $language;
27
28
    /**
29
     * @var string
30
     */
31
    private $proxy;
32
33
    /**
34
     * @var string
35
     */
36
    private $downloadOnly;
37
38
    /**
39
     * @var CurlRequest
40
     */
41
    private $request;
42
43
    /**
44
     * @var Response|int
45
     */
46
    private $response;
47
48
    /**
49
     * @param string $url
50
     * @param string $userAgent
51
     * @param string $language
52
     * @param bool   $tryHttps
53
     *
54
     * @return Response|int corresponding to the curl error
55
     */
56 24
    public static function make(
57
        string  $url,
58
        string  $userAgent,
59
        $downloadOnly = '200;html',
60
        string  $language = 'en,en-US;q=0.5',
61
        ?string $proxy = null
62
    ) {
63 24
        $request = new Request($url);
64
65 24
        $request->userAgent = $userAgent;
66 24
        $request->downloadOnly = $downloadOnly;
67 24
        $request->language = $language;
68 24
        $request->proxy = $proxy;
69
70 24
        return $request->request();
71
    }
72
73 24
    private function __construct($url)
74
    {
75
        /*
76
        if (!filter_var($string, FILTER_VALIDATE_URL)) {
77
            throw new \Exception('URL invalid: '.$string);
78
        }**/
79 24
        $this->url = $url;
80 24
    }
81
82
    /**
83
     * Prepare headers as a normal browser (same order, same content).
84
     *
85
     * @return array
86
     */
87 24
    private function prepareHeadersForRequest()
88
    {
89
        //$host = parse_url($this->url, PHP_URL_HOST);
90
91 24
        $headers = [];
92 24
        $headers[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8';
93 24
        $headers[] = 'Accept-Encoding: gzip, deflate';
94 24
        $headers[] = 'Accept-Language: '.$this->language;
95 24
        $headers[] = 'Connection: keep-alive';
96
97
        //if ($host) {
98
            //$headers[] =  'Host: '.$host;
99
        //}
100
        // Referer
101
102 24
        $headers[] = 'Upgrade-Insecure-Requests: 1';
103 24
        $headers[] = 'User-Agent: '.$this->userAgent;
104
105 24
        return $headers;
106
    }
107
108
    /**
109
     * @return Response|int corresponding to the curl error
110
     */
111 24
    private function request()
112
    {
113 24
        $this->request = new CurlRequest($this->url);
114 24
        $this->request
115 24
            ->setReturnHeader()
116 24
            ->setEncodingGzip()
117 24
            ->setUserAgent($this->userAgent)
118 24
            ->setDefaultSpeedOptions()
119 24
            ->setOpt(CURLOPT_SSL_VERIFYHOST, 0)
120 24
            ->setOpt(CURLOPT_SSL_VERIFYPEER, 0)
121 24
            ->setOpt(CURLOPT_MAXREDIRS, 0)
122 24
            ->setOpt(CURLOPT_FOLLOWLOCATION, false)
123 24
            ->setOpt(CURLOPT_COOKIE, false)
124 24
            ->setOpt(CURLOPT_CONNECTTIMEOUT, 20)
125 24
            ->setOpt(CURLOPT_TIMEOUT, 80)
126 24
            ->setAbortIfTooBig(200000); // 2Mo
127
128
        //$this->setDownloadOnly(); slow slow slow
129
130 24
        if ($this->proxy) {
131
            $this->request->setProxy($this->proxy);
132
        }
133
134 24
        $this->request->setOpt(CURLOPT_HTTPHEADER, $this->prepareHeadersForRequest());
135
136 24
        $this->response = $this->request->exec();
137
138 24
        return $this->response;
139
    }
140
141
    protected function setDownloadOnly()
142
    {
143
        if ($this->downloadOnly) {
144
            if ('200;html' == $this->downloadOnly) {
145
                $download = new \PiedWeb\Curl\MultipleCheckInHeaders();
146
                $this->request->setDownloadOnlyIf([$download, 'check']);
147
            } elseif (is_callable($this->downloadOnly)) {
148
                $this->request->setDownloadOnlyIf($this->downloadOnly);
149
            }
150
        }
151
    }
152
}
153