|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace WebThumbnailer\Application\WebAccess; |
|
6
|
|
|
|
|
7
|
|
|
use WebThumbnailer\Application\ConfigManager; |
|
8
|
|
|
|
|
9
|
|
|
class WebAccessPHP implements WebAccess |
|
10
|
|
|
{ |
|
11
|
|
|
/** |
|
12
|
|
|
* Download content using PHP built-in functions. |
|
13
|
|
|
* Note that this method may fail more often than the cURL one. |
|
14
|
|
|
* |
|
15
|
|
|
* @inheritdoc |
|
16
|
|
|
*/ |
|
17
|
|
|
public function getContent( |
|
18
|
|
|
string $url, |
|
19
|
|
|
?int $timeout = null, |
|
20
|
|
|
?int $maxBytes = null, |
|
21
|
|
|
?callable $dlCallback = null, |
|
22
|
|
|
?string &$dlContent = null |
|
23
|
|
|
): array { |
|
24
|
|
|
if (empty($timeout)) { |
|
25
|
|
|
$timeout = (int) ConfigManager::get('settings.default.timeout', 30); |
|
26
|
|
|
} |
|
27
|
|
|
|
|
28
|
|
|
if (empty($maxBytes)) { |
|
29
|
|
|
$maxBytes = (int) ConfigManager::get('settings.default.max_img_dl', 4194304); |
|
30
|
|
|
} |
|
31
|
|
|
|
|
32
|
|
|
$maxRedr = 3; |
|
33
|
|
|
$context = $this->getContext($timeout, false); |
|
34
|
|
|
stream_context_set_default($context); |
|
35
|
|
|
list($headers, $finalUrl) = $this->getRedirectedHeaders($url, $timeout, $maxRedr); |
|
36
|
|
|
if (! $headers || strpos($headers[0], '200 OK') === false) { |
|
37
|
|
|
$context = $this->getContext($timeout, true); |
|
38
|
|
|
stream_context_set_default($context); |
|
39
|
|
|
list($headers, $finalUrl) = $this->getRedirectedHeaders($url, $timeout, $maxRedr); |
|
40
|
|
|
} |
|
41
|
|
|
|
|
42
|
|
|
if (! $headers) { |
|
43
|
|
|
return array($headers, false); |
|
44
|
|
|
} |
|
45
|
|
|
|
|
46
|
|
|
$context = stream_context_create($context); |
|
47
|
|
|
$content = file_get_contents($finalUrl, false, $context, 0, $maxBytes); |
|
48
|
|
|
|
|
49
|
|
|
return array($headers, $content); |
|
50
|
|
|
} |
|
51
|
|
|
|
|
52
|
|
|
/** |
|
53
|
|
|
* Download URL HTTP headers and follow redirections (HTTP 30x) if necessary. |
|
54
|
|
|
* |
|
55
|
|
|
* @param string $url URL to download. |
|
56
|
|
|
* @param int $timeout network timeout (in seconds) |
|
57
|
|
|
* @param int $redirectionLimit Stop trying to follow redrection if this number is reached. |
|
58
|
|
|
* |
|
59
|
|
|
* @return mixed[] containing HTTP headers. |
|
60
|
|
|
*/ |
|
61
|
|
|
protected function getRedirectedHeaders(string $url, int $timeout, int $redirectionLimit = 3): array |
|
62
|
|
|
{ |
|
63
|
|
|
stream_context_set_default($this->getContext($timeout)); |
|
64
|
|
|
|
|
65
|
|
|
$headers = @get_headers($url, 1); |
|
66
|
|
|
// Some hosts don't like fulluri request, some requires it... |
|
67
|
|
|
if ($headers === false) { |
|
68
|
|
|
stream_context_set_default($this->getContext($timeout, false)); |
|
69
|
|
|
$headers = @get_headers($url, 1); |
|
70
|
|
|
} |
|
71
|
|
|
|
|
72
|
|
|
// Headers found, redirection found, and limit not reached. |
|
73
|
|
|
if ( |
|
74
|
|
|
$redirectionLimit-- > 0 |
|
75
|
|
|
&& !empty($headers) |
|
76
|
|
|
&& (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false) |
|
77
|
|
|
&& !empty($headers['Location']) |
|
78
|
|
|
) { |
|
79
|
|
|
$redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; |
|
80
|
|
|
if ($redirection != $url) { |
|
81
|
|
|
return $this->getRedirectedHeaders($redirection, $timeout, $redirectionLimit); |
|
82
|
|
|
} |
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
return [$headers, $url]; |
|
86
|
|
|
} |
|
87
|
|
|
|
|
88
|
|
|
/** |
|
89
|
|
|
* Create a valid context for PHP HTTP functions. |
|
90
|
|
|
* |
|
91
|
|
|
* @param int $timeout network timeout (in seconds) |
|
92
|
|
|
* @param bool $fulluri this is required by some hosts, rejected by others, so option. |
|
93
|
|
|
* |
|
94
|
|
|
* @return mixed[] context. |
|
95
|
|
|
*/ |
|
96
|
|
|
protected function getContext(int $timeout, bool $fulluri = true): array |
|
97
|
|
|
{ |
|
98
|
|
|
return [ |
|
99
|
|
|
'http' => [ |
|
100
|
|
|
'method' => 'GET', |
|
101
|
|
|
'timeout' => $timeout, |
|
102
|
|
|
'user_agent' => 'Mozilla/5.0 (X11; Linux x86_64; rv:45.0; WebThumbnailer) Gecko/20100101 Firefox/45.0', |
|
103
|
|
|
'request_fulluri' => $fulluri, |
|
104
|
|
|
] |
|
105
|
|
|
]; |
|
106
|
|
|
} |
|
107
|
|
|
} |
|
108
|
|
|
|