Passed
Push — master ( 1fcd2e...458bc4 )
by Pauli
03:51
created

HttpUtil::loadFromUrl()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 30
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 4
eloc 19
c 2
b 0
f 0
nc 5
nop 3
dl 0
loc 30
rs 9.6333
1
<?php declare(strict_types=1);
2
3
/**
4
 * ownCloud - Music app
5
 *
6
 * This file is licensed under the Affero General Public License version 3 or
7
 * later. See the COPYING file.
8
 *
9
 * @author Pauli Järvinen <[email protected]>
10
 * @copyright Pauli Järvinen 2022 - 2025
11
 */
12
13
namespace OCA\Music\Utility;
14
15
use OCP\AppFramework\Http;
16
use OCP\AppFramework\Http\Response;
17
18
/**
19
 * Static utility functions to work with HTTP requests
20
 */
21
class HttpUtil {
22
23
	private const ALLOWED_SCHEMES = ['http', 'https', 'feed', 'podcast', 'pcast', 'podcasts', 'itms-pcast', 'itms-pcasts', 'itms-podcast', 'itms-podcasts'];
24
25
	/**
26
	 * Use HTTP GET to load the requested URL
27
	 * @return array with three keys: ['content' => string|false, 'status_code' => int, 'message' => string, 'content_type' => string]
28
	 */
29
	public static function loadFromUrl(string $url, ?int $maxLength=null, ?int $timeout_s=null) : array {
30
		$status_code = 0;
31
		$content_type = null;
32
33
		if (!self::isUrlSchemeOneOf($url, self::ALLOWED_SCHEMES)) {
34
			$content = false;
35
			$message = 'URL scheme must be one of ' . \json_encode(self::ALLOWED_SCHEMES);
36
		} else {
37
			$context = self::createContext($timeout_s);
38
39
			// The length parameter of file_get_contents isn't nullable prior to PHP8.0
40
			if ($maxLength === null) {
41
				$content = @\file_get_contents($url, false, $context);
42
			} else {
43
				$content = @\file_get_contents($url, false, $context, 0, $maxLength);
44
			}
45
46
			// It's some PHP magic that calling file_get_contents creates and populates also a local
47
			// variable array $http_response_header, provided that the server could be reached.
48
			if (!empty($http_response_header)) {
49
				$parsedHeaders = self::parseHeaders($http_response_header, true);
50
				$status_code = $parsedHeaders['status_code'];
51
				$message = $parsedHeaders['status_msg'];
52
				$content_type = $parsedHeaders['content-type'] ?? null;
53
			} else {
54
				$message = 'The requested URL did not respond';
55
			}
56
		}
57
58
		return \compact('content', 'status_code', 'message', 'content_type');
59
	}
60
61
	/**
62
	 * @return resource
63
	 */
64
	public static function createContext(?int $timeout_s = null, array $extraHeaders = [], ?int $maxRedirects = null) {
65
		$opts = self::contextOptions($extraHeaders);
66
		if ($timeout_s !== null) {
67
			$opts['http']['timeout'] = $timeout_s;
68
		}
69
		if ($maxRedirects !== null) {
70
			$opts['http']['max_redirects'] = $maxRedirects;
71
		}
72
		return \stream_context_create($opts);
73
	}
74
75
	/**
76
	 * @param resource $context
77
	 * @param bool $convertKeysToLower When true, the header names used as keys of the result array are
78
	 * 				converted to lower case. According to RFC 2616, HTTP headers are case-insensitive.
79
	 * @return array The headers from the URL, after any redirections. The header names will be array keys.
80
	 * 					In addition to the named headers from the server, the key 'status_code' will contain
81
	 * 					the status code number of the HTTP request (like 200, 302, 404) and 'status_msg'
82
	 * 					the textual status following the code (like 'OK' or 'Not Found').
83
	 */
84
	public static function getUrlHeaders(string $url, $context, bool $convertKeysToLower=false) : array {
85
		$result = null;
86
		if (self::isUrlSchemeOneOf($url, self::ALLOWED_SCHEMES)) {
87
			// Don't use the built-in associative mode of get_headers because it mixes up the headers from the redirection
88
			// responses with those of the last response after all the redirections, making it impossible to know,
89
			// what is the source of each header. Hence, we roll out our own parsing logic which discards all the
90
			// headers from the intermediate redirection responses.
91
92
			// the type of the second parameter of get_header has changed in PHP 8.0
93
			$associative = \version_compare(\phpversion(), '8.0', '<') ? 0 : false;
94
			$rawHeaders = @\get_headers($url, /** @scrutinizer ignore-type */ $associative, $context);
95
96
			if ($rawHeaders !== false) {
97
				$result = self::parseHeaders($rawHeaders, $convertKeysToLower);
98
			} else {
99
				$result = ['status_code' => Http::STATUS_SERVICE_UNAVAILABLE, 'status_msg' => 'Error connecting the URL', 'content-length' => '0'];
100
			}
101
		} else {
102
			$result = ['status_code' => Http::STATUS_FORBIDDEN, 'status_msg' => 'URL scheme not allowed', 'content-length' => '0'];
103
		}
104
		return $result;
105
	}
106
107
	private static function parseHeaders(array $rawHeaders, bool $convertKeysToLower) : array {
108
		$result = [];
109
110
		foreach ($rawHeaders as $row) {
111
			// The response usually starts with a header like "HTTP/1.1 200 OK". However, some shoutcast streams
112
			// may instead use "ICY 200 OK".
113
			if (Util::startsWith($row, 'HTTP/', /*ignoreCase=*/true) || Util::startsWith($row, 'ICY ', /*ignoreCase=*/true)) {
114
				// Start of new response. If we have already parsed some headers, then those are from some
115
				// intermediate redirect response and those should be discarded.
116
				$parts = \explode(' ', $row, 3);
117
				if (\count($parts) == 3) {
118
					list(, $status_code, $status_msg) = $parts;
119
				} else {
120
					$status_code = Http::STATUS_INTERNAL_SERVER_ERROR;
121
					$status_msg = 'Bad response status header';
122
				}
123
				$result = ['status_code' => (int)$status_code, 'status_msg' => $status_msg];
124
			} else {
125
				// All other lines besides the initial status line should have the format "key: value"
126
				$parts = \explode(':', $row, 2);
127
				if (\count($parts) == 2) {
128
					list($key, $value) = $parts;
129
					if ($convertKeysToLower) {
130
						$key = \mb_strtolower($key);
131
					}
132
					$result[\trim($key)] = \trim($value);
133
				}
134
			}
135
		}
136
137
		return $result;
138
	}
139
140
	public static function userAgentHeader() : string {
141
		return 'User-Agent: OCMusic/' . AppInfo::getVersion();
142
	}
143
144
	private static function contextOptions(array $extraHeaders = []) : array {
145
		$opts = [
146
			'http' => [
147
				'header' => self::userAgentHeader(),	// some servers don't allow requests without a user agent header
148
				'ignore_errors' => true,				// don't emit warnings for bad/unavailable URL, we handle errors manually
149
				'max_redirects' => 20
150
			]
151
		];
152
153
		foreach ($extraHeaders as $key => $value) {
154
			$opts['http']['header'] .= "\r\n$key: $value";
155
		}
156
157
		return $opts;
158
	}
159
160
	private static function isUrlSchemeOneOf(string $url, array $schemes) : bool {
161
		$url = \mb_strtolower($url);
162
163
		foreach ($schemes as $scheme) {
164
			if (Util::startsWith($url, $scheme . '://')) {
165
				return true;
166
			}
167
		}
168
169
		return false;
170
	}
171
172
	public static function setClientCachingDays(Response &$httpResponse, int $days) : void {
173
		$httpResponse->cacheFor($days * 24 * 60 * 60);
174
		$httpResponse->addHeader('Pragma', 'cache');
175
	}
176
}
177