1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Elgg\Http; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* Create, sanitize and compare urls |
7
|
|
|
* |
8
|
|
|
* @since 4.3 |
9
|
|
|
* @internal |
10
|
|
|
*/ |
11
|
|
|
class Urls { |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Sets elements in a URL's query string. |
15
|
|
|
* |
16
|
|
|
* @param string $url The URL |
17
|
|
|
* @param array $elements Key/value pairs to set in the URL. If the value is null, the |
18
|
|
|
* element is removed from the URL. |
19
|
|
|
* |
20
|
|
|
* @return string The new URL with the query strings added |
21
|
|
|
*/ |
22
|
193 |
|
public function addQueryElementsToUrl(string $url, array $elements): string { |
23
|
193 |
|
$url_array = parse_url($url); |
24
|
|
|
|
25
|
193 |
|
if (isset($url_array['query'])) { |
26
|
42 |
|
$query = elgg_parse_str($url_array['query']); |
27
|
|
|
} else { |
28
|
152 |
|
$query = []; |
29
|
|
|
} |
30
|
|
|
|
31
|
193 |
|
foreach ($elements as $k => $v) { |
32
|
130 |
|
if ($v === null) { |
33
|
38 |
|
unset($query[$k]); |
34
|
|
|
} else { |
35
|
106 |
|
$query[$k] = $v; |
36
|
|
|
} |
37
|
|
|
} |
38
|
|
|
|
39
|
|
|
// why check path? A: if no path, this may be a relative URL like "?foo=1". In this case, |
40
|
|
|
// the output "" would be interpreted the current URL, so in this case we *must* set |
41
|
|
|
// a query to make sure elements are removed. |
42
|
193 |
|
if ($query || empty($url_array['path'])) { |
43
|
124 |
|
$url_array['query'] = http_build_query($query); |
44
|
|
|
} else { |
45
|
117 |
|
unset($url_array['query']); |
46
|
|
|
} |
47
|
|
|
|
48
|
193 |
|
$string = $this->buildUrl($url_array, false); |
49
|
|
|
|
50
|
|
|
// Restore relative protocol to url if missing and is provided as part of the initial url (see #9874) |
51
|
193 |
|
if (!isset($url_array['scheme']) && (str_starts_with($url, '//'))) { |
52
|
3 |
|
$string = "//{$string}"; |
53
|
|
|
} |
54
|
|
|
|
55
|
193 |
|
return $string; |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* Adds action tokens to URL |
60
|
|
|
* |
61
|
|
|
* Use this function to append action tokens to a URL's GET parameters. |
62
|
|
|
* This will preserve any existing GET parameters. |
63
|
|
|
* |
64
|
|
|
* @param string $url Full action URL |
65
|
|
|
* @param bool $html_encode HTML encode the url? (default: false) |
66
|
|
|
* |
67
|
|
|
* @return string URL with action tokens |
68
|
|
|
*/ |
69
|
30 |
|
public function addActionTokensToUrl(string $url, bool $html_encode = false): string { |
70
|
30 |
|
$url = $this->normalizeUrl($url); |
71
|
30 |
|
$components = parse_url($url); |
72
|
|
|
|
73
|
30 |
|
if (isset($components['query'])) { |
74
|
18 |
|
$query = elgg_parse_str($components['query']); |
75
|
|
|
} else { |
76
|
12 |
|
$query = []; |
77
|
|
|
} |
78
|
|
|
|
79
|
30 |
|
if (isset($query['__elgg_ts'], $query['__elgg_token'])) { |
80
|
2 |
|
return $url; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
// append action tokens to the existing query |
84
|
|
|
// CSRF service is not DI injected because Urls is used by installer and CSRF requires DB installed |
85
|
30 |
|
$query['__elgg_ts'] = _elgg_services()->csrf->getCurrentTime()->getTimestamp(); |
86
|
30 |
|
$query['__elgg_token'] = _elgg_services()->csrf->generateActionToken($query['__elgg_ts']); |
87
|
30 |
|
$components['query'] = http_build_query($query); |
88
|
|
|
|
89
|
|
|
// rebuild the full url |
90
|
30 |
|
return $this->buildUrl($components, $html_encode); |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Builds a URL from the a parts array like one returned by {@link parse_url()}. |
95
|
|
|
* |
96
|
|
|
* @note If only partial information is passed, a partial URL will be returned. |
97
|
|
|
* |
98
|
|
|
* @param array $parts Associative array of URL components like parse_url() returns |
99
|
|
|
* 'user' and 'pass' parts are ignored because of security reasons |
100
|
|
|
* @param bool $html_encode HTML Encode the url? |
101
|
|
|
* |
102
|
|
|
* @see https://github.com/Elgg/Elgg/pull/8146#issuecomment-91544585 |
103
|
|
|
* |
104
|
|
|
* @return string Full URL |
105
|
|
|
*/ |
106
|
203 |
|
public function buildUrl(array $parts, bool $html_encode = true): string { |
107
|
|
|
// build only what's given to us |
108
|
203 |
|
$scheme = isset($parts['scheme']) ? "{$parts['scheme']}://" : ''; |
109
|
203 |
|
$host = isset($parts['host']) ? "{$parts['host']}" : ''; |
110
|
203 |
|
$port = isset($parts['port']) ? ":{$parts['port']}" : ''; |
111
|
203 |
|
$path = isset($parts['path']) ? "{$parts['path']}" : ''; |
112
|
203 |
|
$query = isset($parts['query']) ? "?{$parts['query']}" : ''; |
113
|
203 |
|
$fragment = isset($parts['fragment']) ? "#{$parts['fragment']}" : ''; |
114
|
|
|
|
115
|
203 |
|
$string = $scheme . $host . $port . $path . $query . $fragment; |
116
|
|
|
|
117
|
203 |
|
return $html_encode ? htmlspecialchars($string, ENT_QUOTES, 'UTF-8', false) : $string; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* Converts shorthand URLs to absolute URLs, unless the given URL is absolute, protocol-relative, |
122
|
|
|
* or starts with a protocol/fragment/query |
123
|
|
|
* |
124
|
|
|
* @example |
125
|
|
|
* elgg_normalize_url(''); // 'http://my.site.com/' |
126
|
|
|
* elgg_normalize_url('dashboard'); // 'http://my.site.com/dashboard' |
127
|
|
|
* elgg_normalize_url('http://google.com/'); // no change |
128
|
|
|
* elgg_normalize_url('//google.com/'); // no change |
129
|
|
|
* |
130
|
|
|
* @param string $url The URL to normalize |
131
|
|
|
* |
132
|
|
|
* @return string The absolute URL |
133
|
|
|
*/ |
134
|
2870 |
|
public function normalizeUrl(string $url): string { |
135
|
2870 |
|
$url = str_replace(' ', '%20', $url); |
136
|
|
|
|
137
|
2870 |
|
if ($this->isValidMultiByteUrl($url)) { |
138
|
|
|
// fix invalid scheme in site url |
139
|
2595 |
|
$protocol_less_site_url = preg_replace('/^https?:/i', ':', elgg_get_site_url()); |
140
|
2595 |
|
$protocol_less_site_url = rtrim($protocol_less_site_url, '/'); |
141
|
2595 |
|
$protocol_less_site_url = str_replace('/', '\/', $protocol_less_site_url); |
142
|
|
|
|
143
|
2595 |
|
return preg_replace("/^https?{$protocol_less_site_url}\/?/i", elgg_get_site_url(), $url); |
144
|
|
|
} |
145
|
|
|
|
146
|
2845 |
|
$matches = []; |
147
|
2845 |
|
if (preg_match('#^([a-z]+)\\:#', $url, $matches)) { |
148
|
|
|
// we don't let http/https: URLs fail filter_var(), but anything else starting with a protocol |
149
|
|
|
// is OK |
150
|
54 |
|
if ($matches[1] !== 'http' && $matches[1] !== 'https') { |
151
|
54 |
|
return $url; |
152
|
|
|
} |
153
|
|
|
} |
154
|
|
|
|
155
|
2844 |
|
if (preg_match('#^(\\#|\\?|//)#', $url)) { |
156
|
|
|
// starts with '//' (protocol-relative link), query, or fragment |
157
|
79 |
|
return $url; |
158
|
|
|
} |
159
|
|
|
|
160
|
2828 |
|
if (preg_match('#^[^/]*\\.php(\\?.*)?$#', $url)) { |
161
|
|
|
// root PHP scripts: 'install.php', 'install.php?step=step'. We don't want to confuse these |
162
|
|
|
// for domain names. |
163
|
14 |
|
return elgg_get_site_url() . $url; |
164
|
|
|
} |
165
|
|
|
|
166
|
2821 |
|
if (preg_match('#^[^/?]*\\.#', $url)) { |
167
|
|
|
// URLs starting with domain: 'example.com', 'example.com/subpage' |
168
|
4 |
|
return "http://{$url}"; |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
// 'page/handler', 'mod/plugin/file.php' |
172
|
|
|
// trim off any leading / because the site URL is stored |
173
|
|
|
// with a trailing / |
174
|
2818 |
|
return elgg_get_site_url() . ltrim($url, '/'); |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
/** |
178
|
|
|
* Test if two URLs are functionally identical. |
179
|
|
|
* |
180
|
|
|
* @tip If $ignore_params is used, neither the name nor its value will be considered when comparing. |
181
|
|
|
* |
182
|
|
|
* @tip The order of GET params doesn't matter. |
183
|
|
|
* |
184
|
|
|
* @param string $url1 First URL |
185
|
|
|
* @param string $url2 Second URL |
186
|
|
|
* @param array $ignore_params GET params to ignore in the comparison |
187
|
|
|
* |
188
|
|
|
* @return bool |
189
|
|
|
*/ |
190
|
138 |
|
public function isUrlIdentical(string $url1, string $url2, array $ignore_params): bool { |
191
|
138 |
|
$url1 = $this->normalizeUrl($url1); |
192
|
138 |
|
$url2 = $this->normalizeUrl($url2); |
193
|
|
|
|
194
|
138 |
|
if ($url1 === $url2) { |
195
|
50 |
|
return true; |
196
|
|
|
} |
197
|
|
|
|
198
|
109 |
|
$url1_info = parse_url($url1); |
199
|
109 |
|
$url2_info = parse_url($url2); |
200
|
|
|
|
201
|
109 |
|
if (isset($url1_info['path'])) { |
202
|
107 |
|
$url1_info['path'] = trim($url1_info['path'], '/'); |
203
|
|
|
} |
204
|
|
|
|
205
|
109 |
|
if (isset($url2_info['path'])) { |
206
|
94 |
|
$url2_info['path'] = trim($url2_info['path'], '/'); |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
// compare basic bits |
210
|
109 |
|
$parts = ['scheme', 'host', 'path']; |
211
|
|
|
|
212
|
109 |
|
foreach ($parts as $part) { |
213
|
109 |
|
if (isset($url1_info[$part], $url2_info[$part]) && $url1_info[$part] !== $url2_info[$part]) { |
214
|
79 |
|
return false; |
215
|
108 |
|
} elseif (isset($url1_info[$part]) && !isset($url2_info[$part])) { |
216
|
21 |
|
return false; |
217
|
95 |
|
} elseif (!isset($url1_info[$part]) && isset($url2_info[$part])) { |
218
|
1 |
|
return false; |
219
|
|
|
} |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
// quick compare of get params |
223
|
30 |
|
if (isset($url1_info['query'], $url2_info['query']) && $url1_info['query'] === $url2_info['query']) { |
224
|
|
|
return true; |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
// compare get params that might be out of order |
228
|
30 |
|
$url1_params = []; |
229
|
30 |
|
$url2_params = []; |
230
|
|
|
|
231
|
30 |
|
if (isset($url1_info['query'])) { |
232
|
15 |
|
$url1_info['query'] = html_entity_decode($url1_info['query']); |
233
|
15 |
|
if (!elgg_is_empty($url1_info['query'])) { |
234
|
15 |
|
$url1_params = elgg_parse_str($url1_info['query']); |
235
|
|
|
} |
236
|
|
|
} |
237
|
|
|
|
238
|
30 |
|
if (isset($url2_info['query'])) { |
239
|
29 |
|
$url2_info['query'] = html_entity_decode($url2_info['query']); |
240
|
29 |
|
if (!elgg_is_empty($url2_info['query'])) { |
241
|
29 |
|
$url2_params = elgg_parse_str($url2_info['query']); |
242
|
|
|
} |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
// drop ignored params |
246
|
30 |
|
foreach ($ignore_params as $param) { |
247
|
29 |
|
unset($url1_params[$param]); |
248
|
29 |
|
unset($url2_params[$param]); |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
// array_diff_assoc only returns the items in arr1 that aren't in arrN |
252
|
|
|
// but not the items that ARE in arrN but NOT in arr1 |
253
|
|
|
// if arr1 is an empty array, this function will return 0 no matter what. |
254
|
|
|
// since we only care if they're different and not how different, |
255
|
|
|
// add the results together to get a non-zero (ie, different) result |
256
|
30 |
|
$diff_count = count($this->arrayDiffAssocRecursive($url1_params, $url2_params)); |
257
|
30 |
|
$diff_count += count($this->arrayDiffAssocRecursive($url2_params, $url1_params)); |
258
|
30 |
|
if ($diff_count > 0) { |
259
|
19 |
|
return false; |
260
|
|
|
} |
261
|
|
|
|
262
|
11 |
|
return true; |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
/** |
266
|
|
|
* Use a "fixed" filter_var() with FILTER_VALIDATE_URL that handles multi-byte chars. |
267
|
|
|
* |
268
|
|
|
* This function is static because it is used in \ElggInstaller. |
269
|
|
|
* During installation this service can't be constructed because the database is not yet available. |
270
|
|
|
* |
271
|
|
|
* @param string $url URL to validate |
272
|
|
|
* |
273
|
|
|
* @return bool |
274
|
|
|
* @internal |
275
|
|
|
*/ |
276
|
2870 |
|
public static function isValidMultiByteUrl(string $url): bool { |
277
|
|
|
// based on http://php.net/manual/en/function.filter-var.php#104160 |
278
|
2870 |
|
if (filter_var($url, FILTER_VALIDATE_URL) !== false) { |
279
|
2595 |
|
return true; |
280
|
|
|
} |
281
|
|
|
|
282
|
|
|
// Check if it has unicode chars. |
283
|
2845 |
|
$l = elgg_strlen($url); |
284
|
2845 |
|
if (strlen($url) === $l) { |
285
|
2845 |
|
return false; |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
// Replace wide chars by X |
289
|
1 |
|
$s = ''; |
290
|
1 |
|
for ($i = 0; $i < $l; ++$i) { |
291
|
1 |
|
$ch = elgg_substr($url, $i, 1); |
292
|
1 |
|
$s .= (strlen($ch) > 1) ? 'X' : $ch; |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
// Re-check now. |
296
|
1 |
|
return (bool) filter_var($s, FILTER_VALIDATE_URL); |
297
|
|
|
} |
298
|
|
|
|
299
|
|
|
/** |
300
|
|
|
* Computes the difference of arrays with additional index check |
301
|
|
|
* |
302
|
|
|
* @return array |
303
|
|
|
* |
304
|
|
|
* @see array_diff_assoc() |
305
|
|
|
* @see https://github.com/Elgg/Elgg/issues/13016 |
306
|
|
|
*/ |
307
|
30 |
|
protected function arrayDiffAssocRecursive(): array { |
308
|
30 |
|
$args = func_get_args(); |
309
|
30 |
|
$diff = []; |
310
|
|
|
|
311
|
30 |
|
foreach (array_shift($args) as $key => $val) { |
312
|
26 |
|
for ($i = 0, $j = 0, $tmp = [$val], $count = count($args); $i < $count; $i++) { |
313
|
26 |
|
if (is_array($val)) { |
314
|
4 |
|
if (empty($args[$i][$key]) || !is_array($args[$i][$key])) { |
315
|
|
|
$j++; |
316
|
|
|
} else { |
317
|
4 |
|
$tmp[] = $args[$i][$key]; |
318
|
|
|
} |
319
|
26 |
|
} elseif (!array_key_exists($key, $args[$i]) || $args[$i][$key] !== $val) { |
320
|
19 |
|
$j++; |
321
|
|
|
} |
322
|
|
|
} |
323
|
|
|
|
324
|
26 |
|
if (is_array($val)) { |
325
|
4 |
|
$tmp = call_user_func_array([$this, 'arrayDiffAssocRecursive'], $tmp); |
326
|
4 |
|
if (!empty($tmp)) { |
327
|
3 |
|
$diff[$key] = $tmp; |
328
|
2 |
|
} elseif ($j == $count) { |
329
|
4 |
|
$diff[$key] = $val; |
330
|
|
|
} |
331
|
26 |
|
} elseif ($j == $count && $count) { |
332
|
19 |
|
$diff[$key] = $val; |
333
|
|
|
} |
334
|
|
|
} |
335
|
|
|
|
336
|
30 |
|
return $diff; |
337
|
|
|
} |
338
|
|
|
} |
339
|
|
|
|