Urls::isUrlIdentical()   F
last analyzed

Complexity

Conditions 19
Paths 309

Size

Total Lines 73
Code Lines 38

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 38
CRAP Score 19.006

Importance

Changes 0
Metric Value
cc 19
eloc 38
nc 309
nop 3
dl 0
loc 73
ccs 38
cts 39
cp 0.9744
crap 19.006
rs 2.3208
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Elgg\Http;
4
5
/**
6
 * Create, sanitize and compare urls
7
 *
8
 * @since 4.3
9
 * @internal
10
 */
11
class Urls {
12
13
	/**
14
	 * Sets elements in a URL's query string.
15
	 *
16
	 * @param string $url      The URL
17
	 * @param array  $elements Key/value pairs to set in the URL. If the value is null, the
18
	 *                         element is removed from the URL.
19
	 *
20
	 * @return string The new URL with the query strings added
21
	 */
22 193
	public function addQueryElementsToUrl(string $url, array $elements): string {
23 193
		$url_array = parse_url($url);
24
25 193
		if (isset($url_array['query'])) {
26 42
			$query = elgg_parse_str($url_array['query']);
27
		} else {
28 152
			$query = [];
29
		}
30
	
31 193
		foreach ($elements as $k => $v) {
32 130
			if ($v === null) {
33 38
				unset($query[$k]);
34
			} else {
35 106
				$query[$k] = $v;
36
			}
37
		}
38
	
39
		// why check path? A: if no path, this may be a relative URL like "?foo=1". In this case,
40
		// the output "" would be interpreted the current URL, so in this case we *must* set
41
		// a query to make sure elements are removed.
42 193
		if ($query || empty($url_array['path'])) {
43 124
			$url_array['query'] = http_build_query($query);
44
		} else {
45 117
			unset($url_array['query']);
46
		}
47
		
48 193
		$string = $this->buildUrl($url_array, false);
49
	
50
		// Restore relative protocol to url if missing and is provided as part of the initial url (see #9874)
51 193
		if (!isset($url_array['scheme']) && (str_starts_with($url, '//'))) {
52 3
			$string = "//{$string}";
53
		}
54
		
55 193
		return $string;
56
	}
57
58
	/**
59
	 * Adds action tokens to URL
60
	 *
61
	 * Use this function to append action tokens to a URL's GET parameters.
62
	 * This will preserve any existing GET parameters.
63
	 *
64
	 * @param string $url         Full action URL
65
	 * @param bool   $html_encode HTML encode the url? (default: false)
66
	 *
67
	 * @return string URL with action tokens
68
	 */
69 30
	public function addActionTokensToUrl(string $url, bool $html_encode = false): string {
70 30
		$url = $this->normalizeUrl($url);
71 30
		$components = parse_url($url);
72
	
73 30
		if (isset($components['query'])) {
74 18
			$query = elgg_parse_str($components['query']);
75
		} else {
76 12
			$query = [];
77
		}
78
	
79 30
		if (isset($query['__elgg_ts'], $query['__elgg_token'])) {
80 2
			return $url;
81
		}
82
	
83
		// append action tokens to the existing query
84
		// CSRF service is not DI injected because Urls is used by installer and CSRF requires DB installed
85 30
		$query['__elgg_ts'] = _elgg_services()->csrf->getCurrentTime()->getTimestamp();
86 30
		$query['__elgg_token'] = _elgg_services()->csrf->generateActionToken($query['__elgg_ts']);
87 30
		$components['query'] = http_build_query($query);
88
	
89
		// rebuild the full url
90 30
		return $this->buildUrl($components, $html_encode);
91
	}
92
	
93
	/**
94
	 * Builds a URL from the a parts array like one returned by {@link parse_url()}.
95
	 *
96
	 * @note If only partial information is passed, a partial URL will be returned.
97
	 *
98
	 * @param array $parts       Associative array of URL components like parse_url() returns
99
	 *                           'user' and 'pass' parts are ignored because of security reasons
100
	 * @param bool  $html_encode HTML Encode the url?
101
	 *
102
	 * @see https://github.com/Elgg/Elgg/pull/8146#issuecomment-91544585
103
	 *
104
	 * @return string Full URL
105
	 */
106 203
	public function buildUrl(array $parts, bool $html_encode = true): string {
107
		// build only what's given to us
108 203
		$scheme = isset($parts['scheme']) ? "{$parts['scheme']}://" : '';
109 203
		$host = isset($parts['host']) ? "{$parts['host']}" : '';
110 203
		$port = isset($parts['port']) ? ":{$parts['port']}" : '';
111 203
		$path = isset($parts['path']) ? "{$parts['path']}" : '';
112 203
		$query = isset($parts['query']) ? "?{$parts['query']}" : '';
113 203
		$fragment = isset($parts['fragment']) ? "#{$parts['fragment']}" : '';
114
	
115 203
		$string = $scheme . $host . $port . $path . $query . $fragment;
116
	
117 203
		return $html_encode ? htmlspecialchars($string, ENT_QUOTES, 'UTF-8', false) : $string;
118
	}
119
	
120
	/**
121
	 * Converts shorthand URLs to absolute URLs, unless the given URL is absolute, protocol-relative,
122
	 * or starts with a protocol/fragment/query
123
	 *
124
	 * @example
125
	 * elgg_normalize_url('');                   // 'http://my.site.com/'
126
	 * elgg_normalize_url('dashboard');          // 'http://my.site.com/dashboard'
127
	 * elgg_normalize_url('http://google.com/'); // no change
128
	 * elgg_normalize_url('//google.com/');      // no change
129
	 *
130
	 * @param string $url The URL to normalize
131
	 *
132
	 * @return string The absolute URL
133
	 */
134 2870
	public function normalizeUrl(string $url): string {
135 2870
		$url = str_replace(' ', '%20', $url);
136
	
137 2870
		if ($this->isValidMultiByteUrl($url)) {
138
			// fix invalid scheme in site url
139 2595
			$protocol_less_site_url = preg_replace('/^https?:/i', ':', elgg_get_site_url());
140 2595
			$protocol_less_site_url = rtrim($protocol_less_site_url, '/');
141 2595
			$protocol_less_site_url = str_replace('/', '\/', $protocol_less_site_url);
142
	
143 2595
			return preg_replace("/^https?{$protocol_less_site_url}\/?/i", elgg_get_site_url(), $url);
144
		}
145
	
146 2845
		$matches = [];
147 2845
		if (preg_match('#^([a-z]+)\\:#', $url, $matches)) {
148
			// we don't let http/https: URLs fail filter_var(), but anything else starting with a protocol
149
			// is OK
150 54
			if ($matches[1] !== 'http' && $matches[1] !== 'https') {
151 54
				return $url;
152
			}
153
		}
154
	
155 2844
		if (preg_match('#^(\\#|\\?|//)#', $url)) {
156
			// starts with '//' (protocol-relative link), query, or fragment
157 79
			return $url;
158
		}
159
	
160 2828
		if (preg_match('#^[^/]*\\.php(\\?.*)?$#', $url)) {
161
			// root PHP scripts: 'install.php', 'install.php?step=step'. We don't want to confuse these
162
			// for domain names.
163 14
			return elgg_get_site_url() . $url;
164
		}
165
	
166 2821
		if (preg_match('#^[^/?]*\\.#', $url)) {
167
			// URLs starting with domain: 'example.com', 'example.com/subpage'
168 4
			return "http://{$url}";
169
		}
170
	
171
		// 'page/handler', 'mod/plugin/file.php'
172
		// trim off any leading / because the site URL is stored
173
		// with a trailing /
174 2818
		return elgg_get_site_url() . ltrim($url, '/');
175
	}
176
	
177
	/**
178
	 * Test if two URLs are functionally identical.
179
	 *
180
	 * @tip If $ignore_params is used, neither the name nor its value will be considered when comparing.
181
	 *
182
	 * @tip The order of GET params doesn't matter.
183
	 *
184
	 * @param string $url1          First URL
185
	 * @param string $url2          Second URL
186
	 * @param array  $ignore_params GET params to ignore in the comparison
187
	 *
188
	 * @return bool
189
	 */
190 138
	public function isUrlIdentical(string $url1, string $url2, array $ignore_params): bool {
191 138
		$url1 = $this->normalizeUrl($url1);
192 138
		$url2 = $this->normalizeUrl($url2);
193
	
194 138
		if ($url1 === $url2) {
195 50
			return true;
196
		}
197
	
198 109
		$url1_info = parse_url($url1);
199 109
		$url2_info = parse_url($url2);
200
	
201 109
		if (isset($url1_info['path'])) {
202 107
			$url1_info['path'] = trim($url1_info['path'], '/');
203
		}
204
		
205 109
		if (isset($url2_info['path'])) {
206 94
			$url2_info['path'] = trim($url2_info['path'], '/');
207
		}
208
	
209
		// compare basic bits
210 109
		$parts = ['scheme', 'host', 'path'];
211
	
212 109
		foreach ($parts as $part) {
213 109
			if (isset($url1_info[$part], $url2_info[$part]) && $url1_info[$part] !== $url2_info[$part]) {
214 79
				return false;
215 108
			} elseif (isset($url1_info[$part]) && !isset($url2_info[$part])) {
216 21
				return false;
217 95
			} elseif (!isset($url1_info[$part]) && isset($url2_info[$part])) {
218 1
				return false;
219
			}
220
		}
221
	
222
		// quick compare of get params
223 30
		if (isset($url1_info['query'], $url2_info['query']) && $url1_info['query'] === $url2_info['query']) {
224
			return true;
225
		}
226
	
227
		// compare get params that might be out of order
228 30
		$url1_params = [];
229 30
		$url2_params = [];
230
	
231 30
		if (isset($url1_info['query'])) {
232 15
			$url1_info['query'] = html_entity_decode($url1_info['query']);
233 15
			if (!elgg_is_empty($url1_info['query'])) {
234 15
				$url1_params = elgg_parse_str($url1_info['query']);
235
			}
236
		}
237
	
238 30
		if (isset($url2_info['query'])) {
239 29
			$url2_info['query'] = html_entity_decode($url2_info['query']);
240 29
			if (!elgg_is_empty($url2_info['query'])) {
241 29
				$url2_params = elgg_parse_str($url2_info['query']);
242
			}
243
		}
244
	
245
		// drop ignored params
246 30
		foreach ($ignore_params as $param) {
247 29
			unset($url1_params[$param]);
248 29
			unset($url2_params[$param]);
249
		}
250
	
251
		// array_diff_assoc only returns the items in arr1 that aren't in arrN
252
		// but not the items that ARE in arrN but NOT in arr1
253
		// if arr1 is an empty array, this function will return 0 no matter what.
254
		// since we only care if they're different and not how different,
255
		// add the results together to get a non-zero (ie, different) result
256 30
		$diff_count = count($this->arrayDiffAssocRecursive($url1_params, $url2_params));
257 30
		$diff_count += count($this->arrayDiffAssocRecursive($url2_params, $url1_params));
258 30
		if ($diff_count > 0) {
259 19
			return false;
260
		}
261
	
262 11
		return true;
263
	}
264
	
265
	/**
266
	 * Use a "fixed" filter_var() with FILTER_VALIDATE_URL that handles multi-byte chars.
267
	 *
268
	 * This function is static because it is used in \ElggInstaller.
269
	 * During installation this service can't be constructed because the database is not yet available.
270
	 *
271
	 * @param string $url URL to validate
272
	 *
273
	 * @return bool
274
	 * @internal
275
	 */
276 2870
	public static function isValidMultiByteUrl(string $url): bool {
277
		// based on http://php.net/manual/en/function.filter-var.php#104160
278 2870
		if (filter_var($url, FILTER_VALIDATE_URL) !== false) {
279 2595
			return true;
280
		}
281
	
282
		// Check if it has unicode chars.
283 2845
		$l = elgg_strlen($url);
284 2845
		if (strlen($url) === $l) {
285 2845
			return false;
286
		}
287
	
288
		// Replace wide chars by X
289 1
		$s = '';
290 1
		for ($i = 0; $i < $l; ++$i) {
291 1
			$ch = elgg_substr($url, $i, 1);
292 1
			$s .= (strlen($ch) > 1) ? 'X' : $ch;
293
		}
294
	
295
		// Re-check now.
296 1
		return (bool) filter_var($s, FILTER_VALIDATE_URL);
297
	}
298
	
299
	/**
300
	 * Computes the difference of arrays with additional index check
301
	 *
302
	 * @return array
303
	 *
304
	 * @see array_diff_assoc()
305
	 * @see https://github.com/Elgg/Elgg/issues/13016
306
	 */
307 30
	protected function arrayDiffAssocRecursive(): array {
308 30
		$args = func_get_args();
309 30
		$diff = [];
310
		
311 30
		foreach (array_shift($args) as $key => $val) {
312 26
			for ($i = 0, $j = 0, $tmp = [$val], $count = count($args); $i < $count; $i++) {
313 26
				if (is_array($val)) {
314 4
					if (empty($args[$i][$key]) || !is_array($args[$i][$key])) {
315
						$j++;
316
					} else {
317 4
						$tmp[] = $args[$i][$key];
318
					}
319 26
				} elseif (!array_key_exists($key, $args[$i]) || $args[$i][$key] !== $val) {
320 19
					$j++;
321
				}
322
			}
323
			
324 26
			if (is_array($val)) {
325 4
				$tmp = call_user_func_array([$this, 'arrayDiffAssocRecursive'], $tmp);
326 4
				if (!empty($tmp)) {
327 3
					$diff[$key] = $tmp;
328 2
				} elseif ($j == $count) {
329 4
					$diff[$key] = $val;
330
				}
331 26
			} elseif ($j == $count && $count) {
332 19
				$diff[$key] = $val;
333
			}
334
		}
335
		
336 30
		return $diff;
337
	}
338
}
339