Completed
Pull Request — master (#2282)
by ྅༻ Ǭɀħ
01:46
created

includes/functions-formatting.php (1 issue)

Labels
Severity

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/*
3
 * YOURLS
4
 * Function library for anything related to formatting / validating / sanitizing
5
 */
6
7
/**
8
 * Convert an integer (1337) to a string (3jk).
9
 *
10
 */
11
function yourls_int2string( $num, $chars = null ) {
12
	if( $chars == null )
13
		$chars = yourls_get_shorturl_charset();
14
	$string = '';
15
	$len = strlen( $chars );
16
	while( $num >= $len ) {
17
		$mod = bcmod( $num, $len );
18
		$num = bcdiv( $num, $len );
19
		$string = $chars[ $mod ] . $string;
20
	}
21
	$string = $chars[ intval( $num ) ] . $string;
22
	
23
	return yourls_apply_filter( 'int2string', $string, $num, $chars );
24
}
25
26
/**
27
 * Convert a string (3jk) to an integer (1337)
28
 *
29
 */
30
function yourls_string2int( $string, $chars = null ) {
31
	if( $chars == null )
32
		$chars = yourls_get_shorturl_charset();
33
	$integer = 0;
34
	$string = strrev( $string  );
35
	$baselen = strlen( $chars );
36
	$inputlen = strlen( $string );
37
	for ($i = 0; $i < $inputlen; $i++) {
38
		$index = strpos( $chars, $string[$i] );
39
		$integer = bcadd( $integer, bcmul( $index, bcpow( $baselen, $i ) ) );
40
	}
41
42
	return yourls_apply_filter( 'string2int', $integer, $string, $chars );
43
}
44
45
/**
46
 * Return a unique(ish) hash for a string to be used as a valid HTML id
47
 *
48
 */
49
function yourls_string2htmlid( $string ) {
50
	return yourls_apply_filter( 'string2htmlid', 'y'.abs( crc32( $string ) ) );
51
}
52
53
/**
54
 * Make sure a link keyword (ie "1fv" as in "http://sho.rt/1fv") is valid.
55
 *
56
 */
57
function yourls_sanitize_string( $string ) {
58
	// make a regexp pattern with the shorturl charset, and remove everything but this
59
	$pattern = yourls_make_regexp_pattern( yourls_get_shorturl_charset() );
60
	$valid = substr( preg_replace( '![^'.$pattern.']!', '', $string ), 0, 199 );
61
	
62
	return yourls_apply_filter( 'sanitize_string', $valid, $string );
63
}
64
65
/**
66
 * Alias function. I was always getting it wrong.
67
 *
68
 */
69
function yourls_sanitize_keyword( $keyword ) {
70
	return yourls_sanitize_string( $keyword );
71
}
72
73
/**
74
 * Sanitize a page title. No HTML per W3C http://www.w3.org/TR/html401/struct/global.html#h-7.4.2
75
 *
76
 *
77
 * @since 1.5
78
 * @param string $unsafe_title  Title, potentially unsafe
79
 * @param string $fallback      Optional fallback if after sanitization nothing remains
80
 * @return string               Safe title
81
 */
82
function yourls_sanitize_title( $unsafe_title, $fallback = '' ) {
83
	$title = $unsafe_title;
84
	$title = strip_tags( $title );
85
	$title = preg_replace( "/\s+/", ' ', trim( $title ) );
86
    
87
    if ( '' === $title || false === $title ) {
88
        $title = $fallback;
89
    }
90
    
91
	return yourls_apply_filter( 'sanitize_title', $title, $unsafe_title, $fallback );
92
}
93
94
/**
95
 * A few sanity checks on the URL. Used for redirection or DB.
96
 * For redirection when you don't trust the URL ($_SERVER variable, query string), see yourls_sanitize_url_safe()
97
 * For display purpose, see yourls_esc_url()
98
 *
99
 * @param string $unsafe_url unsafe URL
100
 * @param array $protocols Optional allowed protocols, default to global $yourls_allowedprotocols
101
 * @return string Safe URL
102
 */
103
function yourls_sanitize_url( $unsafe_url, $protocols = array() ) {
104
	$url = yourls_esc_url( $unsafe_url, 'redirection', $protocols );
105
	return yourls_apply_filter( 'sanitize_url', $url, $unsafe_url );
106
}
107
108
/**
109
 * A few sanity checks on the URL, including CRLF. Used for redirection when URL to be sanitized is critical and cannot be trusted.
110
 *
111
 * Use when critical URL comes from user input or environment variable. In such a case, this function will sanitize
112
 * it like yourls_sanitize_url() but will also remove %0A and %0D to prevent CRLF injection.
113
 * Still, some legit URLs contain %0A or %0D (see issue 2056, and for extra fun 1694, 1707, 2030, and maybe others)
114
 * so we're not using this function unless it's used for internal redirection when the target location isn't
115
 * hardcoded, to avoid XSS via CRLF
116
 *
117
 * @since 1.7.2
118
 * @param string $unsafe_url unsafe URL
119
 * @param array $protocols Optional allowed protocols, default to global $yourls_allowedprotocols
120
 * @return string Safe URL
121
 */
122
function yourls_sanitize_url_safe( $unsafe_url, $protocols = array() ) {
123
	$url = yourls_esc_url( $unsafe_url, 'safe', $protocols );
124
	return yourls_apply_filter( 'sanitize_url_safe', $url, $unsafe_url );
125
}
126
127
/**
128
 * Perform a replacement while a string is found, eg $subject = '%0%0%0DDD', $search ='%0D' -> $result =''
129
 *
130
 * Stolen from WP's _deep_replace
131
 *
132
 */
133
function yourls_deep_replace( $search, $subject ){
134
	$found = true;
135
	while($found) {
136
		$found = false;
137
		foreach( (array) $search as $val ) {
138
			while( strpos( $subject, $val ) !== false ) {
139
				$found = true;
140
				$subject = str_replace( $val, '', $subject );
141
			}
142
		}
143
	}
144
	
145
	return $subject;
146
}
147
148
/**
149
 * Make sure an integer is a valid integer (PHP's intval() limits to too small numbers)
150
 *
151
 */
152
function yourls_sanitize_int( $int ) {
153
	return ( substr( preg_replace( '/[^0-9]/', '', strval( $int ) ), 0, 20 ) );
154
}
155
156
/**
157
 * Escape a string or an array of strings before DB usage. ALWAYS escape before using in a SQL query. Thanks.
158
 *
159
 * @param string|array $data string or array of strings to be escaped
160
 * @return string|array escaped data
161
 */
162
function yourls_escape( $data ) {
163
	if( is_array( $data ) ) {
164
		foreach( $data as $k => $v ) {
165
			if( is_array( $v ) ) {
166
				$data[ $k ] = yourls_escape( $v );
167
			} else {
168
				$data[ $k ] = yourls_escape_real( $v );
169
			}
170
		}
171
	} else {
172
		$data = yourls_escape_real( $data );
173
	}
174
	
175
	return $data;
176
}
177
178
/**
179
 * "Real" escape. This function should NOT be called directly. Use yourls_escape() instead. 
180
 *
181
 * This function uses a "real" escape if possible, using PDO, MySQL or MySQLi functions,
182
 * with a fallback to a "simple" addslashes
183
 * If you're implementing a custom DB engine or a custom cache system, you can define an
184
 * escape function using filter 'custom_escape_real'
185
 *
186
 * @since 1.7
187
 * @param string $a string to be escaped
0 ignored issues
show
There is no parameter named $a. Was it maybe removed?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function.

Consider the following example. The parameter $italy is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was removed, but the annotation was not.

Loading history...
188
 * @return string escaped string
189
 */
190
function yourls_escape_real( $string ) {
191
	global $ydb;
192
	if( isset( $ydb ) && ( $ydb instanceof ezSQLcore ) )
193
		return $ydb->escape( $string );
194
	
195
	// YOURLS DB classes have been bypassed by a custom DB engine or a custom cache layer
196
	return yourls_apply_filter( 'custom_escape_real', addslashes( $string ), $string );	
197
}
198
199
/**
200
 * Sanitize an IP address
201
 *
202
 */
203
function yourls_sanitize_ip( $ip ) {
204
	return preg_replace( '/[^0-9a-fA-F:., ]/', '', $ip );
205
}
206
207
/**
208
 * Make sure a date is m(m)/d(d)/yyyy, return false otherwise
209
 *
210
 */
211
function yourls_sanitize_date( $date ) {
212
	if( !preg_match( '!^\d{1,2}/\d{1,2}/\d{4}$!' , $date ) ) {
213
		return false;
214
	}
215
	return $date;
216
}
217
218
/**
219
 * Sanitize a date for SQL search. Return false if malformed input.
220
 *
221
 */
222
function yourls_sanitize_date_for_sql( $date ) {
223
	if( !yourls_sanitize_date( $date ) )
224
		return false;
225
	return date( 'Y-m-d', strtotime( $date ) );
226
}
227
228
/**
229
 * Return trimmed string
230
 *
231
 */
232
function yourls_trim_long_string( $string, $length = 60, $append = '[...]' ) {
233
	$newstring = $string;
234
    if ( mb_strlen( $newstring ) > $length ) {
235
        $newstring = mb_substr( $newstring, 0, $length - mb_strlen( $append ), 'UTF-8' ) . $append;	
236
    }
237
	return yourls_apply_filter( 'trim_long_string', $newstring, $string, $length, $append );
238
}
239
240
/**
241
 * Sanitize a version number (1.4.1-whatever-RC1 -> 1.4.1)
242
 *
243
 * @since 1.4.1
244
 * @param string $ver Version number
245
 * @return string Sanitized version number
246
 */
247
function yourls_sanitize_version( $ver ) {
248
	preg_match( '/(^[0-9.]+).*$/', $ver, $matches );
249
    return isset( $matches[1] ) ? trim( $matches[1], '.' ) : '';
250
}
251
252
/**
253
 * Sanitize a filename (no Win32 stuff)
254
 *
255
 */
256
function yourls_sanitize_filename( $file ) {
257
	$file = str_replace( '\\', '/', $file ); // sanitize for Win32 installs
258
	$file = preg_replace( '|/+|' ,'/', $file ); // remove any duplicate slash
259
	return $file;
260
}
261
262
/**
263
 * Check if a string seems to be UTF-8. Stolen from WP.
264
 *
265
 */
266
function yourls_seems_utf8( $str ) {
267
	$length = strlen( $str );
268
	for ( $i=0; $i < $length; $i++ ) {
269
		$c = ord( $str[ $i ] );
270
		if ( $c < 0x80 ) $n = 0; # 0bbbbbbb
271
		elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
272
		elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
273
		elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
274
		elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
275
		elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
276
		else return false; # Does not match any model
277
		for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
278
			if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80))
279
				return false;
280
		}
281
	}
282
	return true;
283
}
284
285
286
/**
287
 * Check for PCRE /u modifier support. Stolen from WP.
288
 *
289
 * Just in case "PCRE is not compiled with PCRE_UTF8" which seems to happen
290
 * on some distros even for PHP 5.3
291
 *
292
 * @since 1.7.1
293
 *
294
 * @return bool whether there's /u support or not
295
 */
296
function yourls_supports_pcre_u() {
297
    static $utf8_pcre;
298
    if( !isset( $utf8_pcre ) ) {
299
        $utf8_pcre = (bool) @preg_match( '/^./u', 'a' );   
300
    }
301
    return $utf8_pcre;
302
}
303
304
/**
305
 * Checks for invalid UTF8 in a string. Stolen from WP
306
 *
307
 * @since 1.6
308
 *
309
 * @param string $string The text which is to be checked.
310
 * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
311
 * @return string The checked text.
312
 */
313
function yourls_check_invalid_utf8( $string, $strip = false ) {
314
	$string = (string) $string;
315
316
	if ( 0 === strlen( $string ) ) {
317
		return '';
318
	}
319
320
	// We can't demand utf8 in the PCRE installation, so just return the string in those cases
321
	if ( ! yourls_supports_pcre_u() ) {
322
		return $string;
323
	}
324
325
	// preg_match fails when it encounters invalid UTF8 in $string
326
	if ( 1 === @preg_match( '/^./us', $string ) ) {
327
		return $string;
328
	}
329
330
	// Attempt to strip the bad chars if requested (not recommended)
331
	if ( $strip && function_exists( 'iconv' ) ) {
332
		return iconv( 'utf-8', 'utf-8', $string );
333
	}
334
335
	return '';
336
}
337
338
/**
339
 * Converts a number of special characters into their HTML entities. Stolen from WP.
340
 *
341
 * Specifically deals with: &, <, >, ", and '.
342
 *
343
 * $quote_style can be set to ENT_COMPAT to encode " to
344
 * &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
345
 *
346
 * @since 1.6
347
 *
348
 * @param string $string The text which is to be encoded.
349
 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
350
 * @param boolean $double_encode Optional. Whether to encode existing html entities. Default is false.
351
 * @return string The encoded text with HTML entities.
352
 */
353
function yourls_specialchars( $string, $quote_style = ENT_NOQUOTES, $double_encode = false ) {
354
	$string = (string) $string;
355
356
	if ( 0 === strlen( $string ) )
357
		return '';
358
359
	// Don't bother if there are no specialchars - saves some processing
360
	if ( ! preg_match( '/[&<>"\']/', $string ) )
361
		return $string;
362
363
	// Account for the previous behaviour of the function when the $quote_style is not an accepted value
364 View Code Duplication
	if ( empty( $quote_style ) )
365
		$quote_style = ENT_NOQUOTES;
366
	elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) )
367
		$quote_style = ENT_QUOTES;
368
369
	$charset = 'UTF-8';
370
371
	$_quote_style = $quote_style;
372
373
	if ( $quote_style === 'double' ) {
374
		$quote_style = ENT_COMPAT;
375
		$_quote_style = ENT_COMPAT;
376
	} elseif ( $quote_style === 'single' ) {
377
		$quote_style = ENT_NOQUOTES;
378
	}
379
380
	// Handle double encoding ourselves
381
	if ( $double_encode ) {
382
		$string = @htmlspecialchars( $string, $quote_style, $charset );
383
	} else {
384
		// Decode &amp; into &
385
		$string = yourls_specialchars_decode( $string, $_quote_style );
386
387
		// Guarantee every &entity; is valid or re-encode the &
388
		$string = yourls_kses_normalize_entities( $string );
389
390
		// Now re-encode everything except &entity;
391
		$string = preg_split( '/(&#?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
392
393
		for ( $i = 0; $i < count( $string ); $i += 2 )
394
			$string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset );
395
396
		$string = implode( '', $string );
397
	}
398
399
	// Backwards compatibility
400
	if ( 'single' === $_quote_style )
401
		$string = str_replace( "'", '&#039;', $string );
402
403
	return $string;
404
}
405
406
/**
407
 * Converts a number of HTML entities into their special characters. Stolen from WP.
408
 *
409
 * Specifically deals with: &, <, >, ", and '.
410
 *
411
 * $quote_style can be set to ENT_COMPAT to decode " entities,
412
 * or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded.
413
 *
414
 * @since 1.6
415
 *
416
 * @param string $string The text which is to be decoded.
417
 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old _wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
418
 * @return string The decoded text without HTML entities.
419
 */
420
function yourls_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) {
421
	$string = (string) $string;
422
423
	if ( 0 === strlen( $string ) ) {
424
		return '';
425
	}
426
427
	// Don't bother if there are no entities - saves a lot of processing
428
	if ( strpos( $string, '&' ) === false ) {
429
		return $string;
430
	}
431
432
	// Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value
433 View Code Duplication
	if ( empty( $quote_style ) ) {
434
		$quote_style = ENT_NOQUOTES;
435
	} elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
436
		$quote_style = ENT_QUOTES;
437
	}
438
439
	// More complete than get_html_translation_table( HTML_SPECIALCHARS )
440
	$single = array( '&#039;'  => '\'', '&#x27;' => '\'' );
441
	$single_preg = array( '/&#0*39;/'  => '&#039;', '/&#x0*27;/i' => '&#x27;' );
442
	$double = array( '&quot;' => '"', '&#034;'  => '"', '&#x22;' => '"' );
443
	$double_preg = array( '/&#0*34;/'  => '&#034;', '/&#x0*22;/i' => '&#x22;' );
444
	$others = array( '&lt;'   => '<', '&#060;'  => '<', '&gt;'   => '>', '&#062;'  => '>', '&amp;'  => '&', '&#038;'  => '&', '&#x26;' => '&' );
445
	$others_preg = array( '/&#0*60;/'  => '&#060;', '/&#0*62;/'  => '&#062;', '/&#0*38;/'  => '&#038;', '/&#x0*26;/i' => '&#x26;' );
446
447
	if ( $quote_style === ENT_QUOTES ) {
448
		$translation = array_merge( $single, $double, $others );
449
		$translation_preg = array_merge( $single_preg, $double_preg, $others_preg );
450
	} elseif ( $quote_style === ENT_COMPAT || $quote_style === 'double' ) {
451
		$translation = array_merge( $double, $others );
452
		$translation_preg = array_merge( $double_preg, $others_preg );
453
	} elseif ( $quote_style === 'single' ) {
454
		$translation = array_merge( $single, $others );
455
		$translation_preg = array_merge( $single_preg, $others_preg );
456
	} elseif ( $quote_style === ENT_NOQUOTES ) {
457
		$translation = $others;
458
		$translation_preg = $others_preg;
459
	}
460
461
	// Remove zero padding on numeric entities
462
	$string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string );
463
464
	// Replace characters according to translation table
465
	return strtr( $string, $translation );
466
}
467
468
469
/**
470
 * Escaping for HTML blocks. Stolen from WP
471
 *
472
 * @since 1.6
473
 *
474
 * @param string $text
475
 * @return string
476
 */
477
function yourls_esc_html( $text ) {
478
	$safe_text = yourls_check_invalid_utf8( $text );
479
	$safe_text = yourls_specialchars( $safe_text, ENT_QUOTES );
480
	return yourls_apply_filter( 'esc_html', $safe_text, $text );
481
}
482
483
/**
484
 * Escaping for HTML attributes.  Stolen from WP
485
 *
486
 * @since 1.6
487
 *
488
 * @param string $text
489
 * @return string
490
 */
491
function yourls_esc_attr( $text ) {
492
	$safe_text = yourls_check_invalid_utf8( $text );
493
	$safe_text = yourls_specialchars( $safe_text, ENT_QUOTES );
494
	return yourls_apply_filter( 'esc_attr', $safe_text, $text );
495
}
496
497
/**
498
 * Checks and cleans a URL before printing it. Stolen from WP.
499
 *
500
 * A number of characters are removed from the URL. If the URL is for displaying
501
 * (the default behaviour) ampersands are also replaced.
502
 *
503
 * This function by default "escapes" URL for display purpose (param $context = 'display') but can
504
 * take extra steps in URL sanitization. See yourls_sanitize_url() and yourls_sanitize_url_safe()
505
 *
506
 * @since 1.6
507
 *
508
 * @param string $url The URL to be cleaned.
509
 * @param string $context 'display' or something else. Use yourls_sanitize_url() for database or redirection usage.
510
 * @param array $protocols Optional. Array of allowed protocols, defaults to global $yourls_allowedprotocols
511
 * @return string The cleaned $url
512
 */
513
function yourls_esc_url( $url, $context = 'display', $protocols = array() ) {
514
    // trim first -- see #1931
515
    $url = trim( $url );
516
    
517
	// make sure there's only one 'http://' at the beginning (prevents pasting a URL right after the default 'http://')
518
	$url = str_replace( 
519
		array( 'http://http://', 'http://https://' ),
520
		array( 'http://',        'https://'        ),
521
		$url
522
	);
523
524
	if ( '' == $url )
525
		return $url;
526
527
	// make sure there's a protocol, add http:// if not
528
	if ( ! yourls_get_protocol( $url ) )
529
		$url = 'http://'.$url;
530
531
	$original_url = $url;
532
533
	// force scheme and domain to lowercase - see issues 591 and 1630
534
    $url = yourls_lowercase_scheme_domain( $url );
535
536
	$url = preg_replace( '|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i', '', $url );
537
	// Previous regexp in YOURLS was '|[^a-z0-9-~+_.?\[\]\^#=!&;,/:%@$\|*`\'<>"()\\x80-\\xff\{\}]|i'
538
	// TODO: check if that was it too destructive
539
540
    // If $context is 'safe', an extra step is taken to make sure no CRLF injection is possible.
541
    // To be used when $url can be forged by evil user (eg it's from a $_SERVER variable, a query string, etc..)
542
	if ( 'safe' == $context ) {
543
        $strip = array( '%0d', '%0a', '%0D', '%0A' );
544
        $url = yourls_deep_replace( $strip, $url );
545
    }
546
547
	// Replace ampersands and single quotes only when displaying.
548
	if ( 'display' == $context ) {
549
		$url = yourls_kses_normalize_entities( $url );
550
		$url = str_replace( '&amp;', '&#038;', $url );
551
		$url = str_replace( "'", '&#039;', $url );
552
	}
553
	
554
	if ( ! is_array( $protocols ) or ! $protocols ) {
555
		global $yourls_allowedprotocols;
556
		$protocols = yourls_apply_filter( 'esc_url_protocols', $yourls_allowedprotocols );
557
		// Note: $yourls_allowedprotocols is also globally filterable in functions-kses.php/yourls_kses_init()
558
	}
559
560
	if ( !yourls_is_allowed_protocol( $url, $protocols ) )
561
		return '';
562
	
563
	// I didn't use KSES function kses_bad_protocol() because it doesn't work the way I liked (returns //blah from illegal://blah)
564
565
	return yourls_apply_filter( 'esc_url', $url, $original_url, $context );
566
}
567
568
569
/**
570
 * Lowercase scheme and domain of an URI - see issues 591, 1630, 1889
571
 *
572
 * This function is trickier than what seems to be needed at first
573
 * 
574
 * First, we need to handle several URI types: http://example.com, mailto:[email protected], facetime:[email protected], and so on, see
575
 * yourls_kses_allowed_protocols() in functions-kses.php
576
 * The general rule is that the scheme ("stuff://" or "stuff:") is case insensitive and should be lowercase. But then, depending on the
577
 * scheme, parts of what follows the scheme may or may not be case sensitive.
578
 *
579
 * Second, simply using parse_url() and its opposite http_build_url() (see functions-compat.php) is a pretty unsafe process:
580
 *  - parse_url() can easily trip up on malformed or weird URLs
581
 *  - exploding a URL with parse_url(), lowercasing some stuff, and glueing things back with http_build_url() does not handle well
582
 *    "stuff:"-like URI [1] and can result in URLs ending modified [2][3]. We don't want to *validate* URI, we just want to lowercase
583
 *    what is supposed to be lowercased.
584
 *
585
 * So, to be conservative, this functions:
586
 *  - lowercases the scheme
587
 *  - does not lowercase anything else on "stuff:" URI
588
 *  - tries to lowercase only scheme and domain of "stuff://" URI
589
 *
590
 * [1] http_build_url(parse_url("mailto:ozh")) == "mailto:///ozh"
591
 * [2] http_build_url(parse_url("http://blah#omg")) == "http://blah/#omg"
592
 * [3] http_build_url(parse_url("http://blah?#")) == "http://blah/"
593
 *
594
 * @since 1.7.1
595
 * @param string $url URL
596
 * @return string URL with lowercase scheme and protocol
597
 */
598
function yourls_lowercase_scheme_domain( $url ) {
599
    $scheme = yourls_get_protocol( $url );
600
601
    if( '' == $scheme ) {
602
        // Scheme not found, malformed URL? Something else? Not sure.
603
        return $url;
604
    }
605
606
    // Case 1 : scheme like "stuff://" (eg "http://example.com/" or "ssh://[email protected]")
607
    if( substr( $scheme, -2, 2 ) == '//' ) {
608
609
        $parts = parse_url( $url );
610
611
        // Most likely malformed stuff, could not parse : we'll just lowercase the scheme and leave the rest untouched
612
        if( false == $parts ) {
613
            $url = str_replace( $scheme, strtolower( $scheme ), $url );
614
615
        // URL seems parsable, let's do the best we can
616
        } else {
617
618
            $lower = array();
619
620
            $lower['scheme'] = strtolower( $parts['scheme'] );
621
622
            if( isset( $parts['host'] ) ) { 
623
                $lower['host'] = strtolower( $parts['host'] );
624
            } else {
625
                $parts['host'] = '***';
626
            }
627
628
            // We're not going to glue back things that could be modified in the process            
629
            unset( $parts['path'] );
630
            unset( $parts['query'] );
631
            unset( $parts['fragment'] );
632
633
            // original beginning of the URL and its lowercase-where-needed counterpart
634
            // We trim the / after the domain to avoid avoid "http://example.com" being reconstructed as "http://example.com/"
635
            $partial_original_url       = trim( http_build_url( $parts ), '/' );
636
            $partial_lower_original_url = trim( http_build_url( $parts, $lower ), '/' );
637
638
            $url = str_replace( $partial_original_url , $partial_lower_original_url, $url );
639
640
        }
641
642
    // Case 2 : scheme like "stuff:" (eg "mailto:[email protected]" or "bitcoin:15p1o8vnWqNkJBJGgwafNgR1GCCd6EGtQR?amount=1&label=Ozh")
643
    // In this case, we only lowercase the scheme, because depending on it, things after should or should not be lowercased
644
    } else {
645
646
        $url = str_replace( $scheme, strtolower( $scheme ), $url );
647
648
    }
649
650
    return $url;
651
}
652
653
654
/**
655
 * Escape single quotes, htmlspecialchar " < > &, and fix line endings. Stolen from WP.
656
 *
657
 * Escapes text strings for echoing in JS. It is intended to be used for inline JS
658
 * (in a tag attribute, for example onclick="..."). Note that the strings have to
659
 * be in single quotes. The filter 'js_escape' is also applied here.
660
 *
661
 * @since 1.6
662
 *
663
 * @param string $text The text to be escaped.
664
 * @return string Escaped text.
665
 */
666
function yourls_esc_js( $text ) {
667
	$safe_text = yourls_check_invalid_utf8( $text );
668
	$safe_text = yourls_specialchars( $safe_text, ENT_COMPAT );
669
	$safe_text = preg_replace( '/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes( $safe_text ) );
670
	$safe_text = str_replace( "\r", '', $safe_text );
671
	$safe_text = str_replace( "\n", '\\n', addslashes( $safe_text ) );
672
	return yourls_apply_filter( 'esc_js', $safe_text, $text );
673
}
674
675
/**
676
 * Escaping for textarea values. Stolen from WP.
677
 *
678
 * @since 1.6
679
 *
680
 * @param string $text
681
 * @return string
682
 */
683
function yourls_esc_textarea( $text ) {
684
	$safe_text = htmlspecialchars( $text, ENT_QUOTES );
685
	return yourls_apply_filter( 'esc_textarea', $safe_text, $text );
686
}
687
688
689
/**
690
* PHP emulation of JS's encodeURI
691
*
692
* @link https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURI
693
* @param $url
694
* @return string
695
*/
696
function yourls_encodeURI( $url ) {
697
	// Decode URL all the way
698
	$result = yourls_rawurldecode_while_encoded( $url );
699
	// Encode once
700
	$result = strtr( rawurlencode( $result ), array (
701
        '%3B' => ';', '%2C' => ',', '%2F' => '/', '%3F' => '?', '%3A' => ':', '%40' => '@',
702
		'%26' => '&', '%3D' => '=', '%2B' => '+', '%24' => '$', '%21' => '!', '%2A' => '*',
703
		'%27' => '\'', '%28' => '(', '%29' => ')', '%23' => '#',
704
    ) );
705
	// @TODO:
706
	// Known limit: this will most likely break IDN URLs such as http://www.acad�mie-fran�aise.fr/
707
	// To fully support IDN URLs, advocate use of a plugin.
708
	return yourls_apply_filter( 'encodeURI', $result, $url );
709
}
710
711
/**
712
 * Adds backslashes before letters and before a number at the start of a string. Stolen from WP.
713
 *
714
 * @since 1.6
715
 *
716
 * @param string $string Value to which backslashes will be added.
717
 * @return string String with backslashes inserted.
718
 */
719
function yourls_backslashit($string) {
720
    $string = preg_replace('/^([0-9])/', '\\\\\\\\\1', $string);
721
    $string = preg_replace('/([a-z])/i', '\\\\\1', $string);
722
    return $string;
723
}
724
725
/**
726
 * Check if a string seems to be urlencoded
727
 *
728
 * We use rawurlencode instead of urlencode to avoid messing with '+'
729
 *
730
 * @since 1.7
731
 * @param string $string
732
 * @return bool
733
 */
734
function yourls_is_rawurlencoded( $string ) {
735
	return rawurldecode( $string ) != $string;
736
}
737
738
/**
739
 * rawurldecode a string till it's not encoded anymore
740
 *
741
 * Deals with multiple encoding (eg "%2521" => "%21" => "!").
742
 * See https://github.com/YOURLS/YOURLS/issues/1303
743
 *
744
 * @since 1.7
745
 * @param string $string
746
 * @return string
747
 */
748
function yourls_rawurldecode_while_encoded( $string ) {
749
	$string = rawurldecode( $string );
750
	if( yourls_is_rawurlencoded( $string ) ) {
751
		$string = yourls_rawurldecode_while_encoded( $string );
752
	}
753
	return $string;
754
}
755
756
/**
757
 * Converts readable Javascript code into a valid bookmarklet link
758
 *
759
 * Uses https://github.com/ozh/bookmarkletgen
760
 *
761
 * @since 1.7.1
762
 * @param  string $code  Javascript code
763
 * @return string        Bookmarklet link
764
 */
765
function yourls_make_bookmarklet( $code ) {
766
    $book = new \Ozh\Bookmarkletgen\Bookmarkletgen;
767
    return $book->crunch( $code );
768
}
769