Completed
Pull Request — master (#2066)
by
unknown
03:04
created

functions-formatting.php ➔ yourls_specialchars_decode()   C

Complexity

Conditions 10
Paths 17

Size

Total Lines 47
Code Lines 30

Duplication

Lines 5
Ratio 10.64 %
Metric Value
cc 10
eloc 30
nc 17
nop 2
dl 5
loc 47
rs 5.1578

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/*
3
 * YOURLS
4
 * Function library for anything related to formatting / validating / sanitizing
5
 */
6
7
/**
8
 * Convert an integer (1337) to a string (3jk).
9
 *
10
 */
11
function yourls_int2string( $num, $chars = null ) {
12
	if( $chars == null )
13
		$chars = yourls_get_shorturl_charset();
14
	$string = '';
15
	$len = strlen( $chars );
16
	while( $num >= $len ) {
17
		$mod = bcmod( $num, $len );
18
		$num = bcdiv( $num, $len );
19
		$string = $chars[ $mod ] . $string;
20
	}
21
	$string = $chars[ intval( $num ) ] . $string;
22
	
23
	return yourls_apply_filter( 'int2string', $string, $num, $chars );
24
}
25
26
/**
27
 * Convert a string (3jk) to an integer (1337)
28
 *
29
 */
30
function yourls_string2int( $string, $chars = null ) {
31
	if( $chars == null )
32
		$chars = yourls_get_shorturl_charset();
33
	$integer = 0;
34
	$string = strrev( $string  );
35
	$baselen = strlen( $chars );
36
	$inputlen = strlen( $string );
37
	for ($i = 0; $i < $inputlen; $i++) {
38
		$index = strpos( $chars, $string[$i] );
39
		$integer = bcadd( $integer, bcmul( $index, bcpow( $baselen, $i ) ) );
40
	}
41
42
	return yourls_apply_filter( 'string2int', $integer, $string, $chars );
43
}
44
45
/**
46
 * Return a unique(ish) hash for a string to be used as a valid HTML id
47
 *
48
 */
49
function yourls_string2htmlid( $string ) {
50
	return yourls_apply_filter( 'string2htmlid', 'y'.abs( crc32( $string ) ) );
51
}
52
53
/**
54
 * Make sure a link keyword (ie "1fv" as in "http://sho.rt/1fv") is valid.
55
 *
56
 */
57
function yourls_sanitize_string( $string ) {
58
	// make a regexp pattern with the shorturl charset, and remove everything but this
59
	$pattern = yourls_make_regexp_pattern( yourls_get_shorturl_charset() );
60
	$valid = substr( preg_replace( '![^'.$pattern.']!', '', $string ), 0, 199 );
61
	
62
	return yourls_apply_filter( 'sanitize_string', $valid, $string );
63
}
64
65
/**
66
 * Alias function. I was always getting it wrong.
67
 *
68
 */
69
function yourls_sanitize_keyword( $keyword ) {
70
	return yourls_sanitize_string( $keyword );
71
}
72
73
/**
74
 * Sanitize a page title. No HTML per W3C http://www.w3.org/TR/html401/struct/global.html#h-7.4.2
75
 *
76
 *
77
 * @since 1.5
78
 * @param string $unsafe_title  Title, potentially unsafe
79
 * @param string $fallback      Optional fallback if after sanitization nothing remains
80
 * @return string               Safe title
81
 */
82
function yourls_sanitize_title( $unsafe_title, $fallback = '' ) {
83
	$title = $unsafe_title;
84
	$title = strip_tags( $title );
85
	$title = preg_replace( "/\s+/", ' ', trim( $title ) );
86
    
87
    if ( '' === $title || false === $title ) {
88
        $title = $fallback;
89
    }
90
    
91
	return yourls_apply_filter( 'sanitize_title', $title, $unsafe_title, $fallback );
92
}
93
94
/**
95
 * A few sanity checks on the URL. Used for redirection or DB. For display purpose, see yourls_esc_url()
96
 *
97
 * @param string $unsafe_url unsafe URL
98
 * @param array $protocols Optional allowed protocols, default to global $yourls_allowedprotocols
99
 * @return string Safe URL
100
 */
101
function yourls_sanitize_url( $unsafe_url, $protocols = array() ) {
102
	$url = yourls_esc_url( $unsafe_url, 'redirection', $protocols );
103
	return yourls_apply_filter( 'sanitize_url', $url, $unsafe_url );
104
}
105
106
/**
107
 * Perform a replacement while a string is found, eg $subject = '%0%0%0DDD', $search ='%0D' -> $result =''
108
 *
109
 * Stolen from WP's _deep_replace
110
 *
111
 */
112
function yourls_deep_replace( $search, $subject ){
113
	$found = true;
114
	while($found) {
115
		$found = false;
116
		foreach( (array) $search as $val ) {
117
			while( strpos( $subject, $val ) !== false ) {
118
				$found = true;
119
				$subject = str_replace( $val, '', $subject );
120
			}
121
		}
122
	}
123
	
124
	return $subject;
125
}
126
127
/**
128
 * Make sure an integer is a valid integer (PHP's intval() limits to too small numbers)
129
 *
130
 */
131
function yourls_sanitize_int( $in ) {
132
	return ( substr( preg_replace( '/[^0-9]/', '', strval( $in ) ), 0, 20 ) );
133
}
134
135
/**
136
 * Escape a string or an array of strings before DB usage. ALWAYS escape before using in a SQL query. Thanks.
137
 *
138
 * @param string|array $data string or array of strings to be escaped
139
 * @return string|array escaped data
140
 */
141
function yourls_escape( $data ) {
142
	if( is_array( $data ) ) {
143
		foreach( $data as $k => $v ) {
144
			if( is_array( $v ) ) {
145
				$data[ $k ] = yourls_escape( $v );
146
			} else {
147
				$data[ $k ] = yourls_escape_real( $v );
148
			}
149
		}
150
	} else {
151
		$data = yourls_escape_real( $data );
152
	}
153
	
154
	return $data;
155
}
156
157
/**
158
 * "Real" escape. This function should NOT be called directly. Use yourls_escape() instead. 
159
 *
160
 * This function uses a "real" escape if possible, using PDO, MySQL or MySQLi functions,
161
 * with a fallback to a "simple" addslashes
162
 * If you're implementing a custom DB engine or a custom cache system, you can define an
163
 * escape function using filter 'custom_escape_real'
164
 *
165
 * @since 1.7
166
 * @param string $a string to be escaped
0 ignored issues
show
Bug introduced by
There is no parameter named $a. Was it maybe removed?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function.

Consider the following example. The parameter $italy is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was removed, but the annotation was not.

Loading history...
167
 * @return string escaped string
168
 */
169
function yourls_escape_real( $string ) {
170
	global $ydb;
171
	if( isset( $ydb ) && ( $ydb instanceof ezSQLcore ) )
172
		return $ydb->escape( $string );
173
	
174
	// YOURLS DB classes have been bypassed by a custom DB engine or a custom cache layer
175
	return yourls_apply_filter( 'custom_escape_real', addslashes( $string ), $string );	
176
}
177
178
/**
179
 * Sanitize an IP address
180
 *
181
 */
182
function yourls_sanitize_ip( $ip ) {
183
	return preg_replace( '/[^0-9a-fA-F:., ]/', '', $ip );
184
}
185
186
/**
187
 * Make sure a date is m(m)/d(d)/yyyy, return false otherwise
188
 *
189
 */
190
function yourls_sanitize_date( $date ) {
191
	if( !preg_match( '!^\d{1,2}/\d{1,2}/\d{4}$!' , $date ) ) {
192
		return false;
193
	}
194
	return $date;
195
}
196
197
/**
198
 * Sanitize a date for SQL search. Return false if malformed input.
199
 *
200
 */
201
function yourls_sanitize_date_for_sql( $date ) {
202
	if( !yourls_sanitize_date( $date ) )
203
		return false;
204
	return date( 'Y-m-d', strtotime( $date ) );
205
}
206
207
/**
208
 * Return trimmed string
209
 *
210
 */
211
function yourls_trim_long_string( $string, $length = 60, $append = '[...]' ) {
212
	$newstring = $string;
213
    if ( mb_strlen( $newstring ) > $length ) {
214
        $newstring = mb_substr( $newstring, 0, $length - mb_strlen( $append ), 'UTF-8' ) . $append;	
215
    }
216
	return yourls_apply_filter( 'trim_long_string', $newstring, $string, $length, $append );
217
}
218
219
/**
220
 * Sanitize a version number (1.4.1-whatever-RC1 -> 1.4.1)
221
 *
222
 * @since 1.4.1
223
 * @param string $ver Version number
224
 * @return string Sanitized version number
225
 */
226
function yourls_sanitize_version( $ver ) {
227
	preg_match( '/(^[0-9.]+).*$/', $ver, $matches );
228
    return isset( $matches[1] ) ? trim( $matches[1], '.' ) : '';
229
}
230
231
/**
232
 * Sanitize a filename (no Win32 stuff)
233
 *
234
 */
235
function yourls_sanitize_filename( $file ) {
236
	$file = str_replace( '\\', '/', $file ); // sanitize for Win32 installs
237
	$file = preg_replace( '|/+|' ,'/', $file ); // remove any duplicate slash
238
	return $file;
239
}
240
241
/**
242
 * Check if a string seems to be UTF-8. Stolen from WP.
243
 *
244
 */
245
function yourls_seems_utf8( $str ) {
246
	$length = strlen( $str );
247
	for ( $i=0; $i < $length; $i++ ) {
248
		$c = ord( $str[ $i ] );
249
		if ( $c < 0x80 ) $n = 0; # 0bbbbbbb
250
		elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
251
		elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
252
		elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
253
		elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
254
		elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
255
		else return false; # Does not match any model
256
		for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
257
			if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80))
258
				return false;
259
		}
260
	}
261
	return true;
262
}
263
264
265
/**
266
 * Check for PCRE /u modifier support. Stolen from WP.
267
 *
268
 * Just in case "PCRE is not compiled with PCRE_UTF8" which seems to happen
269
 * on some distros even for PHP 5.3
270
 *
271
 * @since 1.7.1
272
 *
273
 * @return bool whether there's /u support or not
274
 */
275
function yourls_supports_pcre_u() {
276
    static $utf8_pcre;
277
    if( !isset( $utf8_pcre ) ) {
278
        $utf8_pcre = (bool) @preg_match( '/^./u', 'a' );   
279
    }
280
    return $utf8_pcre;
281
}
282
283
/**
284
 * Checks for invalid UTF8 in a string. Stolen from WP
285
 *
286
 * @since 1.6
287
 *
288
 * @param string $string The text which is to be checked.
289
 * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
290
 * @return string The checked text.
291
 */
292
function yourls_check_invalid_utf8( $string, $strip = false ) {
293
	$string = (string) $string;
294
295
	if ( 0 === strlen( $string ) ) {
296
		return '';
297
	}
298
299
	// We can't demand utf8 in the PCRE installation, so just return the string in those cases
300
	if ( ! yourls_supports_pcre_u() ) {
301
		return $string;
302
	}
303
304
	// preg_match fails when it encounters invalid UTF8 in $string
305
	if ( 1 === @preg_match( '/^./us', $string ) ) {
306
		return $string;
307
	}
308
309
	// Attempt to strip the bad chars if requested (not recommended)
310
	if ( $strip && function_exists( 'iconv' ) ) {
311
		return iconv( 'utf-8', 'utf-8', $string );
312
	}
313
314
	return '';
315
}
316
317
/**
318
 * Converts a number of special characters into their HTML entities. Stolen from WP.
319
 *
320
 * Specifically deals with: &, <, >, ", and '.
321
 *
322
 * $quote_style can be set to ENT_COMPAT to encode " to
323
 * &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
324
 *
325
 * @since 1.6
326
 *
327
 * @param string $string The text which is to be encoded.
328
 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
329
 * @param boolean $double_encode Optional. Whether to encode existing html entities. Default is false.
330
 * @return string The encoded text with HTML entities.
331
 */
332
function yourls_specialchars( $string, $quote_style = ENT_NOQUOTES, $double_encode = false ) {
333
	$string = (string) $string;
334
335
	if ( 0 === strlen( $string ) )
336
		return '';
337
338
	// Don't bother if there are no specialchars - saves some processing
339
	if ( ! preg_match( '/[&<>"\']/', $string ) )
340
		return $string;
341
342
	// Account for the previous behaviour of the function when the $quote_style is not an accepted value
343 View Code Duplication
	if ( empty( $quote_style ) )
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
344
		$quote_style = ENT_NOQUOTES;
345
	elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) )
346
		$quote_style = ENT_QUOTES;
347
348
	$charset = 'UTF-8';
349
350
	$_quote_style = $quote_style;
351
352
	if ( $quote_style === 'double' ) {
353
		$quote_style = ENT_COMPAT;
354
		$_quote_style = ENT_COMPAT;
355
	} elseif ( $quote_style === 'single' ) {
356
		$quote_style = ENT_NOQUOTES;
357
	}
358
359
	// Handle double encoding ourselves
360
	if ( $double_encode ) {
361
		$string = @htmlspecialchars( $string, $quote_style, $charset );
362
	} else {
363
		// Decode &amp; into &
364
		$string = yourls_specialchars_decode( $string, $_quote_style );
365
366
		// Guarantee every &entity; is valid or re-encode the &
367
		$string = yourls_kses_normalize_entities( $string );
368
369
		// Now re-encode everything except &entity;
370
		$string = preg_split( '/(&#?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
371
372
		for ( $i = 0; $i < count( $string ); $i += 2 )
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
373
			$string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset );
374
375
		$string = implode( '', $string );
376
	}
377
378
	// Backwards compatibility
379
	if ( 'single' === $_quote_style )
380
		$string = str_replace( "'", '&#039;', $string );
381
382
	return $string;
383
}
384
385
/**
386
 * Converts a number of HTML entities into their special characters. Stolen from WP.
387
 *
388
 * Specifically deals with: &, <, >, ", and '.
389
 *
390
 * $quote_style can be set to ENT_COMPAT to decode " entities,
391
 * or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded.
392
 *
393
 * @since 1.6
394
 *
395
 * @param string $string The text which is to be decoded.
396
 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old _wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
397
 * @return string The decoded text without HTML entities.
398
 */
399
function yourls_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) {
400
	$string = (string) $string;
401
402
	if ( 0 === strlen( $string ) ) {
403
		return '';
404
	}
405
406
	// Don't bother if there are no entities - saves a lot of processing
407
	if ( strpos( $string, '&' ) === false ) {
408
		return $string;
409
	}
410
411
	// Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value
412 View Code Duplication
	if ( empty( $quote_style ) ) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
413
		$quote_style = ENT_NOQUOTES;
414
	} elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
415
		$quote_style = ENT_QUOTES;
416
	}
417
418
	// More complete than get_html_translation_table( HTML_SPECIALCHARS )
419
	$single = array( '&#039;'  => '\'', '&#x27;' => '\'' );
420
	$single_preg = array( '/&#0*39;/'  => '&#039;', '/&#x0*27;/i' => '&#x27;' );
421
	$double = array( '&quot;' => '"', '&#034;'  => '"', '&#x22;' => '"' );
422
	$double_preg = array( '/&#0*34;/'  => '&#034;', '/&#x0*22;/i' => '&#x22;' );
423
	$others = array( '&lt;'   => '<', '&#060;'  => '<', '&gt;'   => '>', '&#062;'  => '>', '&amp;'  => '&', '&#038;'  => '&', '&#x26;' => '&' );
424
	$others_preg = array( '/&#0*60;/'  => '&#060;', '/&#0*62;/'  => '&#062;', '/&#0*38;/'  => '&#038;', '/&#x0*26;/i' => '&#x26;' );
425
426
	if ( $quote_style === ENT_QUOTES ) {
427
		$translation = array_merge( $single, $double, $others );
428
		$translation_preg = array_merge( $single_preg, $double_preg, $others_preg );
429
	} elseif ( $quote_style === ENT_COMPAT || $quote_style === 'double' ) {
430
		$translation = array_merge( $double, $others );
431
		$translation_preg = array_merge( $double_preg, $others_preg );
432
	} elseif ( $quote_style === 'single' ) {
433
		$translation = array_merge( $single, $others );
434
		$translation_preg = array_merge( $single_preg, $others_preg );
435
	} elseif ( $quote_style === ENT_NOQUOTES ) {
436
		$translation = $others;
437
		$translation_preg = $others_preg;
438
	}
439
440
	// Remove zero padding on numeric entities
441
	$string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string );
0 ignored issues
show
Bug introduced by
The variable $translation_preg does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
442
443
	// Replace characters according to translation table
444
	return strtr( $string, $translation );
0 ignored issues
show
Bug introduced by
The variable $translation does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
445
}
446
447
448
/**
449
 * Escaping for HTML blocks. Stolen from WP
450
 *
451
 * @since 1.6
452
 *
453
 * @param string $text
454
 * @return string
455
 */
456
function yourls_esc_html( $text ) {
457
	$safe_text = yourls_check_invalid_utf8( $text );
458
	$safe_text = yourls_specialchars( $safe_text, ENT_QUOTES );
459
	return yourls_apply_filter( 'esc_html', $safe_text, $text );
460
}
461
462
/**
463
 * Escaping for HTML attributes.  Stolen from WP
464
 *
465
 * @since 1.6
466
 *
467
 * @param string $text
468
 * @return string
469
 */
470
function yourls_esc_attr( $text ) {
471
	$safe_text = yourls_check_invalid_utf8( $text );
472
	$safe_text = yourls_specialchars( $safe_text, ENT_QUOTES );
473
	return yourls_apply_filter( 'esc_attr', $safe_text, $text );
474
}
475
476
/**
477
 * Checks and cleans a URL before printing it. Stolen from WP.
478
 *
479
 * A number of characters are removed from the URL. If the URL is for displaying
480
 * (the default behaviour) ampersands are also replaced.
481
 *
482
 * @since 1.6
483
 *
484
 * @param string $url The URL to be cleaned.
485
 * @param string $context 'display' or something else. Use yourls_sanitize_url() for database or redirection usage.
486
 * @param array $protocols Optional. Array of allowed protocols, defaults to global $yourls_allowedprotocols
487
 * @return string The cleaned $url
488
 */
489
function yourls_esc_url( $url, $context = 'display', $protocols = array() ) {
490
    // trim first -- see #1931
491
    $url = trim( $url );
492
    
493
	// make sure there's only one 'http://' at the beginning (prevents pasting a URL right after the default 'http://')
494
	$url = str_replace( 
495
		array( 'http://http://', 'http://https://' ),
496
		array( 'http://',        'https://'        ),
497
		$url
498
	);
499
500
	if ( '' == $url )
501
		return $url;
502
503
	// make sure there's a protocol, add http:// if not
504
	if ( ! yourls_get_protocol( $url ) )
505
		$url = 'http://'.$url;
506
507
	$original_url = $url;
508
509
	// force scheme and domain to lowercase - see issues 591 and 1630
510
    $url = yourls_lowercase_scheme_domain( $url );
511
512
	$url = preg_replace( '|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i', '', $url );
513
	// Previous regexp in YOURLS was '|[^a-z0-9-~+_.?\[\]\^#=!&;,/:%@$\|*`\'<>"()\\x80-\\xff\{\}]|i'
514
	// TODO: check if that was it too destructive
515
	$strip = array( '%0d', '%0a', '%0D', '%0A' );
516
	$url = yourls_deep_replace( $strip, $url );
517
	$url = str_replace( ';//', '://', $url );
518
519
	// Replace ampersands and single quotes only when displaying.
520
	if ( 'display' == $context ) {
521
		$url = yourls_kses_normalize_entities( $url );
522
		$url = str_replace( '&amp;', '&#038;', $url );
523
		$url = str_replace( "'", '&#039;', $url );
524
	}
525
	
526
	if ( ! is_array( $protocols ) or ! $protocols ) {
527
		global $yourls_allowedprotocols;
528
		$protocols = yourls_apply_filter( 'esc_url_protocols', $yourls_allowedprotocols );
529
		// Note: $yourls_allowedprotocols is also globally filterable in functions-kses.php/yourls_kses_init()
530
	}
531
532
	if ( !yourls_is_allowed_protocol( $url, $protocols ) )
533
		return '';
534
	
535
	// I didn't use KSES function kses_bad_protocol() because it doesn't work the way I liked (returns //blah from illegal://blah)
536
537
	return yourls_apply_filter( 'esc_url', $url, $original_url, $context );
538
}
539
540
541
/**
542
 * Lowercase scheme and domain of an URI - see issues 591, 1630, 1889
543
 *
544
 * This function is trickier than what seems to be needed at first
545
 * 
546
 * First, we need to handle several URI types: http://example.com, mailto:[email protected], facetime:[email protected], and so on, see
547
 * yourls_kses_allowed_protocols() in functions-kses.php
548
 * The general rule is that the scheme ("stuff://" or "stuff:") is case insensitive and should be lowercase. But then, depending on the
549
 * scheme, parts of what follows the scheme may or may not be case sensitive.
550
 *
551
 * Second, simply using parse_url() and its opposite http_build_url() (see functions-compat.php) is a pretty unsafe process:
552
 *  - parse_url() can easily trip up on malformed or weird URLs
553
 *  - exploding a URL with parse_url(), lowercasing some stuff, and glueing things back with http_build_url() does not handle well
554
 *    "stuff:"-like URI [1] and can result in URLs ending modified [2][3]. We don't want to *validate* URI, we just want to lowercase
555
 *    what is supposed to be lowercased.
556
 *
557
 * So, to be conservative, this functions:
558
 *  - lowercases the scheme
559
 *  - does not lowercase anything else on "stuff:" URI
560
 *  - tries to lowercase only scheme and domain of "stuff://" URI
561
 *
562
 * [1] http_build_url(parse_url("mailto:ozh")) == "mailto:///ozh"
563
 * [2] http_build_url(parse_url("http://blah#omg")) == "http://blah/#omg"
564
 * [3] http_build_url(parse_url("http://blah?#")) == "http://blah/"
565
 *
566
 * @since 1.7.1
567
 * @param string $url URL
568
 * @return string URL with lowercase scheme and protocol
569
 */
570
function yourls_lowercase_scheme_domain( $url ) {
571
    $scheme = yourls_get_protocol( $url );
572
573
    if( '' == $scheme ) {
574
        // Scheme not found, malformed URL? Something else? Not sure.
575
        return $url;
576
    }
577
578
    // Case 1 : scheme like "stuff://" (eg "http://example.com/" or "ssh://[email protected]")
579
    if( substr( $scheme, -2, 2 ) == '//' ) {
580
581
        $parts = parse_url( $url );
582
583
        // Most likely malformed stuff, could not parse : we'll just lowercase the scheme and leave the rest untouched
584
        if( false == $parts ) {
585
            $url = str_replace( $scheme, strtolower( $scheme ), $url );
586
587
        // URL seems parsable, let's do the best we can
588
        } else {
589
590
            $lower = array();
591
592
            $lower['scheme'] = strtolower( $parts['scheme'] );
593
594
            if( isset( $parts['host'] ) ) { 
595
                $lower['host'] = strtolower( $parts['host'] );
596
            } else {
597
                $parts['host'] = '***';
598
            }
599
600
            // We're not going to glue back things that could be modified in the process            
601
            unset( $parts['path'] );
602
            unset( $parts['query'] );
603
            unset( $parts['fragment'] );
604
605
            // original beginning of the URL and its lowercase-where-needed counterpart
606
            // We trim the / after the domain to avoid avoid "http://example.com" being reconstructed as "http://example.com/"
607
            $partial_original_url       = trim( http_build_url( $parts ), '/' );
608
            $partial_lower_original_url = trim( http_build_url( $parts, $lower ), '/' );
609
610
            $url = str_replace( $partial_original_url , $partial_lower_original_url, $url );
611
612
        }
613
614
    // Case 2 : scheme like "stuff:" (eg "mailto:[email protected]" or "bitcoin:15p1o8vnWqNkJBJGgwafNgR1GCCd6EGtQR?amount=1&label=Ozh")
615
    // In this case, we only lowercase the scheme, because depending on it, things after should or should not be lowercased
616
    } else {
617
618
        $url = str_replace( $scheme, strtolower( $scheme ), $url );
619
620
    }
621
622
    return $url;
623
}
624
625
626
/**
627
 * Escape single quotes, htmlspecialchar " < > &, and fix line endings. Stolen from WP.
628
 *
629
 * Escapes text strings for echoing in JS. It is intended to be used for inline JS
630
 * (in a tag attribute, for example onclick="..."). Note that the strings have to
631
 * be in single quotes. The filter 'js_escape' is also applied here.
632
 *
633
 * @since 1.6
634
 *
635
 * @param string $text The text to be escaped.
636
 * @return string Escaped text.
637
 */
638
function yourls_esc_js( $text ) {
639
	$safe_text = yourls_check_invalid_utf8( $text );
640
	$safe_text = yourls_specialchars( $safe_text, ENT_COMPAT );
641
	$safe_text = preg_replace( '/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes( $safe_text ) );
642
	$safe_text = str_replace( "\r", '', $safe_text );
643
	$safe_text = str_replace( "\n", '\\n', addslashes( $safe_text ) );
644
	return yourls_apply_filter( 'esc_js', $safe_text, $text );
645
}
646
647
/**
648
 * Escaping for textarea values. Stolen from WP.
649
 *
650
 * @since 1.6
651
 *
652
 * @param string $text
653
 * @return string
654
 */
655
function yourls_esc_textarea( $text ) {
656
	$safe_text = htmlspecialchars( $text, ENT_QUOTES );
657
	return yourls_apply_filter( 'esc_textarea', $safe_text, $text );
658
}
659
660
661
/**
662
* PHP emulation of JS's encodeURI
663
*
664
* @link https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURI
665
* @param $url
666
* @return string
667
*/
668
function yourls_encodeURI( $url ) {
669
	// Decode URL all the way
670
	$result = yourls_rawurldecode_while_encoded( $url );
671
	// Encode once
672
	$result = strtr( rawurlencode( $result ), array (
673
        '%3B' => ';', '%2C' => ',', '%2F' => '/', '%3F' => '?', '%3A' => ':', '%40' => '@',
674
		'%26' => '&', '%3D' => '=', '%2B' => '+', '%24' => '$', '%21' => '!', '%2A' => '*',
675
		'%27' => '\'', '%28' => '(', '%29' => ')', '%23' => '#',
676
    ) );
677
	// @TODO:
678
	// Known limit: this will most likely break IDN URLs such as http://www.acad�mie-fran�aise.fr/
679
	// To fully support IDN URLs, advocate use of a plugin.
680
	return yourls_apply_filter( 'encodeURI', $result, $url );
681
}
682
683
/**
684
 * Adds backslashes before letters and before a number at the start of a string. Stolen from WP.
685
 *
686
 * @since 1.6
687
 *
688
 * @param string $string Value to which backslashes will be added.
689
 * @return string String with backslashes inserted.
690
 */
691
function yourls_backslashit($string) {
692
    $string = preg_replace('/^([0-9])/', '\\\\\\\\\1', $string);
693
    $string = preg_replace('/([a-z])/i', '\\\\\1', $string);
694
    return $string;
695
}
696
697
/**
698
 * Check if a string seems to be urlencoded
699
 *
700
 * We use rawurlencode instead of urlencode to avoid messing with '+'
701
 *
702
 * @since 1.7
703
 * @param string $string
704
 * @return bool
705
 */
706
function yourls_is_rawurlencoded( $string ) {
707
	return rawurldecode( $string ) != $string;
708
}
709
710
/**
711
 * rawurldecode a string till it's not encoded anymore
712
 *
713
 * Deals with multiple encoding (eg "%2521" => "%21" => "!").
714
 * See https://github.com/YOURLS/YOURLS/issues/1303
715
 *
716
 * @since 1.7
717
 * @param string $string
718
 * @return string
719
 */
720
function yourls_rawurldecode_while_encoded( $string ) {
721
	$string = rawurldecode( $string );
722
	if( yourls_is_rawurlencoded( $string ) ) {
723
		$string = yourls_rawurldecode_while_encoded( $string );
724
	}
725
	return $string;
726
}
727
728
/**
729
 * Converts readable Javascript code into a valid bookmarklet link
730
 *
731
 * Uses https://github.com/ozh/bookmarkletgen
732
 *
733
 * @since 1.7.1
734
 * @param  string $code  Javascript code
735
 * @return string        Bookmarklet link
736
 */
737
function yourls_make_bookmarklet( $code ) {
738
    if ( !class_exists( 'BookmarkletGen', false ) ) {
739
        require_once YOURLS_INC . '/BookmarkletGen/BookmarkletGen.php';
740
    }
741
742
    $book = new BookmarkletGen;
743
    return $book->crunch( $code );
744
}
745