StringHelper - Code Metrics - gamernetwork/yolk-core - Measure and Improve Code Quality continuously with Scrutinizer

StringHelper A
last analyzed 2016-05-18 08:44 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	317
Duplicated Lines	0 %

Coupling/Cohesion

Components	0
Dependencies	0

Importance

Changes	4
Bugs	0	Features	2

Metric	Value
wmc	29
c	4
b	0
f	2
lcom	0
cbo	0
dl	0
loc	317
rs	10

14 Methods

Rating	Name	Size	Complexity
A	__construct()	1	1
A	randomHex()	3	1
A	slugify()	4	1
A	stripControlChars()	18	4
B	parseURL()	29	5
A	randomString()	8	2
A	uncamelise()	10	1
B	removeAccents()	38	1
A	latin1()	9	1
A	utf8()	11	1
A	ordinal()	8	3
A	sizeFormat()	8	2
B	xssClean()	55	5
A	normaliseLineEndings()	5	1

<?php
/*
 * This file is part of Yolk - Gamer Network's PHP Framework.
 *
 * Copyright (c) 2013 Gamer Network Ltd.
 *
 * Distributed under the MIT License, a copy of which is available in the
 * LICENSE file that was bundled with this package, or online at:
 * https://github.com/gamernetwork/yolk-core
 */

namespace yolk\helpers;

class StringHelper {

	/**
	 * Helpers cannot be instantiated.
	 */
	private function __construct() {}

	/**
	 * Parse a URL string into an array of components.
	 * Similar to the native parse_url except that the returned array will contain all components
	 * and the query component is replaced with an options component containing a decoded array.
	 *
	 * @param  string|array  $url        either a string array or a partial list of url components
	 * @param  array         $defaults   an array of default values for components
	 * @return array|boolean   Returns false if the URL could not be parsed
	 */
	public static function parseURL( $url, $defaults = array() ) {

		$parts = is_string($url) ? \parse_url(urldecode($url)) : $url;

		$select = function( $k ) use ( $parts, $defaults ) {
			if( isset($parts[$k]) )
				return $parts[$k];
			elseif( isset($defaults[$k]) )
				return $defaults[$k];
			else
				return '';
		};

		$url = array(
			'scheme'  => $select('scheme'),
			'host'    => $select('host'),
			'port'    => $select('port'),
			'user'    => $select('user'),
			'pass'    => $select('pass'),
			'path'    => $select('path'),
			'options' => array(),
		);

		if( isset($parts['query']) )
			parse_str($parts['query'], $url['options']);

		return $url;

	}

	/**
	 * Returns a string of cryptographically strong random hex digits.
	 *
	 * @param  integer  $length   length of the desired hex string
	 * @return string
	 */
	public static function randomHex( $length = 40 ) {
		return bin2hex(openssl_random_pseudo_bytes($length / 2));
	}

	/**
	 * Returns a string of the specified length containing only the characters in the $allowed parameter.
	 * This function is not cryptographically strong.
	 *
	 * @param  string  $length    length of the desired string
	 * @param  string  $allowed   the characters allowed to appear in the output
	 * @return string
	 */
	public static function randomString( $length, $allowed = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' ) {
		$out = '';
		$max = strlen($allowed) - 1;
		for ($i = 0; $i < $length; $i++) {
			$out .= $allowed[mt_rand(0, $max)];
		}
		return $out;
	}

	/**
	 * Convert a camel-cased string to lower case with underscores
	 */
	public static function uncamelise( $str ) {
		return mb_strtolower(
			preg_replace(
				'/^A-Z^a-z^0-9]+/',  '_',
				preg_replace('/([a-z\d])([A-Z])/u', '$1_$2',
					preg_replace('/([A-Z+])([A-Z][a-z])/u', '$1_$2', $str)
				)
			)
		);
	}

	/**
	 * Convert a string into a format safe for use in urls.
	 * Converts any accent characters to their equivalent normal characters
	 * and then any sequence of two or more non-alphanumeric characters to a dash.
	 *
	 * @param  string   $str   A string to convert to a slug
	 * @return string
	 */
	public static function slugify( $str ) {
		$chars = array('&' => '-and-', '€' => '-EUR-', '£' => '-GBP-', '$' => '-USD-');
		return trim(preg_replace('/([^a-z0-9]+)/u', '-', mb_strtolower(strtr(static::removeAccents($str), $chars))), '-');
	}

	/**
	 * Converts all accent characters to their ASCII counterparts.
	 *
	 * @param  string   $str   A string that might contain accent characters
	 * @return string
	 */
	public static function removeAccents( $str ) {
		$chars = array(
			'ª' => 'a', 'º' => 'o', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A',
			'Ä' => 'A', 'Å' => 'A', 'Ā' => 'A', 'Ă' => 'A', 'Ą' => 'A', 'à' => 'a',
			'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'ā' => 'a',
			'ă' => 'a', 'ą' => 'a', 'Ç' => 'C', 'Ć' => 'C', 'Ĉ' => 'C', 'Ċ' => 'C',
			'Č' => 'C', 'ç' => 'c', 'ć' => 'c', 'ĉ' => 'c', 'ċ' => 'c', 'č' => 'c',
			'Đ' => 'D', 'Ď' => 'D', 'đ' => 'd', 'ď' => 'd', 'È' => 'E', 'É' => 'E',
			'Ê' => 'E', 'Ë' => 'E', 'Ē' => 'E', 'Ĕ' => 'E', 'Ė' => 'E', 'Ę' => 'E',
			'Ě' => 'E', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ē' => 'e',
			'ĕ' => 'e', 'ė' => 'e', 'ę' => 'e', 'ě' => 'e', 'ƒ' => 'f', 'Ĝ' => 'G',
			'Ğ' => 'G', 'Ġ' => 'G', 'Ģ' => 'G', 'ĝ' => 'g', 'ğ' => 'g', 'ġ' => 'g',
			'ģ' => 'g', 'Ĥ' => 'H', 'Ħ' => 'H', 'ĥ' => 'h', 'ħ' => 'h', 'Ì' => 'I',
			'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ĩ' => 'I', 'Ī' => 'I', 'Ĭ' => 'I',
			'Į' => 'I', 'İ' => 'I', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i',
			'ĩ' => 'i', 'ī' => 'i', 'ĭ' => 'i', 'į' => 'i', 'ı' => 'i', 'Ĵ' => 'J',
			'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k', 'ĸ' => 'k', 'Ĺ' => 'L', 'Ļ' => 'L',
			'Ľ' => 'L', 'Ŀ' => 'L', 'Ł' => 'L', 'ĺ' => 'l', 'ļ' => 'l', 'ľ' => 'l',
			'ŀ' => 'l', 'ł' => 'l', 'Ñ' => 'N', 'Ń' => 'N', 'Ņ' => 'N', 'Ň' => 'N',
			'Ŋ' => 'N', 'ñ' => 'n', 'ń' => 'n', 'ņ' => 'n', 'ň' => 'n', 'ŉ' => 'n',
			'ŋ' => 'n', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O',
			'Ø' => 'O', 'Ō' => 'O', 'Ŏ' => 'O', 'Ő' => 'O', 'ò' => 'o', 'ó' => 'o',
			'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o', 'ō' => 'o', 'ŏ' => 'o',
			'ő' => 'o', 'ð' => 'o', 'Ŕ' => 'R', 'Ŗ' => 'R', 'Ř' => 'R', 'ŕ' => 'r',
			'ŗ' => 'r', 'ř' => 'r', 'Ś' => 'S', 'Ŝ' => 'S', 'Ş' => 'S', 'Š' => 'S',
			'Ș' => 'S', 'ś' => 's', 'ŝ' => 's', 'ş' => 's', 'š' => 's', 'ș' => 's',
			'ſ' => 's', 'Ţ' => 'T', 'Ť' => 'T', 'Ŧ' => 'T', 'Ț' => 'T', 'ţ' => 't',
			'ť' => 't', 'ŧ' => 't', 'ț' => 't', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U',
			'Ü' => 'U', 'Ũ' => 'U', 'Ū' => 'U', 'Ŭ' => 'U', 'Ů' => 'U', 'Ű' => 'U',
			'Ų' => 'U', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ũ' => 'u',
			'ū' => 'u', 'ŭ' => 'u', 'ů' => 'u', 'ű' => 'u', 'ų' => 'u', 'Ŵ' => 'W',
			'ŵ' => 'w', 'Ý' => 'Y', 'Ÿ' => 'Y', 'Ŷ' => 'Y', 'ý' => 'y', 'ÿ' => 'y',
			'ŷ' => 'y', 'Ź' => 'Z', 'Ż' => 'Z', 'Ž' => 'Z', 'ź' => 'z', 'ż' => 'z',
			'ž' => 'z', 'Æ' => 'AE', 'æ' => 'ae', 'Ĳ' => 'IJ', 'ĳ' => 'ij',
			'Œ' => 'OE', 'œ' => 'oe', 'ß' => 'ss', 'þ' => 'th', 'Þ' => 'th',
		);
		return strtr($str, $chars);
	}

	/**
	 * Converts a UTF-8 string to Latin-1 with unsupported characters encoded as numeric entities.
	 * Example: I want to turn text like
	 * hello é β 水
	 * into
	 * hello é &#946; &#27700;
	 *
	 * @param  string   $str
	 * @return string   the converted string.
	 */
	public static function latin1( $str ) {
		return utf8_decode(
			mb_encode_numericentity(
				(string) $str,
				array(0x0100, 0xFFFF, 0, 0xFFFF),
				'UTF-8'
			)
		);
	}

	/**
	 * Converts a Latin-1 string to UTF-8 and decodes entities.
	 *
	 * @param  string   $str
	 * @return string   the converted string.
	 */
	public static function utf8( $str ) {
		return html_entity_decode(
			mb_convert_encoding(
				(string) $str,
				'UTF-8',
				'ISO-8859-1'
			),
			ENT_NOQUOTES,
			'UTF-8'
		);
	}

	/**
	 * Return the ordinal suffix (st, nd, rd, th) of a number.
	 * Taken from: http://stackoverflow.com/questions/3109978/php-display-number-with-ordinal-suffix
	 *
	 * @param  integer   $n
	 * @return string    the number cast as a string with the ordinal suffixed.
	 */
	public static function ordinal( $n ) {
		$ends = array('th','st','nd','rd','th','th','th','th','th','th');
		// if tens digit is 1, 2 or 3 then use th instead of usual ordinal
		if( ($n % 100) >= 11 && ($n % 100) <= 13 )
		   return "{$n}th";
		else
		   return "{$n}{$ends[$n % 10]}";
	}

	/**
	 * Convert a number of bytes to a human-friendly string using the largest suitable unit.
	 * Taken from: http://www.php.net/manual/de/function.filesize.php#91477
	 *
	 * @param  integer   $bytes       the number of bytes to
	 * @param  integer   $precision   the number of decimal places to format the result to.
	 * @return string
	 */
	public static function sizeFormat( $bytes, $precision ) {
		$units = array('B', 'KB', 'MB', 'GB', 'TB', 'PB');
		$bytes = max($bytes, 0);
		$pow   = floor(($bytes ? log($bytes) : 0) / log(1024));
		$pow   = min($pow, count($units) - 1);
		$bytes /= (1 << (10 * $pow));
		return round($bytes, $precision). ' '. $units[$pow];
	}

	/**
	 * Remove XSS vulnerabilities from a string.
	 * Shamelessly ripped from Kohana v2 and then tweaked to remove control characters
	 * and replace the associated regex components with \s instead.
	 * Also added a couple of other tags to the really bad list.
	 * Handles most of the XSS vectors listed at http://ha.ckers.org/xss.html
	 * @param  string|array   str
	 * @return string|array
	 */
	public static function xssClean( $str, $charset = 'UTF-8' ) {

		if( !$str )
			return $str;

		if( is_array($str) ) {
			foreach( $str as &$item ) {
				$item = static::xssClean($item);
			}
			return $str;
		}

		// strip any raw control characters that might interfere with our cleaning
		$str = static::stripControlChars($str);

		// fix and decode entities (handles missing ; terminator)
		$str = str_replace(array('&amp;','&lt;','&gt;'), array('&amp;amp;','&amp;lt;','&amp;gt;'), $str);
		$str = preg_replace('/(&#*\w+)\s+;/u', '$1;', $str);
		$str = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $str);
		$str = html_entity_decode($str, ENT_COMPAT, $charset);

		// strip any control characters that were sneakily encoded as entities
		$str = static::stripControlChars($str);

		// normalise line endings
		$str = static::normaliseLineEndings($str);

		// remove any attribute starting with "on" or xmlns
		$str = preg_replace('#(?:on[a-z]+|xmlns)\s*=\s*[\'"\s]?[^\'>"]*[\'"\s]?\s?#iu', '', $str);

		// remove javascript: and vbscript: protocols and -moz-binding CSS property
		$str = preg_replace('#([a-z]*)\s*=\s*([`\'"]*)\s*j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:#iu', '$1=$2nojavascript...', $str);
		$str = preg_replace('#([a-z]*)\s*=([\'"]*)\s*v\s*b\s*s\s*c\s*r\s*i\s*p\s*t\s*:#iu', '$1=$2novbscript...', $str);
		$str = preg_replace('#([a-z]*)\s*=([\'"]*)\s*-moz-binding\s*:#u', '$1=$2nomozbinding...', $str);

		// only works in IE: <span style="width: expression(alert('XSS!'));"></span>
		$str = preg_replace('#(<[^>]+?)style\s*=\s*[`\'"]*.*?expression\s*\([^>]*+>#isu', '$1>', $str);
		$str = preg_replace('#(<[^>]+?)style\s*=\s*[`\'"]*.*?behaviour\s*\([^>]*+>#isu', '$1>', $str);
		$str = preg_replace('#(<[^>]+?)style\s*=\s*[`\'"]*.*?s\s*c\s*r\s*i\s*p\s*t\s*:*[^>]*+>#isu', '$1>', $str);

		// remove namespaced elements (we do not need them)
		$str = preg_replace('#</*\w+:\w[^>]*+>#iu', '', $str);

		// remove data URIs
		$str = preg_replace("#data:[\w/]+;\w+,[\w\r\n+=/]*#iu", "data: not allowed", $str);

		// remove really unwanted tags
		do {
			$old = $str;
			$str = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|body|embed|frame(?:set)?|head|html|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#iu', '', $str);
		}
		while ($old !== $str);

		return $str;
	}

	/**
	 * Remove every control character except newline (10/x0A) carriage return (13/x0D), and horizontal tab (09/x09)
	 * @param  string|array   str
	 * @return string|array
	 */
	public static function stripControlChars( $str ) {

		if( is_array($str) ) {
			foreach( $str as &$item ) {
				$item = static::stripControlChars($item);
			}
			return $str;
		}

		do {
			// 00-08, 11, 12, 14-31, 127

			$str = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/Su', '', $str, -1, $count);
		}
		while ($count);

		return $str;

	}

	/**
	 * Ensures that a string has consistent line-endings.
	 * All line-ending are converted to LF with maximum of two consecutive.
	 * @return string
	 */
	public static function normaliseLineEndings( $str ) {
		$str = str_replace("\r\n", "\n", $str);
		$str = str_replace("\r", "\n", $str);
		return preg_replace("/\n{2,}/", "\n\n", $str);
	}

}

// EOF

1			<?php
2			/*
3			* This file is part of Yolk - Gamer Network's PHP Framework.
4			*
5			* Copyright (c) 2013 Gamer Network Ltd.
6			*
7			* Distributed under the MIT License, a copy of which is available in the
8			* LICENSE file that was bundled with this package, or online at:
9			* https://github.com/gamernetwork/yolk-core
10			*/
11
12			namespace yolk\helpers;
13
14			class StringHelper {
15
16			/**
17			* Helpers cannot be instantiated.
18			*/
19			private function __construct() {}
20
21			/**
22			* Parse a URL string into an array of components.
23			* Similar to the native parse_url except that the returned array will contain all components
24			* and the query component is replaced with an options component containing a decoded array.
25			*
26			* @param string\|array $url either a string array or a partial list of url components
27			* @param array $defaults an array of default values for components
28			* @return array\|boolean Returns false if the URL could not be parsed
29			*/
30			public static function parseURL( $url, $defaults = array() ) {
31
32			$parts = is_string($url) ? \parse_url(urldecode($url)) : $url;
33
34			$select = function( $k ) use ( $parts, $defaults ) {
35			if( isset($parts[$k]) )
36			return $parts[$k];
37			elseif( isset($defaults[$k]) )
38			return $defaults[$k];
39			else
40			return '';
41			};
42
43			$url = array(
44			'scheme' => $select('scheme'),
45			'host' => $select('host'),
46			'port' => $select('port'),
47			'user' => $select('user'),
48			'pass' => $select('pass'),
49			'path' => $select('path'),
50			'options' => array(),
51			);
52
53			if( isset($parts['query']) )
54			parse_str($parts['query'], $url['options']);
55
56			return $url;
57
58			}
59
60			/**
61			* Returns a string of cryptographically strong random hex digits.
62			*
63			* @param integer $length length of the desired hex string
64			* @return string
65			*/
66			public static function randomHex( $length = 40 ) {
67			return bin2hex(openssl_random_pseudo_bytes($length / 2));
68			}
69
70			/**
71			* Returns a string of the specified length containing only the characters in the $allowed parameter.
72			* This function is not cryptographically strong.
73			*
74			* @param string $length length of the desired string
75			* @param string $allowed the characters allowed to appear in the output
76			* @return string
77			*/
78			public static function randomString( $length, $allowed = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' ) {
79			$out = '';
80			$max = strlen($allowed) - 1;
81			for ($i = 0; $i < $length; $i++) {
82			$out .= $allowed[mt_rand(0, $max)];
83			}
84			return $out;
85			}
86
87			/**
88			* Convert a camel-cased string to lower case with underscores
89			*/
90			public static function uncamelise( $str ) {
91			return mb_strtolower(
92			preg_replace(
93			'/^A-Z^a-z^0-9]+/', '_',
94			preg_replace('/([a-z\d])([A-Z])/u', '$1_$2',
95			preg_replace('/([A-Z+])([A-Z][a-z])/u', '$1_$2', $str)
96			)
97			)
98			);
99			}
100
101			/**
102			* Convert a string into a format safe for use in urls.
103			* Converts any accent characters to their equivalent normal characters
104			* and then any sequence of two or more non-alphanumeric characters to a dash.
105			*
106			* @param string $str A string to convert to a slug
107			* @return string
108			*/
109			public static function slugify( $str ) {
110			$chars = array('&' => '-and-', '€' => '-EUR-', '£' => '-GBP-', '$' => '-USD-');
111			return trim(preg_replace('/([^a-z0-9]+)/u', '-', mb_strtolower(strtr(static::removeAccents($str), $chars))), '-');
112			}
113
114			/**
115			* Converts all accent characters to their ASCII counterparts.
116			*
117			* @param string $str A string that might contain accent characters
118			* @return string
119			*/
120			public static function removeAccents( $str ) {
121			$chars = array(
122			'ª' => 'a', 'º' => 'o', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A',
123			'Ä' => 'A', 'Å' => 'A', 'Ā' => 'A', 'Ă' => 'A', 'Ą' => 'A', 'à' => 'a',
124			'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'ā' => 'a',
125			'ă' => 'a', 'ą' => 'a', 'Ç' => 'C', 'Ć' => 'C', 'Ĉ' => 'C', 'Ċ' => 'C',
126			'Č' => 'C', 'ç' => 'c', 'ć' => 'c', 'ĉ' => 'c', 'ċ' => 'c', 'č' => 'c',
127			'Đ' => 'D', 'Ď' => 'D', 'đ' => 'd', 'ď' => 'd', 'È' => 'E', 'É' => 'E',
128			'Ê' => 'E', 'Ë' => 'E', 'Ē' => 'E', 'Ĕ' => 'E', 'Ė' => 'E', 'Ę' => 'E',
129			'Ě' => 'E', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ē' => 'e',
130			'ĕ' => 'e', 'ė' => 'e', 'ę' => 'e', 'ě' => 'e', 'ƒ' => 'f', 'Ĝ' => 'G',
131			'Ğ' => 'G', 'Ġ' => 'G', 'Ģ' => 'G', 'ĝ' => 'g', 'ğ' => 'g', 'ġ' => 'g',
132			'ģ' => 'g', 'Ĥ' => 'H', 'Ħ' => 'H', 'ĥ' => 'h', 'ħ' => 'h', 'Ì' => 'I',
133			'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ĩ' => 'I', 'Ī' => 'I', 'Ĭ' => 'I',
134			'Į' => 'I', 'İ' => 'I', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i',
135			'ĩ' => 'i', 'ī' => 'i', 'ĭ' => 'i', 'į' => 'i', 'ı' => 'i', 'Ĵ' => 'J',
136			'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k', 'ĸ' => 'k', 'Ĺ' => 'L', 'Ļ' => 'L',
137			'Ľ' => 'L', 'Ŀ' => 'L', 'Ł' => 'L', 'ĺ' => 'l', 'ļ' => 'l', 'ľ' => 'l',
138			'ŀ' => 'l', 'ł' => 'l', 'Ñ' => 'N', 'Ń' => 'N', 'Ņ' => 'N', 'Ň' => 'N',
139			'Ŋ' => 'N', 'ñ' => 'n', 'ń' => 'n', 'ņ' => 'n', 'ň' => 'n', 'ŉ' => 'n',
140			'ŋ' => 'n', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O',
141			'Ø' => 'O', 'Ō' => 'O', 'Ŏ' => 'O', 'Ő' => 'O', 'ò' => 'o', 'ó' => 'o',
142			'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o', 'ō' => 'o', 'ŏ' => 'o',
143			'ő' => 'o', 'ð' => 'o', 'Ŕ' => 'R', 'Ŗ' => 'R', 'Ř' => 'R', 'ŕ' => 'r',
144			'ŗ' => 'r', 'ř' => 'r', 'Ś' => 'S', 'Ŝ' => 'S', 'Ş' => 'S', 'Š' => 'S',
145			'Ș' => 'S', 'ś' => 's', 'ŝ' => 's', 'ş' => 's', 'š' => 's', 'ș' => 's',
146			'ſ' => 's', 'Ţ' => 'T', 'Ť' => 'T', 'Ŧ' => 'T', 'Ț' => 'T', 'ţ' => 't',
147			'ť' => 't', 'ŧ' => 't', 'ț' => 't', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U',
148			'Ü' => 'U', 'Ũ' => 'U', 'Ū' => 'U', 'Ŭ' => 'U', 'Ů' => 'U', 'Ű' => 'U',
149			'Ų' => 'U', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ũ' => 'u',
150			'ū' => 'u', 'ŭ' => 'u', 'ů' => 'u', 'ű' => 'u', 'ų' => 'u', 'Ŵ' => 'W',
151			'ŵ' => 'w', 'Ý' => 'Y', 'Ÿ' => 'Y', 'Ŷ' => 'Y', 'ý' => 'y', 'ÿ' => 'y',
152			'ŷ' => 'y', 'Ź' => 'Z', 'Ż' => 'Z', 'Ž' => 'Z', 'ź' => 'z', 'ż' => 'z',
153			'ž' => 'z', 'Æ' => 'AE', 'æ' => 'ae', 'Ĳ' => 'IJ', 'ĳ' => 'ij',
154			'Œ' => 'OE', 'œ' => 'oe', 'ß' => 'ss', 'þ' => 'th', 'Þ' => 'th',
155			);
156			return strtr($str, $chars);
157			}
158
159			/**
160			* Converts a UTF-8 string to Latin-1 with unsupported characters encoded as numeric entities.
161			* Example: I want to turn text like
162			* hello é β 水
163			* into
164			* hello é β 水
165			*
166			* @param string $str
167			* @return string the converted string.
168			*/
169			public static function latin1( $str ) {
170			return utf8_decode(
171			mb_encode_numericentity(
172			(string) $str,
173			array(0x0100, 0xFFFF, 0, 0xFFFF),
174			'UTF-8'
175			)
176			);
177			}
178
179			/**
180			* Converts a Latin-1 string to UTF-8 and decodes entities.
181			*
182			* @param string $str
183			* @return string the converted string.
184			*/
185			public static function utf8( $str ) {
186			return html_entity_decode(
187			mb_convert_encoding(
188			(string) $str,
189			'UTF-8',
190			'ISO-8859-1'
191			),
192			ENT_NOQUOTES,
193			'UTF-8'
194			);
195			}
196
197			/**
198			* Return the ordinal suffix (st, nd, rd, th) of a number.
199			* Taken from: http://stackoverflow.com/questions/3109978/php-display-number-with-ordinal-suffix
200			*
201			* @param integer $n
202			* @return string the number cast as a string with the ordinal suffixed.
203			*/
204			public static function ordinal( $n ) {
205			$ends = array('th','st','nd','rd','th','th','th','th','th','th');
206			// if tens digit is 1, 2 or 3 then use th instead of usual ordinal
207			if( ($n % 100) >= 11 && ($n % 100) <= 13 )
208			return "{$n}th";
209			else
210			return "{$n}{$ends[$n % 10]}";
211			}
212
213			/**
214			* Convert a number of bytes to a human-friendly string using the largest suitable unit.
215			* Taken from: http://www.php.net/manual/de/function.filesize.php#91477
216			*
217			* @param integer $bytes the number of bytes to
218			* @param integer $precision the number of decimal places to format the result to.
219			* @return string
220			*/
221			public static function sizeFormat( $bytes, $precision ) {
222			$units = array('B', 'KB', 'MB', 'GB', 'TB', 'PB');
223			$bytes = max($bytes, 0);
224			$pow = floor(($bytes ? log($bytes) : 0) / log(1024));
225			$pow = min($pow, count($units) - 1);
226			$bytes /= (1 << (10 * $pow));
227			return round($bytes, $precision). ' '. $units[$pow];
228			}
229
230			/**
231			* Remove XSS vulnerabilities from a string.
232			* Shamelessly ripped from Kohana v2 and then tweaked to remove control characters
233			* and replace the associated regex components with \s instead.
234			* Also added a couple of other tags to the really bad list.
235			* Handles most of the XSS vectors listed at http://ha.ckers.org/xss.html
236			* @param string\|array str
237			* @return string\|array
238			*/
239			public static function xssClean( $str, $charset = 'UTF-8' ) {
240
241			if( !$str )
242			return $str;
243
244			if( is_array($str) ) {
245			foreach( $str as &$item ) {
246			$item = static::xssClean($item);
247			}
248			return $str;
249			}
250
251			// strip any raw control characters that might interfere with our cleaning
252			$str = static::stripControlChars($str);
253
254			// fix and decode entities (handles missing ; terminator)
255			$str = str_replace(array('&','<','>'), array('&amp;','&lt;','&gt;'), $str);
256			$str = preg_replace('/(&#*\w+)\s+;/u', '$1;', $str);
257			$str = preg_replace('/(&#x[0-9A-F]+);/iu', '$1;', $str);
258			$str = html_entity_decode($str, ENT_COMPAT, $charset);
259
260			// strip any control characters that were sneakily encoded as entities
261			$str = static::stripControlChars($str);
262
263			// normalise line endings
264			$str = static::normaliseLineEndings($str);
265
266			// remove any attribute starting with "on" or xmlns
267			$str = preg_replace('#(?:on[a-z]+\|xmlns)\s=\s[\'"\s]?[^\'>"]*[\'"\s]?\s?#iu', '', $str);
268
269			// remove javascript: and vbscript: protocols and -moz-binding CSS property
270			$str = preg_replace('#([a-z])\s=\s([`\'"])\sj\sa\sv\sa\ss\sc\sr\si\sp\st\s*:#iu', '$1=$2nojavascript...', $str);
271			$str = preg_replace('#([a-z])\s=([\'"])\sv\sb\ss\sc\sr\si\sp\st\s:#iu', '$1=$2novbscript...', $str);
272			$str = preg_replace('#([a-z])\s=([\'"])\s-moz-binding\s*:#u', '$1=$2nomozbinding...', $str);
273
274			// only works in IE: <span style="width: expression(alert('XSS!'));"></span>
275			$str = preg_replace('#(<[^>]+?)style\s=\s[`\'"].?expression\s\([^>]+>#isu', '$1>', $str);
276			$str = preg_replace('#(<[^>]+?)style\s=\s[`\'"].?behaviour\s\([^>]+>#isu', '$1>', $str);
277			$str = preg_replace('#(<[^>]+?)style\s=\s[`\'"].?s\sc\sr\si\sp\st\s:[^>]+>#isu', '$1>', $str);
278
279			// remove namespaced elements (we do not need them)
280			$str = preg_replace('#</\w+:\w[^>]+>#iu', '', $str);
281
282			// remove data URIs
283			$str = preg_replace("#data:[\w/]+;\w+,[\w\r\n+=/]*#iu", "data: not allowed", $str);
284
285			// remove really unwanted tags
286			do {
287			$old = $str;
288			$str = preg_replace('#</(?:applet\|b(?:ase\|gsound\|link)\|body\|embed\|frame(?:set)?\|head\|html\|i(?:frame\|layer)\|l(?:ayer\|ink)\|meta\|object\|s(?:cript\|tyle)\|title\|xml)[^>]+>#iu', '', $str);
289			}
290			while ($old !== $str);
291
292			return $str;
293			}
294
295			/**
296			* Remove every control character except newline (10/x0A) carriage return (13/x0D), and horizontal tab (09/x09)
297			* @param string\|array str
298			* @return string\|array
299			*/
300			public static function stripControlChars( $str ) {
301
302			if( is_array($str) ) {
303			foreach( $str as &$item ) {
304			$item = static::stripControlChars($item);
305			}
306			return $str;
307			}
308
309			do {
310			// 00-08, 11, 12, 14-31, 127
			0 ignored issues – show Unused Code Comprehensibility introduced 2016-04-19 10:04 UTC by Report Bug Copy Issue Report `62%` of this comment could be valid code. Did you maybe forget this after debugging? Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it. The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production. This check looks for comments that seem to be mostly valid code and reports them. Loading history...
311			$str = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/Su', '', $str, -1, $count);
312			}
313			while ($count);
314
315			return $str;
316
317			}
318
319			/**
320			* Ensures that a string has consistent line-endings.
321			* All line-ending are converted to LF with maximum of two consecutive.
322			* @return string
323			*/
324			public static function normaliseLineEndings( $str ) {
325			$str = str_replace("\r\n", "\n", $str);
326			$str = str_replace("\r", "\n", $str);
327			return preg_replace("/\n{2,}/", "\n\n", $str);
328			}
329
330			}
331
332			// EOF

gamernetwork / yolk-core

StringHelper A last analyzed 2016-05-18 08:44 UTC

Complexity

Size/Duplication

Coupling/Cohesion

Importance

14 Methods

Duplication Side-by-Side

Filter issues like

StringHelper A
last analyzed 2016-05-18 08:44 UTC