StringHelper::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 1
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 1
rs 10
nc 1
cc 1
eloc 1
nop 0
1
<?php
2
/*
3
 * This file is part of Yolk - Gamer Network's PHP Framework.
4
 *
5
 * Copyright (c) 2013 Gamer Network Ltd.
6
 *
7
 * Distributed under the MIT License, a copy of which is available in the
8
 * LICENSE file that was bundled with this package, or online at:
9
 * https://github.com/gamernetwork/yolk-core
10
 */
11
12
namespace yolk\helpers;
13
14
class StringHelper {
15
16
	/**
17
	 * Helpers cannot be instantiated.
18
	 */
19
	private function __construct() {}
20
21
	/**
22
	 * Parse a URL string into an array of components.
23
	 * Similar to the native parse_url except that the returned array will contain all components
24
	 * and the query component is replaced with an options component containing a decoded array.
25
	 *
26
	 * @param  string|array  $url        either a string array or a partial list of url components
27
	 * @param  array         $defaults   an array of default values for components
28
	 * @return array|boolean   Returns false if the URL could not be parsed
29
	 */
30
	public static function parseURL( $url, $defaults = array() ) {
31
32
		$parts = is_string($url) ? \parse_url(urldecode($url)) : $url;
33
34
		$select = function( $k ) use ( $parts, $defaults ) {
35
			if( isset($parts[$k]) )
36
				return $parts[$k];
37
			elseif( isset($defaults[$k]) )
38
				return $defaults[$k];
39
			else
40
				return '';
41
		};
42
43
		$url = array(
44
			'scheme'  => $select('scheme'),
45
			'host'    => $select('host'),
46
			'port'    => $select('port'),
47
			'user'    => $select('user'),
48
			'pass'    => $select('pass'),
49
			'path'    => $select('path'),
50
			'options' => array(),
51
		);
52
53
		if( isset($parts['query']) )
54
			parse_str($parts['query'], $url['options']);
55
56
		return $url;
57
58
	}
59
60
	/**
61
	 * Returns a string of cryptographically strong random hex digits.
62
	 *
63
	 * @param  integer  $length   length of the desired hex string
64
	 * @return string
65
	 */
66
	public static function randomHex( $length = 40 ) {
67
		return bin2hex(openssl_random_pseudo_bytes($length / 2));
68
	}
69
70
	/**
71
	 * Returns a string of the specified length containing only the characters in the $allowed parameter.
72
	 * This function is not cryptographically strong.
73
	 *
74
	 * @param  string  $length    length of the desired string
75
	 * @param  string  $allowed   the characters allowed to appear in the output
76
	 * @return string
77
	 */
78
	public static function randomString( $length, $allowed = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' ) {
79
		$out = '';
80
		$max = strlen($allowed) - 1;
81
		for ($i = 0; $i < $length; $i++) {
82
			$out .= $allowed[mt_rand(0, $max)];
83
		}
84
		return $out;
85
	}
86
87
	/**
88
	 * Convert a camel-cased string to lower case with underscores
89
	 */
90
	public static function uncamelise( $str ) {
91
		return mb_strtolower(
92
			preg_replace(
93
				'/^A-Z^a-z^0-9]+/',  '_',
94
				preg_replace('/([a-z\d])([A-Z])/u', '$1_$2',
95
					preg_replace('/([A-Z+])([A-Z][a-z])/u', '$1_$2', $str)
96
				)
97
			)
98
		);
99
	}
100
101
	/**
102
	 * Convert a string into a format safe for use in urls.
103
	 * Converts any accent characters to their equivalent normal characters
104
	 * and then any sequence of two or more non-alphanumeric characters to a dash.
105
	 *
106
	 * @param  string   $str   A string to convert to a slug
107
	 * @return string
108
	 */
109
	public static function slugify( $str ) {
110
		$chars = array('&' => '-and-', '€' => '-EUR-', '£' => '-GBP-', '$' => '-USD-');
111
		return trim(preg_replace('/([^a-z0-9]+)/u', '-', mb_strtolower(strtr(static::removeAccents($str), $chars))), '-');
112
	}
113
114
	/**
115
	 * Converts all accent characters to their ASCII counterparts.
116
	 *
117
	 * @param  string   $str   A string that might contain accent characters
118
	 * @return string
119
	 */
120
	public static function removeAccents( $str ) {
121
		$chars = array(
122
			'ª' => 'a', 'º' => 'o', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A',
123
			'Ä' => 'A', 'Å' => 'A', 'Ā' => 'A', 'Ă' => 'A', 'Ą' => 'A', 'à' => 'a',
124
			'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'ā' => 'a',
125
			'ă' => 'a', 'ą' => 'a', 'Ç' => 'C', 'Ć' => 'C', 'Ĉ' => 'C', 'Ċ' => 'C',
126
			'Č' => 'C', 'ç' => 'c', 'ć' => 'c', 'ĉ' => 'c', 'ċ' => 'c', 'č' => 'c',
127
			'Đ' => 'D', 'Ď' => 'D', 'đ' => 'd', 'ď' => 'd', 'È' => 'E', 'É' => 'E',
128
			'Ê' => 'E', 'Ë' => 'E', 'Ē' => 'E', 'Ĕ' => 'E', 'Ė' => 'E', 'Ę' => 'E',
129
			'Ě' => 'E', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ē' => 'e',
130
			'ĕ' => 'e', 'ė' => 'e', 'ę' => 'e', 'ě' => 'e', 'ƒ' => 'f', 'Ĝ' => 'G',
131
			'Ğ' => 'G', 'Ġ' => 'G', 'Ģ' => 'G', 'ĝ' => 'g', 'ğ' => 'g', 'ġ' => 'g',
132
			'ģ' => 'g', 'Ĥ' => 'H', 'Ħ' => 'H', 'ĥ' => 'h', 'ħ' => 'h', 'Ì' => 'I',
133
			'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ĩ' => 'I', 'Ī' => 'I', 'Ĭ' => 'I',
134
			'Į' => 'I', 'İ' => 'I', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i',
135
			'ĩ' => 'i', 'ī' => 'i', 'ĭ' => 'i', 'į' => 'i', 'ı' => 'i', 'Ĵ' => 'J',
136
			'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k', 'ĸ' => 'k', 'Ĺ' => 'L', 'Ļ' => 'L',
137
			'Ľ' => 'L', 'Ŀ' => 'L', 'Ł' => 'L', 'ĺ' => 'l', 'ļ' => 'l', 'ľ' => 'l',
138
			'ŀ' => 'l', 'ł' => 'l', 'Ñ' => 'N', 'Ń' => 'N', 'Ņ' => 'N', 'Ň' => 'N',
139
			'Ŋ' => 'N', 'ñ' => 'n', 'ń' => 'n', 'ņ' => 'n', 'ň' => 'n', 'ʼn' => 'n',
140
			'ŋ' => 'n', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O',
141
			'Ø' => 'O', 'Ō' => 'O', 'Ŏ' => 'O', 'Ő' => 'O', 'ò' => 'o', 'ó' => 'o',
142
			'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o', 'ō' => 'o', 'ŏ' => 'o',
143
			'ő' => 'o', 'ð' => 'o', 'Ŕ' => 'R', 'Ŗ' => 'R', 'Ř' => 'R', 'ŕ' => 'r',
144
			'ŗ' => 'r', 'ř' => 'r', 'Ś' => 'S', 'Ŝ' => 'S', 'Ş' => 'S', 'Š' => 'S',
145
			'Ș' => 'S', 'ś' => 's', 'ŝ' => 's', 'ş' => 's', 'š' => 's', 'ș' => 's',
146
			'ſ' => 's', 'Ţ' => 'T', 'Ť' => 'T', 'Ŧ' => 'T', 'Ț' => 'T', 'ţ' => 't',
147
			'ť' => 't', 'ŧ' => 't', 'ț' => 't', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U',
148
			'Ü' => 'U', 'Ũ' => 'U', 'Ū' => 'U', 'Ŭ' => 'U', 'Ů' => 'U', 'Ű' => 'U',
149
			'Ų' => 'U', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ũ' => 'u',
150
			'ū' => 'u', 'ŭ' => 'u', 'ů' => 'u', 'ű' => 'u', 'ų' => 'u', 'Ŵ' => 'W',
151
			'ŵ' => 'w', 'Ý' => 'Y', 'Ÿ' => 'Y', 'Ŷ' => 'Y', 'ý' => 'y', 'ÿ' => 'y',
152
			'ŷ' => 'y', 'Ź' => 'Z', 'Ż' => 'Z', 'Ž' => 'Z', 'ź' => 'z', 'ż' => 'z',
153
			'ž' => 'z', 'Æ' => 'AE', 'æ' => 'ae', 'IJ' => 'IJ', 'ij' => 'ij',
154
			'Œ' => 'OE', 'œ' => 'oe', 'ß' => 'ss', 'þ' => 'th', 'Þ' => 'th',
155
		);
156
		return strtr($str, $chars);
157
	}
158
159
	/**
160
	 * Converts a UTF-8 string to Latin-1 with unsupported characters encoded as numeric entities.
161
	 * Example: I want to turn text like
162
	 * hello é β 水
163
	 * into
164
	 * hello é &#946; &#27700;
165
	 *
166
	 * @param  string   $str
167
	 * @return string   the converted string.
168
	 */
169
	public static function latin1( $str ) {
170
		return utf8_decode(
171
			mb_encode_numericentity(
172
				(string) $str,
173
				array(0x0100, 0xFFFF, 0, 0xFFFF),
174
				'UTF-8'
175
			)
176
		);
177
	}
178
179
	/**
180
	 * Converts a Latin-1 string to UTF-8 and decodes entities.
181
	 *
182
	 * @param  string   $str
183
	 * @return string   the converted string.
184
	 */
185
	public static function utf8( $str ) {
186
		return html_entity_decode(
187
			mb_convert_encoding(
188
				(string) $str,
189
				'UTF-8',
190
				'ISO-8859-1'
191
			),
192
			ENT_NOQUOTES,
193
			'UTF-8'
194
		);
195
	}
196
197
	/**
198
	 * Return the ordinal suffix (st, nd, rd, th) of a number.
199
	 * Taken from: http://stackoverflow.com/questions/3109978/php-display-number-with-ordinal-suffix
200
	 *
201
	 * @param  integer   $n
202
	 * @return string    the number cast as a string with the ordinal suffixed.
203
	 */
204
	public static function ordinal( $n ) {
205
		$ends = array('th','st','nd','rd','th','th','th','th','th','th');
206
		// if tens digit is 1, 2 or 3 then use th instead of usual ordinal
207
		if( ($n % 100) >= 11 && ($n % 100) <= 13 )
208
		   return "{$n}th";
209
		else
210
		   return "{$n}{$ends[$n % 10]}";
211
	}
212
213
	/**
214
	 * Convert a number of bytes to a human-friendly string using the largest suitable unit.
215
	 * Taken from: http://www.php.net/manual/de/function.filesize.php#91477
216
	 *
217
	 * @param  integer   $bytes       the number of bytes to
218
	 * @param  integer   $precision   the number of decimal places to format the result to.
219
	 * @return string
220
	 */
221
	public static function sizeFormat( $bytes, $precision ) {
222
		$units = array('B', 'KB', 'MB', 'GB', 'TB', 'PB');
223
		$bytes = max($bytes, 0);
224
		$pow   = floor(($bytes ? log($bytes) : 0) / log(1024));
225
		$pow   = min($pow, count($units) - 1);
226
		$bytes /= (1 << (10 * $pow));
227
		return round($bytes, $precision). ' '. $units[$pow];
228
	}
229
230
	/**
231
	 * Remove XSS vulnerabilities from a string.
232
	 * Shamelessly ripped from Kohana v2 and then tweaked to remove control characters
233
	 * and replace the associated regex components with \s instead.
234
	 * Also added a couple of other tags to the really bad list.
235
	 * Handles most of the XSS vectors listed at http://ha.ckers.org/xss.html
236
	 * @param  string|array   str
237
	 * @return string|array
238
	 */
239
	public static function xssClean( $str, $charset = 'UTF-8' ) {
240
241
		if( !$str )
242
			return $str;
243
244
		if( is_array($str) ) {
245
			foreach( $str as &$item ) {
246
				$item = static::xssClean($item);
247
			}
248
			return $str;
249
		}
250
251
		// strip any raw control characters that might interfere with our cleaning
252
		$str = static::stripControlChars($str);
253
254
		// fix and decode entities (handles missing ; terminator)
255
		$str = str_replace(array('&amp;','&lt;','&gt;'), array('&amp;amp;','&amp;lt;','&amp;gt;'), $str);
256
		$str = preg_replace('/(&#*\w+)\s+;/u', '$1;', $str);
257
		$str = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $str);
258
		$str = html_entity_decode($str, ENT_COMPAT, $charset);
259
260
		// strip any control characters that were sneakily encoded as entities
261
		$str = static::stripControlChars($str);
262
263
		// normalise line endings
264
		$str = static::normaliseLineEndings($str);
265
266
		// remove any attribute starting with "on" or xmlns
267
		$str = preg_replace('#(?:on[a-z]+|xmlns)\s*=\s*[\'"\s]?[^\'>"]*[\'"\s]?\s?#iu', '', $str);
268
269
		// remove javascript: and vbscript: protocols and -moz-binding CSS property
270
		$str = preg_replace('#([a-z]*)\s*=\s*([`\'"]*)\s*j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:#iu', '$1=$2nojavascript...', $str);
271
		$str = preg_replace('#([a-z]*)\s*=([\'"]*)\s*v\s*b\s*s\s*c\s*r\s*i\s*p\s*t\s*:#iu', '$1=$2novbscript...', $str);
272
		$str = preg_replace('#([a-z]*)\s*=([\'"]*)\s*-moz-binding\s*:#u', '$1=$2nomozbinding...', $str);
273
274
		// only works in IE: <span style="width: expression(alert('XSS!'));"></span>
275
		$str = preg_replace('#(<[^>]+?)style\s*=\s*[`\'"]*.*?expression\s*\([^>]*+>#isu', '$1>', $str);
276
		$str = preg_replace('#(<[^>]+?)style\s*=\s*[`\'"]*.*?behaviour\s*\([^>]*+>#isu', '$1>', $str);
277
		$str = preg_replace('#(<[^>]+?)style\s*=\s*[`\'"]*.*?s\s*c\s*r\s*i\s*p\s*t\s*:*[^>]*+>#isu', '$1>', $str);
278
279
		// remove namespaced elements (we do not need them)
280
		$str = preg_replace('#</*\w+:\w[^>]*+>#iu', '', $str);
281
282
		// remove data URIs
283
		$str = preg_replace("#data:[\w/]+;\w+,[\w\r\n+=/]*#iu", "data: not allowed", $str);
284
285
		// remove really unwanted tags
286
		do {
287
			$old = $str;
288
			$str = preg_replace('#</*(?:applet|b(?:ase|gsound|link)|body|embed|frame(?:set)?|head|html|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#iu', '', $str);
289
		}
290
		while ($old !== $str);
291
292
		return $str;
293
	}
294
295
	/**
296
	 * Remove every control character except newline (10/x0A) carriage return (13/x0D), and horizontal tab (09/x09)
297
	 * @param  string|array   str
298
	 * @return string|array
299
	 */
300
	public static function stripControlChars( $str ) {
301
302
		if( is_array($str) ) {
303
			foreach( $str as &$item ) {
304
				$item = static::stripControlChars($item);
305
			}
306
			return $str;
307
		}
308
309
		do {
310
			// 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
311
			$str = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/Su', '', $str, -1, $count);
312
		}
313
		while ($count);
314
315
		return $str;
316
317
	}
318
319
	/**
320
	 * Ensures that a string has consistent line-endings.
321
	 * All line-ending are converted to LF with maximum of two consecutive.
322
	 * @return string
323
	 */
324
	public static function normaliseLineEndings( $str ) {
325
		$str = str_replace("\r\n", "\n", $str);
326
		$str = str_replace("\r", "\n", $str);
327
		return preg_replace("/\n{2,}/", "\n\n", $str);
328
	}
329
330
}
331
332
// EOF