1 | <?php |
||
8 | class Clean |
||
9 | { |
||
10 | /** |
||
11 | * Checks if a string contains 7bit ASCII only |
||
12 | * |
||
13 | * @author Andreas Haerter <[email protected]> |
||
14 | * |
||
15 | * @param string $str |
||
16 | * @return bool |
||
17 | */ |
||
18 | public static function isASCII($str) |
||
22 | |||
23 | /** |
||
24 | * Tries to detect if a string is in Unicode encoding |
||
25 | * |
||
26 | * @author <[email protected]> |
||
27 | * @link http://php.net/manual/en/function.utf8-encode.php |
||
28 | * |
||
29 | * @param string $str |
||
30 | * @return bool |
||
31 | */ |
||
32 | public static function isUtf8($str) |
||
52 | |||
53 | /** |
||
54 | * Strips all high byte chars |
||
55 | * |
||
56 | * Returns a pure ASCII7 string |
||
57 | * |
||
58 | * @author Andreas Gohr <[email protected]> |
||
59 | * |
||
60 | * @param string $str |
||
61 | * @return string |
||
62 | */ |
||
63 | public static function strip($str) |
||
74 | |||
75 | /** |
||
76 | * Removes special characters (nonalphanumeric) from a UTF-8 string |
||
77 | * |
||
78 | * This function adds the controlchars 0x00 to 0x19 to the array of |
||
79 | * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) |
||
80 | * |
||
81 | * @author Andreas Gohr <[email protected]> |
||
82 | * |
||
83 | * @param string $string The UTF8 string to strip of special chars |
||
84 | * @param string $repl Replace special with this string |
||
85 | * @param string $additional Additional chars to strip (used in regexp char class) |
||
86 | * @return string |
||
87 | */ |
||
88 | public static function stripspecials($string, $repl = '', $additional = '') |
||
97 | |||
98 | /** |
||
99 | * Replace bad bytes with an alternative character |
||
100 | * |
||
101 | * ASCII character is recommended for replacement char |
||
102 | * |
||
103 | * PCRE Pattern to locate bad bytes in a UTF-8 string |
||
104 | * Comes from W3 FAQ: Multilingual Forms |
||
105 | * Note: modified to include full ASCII range including control chars |
||
106 | * |
||
107 | * @author Harry Fuecks <[email protected]> |
||
108 | * @see http://www.w3.org/International/questions/qa-forms-utf-8 |
||
109 | * |
||
110 | * @param string $str to search |
||
111 | * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII |
||
112 | * @return string |
||
113 | */ |
||
114 | public static function replaceBadBytes($str, $replace = '') |
||
137 | |||
138 | |||
139 | /** |
||
140 | * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents |
||
141 | * |
||
142 | * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) |
||
143 | * letters. Default is to deaccent both cases ($case = 0) |
||
144 | * |
||
145 | * @author Andreas Gohr <[email protected]> |
||
146 | * |
||
147 | * @param string $string |
||
148 | * @param int $case |
||
149 | * @return string |
||
150 | */ |
||
151 | public static function deaccent($string, $case = 0) |
||
161 | |||
162 | /** |
||
163 | * Romanize a non-latin string |
||
164 | * |
||
165 | * @author Andreas Gohr <[email protected]> |
||
166 | * |
||
167 | * @param string $string |
||
168 | * @return string |
||
169 | */ |
||
170 | public static function romanize($string) |
||
176 | |||
177 | /** |
||
178 | * adjust a byte index into a utf8 string to a utf8 character boundary |
||
179 | * |
||
180 | * @author chris smith <[email protected]> |
||
181 | * |
||
182 | * @param string $str utf8 character string |
||
183 | * @param int $i byte index into $str |
||
184 | * @param bool $next direction to search for boundary, false = up (current character) true = down (next character) |
||
185 | * @return int byte index into $str now pointing to a utf8 character boundary |
||
186 | */ |
||
187 | public static function correctIdx($str, $i, $next = false) |
||
203 | |||
204 | } |
||
205 |