Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 14 | class UTF8 |
||
| 15 | { |
||
| 16 | /** |
||
| 17 | * @var array |
||
| 18 | */ |
||
| 19 | protected static $win1252ToUtf8 = array( |
||
| 20 | 128 => "\xe2\x82\xac", // EURO SIGN |
||
| 21 | 130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK |
||
| 22 | 131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK |
||
| 23 | 132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK |
||
| 24 | 133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS |
||
| 25 | 134 => "\xe2\x80\xa0", // DAGGER |
||
| 26 | 135 => "\xe2\x80\xa1", // DOUBLE DAGGER |
||
| 27 | 136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT |
||
| 28 | 137 => "\xe2\x80\xb0", // PER MILLE SIGN |
||
| 29 | 138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON |
||
| 30 | 139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE |
||
| 31 | 140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE |
||
| 32 | 142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON |
||
| 33 | 145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK |
||
| 34 | 146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK |
||
| 35 | 147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK |
||
| 36 | 148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK |
||
| 37 | 149 => "\xe2\x80\xa2", // BULLET |
||
| 38 | 150 => "\xe2\x80\x93", // EN DASH |
||
| 39 | 151 => "\xe2\x80\x94", // EM DASH |
||
| 40 | 152 => "\xcb\x9c", // SMALL TILDE |
||
| 41 | 153 => "\xe2\x84\xa2", // TRADE MARK SIGN |
||
| 42 | 154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON |
||
| 43 | 155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE |
||
| 44 | 156 => "\xc5\x93", // LATIN SMALL LIGATURE OE |
||
| 45 | 158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON |
||
| 46 | 159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS |
||
| 47 | ); |
||
| 48 | |||
| 49 | /** |
||
| 50 | * @var array |
||
| 51 | */ |
||
| 52 | protected static $cp1252ToUtf8 = array( |
||
| 53 | '' => '€', |
||
| 54 | '' => '‚', |
||
| 55 | '' => 'ƒ', |
||
| 56 | '' => '„', |
||
| 57 | ' ' => '…', |
||
| 58 | '' => '†', |
||
| 59 | '' => '‡', |
||
| 60 | '' => 'ˆ', |
||
| 61 | '' => '‰', |
||
| 62 | '' => 'Š', |
||
| 63 | '' => '‹', |
||
| 64 | '' => 'Œ', |
||
| 65 | '' => 'Ž', |
||
| 66 | '' => '‘', |
||
| 67 | '' => '’', |
||
| 68 | '' => '“', |
||
| 69 | '' => '”', |
||
| 70 | '' => '•', |
||
| 71 | '' => '–', |
||
| 72 | '' => '—', |
||
| 73 | '' => '˜', |
||
| 74 | '' => '™', |
||
| 75 | '' => 'š', |
||
| 76 | '' => '›', |
||
| 77 | '' => 'œ', |
||
| 78 | '' => 'ž', |
||
| 79 | '' => 'Ÿ', |
||
| 80 | ); |
||
| 81 | |||
| 82 | /** |
||
| 83 | * Numeric code point => UTF-8 Character |
||
| 84 | * |
||
| 85 | * @var array |
||
| 86 | */ |
||
| 87 | protected static $whitespace = array( |
||
| 88 | 0 => "\x0", |
||
| 89 | //NUL Byte |
||
| 90 | 9 => "\x9", |
||
| 91 | //Tab |
||
| 92 | 10 => "\xa", |
||
| 93 | //New Line |
||
| 94 | 11 => "\xb", |
||
| 95 | //Vertical Tab |
||
| 96 | 13 => "\xd", |
||
| 97 | //Carriage Return |
||
| 98 | 32 => "\x20", |
||
| 99 | //Ordinary Space |
||
| 100 | 160 => "\xc2\xa0", |
||
| 101 | //NO-BREAK SPACE |
||
| 102 | 5760 => "\xe1\x9a\x80", |
||
| 103 | //OGHAM SPACE MARK |
||
| 104 | 6158 => "\xe1\xa0\x8e", |
||
| 105 | //MONGOLIAN VOWEL SEPARATOR |
||
| 106 | 8192 => "\xe2\x80\x80", |
||
| 107 | //EN QUAD |
||
| 108 | 8193 => "\xe2\x80\x81", |
||
| 109 | //EM QUAD |
||
| 110 | 8194 => "\xe2\x80\x82", |
||
| 111 | //EN SPACE |
||
| 112 | 8195 => "\xe2\x80\x83", |
||
| 113 | //EM SPACE |
||
| 114 | 8196 => "\xe2\x80\x84", |
||
| 115 | //THREE-PER-EM SPACE |
||
| 116 | 8197 => "\xe2\x80\x85", |
||
| 117 | //FOUR-PER-EM SPACE |
||
| 118 | 8198 => "\xe2\x80\x86", |
||
| 119 | //SIX-PER-EM SPACE |
||
| 120 | 8199 => "\xe2\x80\x87", |
||
| 121 | //FIGURE SPACE |
||
| 122 | 8200 => "\xe2\x80\x88", |
||
| 123 | //PUNCTUATION SPACE |
||
| 124 | 8201 => "\xe2\x80\x89", |
||
| 125 | //THIN SPACE |
||
| 126 | 8202 => "\xe2\x80\x8a", |
||
| 127 | //HAIR SPACE |
||
| 128 | 8232 => "\xe2\x80\xa8", |
||
| 129 | //LINE SEPARATOR |
||
| 130 | 8233 => "\xe2\x80\xa9", |
||
| 131 | //PARAGRAPH SEPARATOR |
||
| 132 | 8239 => "\xe2\x80\xaf", |
||
| 133 | //NARROW NO-BREAK SPACE |
||
| 134 | 8287 => "\xe2\x81\x9f", |
||
| 135 | //MEDIUM MATHEMATICAL SPACE |
||
| 136 | 12288 => "\xe3\x80\x80" |
||
| 137 | //IDEOGRAPHIC SPACE |
||
| 138 | ); |
||
| 139 | |||
| 140 | /** |
||
| 141 | * @var array |
||
| 142 | */ |
||
| 143 | protected static $whitespaceTable = array( |
||
| 144 | 'SPACE' => "\x20", |
||
| 145 | 'NO-BREAK SPACE' => "\xc2\xa0", |
||
| 146 | 'OGHAM SPACE MARK' => "\xe1\x9a\x80", |
||
| 147 | 'EN QUAD' => "\xe2\x80\x80", |
||
| 148 | 'EM QUAD' => "\xe2\x80\x81", |
||
| 149 | 'EN SPACE' => "\xe2\x80\x82", |
||
| 150 | 'EM SPACE' => "\xe2\x80\x83", |
||
| 151 | 'THREE-PER-EM SPACE' => "\xe2\x80\x84", |
||
| 152 | 'FOUR-PER-EM SPACE' => "\xe2\x80\x85", |
||
| 153 | 'SIX-PER-EM SPACE' => "\xe2\x80\x86", |
||
| 154 | 'FIGURE SPACE' => "\xe2\x80\x87", |
||
| 155 | 'PUNCTUATION SPACE' => "\xe2\x80\x88", |
||
| 156 | 'THIN SPACE' => "\xe2\x80\x89", |
||
| 157 | 'HAIR SPACE' => "\xe2\x80\x8a", |
||
| 158 | 'ZERO WIDTH SPACE' => "\xe2\x80\x8b", |
||
| 159 | 'NARROW NO-BREAK SPACE' => "\xe2\x80\xaf", |
||
| 160 | 'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f", |
||
| 161 | 'IDEOGRAPHIC SPACE' => "\xe3\x80\x80", |
||
| 162 | ); |
||
| 163 | |||
| 164 | /** |
||
| 165 | * @var array |
||
| 166 | */ |
||
| 167 | protected static $commonCaseFold = array( |
||
| 168 | 'ſ' => 's', |
||
| 169 | "\xCD\x85" => 'ι', |
||
| 170 | 'ς' => 'σ', |
||
| 171 | "\xCF\x90" => 'β', |
||
| 172 | "\xCF\x91" => 'θ', |
||
| 173 | "\xCF\x95" => 'φ', |
||
| 174 | "\xCF\x96" => 'π', |
||
| 175 | "\xCF\xB0" => 'κ', |
||
| 176 | "\xCF\xB1" => 'ρ', |
||
| 177 | "\xCF\xB5" => 'ε', |
||
| 178 | "\xE1\xBA\x9B" => "\xE1\xB9\xA1", |
||
| 179 | "\xE1\xBE\xBE" => 'ι', |
||
| 180 | ); |
||
| 181 | |||
| 182 | /** |
||
| 183 | * @var array |
||
| 184 | */ |
||
| 185 | protected static $brokenUtf8ToUtf8 = array( |
||
| 186 | "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN |
||
| 187 | "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK |
||
| 188 | "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK |
||
| 189 | "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK |
||
| 190 | "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS |
||
| 191 | "\xc2\x86" => "\xe2\x80\xa0", // DAGGER |
||
| 192 | "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER |
||
| 193 | "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT |
||
| 194 | "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN |
||
| 195 | "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON |
||
| 196 | "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE |
||
| 197 | "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE |
||
| 198 | "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON |
||
| 199 | "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK |
||
| 200 | "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK |
||
| 201 | "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK |
||
| 202 | "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK |
||
| 203 | "\xc2\x95" => "\xe2\x80\xa2", // BULLET |
||
| 204 | "\xc2\x96" => "\xe2\x80\x93", // EN DASH |
||
| 205 | "\xc2\x97" => "\xe2\x80\x94", // EM DASH |
||
| 206 | "\xc2\x98" => "\xcb\x9c", // SMALL TILDE |
||
| 207 | "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN |
||
| 208 | "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON |
||
| 209 | "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE |
||
| 210 | "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE |
||
| 211 | "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON |
||
| 212 | "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS |
||
| 213 | 'ü' => 'ü', |
||
| 214 | 'ä' => 'ä', |
||
| 215 | 'ö' => 'ö', |
||
| 216 | 'Ö' => 'Ö', |
||
| 217 | 'ß' => 'ß', |
||
| 218 | 'Ã ' => 'à', |
||
| 219 | 'á' => 'á', |
||
| 220 | 'â' => 'â', |
||
| 221 | 'ã' => 'ã', |
||
| 222 | 'ù' => 'ù', |
||
| 223 | 'ú' => 'ú', |
||
| 224 | 'û' => 'û', |
||
| 225 | 'Ù' => 'Ù', |
||
| 226 | 'Ú' => 'Ú', |
||
| 227 | 'Û' => 'Û', |
||
| 228 | 'Ü' => 'Ü', |
||
| 229 | 'ò' => 'ò', |
||
| 230 | 'ó' => 'ó', |
||
| 231 | 'ô' => 'ô', |
||
| 232 | 'è' => 'è', |
||
| 233 | 'é' => 'é', |
||
| 234 | 'ê' => 'ê', |
||
| 235 | 'ë' => 'ë', |
||
| 236 | 'À' => 'À', |
||
| 237 | 'Ã' => 'Á', |
||
| 238 | 'Â' => 'Â', |
||
| 239 | 'Ã' => 'Ã', |
||
| 240 | 'Ä' => 'Ä', |
||
| 241 | 'Ã…' => 'Å', |
||
| 242 | 'Ç' => 'Ç', |
||
| 243 | 'È' => 'È', |
||
| 244 | 'É' => 'É', |
||
| 245 | 'Ê' => 'Ê', |
||
| 246 | 'Ë' => 'Ë', |
||
| 247 | 'ÃŒ' => 'Ì', |
||
| 248 | 'Ã' => 'Í', |
||
| 249 | 'ÃŽ' => 'Î', |
||
| 250 | 'Ã' => 'Ï', |
||
| 251 | 'Ñ' => 'Ñ', |
||
| 252 | 'Ã’' => 'Ò', |
||
| 253 | 'Ó' => 'Ó', |
||
| 254 | 'Ô' => 'Ô', |
||
| 255 | 'Õ' => 'Õ', |
||
| 256 | 'Ø' => 'Ø', |
||
| 257 | 'Ã¥' => 'å', |
||
| 258 | 'æ' => 'æ', |
||
| 259 | 'ç' => 'ç', |
||
| 260 | 'ì' => 'ì', |
||
| 261 | 'Ã' => 'í', |
||
| 262 | 'î' => 'î', |
||
| 263 | 'ï' => 'ï', |
||
| 264 | 'ð' => 'ð', |
||
| 265 | 'ñ' => 'ñ', |
||
| 266 | 'õ' => 'õ', |
||
| 267 | 'ø' => 'ø', |
||
| 268 | 'ý' => 'ý', |
||
| 269 | 'ÿ' => 'ÿ', |
||
| 270 | '€' => '€', |
||
| 271 | ); |
||
| 272 | |||
| 273 | /** |
||
| 274 | * @var array |
||
| 275 | */ |
||
| 276 | protected static $utf8ToWin1252 = array( |
||
| 277 | "\xe2\x82\xac" => "\x80", // EURO SIGN |
||
| 278 | "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK |
||
| 279 | "\xc6\x92" => "\x83", // LATIN SMALL LETTER F WITH HOOK |
||
| 280 | "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK |
||
| 281 | "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS |
||
| 282 | "\xe2\x80\xa0" => "\x86", // DAGGER |
||
| 283 | "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER |
||
| 284 | "\xcb\x86" => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT |
||
| 285 | "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN |
||
| 286 | "\xc5\xa0" => "\x8a", // LATIN CAPITAL LETTER S WITH CARON |
||
| 287 | "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE |
||
| 288 | "\xc5\x92" => "\x8c", // LATIN CAPITAL LIGATURE OE |
||
| 289 | "\xc5\xbd" => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON |
||
| 290 | "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK |
||
| 291 | "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK |
||
| 292 | "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK |
||
| 293 | "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK |
||
| 294 | "\xe2\x80\xa2" => "\x95", // BULLET |
||
| 295 | "\xe2\x80\x93" => "\x96", // EN DASH |
||
| 296 | "\xe2\x80\x94" => "\x97", // EM DASH |
||
| 297 | "\xcb\x9c" => "\x98", // SMALL TILDE |
||
| 298 | "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN |
||
| 299 | "\xc5\xa1" => "\x9a", // LATIN SMALL LETTER S WITH CARON |
||
| 300 | "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE |
||
| 301 | "\xc5\x93" => "\x9c", // LATIN SMALL LIGATURE OE |
||
| 302 | "\xc5\xbe" => "\x9e", // LATIN SMALL LETTER Z WITH CARON |
||
| 303 | "\xc5\xb8" => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS |
||
| 304 | ); |
||
| 305 | |||
| 306 | /** |
||
| 307 | * @var array |
||
| 308 | */ |
||
| 309 | protected static $utf8MSWord = array( |
||
| 310 | "\xc2\xab" => '"', // « (U+00AB) in UTF-8 |
||
| 311 | "\xc2\xbb" => '"', // » (U+00BB) in UTF-8 |
||
| 312 | "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8 |
||
| 313 | "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8 |
||
| 314 | "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8 |
||
| 315 | "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8 |
||
| 316 | "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8 |
||
| 317 | "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8 |
||
| 318 | "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8 |
||
| 319 | "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8 |
||
| 320 | "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8 |
||
| 321 | "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8 |
||
| 322 | "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8 |
||
| 323 | "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8 |
||
| 324 | "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8 |
||
| 325 | ); |
||
| 326 | |||
| 327 | /** |
||
| 328 | * @var array |
||
| 329 | */ |
||
| 330 | private static $support = array(); |
||
| 331 | |||
| 332 | /** |
||
| 333 | * __construct() |
||
| 334 | */ |
||
| 335 | 1 | public function __construct() |
|
| 339 | |||
| 340 | /** |
||
| 341 | * check for UTF8-Support |
||
| 342 | */ |
||
| 343 | 151 | public static function checkForSupport() |
|
| 344 | { |
||
| 345 | 151 | if (!isset(self::$support['mbstring'])) { |
|
| 346 | |||
| 347 | 1 | self::$support['mbstring'] = self::mbstring_loaded(); |
|
| 348 | 1 | self::$support['iconv'] = self::iconv_loaded(); |
|
| 349 | 1 | self::$support['intl'] = self::intl_loaded(); |
|
| 350 | 1 | self::$support['pcre_utf8'] = self::pcre_utf8_support(); |
|
| 351 | |||
| 352 | 1 | Bootup::initAll(); // Enables the portablity layer and configures PHP for UTF-8 |
|
| 353 | 1 | Bootup::filterRequestUri(); // Redirects to an UTF-8 encoded URL if it's not already the case |
|
| 354 | 1 | Bootup::filterRequestInputs(); // Normalizes HTTP inputs to UTF-8 NFC |
|
| 355 | 1 | } |
|
| 356 | 151 | } |
|
| 357 | |||
| 358 | /** |
||
| 359 | * checks whether mbstring is available on the server |
||
| 360 | * |
||
| 361 | * @return bool True if available, False otherwise |
||
| 362 | */ |
||
| 363 | 2 | public static function mbstring_loaded() |
|
| 364 | { |
||
| 365 | 2 | $return = extension_loaded('mbstring'); |
|
| 366 | |||
| 367 | 2 | if ($return === true) { |
|
| 368 | 2 | mb_internal_encoding('UTF-8'); |
|
| 369 | 2 | } |
|
| 370 | |||
| 371 | 2 | return $return; |
|
| 372 | } |
||
| 373 | |||
| 374 | /** |
||
| 375 | * checks whether iconv is available on the server |
||
| 376 | * |
||
| 377 | * @return bool True if available, False otherwise |
||
| 378 | */ |
||
| 379 | 1 | public static function iconv_loaded() |
|
| 383 | |||
| 384 | /** |
||
| 385 | * checks whether intl is available on the server |
||
| 386 | * |
||
| 387 | * @return bool True if available, False otherwise |
||
| 388 | */ |
||
| 389 | 1 | public static function intl_loaded() |
|
| 393 | |||
| 394 | /** |
||
| 395 | * checks if \u modifier is available that enables Unicode support in PCRE. |
||
| 396 | * |
||
| 397 | * @return bool True if support is available, false otherwise |
||
| 398 | */ |
||
| 399 | 30 | public static function pcre_utf8_support() |
|
| 400 | { |
||
| 401 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
||
| 402 | 30 | return (bool)@preg_match('//u', ''); |
|
| 403 | } |
||
| 404 | |||
| 405 | /** |
||
| 406 | * alias for "UTF8::to_ascii()" |
||
| 407 | * |
||
| 408 | * @param string $s The input string e.g. a UTF-8 String |
||
| 409 | * @param string $subst_chr |
||
| 410 | * |
||
| 411 | * @return string |
||
| 412 | */ |
||
| 413 | 6 | public static function toAscii($s, $subst_chr = '?') |
|
| 417 | |||
| 418 | /** |
||
| 419 | * convert to ASCII |
||
| 420 | * |
||
| 421 | * @param string $s The input string e.g. a UTF-8 String |
||
| 422 | * @param string $subst_chr |
||
| 423 | * |
||
| 424 | * @return string |
||
| 425 | */ |
||
| 426 | 7 | public static function to_ascii($s, $subst_chr = '?') |
|
| 497 | |||
| 498 | /** |
||
| 499 | * accepts a string and removes all non-UTF-8 characters from it. |
||
| 500 | * |
||
| 501 | * @param string $str The string to be sanitized. |
||
| 502 | * @param bool $remove_bom |
||
| 503 | * @param bool $normalize_whitespace |
||
| 504 | * @param bool $normalize_msword e.g.: "…" => "..." |
||
| 505 | * @param bool $keep_non_breaking_space set true, to keep non-breaking-spaces |
||
| 506 | * |
||
| 507 | * @return string Clean UTF-8 encoded string |
||
| 508 | */ |
||
| 509 | 24 | public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false) |
|
| 510 | { |
||
| 511 | // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string |
||
| 512 | // caused connection reset problem on larger strings |
||
| 513 | |||
| 514 | $regx = '/ |
||
| 515 | ( |
||
| 516 | (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx |
||
| 517 | | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx |
||
| 518 | | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 |
||
| 519 | | [\xE1-\xEC][\x80-\xBF]{2} |
||
| 520 | | \xED[\x80-\x9F][\x80-\xBF] |
||
| 521 | | [\xEE-\xEF][\x80-\xBF]{2} |
||
| 522 | ){1,50} # ...one or more times |
||
| 523 | ) |
||
| 524 | | . # anything else |
||
| 525 | 24 | /x'; |
|
| 526 | 24 | $str = preg_replace($regx, '$1', $str); |
|
| 527 | |||
| 528 | 24 | $str = self::replace_diamond_question_mark($str, ''); |
|
| 529 | 24 | $str = self::remove_invisible_characters($str); |
|
| 530 | |||
| 531 | 24 | if ($normalize_whitespace === true) { |
|
| 532 | 7 | $str = self::normalize_whitespace($str, $keep_non_breaking_space); |
|
| 533 | 7 | } |
|
| 534 | |||
| 535 | 24 | if ($normalize_msword === true) { |
|
| 536 | 1 | $str = self::normalize_msword($str); |
|
| 537 | 1 | } |
|
| 538 | |||
| 539 | 24 | if ($remove_bom === true) { |
|
| 540 | 6 | $str = self::removeBOM($str); |
|
| 541 | 6 | } |
|
| 542 | |||
| 543 | 24 | return $str; |
|
| 544 | } |
||
| 545 | |||
| 546 | /** |
||
| 547 | * replace diamond question mark (�) |
||
| 548 | * |
||
| 549 | * @param string $str |
||
| 550 | * @param string $unknown |
||
| 551 | * |
||
| 552 | * @return string |
||
| 553 | */ |
||
| 554 | 25 | public static function replace_diamond_question_mark($str, $unknown = '?') |
|
| 568 | |||
| 569 | /** |
||
| 570 | * Remove Invisible Characters |
||
| 571 | * |
||
| 572 | * This prevents sandwiching null characters |
||
| 573 | * between ascii characters, like Java\0script. |
||
| 574 | * |
||
| 575 | * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php |
||
| 576 | * |
||
| 577 | * @param string $str |
||
| 578 | * @param bool $url_encoded |
||
| 579 | * |
||
| 580 | * @return string |
||
| 581 | */ |
||
| 582 | 25 | public static function remove_invisible_characters($str, $url_encoded = true) |
|
| 583 | { |
||
| 584 | // init |
||
| 585 | 25 | $non_displayables = array(); |
|
| 586 | |||
| 587 | // every control character except newline (dec 10), |
||
| 588 | // carriage return (dec 13) and horizontal tab (dec 09) |
||
|
|
|||
| 589 | 25 | if ($url_encoded) { |
|
| 590 | 25 | $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15 |
|
| 591 | 25 | $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31 |
|
| 592 | 25 | } |
|
| 593 | |||
| 594 | 25 | $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127 |
|
| 595 | |||
| 596 | do { |
||
| 597 | 25 | $str = preg_replace($non_displayables, '', $str, -1, $count); |
|
| 598 | 25 | } while ($count !== 0); |
|
| 599 | |||
| 600 | 25 | return $str; |
|
| 601 | } |
||
| 602 | |||
| 603 | /** |
||
| 604 | * Normalize the whitespace. |
||
| 605 | * |
||
| 606 | * @param string $str The string to be normalized. |
||
| 607 | * @param bool $keepNonBreakingSpace Set to true, to keep non-breaking-spaces. |
||
| 608 | * |
||
| 609 | * @return string |
||
| 610 | */ |
||
| 611 | 8 | public static function normalize_whitespace($str, $keepNonBreakingSpace = false) |
|
| 629 | |||
| 630 | /** |
||
| 631 | * Returns an array with all utf8 whitespace characters. |
||
| 632 | * |
||
| 633 | * @see : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html |
||
| 634 | * |
||
| 635 | * @author: Derek E. [email protected] |
||
| 636 | * |
||
| 637 | * @return array an array with all known whitespace characters as values and the type of whitespace as keys |
||
| 638 | * as defined in above URL |
||
| 639 | 1 | */ |
|
| 640 | public static function whitespace_table() |
||
| 644 | |||
| 645 | /** |
||
| 646 | * Normalize MS Word special characters. |
||
| 647 | * |
||
| 648 | * @param string $str The string to be normalized. |
||
| 649 | * |
||
| 650 | * @return string |
||
| 651 | 2 | */ |
|
| 652 | public static function normalize_msword($str) |
||
| 664 | |||
| 665 | /** |
||
| 666 | * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings. |
||
| 667 | * |
||
| 668 | * @param string $str |
||
| 669 | * |
||
| 670 | * @return string |
||
| 671 | 7 | */ |
|
| 672 | public static function removeBOM($str = '') |
||
| 673 | { |
||
| 674 | // UTF-32 (BE) |
||
| 675 | 7 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
|
| 676 | 1 | /** @noinspection SubStrUsedAsStrPosInspection */ |
|
| 677 | 1 | View Code Duplication | if (substr($str, 0, 4) == @pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) { |
| 678 | $str = substr($str, 4); |
||
| 679 | } |
||
| 680 | 7 | ||
| 681 | 1 | // UTF-32 (LE) |
|
| 682 | 1 | ||
| 683 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
||
| 684 | /** @noinspection SubStrUsedAsStrPosInspection */ |
||
| 685 | 7 | View Code Duplication | if (substr($str, 0, 4) == @pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) { |
| 686 | 2 | $str = substr($str, 4); |
|
| 687 | 2 | } |
|
| 688 | |||
| 689 | // UTF-8 |
||
| 690 | 7 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
|
| 691 | 1 | /** @noinspection SubStrUsedAsStrPosInspection */ |
|
| 692 | 1 | View Code Duplication | if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) { |
| 693 | $str = substr($str, 3); |
||
| 694 | } |
||
| 695 | 7 | ||
| 696 | 1 | // UTF-16 (BE) |
|
| 697 | 1 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
|
| 698 | /** @noinspection SubStrUsedAsStrPosInspection */ |
||
| 699 | 7 | View Code Duplication | if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) { |
| 700 | $str = substr($str, 2); |
||
| 701 | } |
||
| 702 | |||
| 703 | // UTF-16 (LE) |
||
| 704 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
||
| 705 | /** @noinspection SubStrUsedAsStrPosInspection */ |
||
| 706 | View Code Duplication | if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) { |
|
| 707 | $str = substr($str, 2); |
||
| 708 | } |
||
| 709 | 2 | ||
| 710 | return $str; |
||
| 711 | 2 | } |
|
| 712 | 2 | ||
| 713 | 2 | /** |
|
| 714 | * get data from "/data/*.ser" |
||
| 715 | * |
||
| 716 | * @param string $file |
||
| 717 | * |
||
| 718 | * @return bool|string|array|int false on error |
||
| 719 | */ |
||
| 720 | protected static function getData($file) |
||
| 729 | |||
| 730 | /** |
||
| 731 | * US-ASCII transliterations of Unicode text. |
||
| 732 | * |
||
| 733 | * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!) |
||
| 734 | * Warning: you should only pass this well formed UTF-8! |
||
| 735 | * Be aware it works by making a copy of the input string which it appends transliterated |
||
| 736 | 8 | * characters to - it uses a PHP output buffer to do this - it means, memory use will increase, |
|
| 737 | * requiring up to the same amount again as the input string |
||
| 738 | 8 | * |
|
| 739 | * @see http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm |
||
| 740 | 8 | * |
|
| 741 | * @author <[email protected]> |
||
| 742 | 8 | * |
|
| 743 | 2 | * @param string $str UTF-8 string to convert |
|
| 744 | * @param string $unknown Character use if character unknown. (default is ?) |
||
| 745 | * |
||
| 746 | 7 | * @return string US-ASCII string |
|
| 747 | */ |
||
| 748 | 7 | public static function str_transliterate($str, $unknown = '?') |
|
| 840 | |||
| 841 | /** |
||
| 842 | * Echo native UTF8-Support libs, e.g. for debugging. |
||
| 843 | */ |
||
| 844 | public static function showSupport() |
||
| 850 | |||
| 851 | /** |
||
| 852 | * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities() |
||
| 853 | * |
||
| 854 | * @link http://php.net/manual/en/function.htmlentities.php |
||
| 855 | * |
||
| 856 | * @param string $str <p> |
||
| 857 | * The input string. |
||
| 858 | * </p> |
||
| 859 | * @param int $flags [optional] <p> |
||
| 860 | * A bitmask of one or more of the following flags, which specify how to handle quotes, |
||
| 861 | * invalid code unit sequences and the used document type. The default is |
||
| 862 | * ENT_COMPAT | ENT_HTML401. |
||
| 863 | * <table> |
||
| 864 | * Available <i>flags</i> constants |
||
| 865 | * <tr valign="top"> |
||
| 866 | * <td>Constant Name</td> |
||
| 867 | * <td>Description</td> |
||
| 868 | * </tr> |
||
| 869 | * <tr valign="top"> |
||
| 870 | * <td><b>ENT_COMPAT</b></td> |
||
| 871 | * <td>Will convert double-quotes and leave single-quotes alone.</td> |
||
| 872 | * </tr> |
||
| 873 | * <tr valign="top"> |
||
| 874 | * <td><b>ENT_QUOTES</b></td> |
||
| 875 | * <td>Will convert both double and single quotes.</td> |
||
| 876 | * </tr> |
||
| 877 | * <tr valign="top"> |
||
| 878 | * <td><b>ENT_NOQUOTES</b></td> |
||
| 879 | * <td>Will leave both double and single quotes unconverted.</td> |
||
| 880 | * </tr> |
||
| 881 | * <tr valign="top"> |
||
| 882 | * <td><b>ENT_IGNORE</b></td> |
||
| 883 | * <td> |
||
| 884 | * Silently discard invalid code unit sequences instead of returning |
||
| 885 | * an empty string. Using this flag is discouraged as it |
||
| 886 | * may have security implications. |
||
| 887 | * </td> |
||
| 888 | * </tr> |
||
| 889 | * <tr valign="top"> |
||
| 890 | * <td><b>ENT_SUBSTITUTE</b></td> |
||
| 891 | * <td> |
||
| 892 | * Replace invalid code unit sequences with a Unicode Replacement Character |
||
| 893 | * U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty string. |
||
| 894 | * </td> |
||
| 895 | * </tr> |
||
| 896 | * <tr valign="top"> |
||
| 897 | * <td><b>ENT_DISALLOWED</b></td> |
||
| 898 | * <td> |
||
| 899 | * Replace invalid code points for the given document type with a |
||
| 900 | * Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
||
| 901 | * (otherwise) instead of leaving them as is. This may be useful, for |
||
| 902 | * instance, to ensure the well-formedness of XML documents with |
||
| 903 | * embedded external content. |
||
| 904 | * </td> |
||
| 905 | * </tr> |
||
| 906 | * <tr valign="top"> |
||
| 907 | * <td><b>ENT_HTML401</b></td> |
||
| 908 | * <td> |
||
| 909 | * Handle code as HTML 4.01. |
||
| 910 | * </td> |
||
| 911 | * </tr> |
||
| 912 | * <tr valign="top"> |
||
| 913 | * <td><b>ENT_XML1</b></td> |
||
| 914 | * <td> |
||
| 915 | * Handle code as XML 1. |
||
| 916 | * </td> |
||
| 917 | * </tr> |
||
| 918 | * <tr valign="top"> |
||
| 919 | * <td><b>ENT_XHTML</b></td> |
||
| 920 | * <td> |
||
| 921 | * Handle code as XHTML. |
||
| 922 | * </td> |
||
| 923 | * </tr> |
||
| 924 | * <tr valign="top"> |
||
| 925 | * <td><b>ENT_HTML5</b></td> |
||
| 926 | * <td> |
||
| 927 | * Handle code as HTML 5. |
||
| 928 | * </td> |
||
| 929 | * </tr> |
||
| 930 | * </table> |
||
| 931 | * </p> |
||
| 932 | * @param string $encoding [optional] <p> |
||
| 933 | * Like <b>htmlspecialchars</b>, |
||
| 934 | * <b>htmlentities</b> takes an optional third argument |
||
| 935 | * <i>encoding</i> which defines encoding used in |
||
| 936 | * conversion. |
||
| 937 | * Although this argument is technically optional, you are highly |
||
| 938 | * encouraged to specify the correct value for your code. |
||
| 939 | * </p> |
||
| 940 | * @param bool $double_encode [optional] <p> |
||
| 941 | * When <i>double_encode</i> is turned off PHP will not |
||
| 942 | * encode existing html entities. The default is to convert everything. |
||
| 943 | * </p> |
||
| 944 | 2 | * |
|
| 945 | * |
||
| 946 | 2 | * @return string the encoded string. |
|
| 947 | * </p> |
||
| 948 | * <p> |
||
| 949 | * If the input <i>string</i> contains an invalid code unit |
||
| 950 | * sequence within the given <i>encoding</i> an empty string |
||
| 951 | * will be returned, unless either the <b>ENT_IGNORE</b> or |
||
| 952 | * <b>ENT_SUBSTITUTE</b> flags are set. |
||
| 953 | */ |
||
| 954 | public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true) |
||
| 958 | |||
| 959 | /** |
||
| 960 | * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars() |
||
| 961 | * |
||
| 962 | * @link http://php.net/manual/en/function.htmlspecialchars.php |
||
| 963 | * |
||
| 964 | * @param string $str <p> |
||
| 965 | * The string being converted. |
||
| 966 | * </p> |
||
| 967 | * @param int $flags [optional] <p> |
||
| 968 | * A bitmask of one or more of the following flags, which specify how to handle quotes, |
||
| 969 | * invalid code unit sequences and the used document type. The default is |
||
| 970 | * ENT_COMPAT | ENT_HTML401. |
||
| 971 | * <table> |
||
| 972 | * Available <i>flags</i> constants |
||
| 973 | * <tr valign="top"> |
||
| 974 | * <td>Constant Name</td> |
||
| 975 | * <td>Description</td> |
||
| 976 | * </tr> |
||
| 977 | * <tr valign="top"> |
||
| 978 | * <td><b>ENT_COMPAT</b></td> |
||
| 979 | * <td>Will convert double-quotes and leave single-quotes alone.</td> |
||
| 980 | * </tr> |
||
| 981 | * <tr valign="top"> |
||
| 982 | * <td><b>ENT_QUOTES</b></td> |
||
| 983 | * <td>Will convert both double and single quotes.</td> |
||
| 984 | * </tr> |
||
| 985 | * <tr valign="top"> |
||
| 986 | * <td><b>ENT_NOQUOTES</b></td> |
||
| 987 | * <td>Will leave both double and single quotes unconverted.</td> |
||
| 988 | * </tr> |
||
| 989 | * <tr valign="top"> |
||
| 990 | * <td><b>ENT_IGNORE</b></td> |
||
| 991 | * <td> |
||
| 992 | * Silently discard invalid code unit sequences instead of returning |
||
| 993 | * an empty string. Using this flag is discouraged as it |
||
| 994 | * may have security implications. |
||
| 995 | * </td> |
||
| 996 | * </tr> |
||
| 997 | * <tr valign="top"> |
||
| 998 | * <td><b>ENT_SUBSTITUTE</b></td> |
||
| 999 | * <td> |
||
| 1000 | * Replace invalid code unit sequences with a Unicode Replacement Character |
||
| 1001 | * U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty string. |
||
| 1002 | * </td> |
||
| 1003 | * </tr> |
||
| 1004 | * <tr valign="top"> |
||
| 1005 | * <td><b>ENT_DISALLOWED</b></td> |
||
| 1006 | * <td> |
||
| 1007 | * Replace invalid code points for the given document type with a |
||
| 1008 | * Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
||
| 1009 | * (otherwise) instead of leaving them as is. This may be useful, for |
||
| 1010 | * instance, to ensure the well-formedness of XML documents with |
||
| 1011 | * embedded external content. |
||
| 1012 | * </td> |
||
| 1013 | * </tr> |
||
| 1014 | * <tr valign="top"> |
||
| 1015 | * <td><b>ENT_HTML401</b></td> |
||
| 1016 | * <td> |
||
| 1017 | * Handle code as HTML 4.01. |
||
| 1018 | * </td> |
||
| 1019 | * </tr> |
||
| 1020 | * <tr valign="top"> |
||
| 1021 | * <td><b>ENT_XML1</b></td> |
||
| 1022 | * <td> |
||
| 1023 | * Handle code as XML 1. |
||
| 1024 | * </td> |
||
| 1025 | * </tr> |
||
| 1026 | * <tr valign="top"> |
||
| 1027 | * <td><b>ENT_XHTML</b></td> |
||
| 1028 | * <td> |
||
| 1029 | * Handle code as XHTML. |
||
| 1030 | * </td> |
||
| 1031 | * </tr> |
||
| 1032 | * <tr valign="top"> |
||
| 1033 | * <td><b>ENT_HTML5</b></td> |
||
| 1034 | * <td> |
||
| 1035 | * Handle code as HTML 5. |
||
| 1036 | * </td> |
||
| 1037 | * </tr> |
||
| 1038 | * </table> |
||
| 1039 | * </p> |
||
| 1040 | * @param string $encoding [optional] <p> |
||
| 1041 | * Defines encoding used in conversion. |
||
| 1042 | * </p> |
||
| 1043 | * <p> |
||
| 1044 | * For the purposes of this function, the encodings |
||
| 1045 | * ISO-8859-1, ISO-8859-15, |
||
| 1046 | * UTF-8, cp866, |
||
| 1047 | * cp1251, cp1252, and |
||
| 1048 | * KOI8-R are effectively equivalent, provided the |
||
| 1049 | * <i>string</i> itself is valid for the encoding, as |
||
| 1050 | * the characters affected by <b>htmlspecialchars</b> occupy |
||
| 1051 | * the same positions in all of these encodings. |
||
| 1052 | * </p> |
||
| 1053 | * @param bool $double_encode [optional] <p> |
||
| 1054 | * When <i>double_encode</i> is turned off PHP will not |
||
| 1055 | * encode existing html entities, the default is to convert everything. |
||
| 1056 | * </p> |
||
| 1057 | * |
||
| 1058 | 1 | * @return string The converted string. |
|
| 1059 | * </p> |
||
| 1060 | 1 | * <p> |
|
| 1061 | * If the input <i>string</i> contains an invalid code unit |
||
| 1062 | * sequence within the given <i>encoding</i> an empty string |
||
| 1063 | * will be returned, unless either the <b>ENT_IGNORE</b> or |
||
| 1064 | * <b>ENT_SUBSTITUTE</b> flags are set. |
||
| 1065 | */ |
||
| 1066 | public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true) |
||
| 1070 | 16 | ||
| 1071 | /** |
||
| 1072 | 16 | * alias for "UTF8::is_base64" |
|
| 1073 | * |
||
| 1074 | * @param string $str |
||
| 1075 | * |
||
| 1076 | * @return bool |
||
| 1077 | */ |
||
| 1078 | public static function isBase64($str) |
||
| 1082 | |||
| 1083 | /** |
||
| 1084 | * Returns true if the string is base64 encoded, false otherwise. |
||
| 1085 | * |
||
| 1086 | * @param string $str |
||
| 1087 | 31 | * |
|
| 1088 | * @return bool Whether or not $str is base64 encoded |
||
| 1089 | 31 | */ |
|
| 1090 | public static function is_base64($str) |
||
| 1104 | 29 | ||
| 1105 | 29 | /** |
|
| 1106 | 29 | * alias for "UTF8::is_utf8" |
|
| 1107 | 29 | * |
|
| 1108 | 29 | * @param string $str |
|
| 1109 | 29 | * |
|
| 1110 | * @return bool |
||
| 1111 | */ |
||
| 1112 | 29 | public static function isUtf8($str) |
|
| 1116 | |||
| 1117 | 25 | /** |
|
| 1118 | 25 | * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters. |
|
| 1119 | 25 | * |
|
| 1120 | 25 | * @see http://hsivonen.iki.fi/php-utf8/ |
|
| 1121 | 27 | * |
|
| 1122 | * @param string $str The string to be checked. |
||
| 1123 | 11 | * |
|
| 1124 | 11 | * @return bool |
|
| 1125 | 11 | */ |
|
| 1126 | 11 | public static function is_utf8($str) |
|
| 1250 | |||
| 1251 | 37 | /** |
|
| 1252 | * Finds the length of the initial segment of a string consisting entirely of characters contained within a given |
||
| 1253 | 37 | * mask. |
|
| 1254 | 9 | * |
|
| 1255 | * @param string $s |
||
| 1256 | * @param string $mask |
||
| 1257 | * @param int $start |
||
| 1258 | 35 | * @param int $len |
|
| 1259 | * |
||
| 1260 | 35 | * @return int|null |
|
| 1261 | */ |
||
| 1262 | public static function strspn($s, $mask, $start = 0, $len = 2147483647) |
||
| 1270 | 31 | ||
| 1271 | /** |
||
| 1272 | * Get part of a string. |
||
| 1273 | 35 | * |
|
| 1274 | * @link http://php.net/manual/en/function.mb-substr.php |
||
| 1275 | * |
||
| 1276 | 35 | * @param string $str <p> |
|
| 1277 | 1 | * The string being checked. |
|
| 1278 | 1 | * </p> |
|
| 1279 | * @param int $start <p> |
||
| 1280 | 35 | * The first position used in str. |
|
| 1281 | * </p> |
||
| 1282 | * @param int $length [optional] <p> |
||
| 1283 | * The maximum length of the returned string. |
||
| 1284 | * </p> |
||
| 1285 | * @param string $encoding |
||
| 1286 | * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string |
||
| 1287 | * |
||
| 1288 | * @return string mb_substr returns the portion of |
||
| 1289 | * str specified by the start and length parameters. |
||
| 1290 | */ |
||
| 1291 | public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false) |
||
| 1348 | |||
| 1349 | /** |
||
| 1350 | * Get the string length, not the byte-length! |
||
| 1351 | * |
||
| 1352 | * @link http://php.net/manual/en/function.mb-strlen.php |
||
| 1353 | 24 | * |
|
| 1354 | * @param string $str The string being checked for length. |
||
| 1355 | 24 | * @param string $encoding Set the charset for e.g. "mb_" function |
|
| 1356 | * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string |
||
| 1357 | 24 | * |
|
| 1358 | 5 | * @return int the number of characters in |
|
| 1359 | * string str having character encoding |
||
| 1360 | * encoding. A multi-byte character is |
||
| 1361 | * counted as 1. |
||
| 1362 | 23 | */ |
|
| 1363 | 23 | public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false) |
|
| 1385 | |||
| 1386 | /** |
||
| 1387 | * Convert a string to an array of Unicode characters. |
||
| 1388 | * |
||
| 1389 | * @param string $str The string to split into array. |
||
| 1390 | * @param int $length Max character length of each array element. |
||
| 1391 | * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string. |
||
| 1392 | * |
||
| 1393 | * @return array An array containing chunks of the string. |
||
| 1394 | */ |
||
| 1395 | public static function split($str, $length = 1, $cleanUtf8 = false) |
||
| 1464 | 16 | ||
| 1465 | /** |
||
| 1466 | * rxClass |
||
| 1467 | * |
||
| 1468 | * @param string $s |
||
| 1469 | * @param string $class |
||
| 1470 | * |
||
| 1471 | * @return string |
||
| 1472 | */ |
||
| 1473 | protected static function rxClass($s, $class = '') |
||
| 1509 | 1 | ||
| 1510 | 1 | /** |
|
| 1511 | * Convert a string to an array. |
||
| 1512 | 1 | * |
|
| 1513 | * @param string $str |
||
| 1514 | 1 | * @param int $len |
|
| 1515 | * |
||
| 1516 | * @return array |
||
| 1517 | */ |
||
| 1518 | public static function str_split($str, $len = 1) |
||
| 1559 | |||
| 1560 | /** |
||
| 1561 | * Return the width of a string. |
||
| 1562 | * |
||
| 1563 | * @param string $s |
||
| 1564 | * |
||
| 1565 | * @return int |
||
| 1566 | */ |
||
| 1567 | public static function strwidth($s) |
||
| 1574 | 1 | ||
| 1575 | /** |
||
| 1576 | * Limit the number of words in a string. |
||
| 1577 | * |
||
| 1578 | * @param string $str |
||
| 1579 | * @param int $words |
||
| 1580 | * @param string $strAddOn |
||
| 1581 | * |
||
| 1582 | * @return string |
||
| 1583 | */ |
||
| 1584 | public static function words_limit($str, $words = 100, $strAddOn = '...') |
||
| 1604 | |||
| 1605 | /** |
||
| 1606 | * Limit the number of characters in a string. |
||
| 1607 | 1 | * |
|
| 1608 | * @param string $str |
||
| 1609 | 1 | * @param int $length |
|
| 1610 | * @param string $strAddOn |
||
| 1611 | * |
||
| 1612 | * @return string |
||
| 1613 | */ |
||
| 1614 | public static function str_limit($str, $length = 100, $strAddOn = '...') |
||
| 1643 | |||
| 1644 | /** |
||
| 1645 | * Find length of initial segment not matching mask. |
||
| 1646 | * |
||
| 1647 | * @param string $str |
||
| 1648 | 10 | * @param string $charlist |
|
| 1649 | * @param int $start |
||
| 1650 | 10 | * @param int $len |
|
| 1651 | 10 | * |
|
| 1652 | 10 | * @return int|null |
|
| 1653 | */ |
||
| 1654 | 10 | public static function strcspn($str, $charlist, $start = 0, $len = 2147483647) |
|
| 1673 | |||
| 1674 | /** |
||
| 1675 | * Checks if the number of Unicode characters in a string are not |
||
| 1676 | * more than the specified integer. |
||
| 1677 | * |
||
| 1678 | * @param string $str The original string to be checked. |
||
| 1679 | * @param int $box_size The size in number of chars to be checked against string. |
||
| 1680 | * |
||
| 1681 | * @return bool true if string is less than or equal to $box_size, false otherwise. |
||
| 1682 | */ |
||
| 1683 | public static function fits_inside($str, $box_size) |
||
| 1687 | 19 | ||
| 1688 | /** |
||
| 1689 | 19 | * Returns all of haystack starting from and including the first occurrence of needle to the end. |
|
| 1690 | * |
||
| 1691 | 19 | * @param string $str |
|
| 1692 | 5 | * @param string $needle |
|
| 1693 | * @param bool $before_needle |
||
| 1694 | * |
||
| 1695 | * @return false|string |
||
| 1696 | 17 | */ |
|
| 1697 | public static function stristr($str, $needle, $before_needle = false) |
||
| 1708 | |||
| 1709 | /** |
||
| 1710 | 1 | * Case insensitive string comparisons using a "natural order" algorithm. |
|
| 1711 | * |
||
| 1712 | 1 | * @param string $str1 |
|
| 1713 | * @param string $str2 |
||
| 1714 | 1 | * |
|
| 1715 | 1 | * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if |
|
| 1716 | * str1 is greater than str2, and 0 if they are equal. |
||
| 1717 | */ |
||
| 1718 | 1 | public static function strnatcasecmp($str1, $str2) |
|
| 1722 | 1 | ||
| 1723 | 1 | /** |
|
| 1724 | 1 | * String comparisons using a "natural order" algorithm. |
|
| 1725 | 1 | * |
|
| 1726 | * @param string $str1 |
||
| 1727 | 1 | * @param string $str2 |
|
| 1728 | 1 | * |
|
| 1729 | 1 | * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if |
|
| 1730 | * str1 is greater than str2, and 0 if they are equal. |
||
| 1731 | 1 | */ |
|
| 1732 | public static function strnatcmp($str1, $str2) |
||
| 1736 | |||
| 1737 | /** |
||
| 1738 | * Generic case sensitive transformation for collation matching. |
||
| 1739 | * |
||
| 1740 | * @param string $s |
||
| 1741 | 8 | * |
|
| 1742 | * @return string |
||
| 1743 | 8 | */ |
|
| 1744 | 8 | protected static function strtonatfold($s) |
|
| 1748 | 8 | ||
| 1749 | 2 | /** |
|
| 1750 | * Unicode transformation for case-less matching. |
||
| 1751 | * |
||
| 1752 | 8 | * @link http://unicode.org/reports/tr21/tr21-5.html |
|
| 1753 | 1 | * |
|
| 1754 | 1 | * @param string $str |
|
| 1755 | 1 | * @param bool $full |
|
| 1756 | * |
||
| 1757 | 8 | * @return string |
|
| 1758 | */ |
||
| 1759 | public static function strtocasefold($str, $full = true) |
||
| 1784 | |||
| 1785 | /** |
||
| 1786 | * (PHP 4 >= 4.3.0, PHP 5)<br/> |
||
| 1787 | * Make a string lowercase. |
||
| 1788 | * |
||
| 1789 | * @link http://php.net/manual/en/function.mb-strtolower.php |
||
| 1790 | * |
||
| 1791 | * @param string $str <p> |
||
| 1792 | * The string being lowercased. |
||
| 1793 | * </p> |
||
| 1794 | * @param string $encoding |
||
| 1795 | * |
||
| 1796 | * @return string str with all alphabetic characters converted to lowercase. |
||
| 1797 | */ |
||
| 1798 | public static function strtolower($str, $encoding = 'UTF-8') |
||
| 1811 | |||
| 1812 | /** |
||
| 1813 | * Returns a case swapped version of the string. |
||
| 1814 | * |
||
| 1815 | * @param string $str |
||
| 1816 | * @param string $encoding |
||
| 1817 | * |
||
| 1818 | * @return string each character's case swapped |
||
| 1819 | */ |
||
| 1820 | public static function swapCase($str, $encoding = 'UTF-8') |
||
| 1846 | |||
| 1847 | /** |
||
| 1848 | 2 | * Multi decode html entity & fix urlencoded-win1252-chars. |
|
| 1849 | * |
||
| 1850 | * e.g: |
||
| 1851 | 14 | * 'Düsseldorf' => 'Düsseldorf' |
|
| 1852 | * 'D%FCsseldorf' => 'Düsseldorf' |
||
| 1853 | * 'Düsseldorf' => 'Düsseldorf' |
||
| 1854 | 14 | * 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
|
| 1855 | 14 | * 'Düsseldorf' => 'Düsseldorf' |
|
| 1856 | 14 | * 'D%C3%BCsseldorf' => 'Düsseldorf' |
|
| 1857 | * 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
||
| 1858 | 14 | * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
|
| 1859 | 14 | * |
|
| 1860 | * @param string $str |
||
| 1861 | 14 | * |
|
| 1862 | * @return string |
||
| 1863 | */ |
||
| 1864 | public static function urldecode($str) |
||
| 1887 | |||
| 1888 | /** |
||
| 1889 | * Fixing a broken UTF-8 string. |
||
| 1890 | 20 | * |
|
| 1891 | * @param string $str |
||
| 1892 | 20 | * |
|
| 1893 | 2 | * @return string |
|
| 1894 | */ |
||
| 1895 | 2 | public static function fix_simple_utf8($str) |
|
| 1913 | 20 | ||
| 1914 | 20 | /** |
|
| 1915 | 20 | * UTF-8 version of html_entity_decode() |
|
| 1916 | 20 | * |
|
| 1917 | * The reason we are not using html_entity_decode() by itself is because |
||
| 1918 | 20 | * while it is not technically correct to leave out the semicolon |
|
| 1919 | * at the end of an entity most browsers will still interpret the entity |
||
| 1920 | 18 | * correctly. html_entity_decode() does not convert entities without |
|
| 1921 | 17 | * semicolons, so we are left with our own little solution here. Bummer. |
|
| 1922 | 17 | * |
|
| 1923 | 17 | * Convert all HTML entities to their applicable characters |
|
| 1924 | 5 | * |
|
| 1925 | 5 | * @link http://php.net/manual/en/function.html-entity-decode.php |
|
| 1926 | 5 | * |
|
| 1927 | * @param string $str <p> |
||
| 1928 | * The input string. |
||
| 1929 | 20 | * </p> |
|
| 1930 | * @param int $flags [optional] <p> |
||
| 1931 | 18 | * A bitmask of one or more of the following flags, which specify how to handle quotes and |
|
| 1932 | 14 | * which document type to use. The default is ENT_COMPAT | ENT_HTML401. |
|
| 1933 | 14 | * <table> |
|
| 1934 | 14 | * Available <i>flags</i> constants |
|
| 1935 | 8 | * <tr valign="top"> |
|
| 1936 | 8 | * <td>Constant Name</td> |
|
| 1937 | 8 | * <td>Description</td> |
|
| 1938 | * </tr> |
||
| 1939 | * <tr valign="top"> |
||
| 1940 | 19 | * <td><b>ENT_COMPAT</b></td> |
|
| 1941 | * <td>Will convert double-quotes and leave single-quotes alone.</td> |
||
| 1942 | 7 | * </tr> |
|
| 1943 | 1 | * <tr valign="top"> |
|
| 1944 | 1 | * <td><b>ENT_QUOTES</b></td> |
|
| 1945 | 1 | * <td>Will convert both double and single quotes.</td> |
|
| 1946 | 6 | * </tr> |
|
| 1947 | 6 | * <tr valign="top"> |
|
| 1948 | 6 | * <td><b>ENT_NOQUOTES</b></td> |
|
| 1949 | * <td>Will leave both double and single quotes unconverted.</td> |
||
| 1950 | * </tr> |
||
| 1951 | 7 | * <tr valign="top"> |
|
| 1952 | 6 | * <td><b>ENT_HTML401</b></td> |
|
| 1953 | 6 | * <td> |
|
| 1954 | 6 | * Handle code as HTML 4.01. |
|
| 1955 | * </td> |
||
| 1956 | * </tr> |
||
| 1957 | 20 | * <tr valign="top"> |
|
| 1958 | * <td><b>ENT_XML1</b></td> |
||
| 1959 | 2 | * <td> |
|
| 1960 | 2 | * Handle code as XML 1. |
|
| 1961 | * </td> |
||
| 1962 | * </tr> |
||
| 1963 | 2 | * <tr valign="top"> |
|
| 1964 | 2 | * <td><b>ENT_XHTML</b></td> |
|
| 1965 | 2 | * <td> |
|
| 1966 | * Handle code as XHTML. |
||
| 1967 | * </td> |
||
| 1968 | 2 | * </tr> |
|
| 1969 | 18 | * <tr valign="top"> |
|
| 1970 | * <td><b>ENT_HTML5</b></td> |
||
| 1971 | 20 | * <td> |
|
| 1972 | * Handle code as HTML 5. |
||
| 1973 | 20 | * </td> |
|
| 1974 | * </tr> |
||
| 1975 | * </table> |
||
| 1976 | 20 | * </p> |
|
| 1977 | 20 | * @param string $encoding [optional] <p> |
|
| 1978 | * Encoding to use. |
||
| 1979 | 3 | * </p> |
|
| 1980 | 20 | * |
|
| 1981 | * @return string the decoded string. |
||
| 1982 | 20 | */ |
|
| 1983 | public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8') |
||
| 2016 | |||
| 2017 | /** |
||
| 2018 | * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. |
||
| 2019 | * |
||
| 2020 | * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. |
||
| 2021 | * |
||
| 2022 | * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios: |
||
| 2023 | * |
||
| 2024 | * 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß |
||
| 2025 | * are followed by any of these: ("group B") |
||
| 2026 | * ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿ |
||
| 2027 | * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» |
||
| 2028 | * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) |
||
| 2029 | * is also a valid unicode character, and will be left unchanged. |
||
| 2030 | * |
||
| 2031 | * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, |
||
| 2032 | * 3) when any of these: ðñòó are followed by THREE chars from group B. |
||
| 2033 | * |
||
| 2034 | * @param string $str Any string or array. |
||
| 2035 | * |
||
| 2036 | * @return string The same string, but UTF8 encoded. |
||
| 2037 | */ |
||
| 2038 | public static function to_utf8($str) |
||
| 2144 | 2 | ||
| 2145 | 2 | /** |
|
| 2146 | * alias for "UTF8::to_utf8" |
||
| 2147 | 2 | * |
|
| 2148 | 2 | * @param string $str |
|
| 2149 | * |
||
| 2150 | * @return string |
||
| 2151 | */ |
||
| 2152 | 2 | public static function toUTF8($str) |
|
| 2156 | 2 | ||
| 2157 | /** |
||
| 2158 | 2 | * Try to check if a string is a json-string... |
|
| 2159 | 1 | * |
|
| 2160 | 1 | * @param $str |
|
| 2161 | 2 | * |
|
| 2162 | * @return bool |
||
| 2163 | * |
||
| 2164 | * @deprecated |
||
| 2165 | 2 | */ |
|
| 2166 | public static function isJson($str) |
||
| 2184 | |||
| 2185 | /** |
||
| 2186 | * Returns part of haystack string from the first occurrence of needle to the end of haystack. |
||
| 2187 | * |
||
| 2188 | * @link http://php.net/manual/en/function.grapheme-strstr.php |
||
| 2189 | * |
||
| 2190 | * @param string $haystack <p> |
||
| 2191 | 2 | * The input string. Must be valid UTF-8. |
|
| 2192 | * </p> |
||
| 2193 | * @param string $needle <p> |
||
| 2194 | 2 | * The string to look for. Must be valid UTF-8. |
|
| 2195 | * </p> |
||
| 2196 | * @param bool $before_needle [optional] <p> |
||
| 2197 | * If <b>TRUE</b>, grapheme_strstr() returns the part of the |
||
| 2198 | 2 | * haystack before the first occurrence of the needle (excluding the needle). |
|
| 2199 | * </p> |
||
| 2200 | * |
||
| 2201 | * @return string the portion of string, or FALSE if needle is not found. |
||
| 2202 | */ |
||
| 2203 | public static function strstr($haystack, $needle, $before_needle = false) |
||
| 2209 | |||
| 2210 | 2 | /** |
|
| 2211 | * Reads entire file into a string. |
||
| 2212 | * |
||
| 2213 | * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!! |
||
| 2214 | * |
||
| 2215 | * @link http://php.net/manual/en/function.file-get-contents.php |
||
| 2216 | 2 | * |
|
| 2217 | * @param string $filename <p> |
||
| 2218 | * Name of the file to read. |
||
| 2219 | * </p> |
||
| 2220 | * @param int $flags [optional] <p> |
||
| 2221 | * Prior to PHP 6, this parameter is called |
||
| 2222 | 2 | * use_include_path and is a bool. |
|
| 2223 | * As of PHP 5 the FILE_USE_INCLUDE_PATH can be used |
||
| 2224 | * to trigger include path |
||
| 2225 | * search. |
||
| 2226 | 2 | * </p> |
|
| 2227 | 2 | * <p> |
|
| 2228 | * The value of flags can be any combination of |
||
| 2229 | * the following flags (with some restrictions), joined with the |
||
| 2230 | * binary OR (|) |
||
| 2231 | 2 | * operator. |
|
| 2232 | 2 | * </p> |
|
| 2233 | 2 | * <p> |
|
| 2234 | 2 | * <table> |
|
| 2235 | 2 | * Available flags |
|
| 2236 | 2 | * <tr valign="top"> |
|
| 2237 | * <td>Flag</td> |
||
| 2238 | 2 | * <td>Description</td> |
|
| 2239 | 1 | * </tr> |
|
| 2240 | 1 | * <tr valign="top"> |
|
| 2241 | 1 | * <td> |
|
| 2242 | 1 | * FILE_USE_INCLUDE_PATH |
|
| 2243 | 1 | * </td> |
|
| 2244 | * <td> |
||
| 2245 | 1 | * Search for filename in the include directory. |
|
| 2246 | * See include_path for more |
||
| 2247 | * information. |
||
| 2248 | 1 | * </td> |
|
| 2249 | * </tr> |
||
| 2250 | 2 | * <tr valign="top"> |
|
| 2251 | * <td> |
||
| 2252 | * FILE_TEXT |
||
| 2253 | * </td> |
||
| 2254 | 2 | * <td> |
|
| 2255 | * As of PHP 6, the default encoding of the read |
||
| 2256 | * data is UTF-8. You can specify a different encoding by creating a |
||
| 2257 | * custom context or by changing the default using |
||
| 2258 | * stream_default_encoding. This flag cannot be |
||
| 2259 | * used with FILE_BINARY. |
||
| 2260 | * </td> |
||
| 2261 | * </tr> |
||
| 2262 | * <tr valign="top"> |
||
| 2263 | * <td> |
||
| 2264 | 3 | * FILE_BINARY |
|
| 2265 | * </td> |
||
| 2266 | * <td> |
||
| 2267 | 3 | * With this flag, the file is read in binary mode. This is the default |
|
| 2268 | * setting and cannot be used with FILE_TEXT. |
||
| 2269 | * </td> |
||
| 2270 | 3 | * </tr> |
|
| 2271 | * </table> |
||
| 2272 | 3 | * </p> |
|
| 2273 | 3 | * @param resource $context [optional] <p> |
|
| 2274 | 3 | * A valid context resource created with |
|
| 2275 | 3 | * stream_context_create. If you don't need to use a |
|
| 2276 | 2 | * custom context, you can skip this parameter by &null;. |
|
| 2277 | * </p> |
||
| 2278 | 3 | * @param int $offset [optional] <p> |
|
| 2279 | * The offset where the reading starts. |
||
| 2280 | * </p> |
||
| 2281 | * @param int $maxlen [optional] <p> |
||
| 2282 | * Maximum length of data read. The default is to read until end |
||
| 2283 | * of file is reached. |
||
| 2284 | * </p> |
||
| 2285 | * @param int $timeout |
||
| 2286 | * |
||
| 2287 | * @param boolean $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non |
||
| 2288 | * default utf-8 chars |
||
| 2289 | 1 | * |
|
| 2290 | * @return string The function returns the read data or false on failure. |
||
| 2291 | 1 | */ |
|
| 2292 | 1 | public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true) |
|
| 2334 | 1 | ||
| 2335 | /** |
||
| 2336 | * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32. |
||
| 2337 | * |
||
| 2338 | * @param string $str |
||
| 2339 | * |
||
| 2340 | * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br /> |
||
| 2341 | * otherwise it will return false. |
||
| 2342 | */ |
||
| 2343 | public static function str_detect_encoding($str) |
||
| 2413 | |||
| 2414 | /** |
||
| 2415 | * Check if the input is binary... (is look like a hack) |
||
| 2416 | 5 | * |
|
| 2417 | * @param string $input |
||
| 2418 | 5 | * |
|
| 2419 | * @return bool |
||
| 2420 | 5 | */ |
|
| 2421 | public static function is_binary($input) |
||
| 2438 | |||
| 2439 | /** |
||
| 2440 | * Check if the string is UTF-16. |
||
| 2441 | * |
||
| 2442 | * @param string $str |
||
| 2443 | * |
||
| 2444 | * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE. |
||
| 2445 | */ |
||
| 2446 | View Code Duplication | public static function is_utf16($str) |
|
| 2493 | |||
| 2494 | /** |
||
| 2495 | * Returns count of characters used in a string. |
||
| 2496 | * |
||
| 2497 | * @param string $str The input string. |
||
| 2498 | * |
||
| 2499 | * @return array An associative array of Character as keys and |
||
| 2500 | * their count as values. |
||
| 2501 | */ |
||
| 2502 | public static function count_chars($str) // there is no $mode parameters |
||
| 2510 | |||
| 2511 | /** |
||
| 2512 | * Check if the string is UTF-32. |
||
| 2513 | * |
||
| 2514 | * @param string $str |
||
| 2515 | * |
||
| 2516 | * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE. |
||
| 2517 | */ |
||
| 2518 | View Code Duplication | public static function is_utf32($str) |
|
| 2565 | |||
| 2566 | 1 | /** |
|
| 2567 | 1 | * Clean-up a and show only printable UTF-8 chars at the end. |
|
| 2568 | 6 | * |
|
| 2569 | 1 | * @param string|false $str |
|
| 2570 | 1 | * |
|
| 2571 | 1 | * @return string |
|
| 2572 | 1 | */ |
|
| 2573 | 6 | public static function cleanup($str) |
|
| 2596 | 6 | ||
| 2597 | 6 | /** |
|
| 2598 | 6 | * Check if the file is binary. |
|
| 2599 | * |
||
| 2600 | 6 | * @param string $file |
|
| 2601 | * |
||
| 2602 | * @return boolean |
||
| 2603 | */ |
||
| 2604 | public static function is_binary_file($file) |
||
| 2616 | 11 | ||
| 2617 | /** |
||
| 2618 | 11 | * Finds the last occurrence of a character in a string within another. |
|
| 2619 | 11 | * |
|
| 2620 | * @link http://php.net/manual/en/function.mb-strrchr.php |
||
| 2621 | * |
||
| 2622 | 1 | * @param string $haystack <p> |
|
| 2623 | 1 | * The string from which to get the last occurrence |
|
| 2624 | * of needle |
||
| 2625 | * </p> |
||
| 2626 | * @param string $needle <p> |
||
| 2627 | * The string to find in haystack |
||
| 2628 | * </p> |
||
| 2629 | * @param bool $part [optional] <p> |
||
| 2630 | * Determines which portion of haystack |
||
| 2631 | * this function returns. |
||
| 2632 | * If set to true, it returns all of haystack |
||
| 2633 | * from the beginning to the last occurrence of needle. |
||
| 2634 | * If set to false, it returns all of haystack |
||
| 2635 | * from the last occurrence of needle to the end, |
||
| 2636 | 11 | * </p> |
|
| 2637 | * @param string $encoding [optional] <p> |
||
| 2638 | 11 | * Character encoding name to use. |
|
| 2639 | 11 | * If it is omitted, internal character encoding is used. |
|
| 2640 | * </p> |
||
| 2641 | 11 | * |
|
| 2642 | 11 | * @return string the portion of haystack. |
|
| 2643 | 11 | * or false if needle is not found. |
|
| 2644 | 11 | */ |
|
| 2645 | 11 | public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8') |
|
| 2651 | 11 | ||
| 2652 | /** |
||
| 2653 | * Finds the last occurrence of a character in a string within another, case insensitive. |
||
| 2654 | * |
||
| 2655 | 11 | * @link http://php.net/manual/en/function.mb-strrichr.php |
|
| 2656 | * |
||
| 2657 | * @param string $haystack <p> |
||
| 2658 | * The string from which to get the last occurrence |
||
| 2659 | * of needle |
||
| 2660 | * </p> |
||
| 2661 | * @param string $needle <p> |
||
| 2662 | * The string to find in haystack |
||
| 2663 | * </p> |
||
| 2664 | * @param bool $part [optional] <p> |
||
| 2665 | 2 | * Determines which portion of haystack |
|
| 2666 | * this function returns. |
||
| 2667 | 2 | * If set to true, it returns all of haystack |
|
| 2668 | * from the beginning to the last occurrence of needle. |
||
| 2669 | * If set to false, it returns all of haystack |
||
| 2670 | * from the last occurrence of needle to the end, |
||
| 2671 | * </p> |
||
| 2672 | * @param string $encoding [optional] <p> |
||
| 2673 | * Character encoding name to use. |
||
| 2674 | * If it is omitted, internal character encoding is used. |
||
| 2675 | * </p> |
||
| 2676 | * |
||
| 2677 | 2 | * @return string the portion of haystack. |
|
| 2678 | * or false if needle is not found. |
||
| 2679 | 2 | */ |
|
| 2680 | public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8') |
||
| 2686 | 1 | ||
| 2687 | 2 | /** |
|
| 2688 | 2 | * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed. |
|
| 2689 | * |
||
| 2690 | * @param mixed $var |
||
| 2691 | * @param int $filter |
||
| 2692 | * @param mixed $option |
||
| 2693 | * |
||
| 2694 | * @return mixed |
||
| 2695 | */ |
||
| 2696 | View Code Duplication | public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null) |
|
| 2706 | 6 | ||
| 2707 | /** |
||
| 2708 | 6 | * Normalizes to UTF-8 NFC, converting from CP-1252 when needed. |
|
| 2709 | 5 | * |
|
| 2710 | * @param mixed $var |
||
| 2711 | * @param int $normalization_form |
||
| 2712 | * @param string $leading_combining |
||
| 2713 | 6 | * |
|
| 2714 | * @return mixed |
||
| 2715 | 6 | */ |
|
| 2716 | public static function filter($var, $normalization_form = 4, $leading_combining = '◌') |
||
| 2759 | |||
| 2760 | /** |
||
| 2761 | 5 | * Encode to UTF8 or LATIN1. |
|
| 2762 | 5 | * |
|
| 2763 | * INFO: The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding, |
||
| 2764 | 5 | * so you can call this function also on a UTF-8 String and you don't mess the string. |
|
| 2765 | 1 | * |
|
| 2766 | 1 | * @param string $encodingLabel ISO-8859-1 || UTF-8 |
|
| 2767 | 1 | * @param string $str |
|
| 2768 | * |
||
| 2769 | 5 | * @return false|string Will return false on error. |
|
| 2770 | */ |
||
| 2771 | public static function encode($encodingLabel, $str) |
||
| 2785 | |||
| 2786 | /** |
||
| 2787 | * Normalize the encoding-name input. |
||
| 2788 | * |
||
| 2789 | * @param string $encodingLabel e.g.: ISO, UTF8, ISO88591, WIN1252, etc. |
||
| 2790 | * |
||
| 2791 | * @return string |
||
| 2792 | */ |
||
| 2793 | protected static function normalizeEncoding($encodingLabel) |
||
| 2817 | |||
| 2818 | /** |
||
| 2819 | 1 | * alias for "UTF8::to_win1252()" |
|
| 2820 | * |
||
| 2821 | * @param string|array $str |
||
| 2822 | * |
||
| 2823 | * @return string|array |
||
| 2824 | */ |
||
| 2825 | public static function to_latin1($str) |
||
| 2829 | |||
| 2830 | /** |
||
| 2831 | * Convert a string into win1252. |
||
| 2832 | * |
||
| 2833 | * @param string|array $str |
||
| 2834 | * |
||
| 2835 | * @return string|array |
||
| 2836 | */ |
||
| 2837 | protected static function to_win1252($str) |
||
| 2853 | |||
| 2854 | /** |
||
| 2855 | * Decodes an UTF-8 string to ISO-8859-1. |
||
| 2856 | * |
||
| 2857 | * @param string $str |
||
| 2858 | 2 | * |
|
| 2859 | * @return string |
||
| 2860 | 2 | */ |
|
| 2861 | public static function utf8_decode($str) |
||
| 2884 | |||
| 2885 | /** |
||
| 2886 | * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed. |
||
| 2887 | * |
||
| 2888 | * @param int $type |
||
| 2889 | * @param string $var |
||
| 2890 | * @param int $filter |
||
| 2891 | * @param mixed $option |
||
| 2892 | * |
||
| 2893 | * @return mixed |
||
| 2894 | */ |
||
| 2895 | View Code Duplication | public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null) |
|
| 2905 | |||
| 2906 | /** |
||
| 2907 | * Encodes an ISO-8859-1 string to UTF-8. |
||
| 2908 | * |
||
| 2909 | * @param string $str |
||
| 2910 | * |
||
| 2911 | * @return string |
||
| 2912 | */ |
||
| 2913 | public static function utf8_encode($str) |
||
| 2932 | |||
| 2933 | /** |
||
| 2934 | * (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
||
| 2935 | * Returns the JSON representation of a value |
||
| 2936 | * |
||
| 2937 | * @link http://php.net/manual/en/function.json-encode.php |
||
| 2938 | * |
||
| 2939 | * @param mixed $value <p> |
||
| 2940 | * The <i>value</i> being encoded. Can be any type except |
||
| 2941 | * a resource. |
||
| 2942 | * </p> |
||
| 2943 | * <p> |
||
| 2944 | 8 | * All string data must be UTF-8 encoded. |
|
| 2945 | * </p> |
||
| 2946 | 8 | * <p>PHP implements a superset of |
|
| 2947 | 5 | * JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
|
| 2948 | 5 | * only supports these values when they are nested inside an array or an object. |
|
| 2949 | 8 | * </p> |
|
| 2950 | * @param int $options [optional] <p> |
||
| 2951 | * Bitmask consisting of <b>JSON_HEX_QUOT</b>, |
||
| 2952 | * <b>JSON_HEX_TAG</b>, |
||
| 2953 | * <b>JSON_HEX_AMP</b>, |
||
| 2954 | * <b>JSON_HEX_APOS</b>, |
||
| 2955 | * <b>JSON_NUMERIC_CHECK</b>, |
||
| 2956 | * <b>JSON_PRETTY_PRINT</b>, |
||
| 2957 | * <b>JSON_UNESCAPED_SLASHES</b>, |
||
| 2958 | * <b>JSON_FORCE_OBJECT</b>, |
||
| 2959 | * <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these |
||
| 2960 | 1 | * constants is described on |
|
| 2961 | * the JSON constants page. |
||
| 2962 | 1 | * </p> |
|
| 2963 | 1 | * @param int $depth [optional] <p> |
|
| 2964 | 1 | * Set the maximum depth. Must be greater than zero. |
|
| 2965 | * </p> |
||
| 2966 | 1 | * |
|
| 2967 | * @return string a JSON encoded string on success or <b>FALSE</b> on failure. |
||
| 2968 | */ |
||
| 2969 | public static function json_encode($value, $options = 0, $depth = 512) |
||
| 2981 | |||
| 2982 | 2 | /** |
|
| 2983 | * (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
||
| 2984 | 2 | * Decodes a JSON string |
|
| 2985 | 2 | * |
|
| 2986 | * @link http://php.net/manual/en/function.json-decode.php |
||
| 2987 | * |
||
| 2988 | 2 | * @param string $json <p> |
|
| 2989 | * The <i>json</i> string being decoded. |
||
| 2990 | * </p> |
||
| 2991 | * <p> |
||
| 2992 | * This function only works with UTF-8 encoded strings. |
||
| 2993 | * </p> |
||
| 2994 | * <p>PHP implements a superset of |
||
| 2995 | * JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
||
| 2996 | * only supports these values when they are nested inside an array or an object. |
||
| 2997 | * </p> |
||
| 2998 | 2 | * @param bool $assoc [optional] <p> |
|
| 2999 | * When <b>TRUE</b>, returned objects will be converted into |
||
| 3000 | 2 | * associative arrays. |
|
| 3001 | 1 | * </p> |
|
| 3002 | * @param int $depth [optional] <p> |
||
| 3003 | * User specified recursion depth. |
||
| 3004 | 2 | * </p> |
|
| 3005 | * @param int $options [optional] <p> |
||
| 3006 | * Bitmask of JSON decode options. Currently only |
||
| 3007 | * <b>JSON_BIGINT_AS_STRING</b> |
||
| 3008 | * is supported (default is to cast large integers as floats) |
||
| 3009 | * </p> |
||
| 3010 | * |
||
| 3011 | * @return mixed the value encoded in <i>json</i> in appropriate |
||
| 3012 | * PHP type. Values true, false and |
||
| 3013 | * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> |
||
| 3014 | * and <b>NULL</b> respectively. <b>NULL</b> is returned if the |
||
| 3015 | 15 | * <i>json</i> cannot be decoded or if the encoded |
|
| 3016 | * data is deeper than the recursion limit. |
||
| 3017 | 15 | */ |
|
| 3018 | 2 | public static function json_decode($json, $assoc = false, $depth = 512, $options = 0) |
|
| 3030 | |||
| 3031 | /** |
||
| 3032 | 12 | * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed. |
|
| 3033 | 8 | * |
|
| 3034 | * @param array $data |
||
| 3035 | * @param mixed $definition |
||
| 3036 | 10 | * @param bool $add_empty |
|
| 3037 | * |
||
| 3038 | * @return mixed |
||
| 3039 | */ |
||
| 3040 | View Code Duplication | public static function filter_var_array($data, $definition = null, $add_empty = true) |
|
| 3050 | 1 | ||
| 3051 | 1 | /** |
|
| 3052 | * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed. |
||
| 3053 | 1 | * |
|
| 3054 | 1 | * @param int $type |
|
| 3055 | 1 | * @param mixed $definition |
|
| 3056 | 1 | * @param bool $add_empty |
|
| 3057 | 1 | * |
|
| 3058 | 1 | * @return mixed |
|
| 3059 | */ |
||
| 3060 | View Code Duplication | public static function filter_input_array($type, $definition = null, $add_empty = true) |
|
| 3070 | 1 | ||
| 3071 | /** |
||
| 3072 | * Search a string for any of a set of characters. |
||
| 3073 | * |
||
| 3074 | * @param string $s |
||
| 3075 | * @param string $charList |
||
| 3076 | * |
||
| 3077 | * @return string|false |
||
| 3078 | */ |
||
| 3079 | public static function strpbrk($s, $charList) |
||
| 3087 | |||
| 3088 | /** |
||
| 3089 | * Case-insensitive string comparison of the first n characters. |
||
| 3090 | * |
||
| 3091 | * @param string $str1 |
||
| 3092 | 2 | * @param string $str2 |
|
| 3093 | * @param int $len |
||
| 3094 | 2 | * |
|
| 3095 | * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal. |
||
| 3096 | */ |
||
| 3097 | public static function strncasecmp($str1, $str2, $len) |
||
| 3101 | |||
| 3102 | /** |
||
| 3103 | * Comparison of the first n characters. |
||
| 3104 | * |
||
| 3105 | * @param string $str1 |
||
| 3106 | * @param string $str2 |
||
| 3107 | * @param int $len |
||
| 3108 | * |
||
| 3109 | * @return int <strong>< 0</strong> if str1 is less than str2<br /> |
||
| 3110 | * <strong>> 0</strong> if str1 is greater than str2<br /> |
||
| 3111 | * <strong>0</strong> if they are equal |
||
| 3112 | */ |
||
| 3113 | public static function strncmp($str1, $str2, $len) |
||
| 3117 | |||
| 3118 | 1 | /** |
|
| 3119 | * String comparison. |
||
| 3120 | * |
||
| 3121 | * @param string $str1 |
||
| 3122 | * @param string $str2 |
||
| 3123 | * |
||
| 3124 | * @return int <strong>< 0</strong> if str1 is less than str2<br /> |
||
| 3125 | * <strong>> 0</strong> if str1 is greater than str2<br /> |
||
| 3126 | * <strong>0</strong> if they are equal. |
||
| 3127 | */ |
||
| 3128 | public static function strcmp($str1, $str2) |
||
| 3135 | |||
| 3136 | /** |
||
| 3137 | * Calculates and returns the maximum number of bytes taken by any |
||
| 3138 | * UTF-8 encoded character in the given string. |
||
| 3139 | * |
||
| 3140 | * @param string $str The original Unicode string. |
||
| 3141 | * |
||
| 3142 | * @return int An array of byte lengths of each character. |
||
| 3143 | */ |
||
| 3144 | public static function max_chr_width($str) |
||
| 3153 | |||
| 3154 | /** |
||
| 3155 | * Generates an array of byte length of each character of a Unicode string. |
||
| 3156 | * |
||
| 3157 | * 1 byte => U+0000 - U+007F |
||
| 3158 | * 2 byte => U+0080 - U+07FF |
||
| 3159 | * 3 byte => U+0800 - U+FFFF |
||
| 3160 | * 4 byte => U+10000 - U+10FFFF |
||
| 3161 | * |
||
| 3162 | * @param string $str The original Unicode string. |
||
| 3163 | * |
||
| 3164 | * @return array An array of byte lengths of each character. |
||
| 3165 | 4 | */ |
|
| 3166 | public static function chr_size_list($str) |
||
| 3174 | |||
| 3175 | 4 | /** |
|
| 3176 | 4 | * Converts a UTF-8 character to HTML Numbered Entity like "{". |
|
| 3177 | 4 | * |
|
| 3178 | 4 | * @param string $chr The Unicode character to be encoded as numbered entity. |
|
| 3179 | * |
||
| 3180 | 4 | * @return string The HTML numbered entity. |
|
| 3181 | */ |
||
| 3182 | public static function single_chr_html_encode($chr) |
||
| 3190 | |||
| 3191 | 4 | /** |
|
| 3192 | 4 | * Calculates Unicode code point of the given UTF-8 encoded character. |
|
| 3193 | * |
||
| 3194 | 4 | * @param string $s The character of which to calculate code point. |
|
| 3195 | 4 | * |
|
| 3196 | 4 | * @return int Unicode code point of the given character,<br /> |
|
| 3197 | 4 | * 0 on invalid UTF-8 byte sequence. |
|
| 3198 | 4 | */ |
|
| 3199 | public static function ord($s) |
||
| 3222 | |||
| 3223 | /** |
||
| 3224 | * Converts a UTF-8 string to a series of HTML numbered entities. |
||
| 3225 | * |
||
| 3226 | * e.g.: {'ی |
||
| 3227 | * |
||
| 3228 | * @param string $str The Unicode string to be encoded as numbered entities. |
||
| 3229 | * |
||
| 3230 | * @return string HTML numbered entities. |
||
| 3231 | */ |
||
| 3232 | public static function html_encode($str) |
||
| 3244 | |||
| 3245 | 11 | /** |
|
| 3246 | 11 | * Checks if a file starts with BOM character. |
|
| 3247 | * |
||
| 3248 | 11 | * @param string $file_path Path to a valid file. |
|
| 3249 | 2 | * |
|
| 3250 | * @return bool True if the file has BOM at the start, False otherwise. |
||
| 3251 | */ |
||
| 3252 | public static function file_has_bom($file_path) |
||
| 3256 | |||
| 3257 | /** |
||
| 3258 | 10 | * Checks if the given string is exactly "UTF8 - Byte Order Mark". |
|
| 3259 | * |
||
| 3260 | * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string. |
||
| 3261 | * |
||
| 3262 | 10 | * @param string $utf8_chr The input string. |
|
| 3263 | * |
||
| 3264 | * @return bool True if the $utf8_chr is Byte Order Mark, False otherwise. |
||
| 3265 | */ |
||
| 3266 | 1 | public static function is_bom($utf8_chr) |
|
| 3270 | 10 | ||
| 3271 | /** |
||
| 3272 | * Returns the Byte Order Mark Character. |
||
| 3273 | 10 | * |
|
| 3274 | 1 | * @return string Byte Order Mark |
|
| 3275 | 1 | */ |
|
| 3276 | public static function bom() |
||
| 3280 | |||
| 3281 | /** |
||
| 3282 | * alias for "UTF8::is_bom" |
||
| 3283 | * |
||
| 3284 | * @param string $utf8_chr |
||
| 3285 | * |
||
| 3286 | * @return boolean |
||
| 3287 | */ |
||
| 3288 | public static function isBom($utf8_chr) |
||
| 3292 | |||
| 3293 | /** |
||
| 3294 | * Checks if string starts with "UTF-8 BOM" character. |
||
| 3295 | * |
||
| 3296 | * @param string $str The input string. |
||
| 3297 | * |
||
| 3298 | * @return bool True if the string has BOM at the start, False otherwise. |
||
| 3299 | */ |
||
| 3300 | public static function string_has_bom($str) |
||
| 3304 | |||
| 3305 | /** |
||
| 3306 | 8 | * Prepends BOM character to the string and returns the whole string. |
|
| 3307 | * |
||
| 3308 | 8 | * INFO: If BOM already existed there, the Input string is returned. |
|
| 3309 | * |
||
| 3310 | 8 | * @param string $str The input string |
|
| 3311 | * |
||
| 3312 | * @return string The output string that contains BOM |
||
| 3313 | */ |
||
| 3314 | public static function add_bom_to_string($str) |
||
| 3322 | |||
| 3323 | /** |
||
| 3324 | * Shuffles all the characters in the string. |
||
| 3325 | * |
||
| 3326 | * @param string $str The input string |
||
| 3327 | * |
||
| 3328 | * @return string The shuffled string. |
||
| 3329 | */ |
||
| 3330 | public static function str_shuffle($str) |
||
| 3338 | |||
| 3339 | /** |
||
| 3340 | * Wraps a string to a given number of characters. |
||
| 3341 | * |
||
| 3342 | * @param string $str |
||
| 3343 | * @param int $width |
||
| 3344 | * @param string $break |
||
| 3345 | 4 | * @param bool $cut |
|
| 3346 | * |
||
| 3347 | 4 | * @return false|string Returns the given string wrapped at the specified length. |
|
| 3348 | */ |
||
| 3349 | public static function wordwrap($str, $width = 75, $break = "\n", $cut = false) |
||
| 3405 | |||
| 3406 | /** |
||
| 3407 | * Find position of first occurrence of string in a string. |
||
| 3408 | * |
||
| 3409 | * @link http://php.net/manual/en/function.mb-strpos.php |
||
| 3410 | * |
||
| 3411 | 1 | * @param string $haystack <p> |
|
| 3412 | * The string being checked. |
||
| 3413 | 1 | * </p> |
|
| 3414 | * @param string $needle <p> |
||
| 3415 | * The position counted from the beginning of haystack. |
||
| 3416 | * </p> |
||
| 3417 | 1 | * @param int $offset [optional] <p> |
|
| 3418 | * The search offset. If it is not specified, 0 is used. |
||
| 3419 | * </p> |
||
| 3420 | * @param string $encoding |
||
| 3421 | * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string. |
||
| 3422 | * |
||
| 3423 | * @return int The numeric position of the first occurrence of needle in the haystack string.<br /> |
||
| 3424 | * If needle is not found it returns false. |
||
| 3425 | */ |
||
| 3426 | public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false) |
||
| 3480 | |||
| 3481 | /** |
||
| 3482 | * Generates a UTF-8 encoded character from the given code point. |
||
| 3483 | * |
||
| 3484 | * @param int $code_point The code point for which to generate a character. |
||
| 3485 | * |
||
| 3486 | * @return string Multi-Byte character, returns empty string on failure to encode. |
||
| 3487 | */ |
||
| 3488 | public static function chr($code_point) |
||
| 3501 | |||
| 3502 | /** |
||
| 3503 | * Converts hexadecimal U+xxxx code point representation to Integer. |
||
| 3504 | * |
||
| 3505 | * INFO: opposite to UTF8::int_to_hex( ) |
||
| 3506 | * |
||
| 3507 | * @param string $str The hexadecimal code point representation. |
||
| 3508 | * |
||
| 3509 | 1 | * @return int The code point, or 0 on failure. |
|
| 3510 | */ |
||
| 3511 | public static function hex_to_int($str) |
||
| 3519 | |||
| 3520 | /** |
||
| 3521 | * Reverses characters order in the string. |
||
| 3522 | * |
||
| 3523 | * @param string $str The input string |
||
| 3524 | * |
||
| 3525 | * @return string The string with characters in the reverse sequence |
||
| 3526 | */ |
||
| 3527 | public static function strrev($str) |
||
| 3531 | |||
| 3532 | 26 | /** |
|
| 3533 | 5 | * Returns the UTF-8 character with the maximum code point in the given data. |
|
| 3534 | * |
||
| 3535 | * @param mixed $arg A UTF-8 encoded string or an array of such strings. |
||
| 3536 | * |
||
| 3537 | 22 | * @return string The character with the highest code point than others. |
|
| 3538 | 6 | */ |
|
| 3539 | View Code Duplication | public static function max($arg) |
|
| 3547 | |||
| 3548 | /** |
||
| 3549 | * Accepts a string and returns an array of Unicode code points. |
||
| 3550 | * |
||
| 3551 | * @param mixed $arg A UTF-8 encoded string or an array of such strings. |
||
| 3552 | * @param bool $u_style If True, will return code points in U+xxxx format, |
||
| 3553 | * default, code points will be returned as integers. |
||
| 3554 | 22 | * |
|
| 3555 | * @return array The array of code points |
||
| 3556 | 22 | */ |
|
| 3557 | public static function codepoints($arg, $u_style = false) |
||
| 3583 | |||
| 3584 | /** |
||
| 3585 | 23 | * Returns the UTF-8 character with the minimum code point in the given data. |
|
| 3586 | * |
||
| 3587 | 23 | * @param mixed $arg A UTF-8 encoded string or an array of such strings. |
|
| 3588 | * |
||
| 3589 | * @return string The character with the lowest code point than others. |
||
| 3590 | */ |
||
| 3591 | View Code Duplication | public static function min($arg) |
|
| 3599 | |||
| 3600 | /** |
||
| 3601 | * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character. |
||
| 3602 | 6 | * |
|
| 3603 | * @param string $chr The input character |
||
| 3604 | * @param string $pfix |
||
| 3605 | 6 | * |
|
| 3606 | 1 | * @return string The code point encoded as U+xxxx |
|
| 3607 | */ |
||
| 3608 | public static function chr_to_hex($chr, $pfix = 'U+') |
||
| 3612 | 1 | ||
| 3613 | /** |
||
| 3614 | * Converts Integer to hexadecimal U+xxxx code point representation. |
||
| 3615 | * |
||
| 3616 | 1 | * @param int $int The integer to be converted to hexadecimal code point. |
|
| 3617 | * @param string $pfix |
||
| 3618 | * |
||
| 3619 | * @return string The code point, or empty string on failure. |
||
| 3620 | */ |
||
| 3621 | public static function int_to_hex($int, $pfix = 'U+') |
||
| 3633 | |||
| 3634 | /** |
||
| 3635 | * Get a binary representation of a specific character. |
||
| 3636 | * |
||
| 3637 | * @param string $str The input character. |
||
| 3638 | 1 | * |
|
| 3639 | * @return string |
||
| 3640 | */ |
||
| 3641 | public static function str_to_binary($str) |
||
| 3660 | 4 | ||
| 3661 | /** |
||
| 3662 | 6 | * Counts number of words in the UTF-8 string. |
|
| 3663 | * |
||
| 3664 | 6 | * @param string $s The input string. |
|
| 3665 | * @param int $format |
||
| 3666 | * @param string $charlist |
||
| 3667 | * |
||
| 3668 | * @return array|float|string The number of words in the string |
||
| 3669 | */ |
||
| 3670 | public static function str_word_count($s, $format = 0, $charlist = '') |
||
| 3695 | |||
| 3696 | /** |
||
| 3697 | * Strip whitespace or other characters from beginning or end of a UTF-8 string. |
||
| 3698 | * |
||
| 3699 | * INFO: This is slower then "trim()" |
||
| 3700 | * |
||
| 3701 | * But we can only use the original-function, if we use <= 7-Bit in the string / chars |
||
| 3702 | * but the check for ACSII (7-Bit) cost more time, then we can safe here. |
||
| 3703 | * |
||
| 3704 | * @param string $str The string to be trimmed |
||
| 3705 | * @param string $chars Optional characters to be stripped |
||
| 3706 | * |
||
| 3707 | * @return string The trimmed string |
||
| 3708 | */ |
||
| 3709 | public static function trim($str = '', $chars = INF) |
||
| 3724 | |||
| 3725 | 1 | /** |
|
| 3726 | * Strip whitespace or other characters from end of a UTF-8 string. |
||
| 3727 | 1 | * |
|
| 3728 | * WARNING: This is much slower then "rtrim()" !!!! |
||
| 3729 | * |
||
| 3730 | * @param string $str The string to be trimmed |
||
| 3731 | * @param string $chars Optional characters to be stripped |
||
| 3732 | * |
||
| 3733 | * @return string The string with unwanted characters stripped from the right |
||
| 3734 | */ |
||
| 3735 | View Code Duplication | public static function rtrim($str = '', $chars = INF) |
|
| 3747 | |||
| 3748 | /** |
||
| 3749 | 1 | * Strip whitespace or other characters from beginning of a UTF-8 string. |
|
| 3750 | * |
||
| 3751 | * WARNING: This is much slower then "ltrim()" !!!! |
||
| 3752 | 1 | * |
|
| 3753 | * @param string $str The string to be trimmed |
||
| 3754 | * @param string $chars Optional characters to be stripped |
||
| 3755 | * |
||
| 3756 | 1 | * @return string The string with unwanted characters stripped from the left |
|
| 3757 | */ |
||
| 3758 | 1 | View Code Duplication | public static function ltrim($str = '', $chars = INF) |
| 3770 | 1 | ||
| 3771 | 1 | /** |
|
| 3772 | 1 | * Replace text within a portion of a string. |
|
| 3773 | 1 | * |
|
| 3774 | 1 | * source: https://gist.github.com/stemar/8287074 |
|
| 3775 | * |
||
| 3776 | * @param string|array $str |
||
| 3777 | * @param string|array $replacement |
||
| 3778 | * @param int $start |
||
| 3779 | * @param null|int $length |
||
| 3780 | * |
||
| 3781 | * @return array|string |
||
| 3782 | */ |
||
| 3783 | public static function substr_replace($str, $replacement, $start, $length = null) |
||
| 3849 | 1 | ||
| 3850 | /** |
||
| 3851 | 1 | * alias for "UTF8::to_latin1()" |
|
| 3852 | * |
||
| 3853 | 1 | * @param $str |
|
| 3854 | * |
||
| 3855 | * @return string |
||
| 3856 | */ |
||
| 3857 | public static function toLatin1($str) |
||
| 3861 | |||
| 3862 | /** |
||
| 3863 | * Count the number of sub-string occurrences. |
||
| 3864 | 1 | * |
|
| 3865 | * @param string $haystack The string to search in. |
||
| 3866 | * @param string $needle The string to search for. |
||
| 3867 | * @param int $offset The offset where to start counting. |
||
| 3868 | 1 | * @param int $length The maximum length after the specified offset to search for the substring. |
|
| 3869 | * |
||
| 3870 | * @return int number of occurrences of $needle |
||
| 3871 | */ |
||
| 3872 | public static function substr_count($haystack, $needle, $offset = 0, $length = null) |
||
| 3888 | 1 | ||
| 3889 | 1 | /** |
|
| 3890 | 1 | * alias for "UTF8::is_ascii()" |
|
| 3891 | 1 | * |
|
| 3892 | * @param string $str |
||
| 3893 | * |
||
| 3894 | 1 | * @return boolean |
|
| 3895 | */ |
||
| 3896 | public static function isAscii($str) |
||
| 3900 | |||
| 3901 | /** |
||
| 3902 | * Checks if a string is 7 bit ASCII. |
||
| 3903 | * |
||
| 3904 | 2 | * @param string $str The string to check. |
|
| 3905 | * |
||
| 3906 | 2 | * @return bool <strong>true</strong> if it is ASCII<br /> |
|
| 3907 | 2 | * <strong>false</strong> otherwise |
|
| 3908 | */ |
||
| 3909 | 2 | public static function is_ascii($str) |
|
| 3913 | 2 | ||
| 3914 | 2 | /** |
|
| 3915 | * Create an array containing a range of UTF-8 characters. |
||
| 3916 | * |
||
| 3917 | * @param mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from. |
||
| 3918 | * @param mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at. |
||
| 3919 | * |
||
| 3920 | * @return array |
||
| 3921 | */ |
||
| 3922 | public static function range($var1, $var2) |
||
| 3960 | |||
| 3961 | 2 | /** |
|
| 3962 | * Creates a random string of UTF-8 characters. |
||
| 3963 | * |
||
| 3964 | 2 | * @param int $len The length of string in characters. |
|
| 3965 | 2 | * |
|
| 3966 | 2 | * @return string String consisting of random characters. |
|
| 3967 | 2 | */ |
|
| 3968 | 2 | public static function hash($len = 8) |
|
| 4008 | 1 | ||
| 4009 | /** |
||
| 4010 | 1 | * @alias of UTF8::chr_map() |
|
| 4011 | * |
||
| 4012 | 1 | * @param $callback |
|
| 4013 | * @param $str |
||
| 4014 | * |
||
| 4015 | * @return array |
||
| 4016 | */ |
||
| 4017 | public static function callback($callback, $str) |
||
| 4021 | |||
| 4022 | /** |
||
| 4023 | 1 | * Applies callback to all characters of a string. |
|
| 4024 | * |
||
| 4025 | 1 | * @param string $callback The callback function. |
|
| 4026 | 1 | * @param string $str UTF-8 string to run callback on. |
|
| 4027 | 1 | * |
|
| 4028 | * @return array The outcome of callback. |
||
| 4029 | 1 | */ |
|
| 4030 | 1 | ||
| 4031 | 1 | public static function chr_map($callback, $str) |
|
| 4037 | |||
| 4038 | /** |
||
| 4039 | * Returns a single UTF-8 character from string. |
||
| 4040 | * |
||
| 4041 | * @param string $str A UTF-8 string. |
||
| 4042 | * @param int $pos The position of character to return. |
||
| 4043 | * |
||
| 4044 | * @return string Single Multi-Byte character. |
||
| 4045 | */ |
||
| 4046 | public static function access($str, $pos) |
||
| 4052 | |||
| 4053 | /** |
||
| 4054 | * Sort all characters according to code points. |
||
| 4055 | * |
||
| 4056 | * @param string $str A UTF-8 string. |
||
| 4057 | * @param bool $unique Sort unique. If true, repeated characters are ignored. |
||
| 4058 | * @param bool $desc If true, will sort characters in reverse code point order. |
||
| 4059 | * |
||
| 4060 | * @return string String of sorted characters |
||
| 4061 | 8 | */ |
|
| 4062 | public static function str_sort($str, $unique = false, $desc = false) |
||
| 4078 | |||
| 4079 | 7 | /** |
|
| 4080 | 1 | * Makes a UTF-8 string from code points. |
|
| 4081 | 1 | * |
|
| 4082 | * @param array $array Integer or Hexadecimal codepoints |
||
| 4083 | 7 | * |
|
| 4084 | * @return string UTF-8 encoded string |
||
| 4085 | */ |
||
| 4086 | public static function string($array) |
||
| 4098 | |||
| 4099 | /** |
||
| 4100 | * Strip HTML and PHP tags from a string. |
||
| 4101 | * |
||
| 4102 | * @link http://php.net/manual/en/function.strip-tags.php |
||
| 4103 | * |
||
| 4104 | * @param string $str <p> |
||
| 4105 | 1 | * The input string. |
|
| 4106 | 1 | * </p> |
|
| 4107 | 1 | * @param string $allowable_tags [optional] <p> |
|
| 4108 | 1 | * You can use the optional second parameter to specify tags which should |
|
| 4109 | 1 | * not be stripped. |
|
| 4110 | * </p> |
||
| 4111 | 1 | * <p> |
|
| 4112 | * HTML comments and PHP tags are also stripped. This is hardcoded and |
||
| 4113 | * can not be changed with allowable_tags. |
||
| 4114 | * </p> |
||
| 4115 | * |
||
| 4116 | * @return string the stripped string. |
||
| 4117 | */ |
||
| 4118 | public static function strip_tags($str, $allowable_tags = null) |
||
| 4125 | |||
| 4126 | /** |
||
| 4127 | * Pad a UTF-8 string to given length with another string. |
||
| 4128 | * |
||
| 4129 | * @param string $input The input string |
||
| 4130 | * @param int $pad_length The length of return string |
||
| 4131 | * @param string $pad_string String to use for padding the input string |
||
| 4132 | * @param int $pad_type can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH |
||
| 4133 | 13 | * |
|
| 4134 | * @return string Returns the padded string |
||
| 4135 | 13 | */ |
|
| 4136 | public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT) |
||
| 4171 | |||
| 4172 | /** |
||
| 4173 | * Repeat a string. |
||
| 4174 | * |
||
| 4175 | * @param string $input <p> |
||
| 4176 | * The string to be repeated. |
||
| 4177 | * </p> |
||
| 4178 | * @param int $multiplier <p> |
||
| 4179 | * Number of time the input string should be |
||
| 4180 | * repeated. |
||
| 4181 | * </p> |
||
| 4182 | * <p> |
||
| 4183 | * multiplier has to be greater than or equal to 0. |
||
| 4184 | * If the multiplier is set to 0, the function |
||
| 4185 | * will return an empty string. |
||
| 4186 | * </p> |
||
| 4187 | * |
||
| 4188 | * @return string the repeated string. |
||
| 4189 | */ |
||
| 4190 | public static function str_repeat($input, $multiplier) |
||
| 4196 | |||
| 4197 | /** |
||
| 4198 | * Removes duplicate occurrences of a string in another string. |
||
| 4199 | * |
||
| 4200 | * @param string $str The base string |
||
| 4201 | * @param string|array $what String to search for in the base string |
||
| 4202 | * |
||
| 4203 | * @return string The result string with removed duplicates |
||
| 4204 | */ |
||
| 4205 | public static function remove_duplicates($str, $what = ' ') |
||
| 4219 | |||
| 4220 | /** |
||
| 4221 | * Finds position of first occurrence of a string within another, case insensitive. |
||
| 4222 | * |
||
| 4223 | * @link http://php.net/manual/en/function.mb-stripos.php |
||
| 4224 | * |
||
| 4225 | * @param string $haystack <p> |
||
| 4226 | * The string from which to get the position of the first occurrence |
||
| 4227 | * of needle |
||
| 4228 | * </p> |
||
| 4229 | * @param string $needle <p> |
||
| 4230 | * The string to find in haystack |
||
| 4231 | * </p> |
||
| 4232 | * @param int $offset [optional] <p> |
||
| 4233 | * The position in haystack |
||
| 4234 | * to start searching |
||
| 4235 | * </p> |
||
| 4236 | * @param string $encoding |
||
| 4237 | * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string |
||
| 4238 | * |
||
| 4239 | * @return int Return the numeric position of the first occurrence of |
||
| 4240 | * needle in the haystack |
||
| 4241 | * string, or false if needle is not found. |
||
| 4242 | */ |
||
| 4243 | public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false) |
||
| 4267 | |||
| 4268 | /** |
||
| 4269 | * Fix a double (or multiple) encoded UTF8 string. |
||
| 4270 | * |
||
| 4271 | * @param array|string $str |
||
| 4272 | * |
||
| 4273 | * @return string |
||
| 4274 | */ |
||
| 4275 | public static function fix_utf8($str) |
||
| 4295 | |||
| 4296 | /** |
||
| 4297 | * alias for "UTF8::ucfirst" |
||
| 4298 | * |
||
| 4299 | * @param $str |
||
| 4300 | * |
||
| 4301 | * @return string |
||
| 4302 | */ |
||
| 4303 | public static function ucword($str) |
||
| 4307 | |||
| 4308 | /** |
||
| 4309 | * Makes string's first char uppercase. |
||
| 4310 | * |
||
| 4311 | * @param string $str The input string |
||
| 4312 | * |
||
| 4313 | * @return string The resulting string |
||
| 4314 | */ |
||
| 4315 | public static function ucfirst($str) |
||
| 4319 | |||
| 4320 | /** |
||
| 4321 | * Make a string uppercase. |
||
| 4322 | * |
||
| 4323 | * @link http://php.net/manual/en/function.mb-strtoupper.php |
||
| 4324 | * |
||
| 4325 | * @param string $str <p> |
||
| 4326 | * The string being uppercased. |
||
| 4327 | * </p> |
||
| 4328 | * @param string $encoding |
||
| 4329 | * |
||
| 4330 | * @return string str with all alphabetic characters converted to uppercase. |
||
| 4331 | */ |
||
| 4332 | public static function strtoupper($str, $encoding = 'UTF-8') |
||
| 4363 | |||
| 4364 | /** |
||
| 4365 | * Returns an array of all lower and upper case UTF-8 encoded characters. |
||
| 4366 | * |
||
| 4367 | * @return string An array with lower case chars as keys and upper chars as values. |
||
| 4368 | */ |
||
| 4369 | protected static function case_table() |
||
| 5370 | 13 | ||
| 5371 | /** |
||
| 5372 | 13 | * Translate characters or replace sub-strings. |
|
| 5373 | * |
||
| 5374 | * @param string $s |
||
| 5375 | 13 | * @param string $from |
|
| 5376 | 13 | * @param string $to |
|
| 5377 | 1 | * |
|
| 5378 | 1 | * @return string |
|
| 5379 | 12 | */ |
|
| 5380 | public static function strtr($s, $from, $to = INF) |
||
| 5399 | |||
| 5400 | /** |
||
| 5401 | * Binary safe comparison of two strings from an offset, up to length characters. |
||
| 5402 | * |
||
| 5403 | * @param string $main_str The main string being compared. |
||
| 5404 | * @param string $str The secondary string being compared. |
||
| 5405 | * @param int $offset The start position for the comparison. If negative, it starts counting from the |
||
| 5406 | * end of the string. |
||
| 5407 | * @param int $length The length of the comparison. The default value is the largest of the length of |
||
| 5408 | * the str compared to the length of main_str less the offset. |
||
| 5409 | * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive. |
||
| 5410 | 1 | * |
|
| 5411 | * @return int |
||
| 5412 | 1 | */ |
|
| 5413 | public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false) |
||
| 5420 | |||
| 5421 | /** |
||
| 5422 | * Case-insensitive string comparison. |
||
| 5423 | * |
||
| 5424 | * @param string $str1 |
||
| 5425 | * @param string $str2 |
||
| 5426 | * |
||
| 5427 | * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal. |
||
| 5428 | */ |
||
| 5429 | public static function strcasecmp($str1, $str2) |
||
| 5433 | |||
| 5434 | /** |
||
| 5435 | * Uppercase for all words in the string. |
||
| 5436 | * |
||
| 5437 | 10 | * @param string $str |
|
| 5438 | * @param array $exceptions |
||
| 5439 | 10 | * |
|
| 5440 | 10 | * @return string |
|
| 5441 | */ |
||
| 5442 | 10 | public static function ucwords($str, $exceptions = array()) |
|
| 5475 | |||
| 5476 | /** |
||
| 5477 | * Format a number with grouped thousands. |
||
| 5478 | * |
||
| 5479 | * @param float $number |
||
| 5480 | * @param int $decimals |
||
| 5481 | * @param string $dec_point |
||
| 5482 | * @param string $thousands_sep |
||
| 5483 | * |
||
| 5484 | * @return string |
||
| 5485 | */ |
||
| 5486 | public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',') |
||
| 5506 | |||
| 5507 | /** |
||
| 5508 | * INFO: this is only a wrapper for "str_replace()" -> the original functions is already UTF-8 safe |
||
| 5509 | * |
||
| 5510 | * (PHP 4, PHP 5)<br/> |
||
| 5511 | * Replace all occurrences of the search string with the replacement string |
||
| 5512 | * |
||
| 5513 | * @link http://php.net/manual/en/function.str-replace.php |
||
| 5514 | * |
||
| 5515 | * @param mixed $search <p> |
||
| 5516 | * The value being searched for, otherwise known as the needle. |
||
| 5517 | * An array may be used to designate multiple needles. |
||
| 5518 | * </p> |
||
| 5519 | * @param mixed $replace <p> |
||
| 5520 | * The replacement value that replaces found search |
||
| 5521 | * values. An array may be used to designate multiple replacements. |
||
| 5522 | * </p> |
||
| 5523 | * @param mixed $subject <p> |
||
| 5524 | * The string or array being searched and replaced on, |
||
| 5525 | * otherwise known as the haystack. |
||
| 5526 | * </p> |
||
| 5527 | * <p> |
||
| 5528 | * If subject is an array, then the search and |
||
| 5529 | * replace is performed with every entry of |
||
| 5530 | * subject, and the return value is an array as |
||
| 5531 | * well. |
||
| 5532 | * </p> |
||
| 5533 | * @param int $count [optional] If passed, this will hold the number of matched and replaced needles. |
||
| 5534 | * |
||
| 5535 | * @return mixed This function returns a string or an array with the replaced values. |
||
| 5536 | */ |
||
| 5537 | public static function str_replace($search, $replace, $subject, &$count = null) |
||
| 5541 | |||
| 5542 | /** |
||
| 5543 | * str_ireplace |
||
| 5544 | * |
||
| 5545 | * @param string $search |
||
| 5546 | * @param string $replace |
||
| 5547 | * @param string $subject |
||
| 5548 | * @param null $count |
||
| 5549 | * |
||
| 5550 | * @return string |
||
| 5551 | */ |
||
| 5552 | public static function str_ireplace($search, $replace, $subject, &$count = null) |
||
| 5570 | |||
| 5571 | 1 | /** |
|
| 5572 | 1 | * Makes string's first char lowercase. |
|
| 5573 | * |
||
| 5574 | * @param string $str The input string |
||
| 5575 | * |
||
| 5576 | * @return string The resulting string |
||
| 5577 | */ |
||
| 5578 | public static function lcfirst($str) |
||
| 5582 | |||
| 5583 | 1 | /** |
|
| 5584 | * Find position of last occurrence of a case-insensitive string. |
||
| 5585 | 1 | * |
|
| 5586 | 1 | * @param string $haystack The string to look in |
|
| 5587 | * @param string $needle The string to look for |
||
| 5588 | * @param int $offset (Optional) Number of characters to ignore in the beginning or end |
||
| 5589 | 1 | * |
|
| 5590 | * @return int The position of offset |
||
| 5591 | 1 | */ |
|
| 5592 | 1 | public static function strripos($haystack, $needle, $offset = 0) |
|
| 5596 | 1 | ||
| 5597 | 1 | /** |
|
| 5598 | 1 | * Find position of last occurrence of a string in a string. |
|
| 5599 | 1 | * |
|
| 5600 | 1 | * @link http://php.net/manual/en/function.mb-strrpos.php |
|
| 5601 | 1 | * |
|
| 5602 | * @param string $haystack <p> |
||
| 5603 | * The string being checked, for the last occurrence |
||
| 5604 | * of needle |
||
| 5605 | * </p> |
||
| 5606 | * @param string $needle <p> |
||
| 5607 | * The string to find in haystack. |
||
| 5608 | * </p> |
||
| 5609 | * @param int $offset [optional] May be specified to begin searching an arbitrary number of characters into |
||
| 5610 | * the string. Negative values will stop searching at an arbitrary point |
||
| 5611 | * prior to the end of the string. |
||
| 5612 | * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string |
||
| 5613 | * |
||
| 5614 | * @return int the numeric position of |
||
| 5615 | * the last occurrence of needle in the |
||
| 5616 | * haystack string. If |
||
| 5617 | * needle is not found, it returns false. |
||
| 5618 | */ |
||
| 5619 | public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false) |
||
| 5670 | |||
| 5671 | /** |
||
| 5672 | * Splits a string into smaller chunks and multiple lines, using the specified |
||
| 5673 | * line ending character. |
||
| 5674 | * |
||
| 5675 | * @param string $body The original string to be split. |
||
| 5676 | * @param int $chunklen The maximum character length of a chunk. |
||
| 5677 | * @param string $end The character(s) to be inserted at the end of each chunk. |
||
| 5678 | * |
||
| 5679 | * @return string The chunked string |
||
| 5680 | */ |
||
| 5681 | 2 | public static function chunk_split($body, $chunklen = 76, $end = "\r\n") |
|
| 5685 | 2 | ||
| 5686 | /** |
||
| 5687 | 2 | * alias for "UTF8::to_win1252()" |
|
| 5688 | * |
||
| 5689 | 2 | * @param string $str |
|
| 5690 | * |
||
| 5691 | * @return array|string |
||
| 5692 | 2 | */ |
|
| 5693 | public static function to_iso8859($str) |
||
| 5697 | |||
| 5698 | 1 | /** |
|
| 5699 | 1 | * fix -> utf8-win1252 chars |
|
| 5700 | 1 | * |
|
| 5701 | * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 |
||
| 5702 | * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. |
||
| 5703 | * See: http://en.wikipedia.org/wiki/Windows-1252 |
||
| 5704 | * |
||
| 5705 | * @deprecated use "UTF8::fix_simple_utf8()" |
||
| 5706 | 2 | * |
|
| 5707 | * @param string $str |
||
| 5708 | 2 | * |
|
| 5709 | 2 | * @return string |
|
| 5710 | */ |
||
| 5711 | 2 | public static function utf8_fix_win1252_chars($str) |
|
| 5715 | |||
| 5716 | /** |
||
| 5717 | * Returns an array of Unicode White Space characters. |
||
| 5718 | * |
||
| 5719 | * @return array An array with numeric code point as key and White Space Character as value. |
||
| 5720 | */ |
||
| 5721 | 1 | public static function ws() |
|
| 5725 | 1 | ||
| 5726 | 1 | /** |
|
| 5727 | 1 | * Parses the string into variables. |
|
| 5728 | * |
||
| 5729 | 1 | * WARNING: This differs from parse_str() by returning the results |
|
| 5730 | * instead of placing them in the local scope! |
||
| 5731 | * |
||
| 5732 | * @link http://php.net/manual/en/function.parse-str.php |
||
| 5733 | * |
||
| 5734 | * @param string $str <p> |
||
| 5735 | * The input string. |
||
| 5736 | * </p> |
||
| 5737 | * @param array $result <p> |
||
| 5738 | * If the second parameter arr is present, |
||
| 5739 | * variables are stored in this variable as array elements instead. |
||
| 5740 | * </p> |
||
| 5741 | * |
||
| 5742 | * @return void |
||
| 5743 | */ |
||
| 5744 | public static function parse_str($str, &$result) |
||
| 5753 | |||
| 5754 | /** |
||
| 5755 | * Get character of a specific character. |
||
| 5756 | * |
||
| 5757 | * @param string $chr Character. |
||
| 5758 | * |
||
| 5759 | * @return string 'RTL' or 'LTR' |
||
| 5760 | */ |
||
| 5761 | public static function getCharDirection($chr) |
||
| 5853 | |||
| 5854 | /** |
||
| 5855 | * Get a decimal code representation of a specific character. |
||
| 5856 | * |
||
| 5857 | * @param string $chr The input character |
||
| 5858 | * |
||
| 5859 | * @return int |
||
| 5860 | */ |
||
| 5861 | public static function chr_to_decimal($chr) |
||
| 5893 | |||
| 5894 | /** |
||
| 5895 | * Get a UTF-8 character from its decimal code representation. |
||
| 5896 | * |
||
| 5897 | * @param int $code Code. |
||
| 5898 | * |
||
| 5899 | * @return string |
||
| 5900 | */ |
||
| 5901 | public static function decimal_to_chr($code) |
||
| 5911 | |||
| 5912 | /** |
||
| 5913 | * Return a array with "urlencoded"-win1252 -> UTF-8 |
||
| 5914 | * |
||
| 5915 | * @return mixed |
||
| 5916 | */ |
||
| 5917 | protected static function urldecode_fix_win1252_chars() |
||
| 6148 | |||
| 6149 | } |
||
| 6150 |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.