Mistralys /
application-utils
| 1 | <?php |
||
| 2 | /** |
||
| 3 | * File containing the {@see \AppUtils\ConvertHelper_String} class. |
||
| 4 | * |
||
| 5 | * @package Application Utils |
||
| 6 | * @subpackage ConvertHelper |
||
| 7 | * @see \AppUtils\ConvertHelper_String |
||
| 8 | */ |
||
| 9 | |||
| 10 | declare(strict_types=1); |
||
| 11 | |||
| 12 | namespace AppUtils; |
||
| 13 | |||
| 14 | use ForceUTF8\Encoding; |
||
| 15 | |||
| 16 | /** |
||
| 17 | * String conversion helper: focuses on string operations. |
||
| 18 | * |
||
| 19 | * @package Application Utils |
||
| 20 | * @subpackage ConvertHelper |
||
| 21 | * @author Sebastian Mordziol <[email protected]> |
||
| 22 | */ |
||
| 23 | class ConvertHelper_String |
||
| 24 | { |
||
| 25 | /** |
||
| 26 | * Searches for needle in the specified string, and returns a list |
||
| 27 | * of all occurrences, including the matched string. The matched |
||
| 28 | * string is useful when doing a case-insensitive search, as it |
||
| 29 | * shows the exact matched case of needle. |
||
| 30 | * |
||
| 31 | * @param string $needle |
||
| 32 | * @param string $haystack |
||
| 33 | * @param bool $caseInsensitive |
||
| 34 | * @return ConvertHelper_StringMatch[] |
||
| 35 | */ |
||
| 36 | public static function findString(string $needle, string $haystack, bool $caseInsensitive=false): array |
||
| 37 | { |
||
| 38 | if($needle === '') { |
||
| 39 | return array(); |
||
| 40 | } |
||
| 41 | |||
| 42 | $function = 'mb_strpos'; |
||
| 43 | if($caseInsensitive) { |
||
| 44 | $function = 'mb_stripos'; |
||
| 45 | } |
||
| 46 | |||
| 47 | $pos = 0; |
||
| 48 | $positions = array(); |
||
| 49 | $length = mb_strlen($needle); |
||
| 50 | |||
| 51 | while( ($pos = $function($haystack, $needle, $pos)) !== false) |
||
| 52 | { |
||
| 53 | $match = mb_substr($haystack, $pos, $length); |
||
| 54 | $positions[] = new ConvertHelper_StringMatch($pos, $match); |
||
| 55 | $pos += $length; |
||
| 56 | } |
||
| 57 | |||
| 58 | return $positions; |
||
| 59 | } |
||
| 60 | |||
| 61 | /** |
||
| 62 | * Splits a string into an array of all characters it is composed of. |
||
| 63 | * Unicode character safe. |
||
| 64 | * |
||
| 65 | * NOTE: Spaces and newlines (both \r and \n) are also considered single |
||
| 66 | * characters. |
||
| 67 | * |
||
| 68 | * @param string $string |
||
| 69 | * @return string[] |
||
| 70 | */ |
||
| 71 | public static function toArray(string $string) : array |
||
| 72 | { |
||
| 73 | $result = preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY); |
||
| 74 | if($result !== false) { |
||
| 75 | return $result; |
||
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
Loading history...
|
|||
| 76 | } |
||
| 77 | |||
| 78 | return array(); |
||
| 79 | } |
||
| 80 | |||
| 81 | /** |
||
| 82 | * Calculates the byte length of a string, taking into |
||
| 83 | * account any unicode characters. |
||
| 84 | * |
||
| 85 | * @param string $string |
||
| 86 | * @return int |
||
| 87 | * @see https://stackoverflow.com/a/9718273/2298192 |
||
| 88 | */ |
||
| 89 | public static function toBytes(string $string) : int |
||
| 90 | { |
||
| 91 | return mb_strlen($string, '8bit'); |
||
| 92 | } |
||
| 93 | |||
| 94 | /** |
||
| 95 | * Converts a string into an MD5 hash. |
||
| 96 | * |
||
| 97 | * @param string $string |
||
| 98 | * @return string |
||
| 99 | */ |
||
| 100 | public static function toHash(string $string): string |
||
| 101 | { |
||
| 102 | return md5($string); |
||
| 103 | } |
||
| 104 | |||
| 105 | /** |
||
| 106 | * Creates a short, 8-character long hash for the specified string. |
||
| 107 | * |
||
| 108 | * WARNING: Not cryptographically safe. |
||
| 109 | * |
||
| 110 | * @param string $string |
||
| 111 | * @return string |
||
| 112 | */ |
||
| 113 | public static function toShortHash(string $string) : string |
||
| 114 | { |
||
| 115 | return hash('crc32', $string, false); |
||
| 116 | } |
||
| 117 | |||
| 118 | /** |
||
| 119 | * Converts a string to valid utf8, regardless |
||
| 120 | * of the string's encoding(s). |
||
| 121 | * |
||
| 122 | * @param string $string |
||
| 123 | * @return string |
||
| 124 | */ |
||
| 125 | public static function toUtf8(string $string) : string |
||
| 126 | { |
||
| 127 | if(!self::isASCII($string)) { |
||
| 128 | return Encoding::toUTF8($string); |
||
| 129 | } |
||
| 130 | |||
| 131 | return $string; |
||
| 132 | } |
||
| 133 | |||
| 134 | /** |
||
| 135 | * Checks whether the specified string is an ASCII |
||
| 136 | * string, without any special or UTF8 characters. |
||
| 137 | * Note: empty strings and NULL are considered ASCII. |
||
| 138 | * Any variable types other than strings are not. |
||
| 139 | * |
||
| 140 | * @param mixed $string |
||
| 141 | * @return boolean |
||
| 142 | */ |
||
| 143 | public static function isASCII($string) : bool |
||
| 144 | { |
||
| 145 | if($string === '' || $string === NULL) { |
||
| 146 | return true; |
||
| 147 | } |
||
| 148 | |||
| 149 | if(!is_string($string)) { |
||
| 150 | return false; |
||
| 151 | } |
||
| 152 | |||
| 153 | return !preg_match('/[^\x00-\x7F]/', $string); |
||
| 154 | } |
||
| 155 | |||
| 156 | /** |
||
| 157 | * Checks whether the specified string contains HTML code. |
||
| 158 | * |
||
| 159 | * @param string $string |
||
| 160 | * @return boolean |
||
| 161 | */ |
||
| 162 | public static function isHTML(string $string) : bool |
||
| 163 | { |
||
| 164 | if(preg_match('%<[a-z/][\s\S]*>%siU', $string)) { |
||
| 165 | return true; |
||
| 166 | } |
||
| 167 | |||
| 168 | $decoded = html_entity_decode($string); |
||
| 169 | if($decoded !== $string) { |
||
| 170 | return true; |
||
| 171 | } |
||
| 172 | |||
| 173 | return false; |
||
| 174 | } |
||
| 175 | |||
| 176 | /** |
||
| 177 | * Normalizes tabs in the specified string by indenting everything |
||
| 178 | * back to the minimum tab distance. With the second parameter, |
||
| 179 | * tabs can optionally be converted to spaces as well (recommended |
||
| 180 | * for HTML output). |
||
| 181 | * |
||
| 182 | * @param string $string |
||
| 183 | * @param boolean $tabs2spaces |
||
| 184 | * @return string |
||
| 185 | */ |
||
| 186 | public static function normalizeTabs(string $string, bool $tabs2spaces = false) : string |
||
| 187 | { |
||
| 188 | $normalizer = new ConvertHelper_TabsNormalizer(); |
||
| 189 | $normalizer->convertTabsToSpaces($tabs2spaces); |
||
| 190 | |||
| 191 | return $normalizer->normalize($string); |
||
| 192 | } |
||
| 193 | |||
| 194 | /** |
||
| 195 | * Converts tabs to spaces in the specified string. |
||
| 196 | * |
||
| 197 | * @param string $string |
||
| 198 | * @param int $tabSize The amount of spaces per tab. |
||
| 199 | * @return string |
||
| 200 | */ |
||
| 201 | public static function tabs2spaces(string $string, int $tabSize=4) : string |
||
| 202 | { |
||
| 203 | return str_replace("\t", str_repeat(' ', $tabSize), $string); |
||
| 204 | } |
||
| 205 | |||
| 206 | /** |
||
| 207 | * Converts spaces to tabs in the specified string. |
||
| 208 | * |
||
| 209 | * @param string $string |
||
| 210 | * @param int $tabSize The amount of spaces per tab in the source string. |
||
| 211 | * @return string |
||
| 212 | */ |
||
| 213 | public static function spaces2tabs(string $string, int $tabSize=4) : string |
||
| 214 | { |
||
| 215 | return str_replace(str_repeat(' ', $tabSize), "\t", $string); |
||
| 216 | } |
||
| 217 | |||
| 218 | /** |
||
| 219 | * Makes all hidden characters visible in the target string, |
||
| 220 | * from spaces to control characters. |
||
| 221 | * |
||
| 222 | * @param string $string |
||
| 223 | * @return string |
||
| 224 | */ |
||
| 225 | public static function hidden2visible(string $string) : string |
||
| 226 | { |
||
| 227 | $converter = new ConvertHelper_HiddenConverter(); |
||
| 228 | |||
| 229 | return $converter->convert($string); |
||
| 230 | } |
||
| 231 | |||
| 232 | /** |
||
| 233 | * UTF8-safe wordwrap method: works like the regular wordwrap |
||
| 234 | * PHP function but compatible with UTF8. Otherwise, the lengths |
||
| 235 | * are not calculated correctly. |
||
| 236 | * |
||
| 237 | * @param string $str |
||
| 238 | * @param int $width |
||
| 239 | * @param string $break |
||
| 240 | * @param bool $cut |
||
| 241 | * @return string |
||
| 242 | */ |
||
| 243 | public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false) : string |
||
| 244 | { |
||
| 245 | $wrapper = new ConvertHelper_WordWrapper(); |
||
| 246 | |||
| 247 | return $wrapper |
||
| 248 | ->setLineWidth($width) |
||
| 249 | ->setBreakCharacter($break) |
||
| 250 | ->setCuttingEnabled($cut) |
||
| 251 | ->wrapText($str); |
||
| 252 | } |
||
| 253 | |||
| 254 | /** |
||
| 255 | * Transliterates a string. |
||
| 256 | * |
||
| 257 | * @param string $string |
||
| 258 | * @param string $spaceChar |
||
| 259 | * @param bool $lowercase |
||
| 260 | * @return string |
||
| 261 | */ |
||
| 262 | public static function transliterate(string $string, string $spaceChar = '-', bool $lowercase = true) : string |
||
| 263 | { |
||
| 264 | $transliterate = new Transliteration(); |
||
| 265 | $transliterate->setSpaceReplacement($spaceChar); |
||
| 266 | |||
| 267 | if ($lowercase) |
||
| 268 | { |
||
| 269 | $transliterate->setLowercase(); |
||
| 270 | } |
||
| 271 | |||
| 272 | return $transliterate->convert($string); |
||
| 273 | } |
||
| 274 | |||
| 275 | /** |
||
| 276 | * Cuts a text to the specified length if it is longer than the |
||
| 277 | * target length. Appends a text to signify it has been cut at |
||
| 278 | * the end of the string. |
||
| 279 | * |
||
| 280 | * @param string $text |
||
| 281 | * @param int $targetLength |
||
| 282 | * @param string $append |
||
| 283 | * @return string |
||
| 284 | */ |
||
| 285 | public static function cutText(string $text, int $targetLength, string $append = '...') : string |
||
| 286 | { |
||
| 287 | $length = mb_strlen($text); |
||
| 288 | if ($length <= $targetLength) { |
||
| 289 | return $text; |
||
| 290 | } |
||
| 291 | |||
| 292 | return trim(mb_substr($text, 0, $targetLength)) . $append; |
||
| 293 | } |
||
| 294 | |||
| 295 | /** |
||
| 296 | * Like explode, but trims all entries, and removes |
||
| 297 | * empty entries from the resulting array. |
||
| 298 | * |
||
| 299 | * @param string $delimiter |
||
| 300 | * @param string $string |
||
| 301 | * @return string[] |
||
| 302 | */ |
||
| 303 | public static function explodeTrim(string $delimiter, string $string) : array |
||
| 304 | { |
||
| 305 | if(empty($string) || empty($delimiter)) { |
||
| 306 | return array(); |
||
| 307 | } |
||
| 308 | |||
| 309 | $tokens = explode($delimiter, $string); |
||
| 310 | $tokens = array_map('trim', $tokens); |
||
| 311 | |||
| 312 | $keep = array(); |
||
| 313 | foreach($tokens as $token) { |
||
| 314 | if($token !== '') { |
||
| 315 | $keep[] = $token; |
||
| 316 | } |
||
| 317 | } |
||
| 318 | |||
| 319 | return $keep; |
||
| 320 | } |
||
| 321 | } |
||
| 322 |