elkarte /
Elkarte
| 1 | <?php |
||||
| 2 | |||||
| 3 | /** |
||||
| 4 | * Utility functions, such as to handle multi byte strings |
||||
| 5 | * |
||||
| 6 | * @package ElkArte Forum |
||||
| 7 | * @copyright ElkArte Forum contributors |
||||
| 8 | * @license BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
||||
| 9 | * |
||||
| 10 | * @version 2.0 dev |
||||
| 11 | * |
||||
| 12 | */ |
||||
| 13 | |||||
| 14 | namespace ElkArte\Helper; |
||||
| 15 | |||||
| 16 | /** |
||||
| 17 | * Utility functions, such as to handle multi byte strings |
||||
| 18 | * Note: some of these might be deprecated or removed in the future. |
||||
| 19 | */ |
||||
| 20 | class Util |
||||
| 21 | { |
||||
| 22 | protected static $_entity_check_reg = '~(&#(\d{1,7}|x[0-9a-fA-F]{1,6});)~'; |
||||
| 23 | |||||
| 24 | /** |
||||
| 25 | * Converts invalid / disallowed / out of range entities to nulls |
||||
| 26 | * |
||||
| 27 | * @param string $string |
||||
| 28 | * |
||||
| 29 | * @return string |
||||
| 30 | */ |
||||
| 31 | public static function entity_fix($string): string |
||||
| 32 | { |
||||
| 33 | $num = $string[0] === 'x' ? hexdec(substr($string, 1)) : (int) $string; |
||||
| 34 | |||||
| 35 | // We don't allow control characters, characters out of range, byte markers, etc |
||||
| 36 | if ($num < 0x20 || $num > 0x10FFFF || ($num >= 0xD800 && $num <= 0xDFFF) || $num === 0x202D || $num === 0x202E) |
||||
| 37 | { |
||||
| 38 | return ''; |
||||
| 39 | } |
||||
| 40 | |||||
| 41 | return '&#' . $num . ';'; |
||||
| 42 | } |
||||
| 43 | |||||
| 44 | /** |
||||
| 45 | * Performs an htmlspecialchars on a string, using UTF-8 character set |
||||
| 46 | * Optionally performs an entity_fix to null any invalid character entities from the string |
||||
| 47 | * |
||||
| 48 | * @param string $string |
||||
| 49 | * @param int $quote_style integer or constant representation of one |
||||
| 50 | * @param string $charset only UTF-8 allowed |
||||
| 51 | * @param bool $double true will allow double encoding, false will not encode existing html entities, |
||||
| 52 | * |
||||
| 53 | * @return string|null |
||||
| 54 | */ |
||||
| 55 | public static function htmlspecialchars($string, $quote_style = ENT_COMPAT, $charset = 'UTF-8', $double = false): ?string |
||||
| 56 | { |
||||
| 57 | global $modSettings; |
||||
| 58 | |||||
| 59 | if (empty($string)) |
||||
| 60 | { |
||||
| 61 | return $string; |
||||
| 62 | } |
||||
| 63 | |||||
| 64 | if (empty($modSettings['disableEntityCheck'])) |
||||
| 65 | { |
||||
| 66 | return preg_replace_callback('~(&#(\d{1,7}|x[0-9a-fA-F]{1,6});)~', 'entity_fix__callback', htmlspecialchars($string, $quote_style, $charset, $double)); |
||||
| 67 | } |
||||
| 68 | |||||
| 69 | return htmlspecialchars($string, $quote_style, $charset, $double); |
||||
| 70 | } |
||||
| 71 | |||||
| 72 | /** |
||||
| 73 | * Adds html entities to the array/variable. Uses two underscores to guard against overloading. |
||||
| 74 | * |
||||
| 75 | * What it does: |
||||
| 76 | * |
||||
| 77 | * - Adds entities (", <, >) to the array or string var. |
||||
| 78 | * - Importantly, does not effect keys, only values. |
||||
| 79 | * - Calls itself recursively if necessary. |
||||
| 80 | * - Does not go deeper than 25 to prevent loop exhaustion |
||||
| 81 | * |
||||
| 82 | * @param array|string $var The string or array of strings to add entities |
||||
| 83 | * @param int $level = 0 The current level we're at within the array (if called recursively) |
||||
| 84 | * |
||||
| 85 | * @return array|string The string or array of strings with entities added |
||||
| 86 | */ |
||||
| 87 | public static function htmlspecialchars__recursive($var, $level = 0) |
||||
| 88 | { |
||||
| 89 | if (!is_array($var)) |
||||
| 90 | { |
||||
| 91 | return Util::htmlspecialchars($var, ENT_QUOTES); |
||||
| 92 | } |
||||
| 93 | |||||
| 94 | // Apply htmlspecialchars to every element. |
||||
| 95 | foreach ($var as $k => $v) |
||||
| 96 | { |
||||
| 97 | $var[$k] = $level > 25 ? null : Util::htmlspecialchars__recursive($v, $level + 1); |
||||
| 98 | } |
||||
| 99 | |||||
| 100 | return $var; |
||||
| 101 | } |
||||
| 102 | |||||
| 103 | /** |
||||
| 104 | * Trims tabs, newlines, carriage returns, spaces, vertical tabs and null bytes |
||||
| 105 | * and any number of space characters from the start and end of a string |
||||
| 106 | * |
||||
| 107 | * - Optionally performs an entity_fix to null any invalid character entities from the string |
||||
| 108 | * |
||||
| 109 | * @param string $string |
||||
| 110 | * |
||||
| 111 | * @return string |
||||
| 112 | */ |
||||
| 113 | public static function htmltrim($string): string |
||||
| 114 | { |
||||
| 115 | global $modSettings; |
||||
| 116 | |||||
| 117 | // Preg_replace for any kind of whitespace or invisible separator |
||||
| 118 | // and invisible control characters and unused code points |
||||
| 119 | $space_chars = '\p{Z}\p{C}'; |
||||
| 120 | |||||
| 121 | if (empty($modSettings['disableEntityCheck'])) |
||||
| 122 | { |
||||
| 123 | return preg_replace('~^(?:[' . $space_chars . ']| )+|(?:[' . $space_chars . ']| )+$~u', '', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string)); |
||||
| 124 | } |
||||
| 125 | |||||
| 126 | return preg_replace('~^(?:[' . $space_chars . ']| )+|(?:[' . $space_chars . ']| )+$~u', '', $string); |
||||
| 127 | } |
||||
| 128 | |||||
| 129 | /** |
||||
| 130 | * Trim a string including the HTML space, character 160. Uses two underscores to guard against overloading. |
||||
| 131 | * |
||||
| 132 | * What it does: |
||||
| 133 | * |
||||
| 134 | * - Trims a string or an array using html characters as well. |
||||
| 135 | * - Remove spaces (32), tabs (9), returns (13, 10, and 11), nulls (0), and hard spaces. (160) |
||||
| 136 | * - Does not effect keys, only values. |
||||
| 137 | * - May call itself recursively if needed. |
||||
| 138 | * - Does not go deeper than 25 to prevent loop exhaustion |
||||
| 139 | * |
||||
| 140 | * @param array|string $var The string or array of strings to trim |
||||
| 141 | * @param int $level = 0 How deep we're at within the array (if called recursively) |
||||
| 142 | * |
||||
| 143 | * @return array|string The trimmed string or array of trimmed strings |
||||
| 144 | */ |
||||
| 145 | public static function htmltrim__recursive($var, $level = 0) |
||||
| 146 | { |
||||
| 147 | // Remove spaces (32), tabs (9), returns (13, 10, and 11), nulls (0), and hard spaces. (160) |
||||
| 148 | if (!is_array($var)) |
||||
| 149 | { |
||||
| 150 | return self::htmltrim($var); |
||||
| 151 | } |
||||
| 152 | |||||
| 153 | // Go through all the elements and remove the whitespace. |
||||
| 154 | foreach ($var as $k => $v) |
||||
| 155 | { |
||||
| 156 | $var[$k] = $level > 25 ? null : self::htmltrim__recursive($v, $level + 1); |
||||
| 157 | } |
||||
| 158 | |||||
| 159 | return $var; |
||||
| 160 | } |
||||
| 161 | |||||
| 162 | /** |
||||
| 163 | * Perform a strpos search on a multi-byte string |
||||
| 164 | * |
||||
| 165 | * - Optionally performs an entity_fix to null any invalid character entities from the string before the search |
||||
| 166 | * |
||||
| 167 | * @param string $haystack what to search in |
||||
| 168 | * @param string $needle what is being looked for |
||||
| 169 | * @param int $offset where to start, assumed 0 |
||||
| 170 | * @param bool $right set to true to mimic strrpos functions |
||||
| 171 | * |
||||
| 172 | * @return bool|int |
||||
| 173 | */ |
||||
| 174 | public static function strpos($haystack, $needle, $offset = 0, $right = false) |
||||
| 175 | { |
||||
| 176 | global $modSettings; |
||||
| 177 | |||||
| 178 | $haystack_check = empty($modSettings['disableEntityCheck']) ? preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $haystack) : $haystack; |
||||
| 179 | $haystack_arr = preg_split('~(&#' . (empty($modSettings['disableEntityCheck']) ? '\d{1,7}' : '021') . ';|"|&|<|>| |.)~u', $haystack_check, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
||||
| 180 | $count = 0; |
||||
| 181 | |||||
| 182 | // From the right side, like mb_strrpos instead |
||||
| 183 | if ($right) |
||||
| 184 | { |
||||
| 185 | $haystack_arr = array_reverse($haystack_arr); |
||||
| 186 | $count = count($haystack_arr) - 1; |
||||
| 187 | } |
||||
| 188 | |||||
| 189 | // Single character search, lets go |
||||
| 190 | if (strlen($needle) === 1) |
||||
| 191 | { |
||||
| 192 | $result = array_search($needle, array_slice($haystack_arr, $offset), true); |
||||
| 193 | |||||
| 194 | return is_int($result) ? ($right ? $count - ($result + $offset) : $result + $offset) : false; |
||||
| 195 | } |
||||
| 196 | |||||
| 197 | $needle_check = empty($modSettings['disableEntityCheck']) ? preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $needle) : $needle; |
||||
| 198 | $needle_arr = preg_split('~(&#' . (empty($modSettings['disableEntityCheck']) ? '\d{1,7}' : '021') . ';|"|&|<|>| |.)~u', $needle_check, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
||||
| 199 | $needle_arr = $right ? array_reverse($needle_arr) : $needle_arr; |
||||
| 200 | |||||
| 201 | $needle_size = count($needle_arr); |
||||
| 202 | $result = array_search($needle_arr[0], array_slice($haystack_arr, $offset), true); |
||||
| 203 | while ((int) $result === $result) |
||||
| 204 | { |
||||
| 205 | $offset += $result; |
||||
| 206 | if (array_slice($haystack_arr, $offset, $needle_size) === $needle_arr) |
||||
| 207 | { |
||||
| 208 | return $right ? ($count - $offset - $needle_size + 1) : $offset; |
||||
| 209 | } |
||||
| 210 | |||||
| 211 | $result = array_search($needle_arr[0], array_slice($haystack_arr, ++$offset), true); |
||||
| 212 | } |
||||
| 213 | |||||
| 214 | return false; |
||||
| 215 | } |
||||
| 216 | |||||
| 217 | /** |
||||
| 218 | * Converts a multi-byte string to lowercase |
||||
| 219 | * |
||||
| 220 | * - Prefers to use mb_ functions if available, otherwise will use charset substitution tables |
||||
| 221 | * |
||||
| 222 | * @param string $string |
||||
| 223 | * |
||||
| 224 | * @return string |
||||
| 225 | */ |
||||
| 226 | public static function strtolower($string): string |
||||
| 227 | { |
||||
| 228 | if (function_exists('mb_strtolower')) |
||||
| 229 | { |
||||
| 230 | return mb_strtolower($string, 'UTF-8'); |
||||
| 231 | } |
||||
| 232 | |||||
| 233 | require_once(SUBSDIR . '/Charset.subs.php'); |
||||
| 234 | |||||
| 235 | return utf8_strtolower($string); |
||||
| 236 | } |
||||
| 237 | |||||
| 238 | /** |
||||
| 239 | * Cuts off a multi-byte string at a certain length |
||||
| 240 | * |
||||
| 241 | * - Optionally performs an entity_fix to null any invalid character entities from the string prior to the length |
||||
| 242 | * check |
||||
| 243 | * - Use this when the number of actual characters ( = 6 not 1) must be <= length not the displayable, |
||||
| 244 | * for example db field compliance to avoid overflow |
||||
| 245 | * |
||||
| 246 | * @param string $string |
||||
| 247 | * @param int $length |
||||
| 248 | * |
||||
| 249 | * @return string |
||||
| 250 | */ |
||||
| 251 | public static function truncate($string, $length): string |
||||
| 252 | { |
||||
| 253 | global $modSettings; |
||||
| 254 | |||||
| 255 | // Set a list of common functions. |
||||
| 256 | $ent_list = empty($modSettings['disableEntityCheck']) ? '&(#\d{1,7}|quot|amp|lt|gt|nbsp);' : '&(#021|quot|amp|lt|gt|nbsp);'; |
||||
| 257 | |||||
| 258 | if (empty($modSettings['disableEntityCheck'])) |
||||
| 259 | { |
||||
| 260 | $string = preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string); |
||||
| 261 | } |
||||
| 262 | |||||
| 263 | preg_match('~^(' . $ent_list . '|.){' . self::strlen(substr($string, 0, $length)) . '}~u', $string, $matches); |
||||
| 264 | $string = $matches[0]; |
||||
| 265 | while (strlen($string) > $length) |
||||
| 266 | { |
||||
| 267 | $string = preg_replace('~(?:' . $ent_list . '|.)$~u', '', $string); |
||||
| 268 | } |
||||
| 269 | |||||
| 270 | return $string; |
||||
| 271 | } |
||||
| 272 | |||||
| 273 | /** |
||||
| 274 | * Returns the length of multi-byte string |
||||
| 275 | * |
||||
| 276 | * @param string $string |
||||
| 277 | * |
||||
| 278 | * @return int |
||||
| 279 | */ |
||||
| 280 | public static function strlen($string): int |
||||
| 281 | { |
||||
| 282 | global $modSettings; |
||||
| 283 | |||||
| 284 | if (empty($string)) |
||||
| 285 | { |
||||
| 286 | return 0; |
||||
| 287 | } |
||||
| 288 | |||||
| 289 | if (empty($modSettings['disableEntityCheck'])) |
||||
| 290 | { |
||||
| 291 | $ent_list = '&(#\d{1,7}|quot|amp|lt|gt|nbsp);'; |
||||
| 292 | if (function_exists('mb_strlen')) |
||||
| 293 | { |
||||
| 294 | $check = preg_replace('~' . $ent_list . '|.~u', '_', $string); |
||||
| 295 | return $check === null ? 0 : mb_strlen($check, 'UTF-8'); |
||||
| 296 | } |
||||
| 297 | |||||
| 298 | $check = preg_replace('~' . $ent_list . '|.~u', '_', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string)); |
||||
| 299 | return $check === null ? 0 : strlen($check); |
||||
| 300 | } |
||||
| 301 | |||||
| 302 | $ent_list = '&(#021|quot|amp|lt|gt|nbsp);'; |
||||
| 303 | $check = preg_replace('~' . $ent_list . '|.~u', '_', $string); |
||||
| 304 | |||||
| 305 | return $check === null ? 0 : strlen($check); |
||||
| 306 | } |
||||
| 307 | |||||
| 308 | /** |
||||
| 309 | * Shorten a string of text |
||||
| 310 | * |
||||
| 311 | * What it does: |
||||
| 312 | * |
||||
| 313 | * - Shortens a text string to a given visual length |
||||
| 314 | * - Considers certain html entities as 1 in length, & etc |
||||
| 315 | * - Optionally adds ending ellipsis that honor length or are appended |
||||
| 316 | * - Optionally attempts to break the string on a word boundary approximately at the allowed length |
||||
| 317 | * - If using cutword and the resulting length is < len minus buffer then it is truncated to length plus an |
||||
| 318 | * ellipsis. |
||||
| 319 | * - Respects internationalization characters, html spacing and entities as one character. |
||||
| 320 | * - Returns the shortened string. |
||||
| 321 | * - Does not account for html tags, ie <b>test</b> is 11 characters not 4 |
||||
| 322 | * |
||||
| 323 | * @param string $string The string to shorten |
||||
| 324 | * @param int $length The length to cut the string to |
||||
| 325 | * @param bool $cutword try to cut at a word boundary |
||||
| 326 | * @param string $ellipsis characters to add at the end of a cut string |
||||
| 327 | * @param bool $exact set true to include ellipsis in the allowed length, false will append instead |
||||
| 328 | * @param int $buffer maximum length underflow to allow when cutting on a word boundary |
||||
| 329 | * |
||||
| 330 | * @return string|null |
||||
| 331 | */ |
||||
| 332 | public static function shorten_text($string, $length = 384, $cutword = false, $ellipsis = '...', $exact = true, $buffer = 12): ?string |
||||
| 333 | { |
||||
| 334 | // Does len include the ellipsis or are the ellipsis appended |
||||
| 335 | $ending = !empty($ellipsis) && $exact ? self::strlen($ellipsis) : 0; |
||||
| 336 | |||||
| 337 | // If its to long, cut it down to size |
||||
| 338 | if (self::strlen($string) > $length) |
||||
| 339 | { |
||||
| 340 | // Try to cut on a word boundary |
||||
| 341 | if ($cutword) |
||||
| 342 | { |
||||
| 343 | $string = self::substr($string, 0, $length - $ending); |
||||
| 344 | $space_pos = self::strpos($string, ' ', 0, true); |
||||
| 345 | |||||
| 346 | // Always one clown in the audience who likes long words or not using the spacebar |
||||
| 347 | if (!empty($space_pos) && ($length - $space_pos <= $buffer)) |
||||
| 348 | { |
||||
| 349 | $string = self::substr($string, 0, $space_pos); |
||||
| 350 | } |
||||
| 351 | |||||
| 352 | $string = rtrim($string) . $ellipsis; |
||||
| 353 | } |
||||
| 354 | else |
||||
| 355 | { |
||||
| 356 | $string = self::substr($string, 0, $length - $ending) . $ellipsis; |
||||
| 357 | } |
||||
| 358 | } |
||||
| 359 | |||||
| 360 | return $string; |
||||
| 361 | } |
||||
| 362 | |||||
| 363 | /** |
||||
| 364 | * Perform a substr operation on multi-byte strings |
||||
| 365 | * |
||||
| 366 | * - Optionally performs an entity_fix to null any invalid character entities from the string before the operation |
||||
| 367 | * |
||||
| 368 | * @param string $string |
||||
| 369 | * @param string $start |
||||
| 370 | * @param int|null $length |
||||
| 371 | * |
||||
| 372 | * @return string |
||||
| 373 | */ |
||||
| 374 | public static function substr($string, $start, $length = null): string |
||||
| 375 | { |
||||
| 376 | global $modSettings; |
||||
| 377 | |||||
| 378 | if (empty($modSettings['disableEntityCheck'])) |
||||
| 379 | { |
||||
| 380 | $ent_arr = preg_split('~(&#\d{1,7};|"|&|<|>| |.)~u', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string), -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
||||
| 381 | } |
||||
| 382 | else |
||||
| 383 | { |
||||
| 384 | $ent_arr = preg_split('~(|"|&|<|>| |.)~u', $string, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
||||
| 385 | } |
||||
| 386 | |||||
| 387 | return $length === null ? implode('', array_slice($ent_arr, $start)) : implode('', array_slice($ent_arr, $start, $length)); |
||||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 388 | } |
||||
| 389 | |||||
| 390 | /** |
||||
| 391 | * Truncate a string up to a number of characters while preserving whole words and HTML tags |
||||
| 392 | * |
||||
| 393 | * This function is an adaption of the cake php function truncate in utility string.php (MIT) |
||||
| 394 | * |
||||
| 395 | * @param string $string text to truncate. |
||||
| 396 | * @param int $length length of returned string |
||||
| 397 | * @param string $ellipsis characters to add at the end of cut string, like ... |
||||
| 398 | * @param bool $exact If to account for the $ellipsis length in returned string length |
||||
| 399 | * |
||||
| 400 | * @return string Trimmed string. |
||||
| 401 | */ |
||||
| 402 | public static function shorten_html($string, $length = 384, $ellipsis = '...', $exact = true): string |
||||
| 403 | { |
||||
| 404 | // If its shorter than the maximum length, while accounting for html tags, simply return |
||||
| 405 | if (self::strlen(preg_replace('~<.*?>~', '', $string)) <= $length) |
||||
| 406 | { |
||||
| 407 | return $string; |
||||
| 408 | } |
||||
| 409 | |||||
| 410 | // Start off empty |
||||
| 411 | $total_length = $exact ? self::strlen($ellipsis) : 0; |
||||
| 412 | $open_tags = []; |
||||
| 413 | $truncate = ''; |
||||
| 414 | |||||
| 415 | // Group all html open and closing tags, [1] full tag with <> [2] basic tag name [3] tag content |
||||
| 416 | preg_match_all('~(<\/?([\w+]+)[^>]*>)?([^<>]*)~', $string, $tags, PREG_SET_ORDER); |
||||
| 417 | |||||
| 418 | // Walk down the stack of tags |
||||
| 419 | foreach ($tags as $tag) |
||||
| 420 | { |
||||
| 421 | // If this tag has content |
||||
| 422 | if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/', $tag[2])) |
||||
| 423 | { |
||||
| 424 | // Opening tag add the closing tag to the top of the stack |
||||
| 425 | if (preg_match('~<[\w]+[^>]*>~', $tag[0])) |
||||
| 426 | { |
||||
| 427 | array_unshift($open_tags, $tag[2]); |
||||
| 428 | } |
||||
| 429 | // Closing tag |
||||
| 430 | elseif (preg_match('~<\/([\w]+)[^>]*>~', $tag[0], $close_tag)) |
||||
| 431 | { |
||||
| 432 | // Remove its starting tag |
||||
| 433 | $pos = array_search($close_tag[1], $open_tags, true); |
||||
| 434 | if ($pos !== false) |
||||
| 435 | { |
||||
| 436 | array_splice($open_tags, $pos, 1); |
||||
|
0 ignored issues
–
show
It seems like
$pos can also be of type string; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 437 | } |
||||
| 438 | } |
||||
| 439 | } |
||||
| 440 | |||||
| 441 | // Add this (opening or closing) tag to $truncate |
||||
| 442 | $truncate .= $tag[1]; |
||||
| 443 | |||||
| 444 | // Calculate the length of the actual tag content, accounts for html entities as a single characters |
||||
| 445 | $content_length = self::strlen($tag[3]); |
||||
| 446 | |||||
| 447 | // Have we exceeded the allowed length limit, only add in what we are allowed |
||||
| 448 | if ($content_length + $total_length > $length) |
||||
| 449 | { |
||||
| 450 | // The number of characters which we can still return |
||||
| 451 | $remaining = $length - $total_length; |
||||
| 452 | $truncate .= self::substr($tag[3], 0, $remaining); |
||||
| 453 | break; |
||||
| 454 | } |
||||
| 455 | |||||
| 456 | // Still room to go so add the tag content and continue |
||||
| 457 | $truncate .= $tag[3]; |
||||
| 458 | $total_length += $content_length; |
||||
| 459 | |||||
| 460 | // Are we there yet? |
||||
| 461 | if ($total_length >= $length) |
||||
| 462 | { |
||||
| 463 | break; |
||||
| 464 | } |
||||
| 465 | } |
||||
| 466 | |||||
| 467 | // Our truncated string up to the last space |
||||
| 468 | $space_pos = self::strpos($truncate, ' ', 0, true); |
||||
| 469 | $space_pos = empty($space_pos) ? $length : $space_pos; |
||||
| 470 | |||||
| 471 | $truncate_check = self::substr($truncate, 0, $space_pos); |
||||
| 472 | |||||
| 473 | // Make sure this would not cause a cut in the middle of a tag |
||||
| 474 | $lastOpenTag = (int) self::strpos($truncate_check, '<', 0, true); |
||||
| 475 | $lastCloseTag = (int) self::strpos($truncate_check, '>', 0, true); |
||||
| 476 | if ($lastOpenTag > $lastCloseTag) |
||||
| 477 | { |
||||
| 478 | // Find the last full open tag in our truncated string, its what was being cut |
||||
| 479 | preg_match_all('~<[\w]+[^>]*>~', $truncate, $lastTagMatches); |
||||
| 480 | $last_tag = array_pop($lastTagMatches[0]); |
||||
| 481 | |||||
| 482 | // Set the space to just after the last tag |
||||
| 483 | $space_pos = self::strpos($truncate, $last_tag, 0, true) + strlen($last_tag); |
||||
| 484 | $space_pos = empty($space_pos) ? $length : $space_pos; |
||||
| 485 | } |
||||
| 486 | |||||
| 487 | // Look at what we are going to cut off the end of our truncated string |
||||
| 488 | $bits = self::substr($truncate, $space_pos); |
||||
| 489 | |||||
| 490 | // Does it cut a tag off, if so we need to know so it can be added back at the cut point |
||||
| 491 | preg_match_all('~<\/([a-z]+)>~', $bits, $dropped_tags, PREG_SET_ORDER); |
||||
| 492 | if (!empty($dropped_tags)) |
||||
| 493 | { |
||||
| 494 | if (!empty($open_tags)) |
||||
| 495 | { |
||||
| 496 | foreach ($dropped_tags as $closing_tag) |
||||
| 497 | { |
||||
| 498 | if (!in_array($closing_tag[1], $open_tags)) |
||||
| 499 | { |
||||
| 500 | array_unshift($open_tags, $closing_tag[1]); |
||||
| 501 | } |
||||
| 502 | } |
||||
| 503 | } |
||||
| 504 | else |
||||
| 505 | { |
||||
| 506 | foreach ($dropped_tags as $closing_tag) |
||||
| 507 | { |
||||
| 508 | $open_tags[] = $closing_tag[1]; |
||||
| 509 | } |
||||
| 510 | } |
||||
| 511 | } |
||||
| 512 | |||||
| 513 | // Cut it |
||||
| 514 | $truncate = self::substr($truncate, 0, $space_pos); |
||||
| 515 | |||||
| 516 | // Dot dot dot |
||||
| 517 | $truncate .= $ellipsis; |
||||
| 518 | |||||
| 519 | // Finally close any html tags that were left open |
||||
| 520 | foreach ($open_tags as $tag) |
||||
| 521 | { |
||||
| 522 | $truncate .= '</' . $tag . '>'; |
||||
| 523 | } |
||||
| 524 | |||||
| 525 | return $truncate; |
||||
| 526 | } |
||||
| 527 | |||||
| 528 | /** |
||||
| 529 | * Converts the first character of each work in a multi-byte string to uppercase |
||||
| 530 | * |
||||
| 531 | * @param string $string |
||||
| 532 | * |
||||
| 533 | * @return string |
||||
| 534 | */ |
||||
| 535 | public static function ucwords($string): string |
||||
| 536 | { |
||||
| 537 | $words = preg_split('~([\s\r\n\t]+)~', $string, -1, PREG_SPLIT_DELIM_CAPTURE); |
||||
| 538 | for ($i = 0, $n = count($words); $i < $n; $i += 2) |
||||
| 539 | { |
||||
| 540 | $words[$i] = self::ucfirst($words[$i]); |
||||
| 541 | } |
||||
| 542 | |||||
| 543 | return implode('', $words); |
||||
| 544 | } |
||||
| 545 | |||||
| 546 | /** |
||||
| 547 | * Converts the first character of a multi-byte string to uppercase |
||||
| 548 | * |
||||
| 549 | * @param string $string |
||||
| 550 | * |
||||
| 551 | * @return string |
||||
| 552 | */ |
||||
| 553 | public static function ucfirst($string): string |
||||
| 554 | { |
||||
| 555 | return self::strtoupper(self::substr($string, 0, 1)) . self::substr($string, 1); |
||||
| 556 | } |
||||
| 557 | |||||
| 558 | /** |
||||
| 559 | * Converts a multi-byte string to uppercase |
||||
| 560 | * |
||||
| 561 | * Prefers to use mb_ functions if available, otherwise will use charset substitution tables |
||||
| 562 | * |
||||
| 563 | * @param string $string |
||||
| 564 | * |
||||
| 565 | * @return string |
||||
| 566 | */ |
||||
| 567 | public static function strtoupper($string): string |
||||
| 568 | { |
||||
| 569 | if (function_exists('mb_strtoupper')) |
||||
| 570 | { |
||||
| 571 | return mb_strtoupper($string, 'UTF-8'); |
||||
| 572 | } |
||||
| 573 | |||||
| 574 | require_once(SUBSDIR . '/Charset.subs.php'); |
||||
| 575 | |||||
| 576 | return utf8_strtoupper($string); |
||||
| 577 | } |
||||
| 578 | |||||
| 579 | /** |
||||
| 580 | * Wrappers for unserialize |
||||
| 581 | * |
||||
| 582 | * What it does: |
||||
| 583 | * |
||||
| 584 | * @param string $string The string to unserialize |
||||
| 585 | * @param string[] $options Optional. Additionally, it doesn't allow to use the option: |
||||
| 586 | * allowed_classes => true, that is reverted to false. |
||||
| 587 | * @return mixed|false |
||||
| 588 | */ |
||||
| 589 | public static function unserialize($string, $options = []) |
||||
| 590 | { |
||||
| 591 | $options['allowed_classes'] = false; |
||||
| 592 | if (self::is_serialized($string)) |
||||
| 593 | { |
||||
| 594 | return unserialize($string, $options); |
||||
| 595 | } |
||||
| 596 | |||||
| 597 | return ''; |
||||
| 598 | } |
||||
| 599 | |||||
| 600 | /** |
||||
| 601 | * Determine if a string is serialized |
||||
| 602 | * |
||||
| 603 | * - Avoids the notice/warning it could raise |
||||
| 604 | * |
||||
| 605 | * @param string $string |
||||
| 606 | * @return bool |
||||
| 607 | */ |
||||
| 608 | public static function is_serialized($string): bool |
||||
| 609 | { |
||||
| 610 | $check = false; |
||||
| 611 | |||||
| 612 | // Easy cases |
||||
| 613 | if (!is_string($string) || $string === '') |
||||
|
0 ignored issues
–
show
|
|||||
| 614 | { |
||||
| 615 | return false; |
||||
| 616 | } |
||||
| 617 | |||||
| 618 | // Attempt to unserialize, mask errors |
||||
| 619 | set_error_handler(static function () { /* ignore errors */ }); |
||||
| 620 | try |
||||
| 621 | { |
||||
| 622 | if (unserialize($string, ['allowed_classes' => false]) !== false) |
||||
| 623 | { |
||||
| 624 | $check = true; |
||||
| 625 | } |
||||
| 626 | } |
||||
| 627 | catch (\Throwable) |
||||
| 628 | { |
||||
| 629 | /* do nothing */ |
||||
| 630 | } |
||||
| 631 | finally |
||||
| 632 | { |
||||
| 633 | restore_error_handler(); |
||||
| 634 | } |
||||
| 635 | |||||
| 636 | return $check; |
||||
| 637 | } |
||||
| 638 | |||||
| 639 | /** |
||||
| 640 | * Provide a PHP 8.1 version of strftime |
||||
| 641 | * |
||||
| 642 | * @param string $format of the date/time to return |
||||
| 643 | * @param int|null $timestamp to convert |
||||
| 644 | * @return string|false |
||||
| 645 | */ |
||||
| 646 | public static function strftime(string $format, int $timestamp = null) |
||||
| 647 | { |
||||
| 648 | if (function_exists('strftime') && (PHP_VERSION_ID < 80100)) |
||||
| 649 | { |
||||
| 650 | return \strftime($format, $timestamp); |
||||
| 651 | } |
||||
| 652 | |||||
| 653 | if (is_null($timestamp)) |
||||
| 654 | { |
||||
| 655 | $timestamp = time(); |
||||
| 656 | } |
||||
| 657 | |||||
| 658 | $date_equivalents = [ |
||||
| 659 | '%a' => 'D', |
||||
| 660 | '%A' => 'l', |
||||
| 661 | '%d' => 'd', |
||||
| 662 | '%e' => 'j', |
||||
| 663 | '%j' => 'z', |
||||
| 664 | '%u' => 'N', |
||||
| 665 | '%w' => 'w', |
||||
| 666 | // Week |
||||
| 667 | '%U' => 'W', // Week Number of the given year |
||||
| 668 | '%V' => 'W', |
||||
| 669 | '%W' => 'W', |
||||
| 670 | // Month |
||||
| 671 | '%b' => 'M', |
||||
| 672 | '%B' => 'F', |
||||
| 673 | '%h' => 'M', |
||||
| 674 | '%m' => 'm', |
||||
| 675 | // Year |
||||
| 676 | '%C' => 'y', // Two digit representation of the century |
||||
| 677 | '%g' => 'y', |
||||
| 678 | '%G' => 'y', |
||||
| 679 | '%y' => 'y', |
||||
| 680 | '%Y' => 'Y', |
||||
| 681 | // Time |
||||
| 682 | '%H' => 'H', |
||||
| 683 | '%k' => 'G', |
||||
| 684 | '%I' => 'h', |
||||
| 685 | '%l' => 'g', |
||||
| 686 | '%M' => 'i', |
||||
| 687 | '%p' => 'A', |
||||
| 688 | '%P' => 'a', |
||||
| 689 | '%r' => 'H:i:s a', |
||||
| 690 | '%R' => 'H:i', |
||||
| 691 | '%S' => 's', |
||||
| 692 | '%T' => 'H:i:s', |
||||
| 693 | '%X' => 'h:i:s', // Preferred time representation based upon locale |
||||
| 694 | '%z' => 'O', |
||||
| 695 | '%Z' => 'T', |
||||
| 696 | // Time and Date Stamps |
||||
| 697 | '%c' => 'c', |
||||
| 698 | '%D' => 'm/d/y', |
||||
| 699 | '%F' => 'y/m/d', |
||||
| 700 | '%s' => 'U', |
||||
| 701 | '%x' => '', // Locale based date representation |
||||
| 702 | // Misc |
||||
| 703 | '%n' => "\n", |
||||
| 704 | '%t' => "\t", |
||||
| 705 | '%%' => '%', |
||||
| 706 | ]; |
||||
| 707 | |||||
| 708 | return preg_replace_callback( |
||||
| 709 | '/%[A-Za-z]{1}/', |
||||
| 710 | static function ($matches) use ($timestamp, $date_equivalents) { |
||||
| 711 | $new_format = str_replace(array_keys($date_equivalents), array_values($date_equivalents), $matches[0]); |
||||
| 712 | return date($new_format, $timestamp); |
||||
| 713 | }, |
||||
| 714 | $format |
||||
| 715 | ); |
||||
| 716 | } |
||||
| 717 | |||||
| 718 | /** |
||||
| 719 | * Provide a PHP 8.1 version of gmstrftime |
||||
| 720 | * |
||||
| 721 | * @param string $format of the date/time to return |
||||
| 722 | * @param int|null $timestamp to convert |
||||
| 723 | * @return string|false |
||||
| 724 | */ |
||||
| 725 | public static function gmstrftime(string $format, int $timestamp = null) |
||||
| 726 | { |
||||
| 727 | if (function_exists('gmstrftime') && (PHP_VERSION_ID < 80100)) |
||||
| 728 | { |
||||
| 729 | return \gmstrftime($format, $timestamp); |
||||
| 730 | } |
||||
| 731 | |||||
| 732 | return self::strftime($format, $timestamp); |
||||
| 733 | } |
||||
| 734 | |||||
| 735 | /** |
||||
| 736 | * Checks if the string contains any 4byte chars (emoji) and if so, |
||||
| 737 | * converts them into &#x...; HTML entities. |
||||
| 738 | * |
||||
| 739 | * @param string $string |
||||
| 740 | * @return string |
||||
| 741 | */ |
||||
| 742 | public static function clean_4byte_chars($string): string |
||||
| 743 | { |
||||
| 744 | global $modSettings; |
||||
| 745 | |||||
| 746 | if (!empty($modSettings['using_utf8mb4'])) |
||||
| 747 | { |
||||
| 748 | return $string; |
||||
| 749 | } |
||||
| 750 | |||||
| 751 | $result = $string; |
||||
| 752 | |||||
| 753 | // If we are in the 4-byte range |
||||
| 754 | if (preg_match('~[\x{10000}-\x{10FFFF}]~u', $string)) |
||||
| 755 | { |
||||
| 756 | $ord = array_map('ord', str_split($string)); |
||||
|
0 ignored issues
–
show
It seems like
str_split($string) can also be of type true; however, parameter $array of array_map() does only seem to accept array, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 757 | |||||
| 758 | // Byte length |
||||
| 759 | $length = strlen($string); |
||||
| 760 | $result = ''; |
||||
| 761 | |||||
| 762 | // Look for a 4byte marker |
||||
| 763 | for ($i = 0; $i < $length; $i++) |
||||
| 764 | { |
||||
| 765 | // The first byte of a 4-byte character encoding starts with the bytes 0xF0-0xF4 (240 <-> 244) |
||||
| 766 | // but look all the way to 247 for safe measure |
||||
| 767 | $ord1 = $ord[$i]; |
||||
| 768 | if ($ord1 >= 240 && $ord1 <= 247) |
||||
| 769 | { |
||||
| 770 | // Replace it with the corresponding html entity |
||||
| 771 | $entity = self::getUnicodeOrdinal(chr($ord[$i]) . chr($ord[$i + 1]) . chr($ord[$i + 2]) . chr($ord[$i + 3])); |
||||
| 772 | |||||
| 773 | if ($entity === false) |
||||
| 774 | { |
||||
| 775 | $result .= "\xEF\xBF\xBD"; |
||||
| 776 | } |
||||
| 777 | else |
||||
| 778 | { |
||||
| 779 | $result .= '&#x' . dechex($entity) . ';'; |
||||
| 780 | } |
||||
| 781 | |||||
| 782 | $i += 3; |
||||
| 783 | } |
||||
| 784 | else |
||||
| 785 | { |
||||
| 786 | $result .= $string[$i]; |
||||
| 787 | } |
||||
| 788 | } |
||||
| 789 | } |
||||
| 790 | |||||
| 791 | return $result; |
||||
| 792 | } |
||||
| 793 | |||||
| 794 | /** |
||||
| 795 | * Converts a 4byte char into the corresponding HTML entity code. |
||||
| 796 | * |
||||
| 797 | * This function is derived from: |
||||
| 798 | * http://www.greywyvern.com/code/php/utf8_html |
||||
| 799 | * |
||||
| 800 | * @param string $character |
||||
| 801 | * @return int|false |
||||
| 802 | */ |
||||
| 803 | public static function getUnicodeOrdinal($character) |
||||
| 804 | { |
||||
| 805 | if (ord($character[0]) <= 127) |
||||
| 806 | { |
||||
| 807 | return ord($character[0]); |
||||
| 808 | } |
||||
| 809 | |||||
| 810 | if (ord($character[0]) >= 192 && ord($character[0]) <= 223) |
||||
| 811 | { |
||||
| 812 | return (ord($character[0]) - 192) * 64 + (ord($character[1]) - 128); |
||||
| 813 | } |
||||
| 814 | |||||
| 815 | if (ord($character[0]) >= 224 && ord($character[0]) <= 239) |
||||
| 816 | { |
||||
| 817 | return (ord($character[0]) - 224) * 4096 + (ord($character[1]) - 128) * 64 + (ord($character[2]) - 128); |
||||
| 818 | } |
||||
| 819 | |||||
| 820 | if (ord($character[0]) >= 240 && ord($character[0]) <= 247) |
||||
| 821 | { |
||||
| 822 | return (ord($character[0]) - 240) * 262144 + (ord($character[1]) - 128) * 4096 + (ord($character[2]) - 128) * 64 + (ord($character[3]) - 128); |
||||
| 823 | } |
||||
| 824 | |||||
| 825 | if (ord($character[0]) >= 248 && ord($character[0]) <= 251) |
||||
| 826 | { |
||||
| 827 | return (ord($character[0]) - 248) * 16777216 + (ord($character[1]) - 128) * 262144 + (ord($character[2]) - 128) * 4096 + (ord($character[3]) - 128) * 64 + (ord($character[4]) - 128); |
||||
| 828 | } |
||||
| 829 | |||||
| 830 | if (ord($character[0]) >= 252 && ord($character[0]) <= 253) |
||||
| 831 | { |
||||
| 832 | return (ord($character[0]) - 252) * 1073741824 + (ord($character[1]) - 128) * 16777216 + (ord($character[2]) - 128) * 262144 + (ord($character[3]) - 128) * 4096 + (ord($character[4]) - 128) * 64 + (ord($character[5]) - 128); |
||||
| 833 | } |
||||
| 834 | |||||
| 835 | if (ord($character[0]) >= 254 && ord($character[0]) <= 255) |
||||
| 836 | { |
||||
| 837 | return false; |
||||
| 838 | } |
||||
| 839 | |||||
| 840 | return 0; |
||||
| 841 | } |
||||
| 842 | } |
||||
| 843 |