1 | <?php |
||||
2 | |||||
3 | /** |
||||
4 | * Utility functions, such as to handle multi byte strings |
||||
5 | * |
||||
6 | * @package ElkArte Forum |
||||
7 | * @copyright ElkArte Forum contributors |
||||
8 | * @license BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
||||
9 | * |
||||
10 | * @version 2.0 dev |
||||
11 | * |
||||
12 | */ |
||||
13 | |||||
14 | namespace ElkArte\Helper; |
||||
15 | |||||
16 | /** |
||||
17 | * Utility functions, such as to handle multi byte strings |
||||
18 | * Note: some of these might be deprecated or removed in the future. |
||||
19 | */ |
||||
20 | class Util |
||||
21 | { |
||||
22 | protected static $_entity_check_reg = '~(&#(\d{1,7}|x[0-9a-fA-F]{1,6});)~'; |
||||
23 | |||||
24 | /** |
||||
25 | * Converts invalid / disallowed / out of range entities to nulls |
||||
26 | * |
||||
27 | * @param string $string |
||||
28 | * |
||||
29 | * @return string |
||||
30 | */ |
||||
31 | public static function entity_fix($string) |
||||
32 | { |
||||
33 | $num = $string[0] === 'x' ? hexdec(substr($string, 1)) : (int) $string; |
||||
34 | |||||
35 | // We don't allow control characters, characters out of range, byte markers, etc |
||||
36 | if ($num < 0x20 || $num > 0x10FFFF || ($num >= 0xD800 && $num <= 0xDFFF) || $num === 0x202D || $num === 0x202E) |
||||
37 | { |
||||
38 | return ''; |
||||
39 | } |
||||
40 | |||||
41 | return '&#' . $num . ';'; |
||||
42 | } |
||||
43 | |||||
44 | /** |
||||
45 | * Performs an htmlspecialchars on a string, using UTF-8 character set |
||||
46 | * Optionally performs an entity_fix to null any invalid character entities from the string |
||||
47 | * |
||||
48 | * @param string $string |
||||
49 | * @param int $quote_style integer or constant representation of one |
||||
50 | * @param string $charset only UTF-8 allowed |
||||
51 | * @param bool $double true will allow double encoding, false will not encode existing html entities, |
||||
52 | * |
||||
53 | * @return string |
||||
54 | */ |
||||
55 | public static function htmlspecialchars($string, $quote_style = ENT_COMPAT, $charset = 'UTF-8', $double = false) |
||||
56 | { |
||||
57 | global $modSettings; |
||||
58 | |||||
59 | if (empty($string)) |
||||
60 | { |
||||
61 | return $string; |
||||
62 | } |
||||
63 | |||||
64 | if (empty($modSettings['disableEntityCheck'])) |
||||
65 | { |
||||
66 | return preg_replace_callback('~(&#(\d{1,7}|x[0-9a-fA-F]{1,6});)~', 'entity_fix__callback', htmlspecialchars($string, $quote_style, $charset, $double)); |
||||
67 | } |
||||
68 | |||||
69 | return htmlspecialchars($string, $quote_style, $charset, $double); |
||||
70 | } |
||||
71 | |||||
72 | /** |
||||
73 | * Adds html entities to the array/variable. Uses two underscores to guard against overloading. |
||||
74 | * |
||||
75 | * What it does: |
||||
76 | * |
||||
77 | * - Adds entities (", <, >) to the array or string var. |
||||
78 | * - Importantly, does not effect keys, only values. |
||||
79 | * - Calls itself recursively if necessary. |
||||
80 | * - Does not go deeper than 25 to prevent loop exhaustion |
||||
81 | * |
||||
82 | * @param array|string $var The string or array of strings to add entities |
||||
83 | * @param int $level = 0 The current level we're at within the array (if called recursively) |
||||
84 | * |
||||
85 | * @return array|string The string or array of strings with entities added |
||||
86 | */ |
||||
87 | public static function htmlspecialchars__recursive($var, $level = 0) |
||||
88 | { |
||||
89 | if (!is_array($var)) |
||||
90 | { |
||||
91 | return Util::htmlspecialchars($var, ENT_QUOTES); |
||||
92 | } |
||||
93 | |||||
94 | // Apply htmlspecialchars to every element. |
||||
95 | foreach ($var as $k => $v) |
||||
96 | { |
||||
97 | $var[$k] = $level > 25 ? null : Util::htmlspecialchars__recursive($v, $level + 1); |
||||
98 | } |
||||
99 | |||||
100 | return $var; |
||||
101 | } |
||||
102 | |||||
103 | /** |
||||
104 | * Trims tabs, newlines, carriage returns, spaces, vertical tabs and null bytes |
||||
105 | * and any number of space characters from the start and end of a string |
||||
106 | * |
||||
107 | * - Optionally performs an entity_fix to null any invalid character entities from the string |
||||
108 | * |
||||
109 | * @param string $string |
||||
110 | * |
||||
111 | * @return string |
||||
112 | */ |
||||
113 | public static function htmltrim($string) |
||||
114 | { |
||||
115 | global $modSettings; |
||||
116 | |||||
117 | // Preg_replace for any kind of whitespace or invisible separator |
||||
118 | // and invisible control characters and unused code points |
||||
119 | $space_chars = '\p{Z}\p{C}'; |
||||
120 | |||||
121 | if (empty($modSettings['disableEntityCheck'])) |
||||
122 | { |
||||
123 | return preg_replace('~^(?:[' . $space_chars . ']| )+|(?:[' . $space_chars . ']| )+$~u', '', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string)); |
||||
124 | } |
||||
125 | |||||
126 | return preg_replace('~^(?:[' . $space_chars . ']| )+|(?:[' . $space_chars . ']| )+$~u', '', $string); |
||||
127 | } |
||||
128 | |||||
129 | /** |
||||
130 | * Trim a string including the HTML space, character 160. Uses two underscores to guard against overloading. |
||||
131 | * |
||||
132 | * What it does: |
||||
133 | * |
||||
134 | * - Trims a string or an array using html characters as well. |
||||
135 | * - Remove spaces (32), tabs (9), returns (13, 10, and 11), nulls (0), and hard spaces. (160) |
||||
136 | * - Does not effect keys, only values. |
||||
137 | * - May call itself recursively if needed. |
||||
138 | * - Does not go deeper than 25 to prevent loop exhaustion |
||||
139 | * |
||||
140 | * @param array|string $var The string or array of strings to trim |
||||
141 | * @param int $level = 0 How deep we're at within the array (if called recursively) |
||||
142 | * |
||||
143 | * @return mixed[]|string The trimmed string or array of trimmed strings |
||||
144 | */ |
||||
145 | public static function htmltrim__recursive($var, $level = 0) |
||||
146 | { |
||||
147 | // Remove spaces (32), tabs (9), returns (13, 10, and 11), nulls (0), and hard spaces. (160) |
||||
148 | if (!is_array($var)) |
||||
149 | { |
||||
150 | return self::htmltrim($var); |
||||
151 | } |
||||
152 | |||||
153 | // Go through all the elements and remove the whitespace. |
||||
154 | foreach ($var as $k => $v) |
||||
155 | { |
||||
156 | $var[$k] = $level > 25 ? null : self::htmltrim__recursive($v, $level + 1); |
||||
157 | } |
||||
158 | |||||
159 | return $var; |
||||
160 | } |
||||
161 | |||||
162 | /** |
||||
163 | * Perform a strpos search on a multi-byte string |
||||
164 | * |
||||
165 | * - Optionally performs an entity_fix to null any invalid character entities from the string before the search |
||||
166 | * |
||||
167 | * @param string $haystack what to search in |
||||
168 | * @param string $needle what is being looked for |
||||
169 | * @param int $offset where to start, assumed 0 |
||||
170 | * @param bool $right set to true to mimic strrpos functions |
||||
171 | * |
||||
172 | * @return bool|mixed |
||||
173 | */ |
||||
174 | public static function strpos($haystack, $needle, $offset = 0, $right = false) |
||||
175 | { |
||||
176 | global $modSettings; |
||||
177 | |||||
178 | $haystack_check = empty($modSettings['disableEntityCheck']) ? preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $haystack) : $haystack; |
||||
179 | $haystack_arr = preg_split('~(&#' . (empty($modSettings['disableEntityCheck']) ? '\d{1,7}' : '021') . ';|"|&|<|>| |.)~u', $haystack_check, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
||||
180 | $count = 0; |
||||
181 | |||||
182 | // From the right side, like mb_strrpos instead |
||||
183 | if ($right) |
||||
184 | { |
||||
185 | $haystack_arr = array_reverse($haystack_arr); |
||||
186 | $count = count($haystack_arr) - 1; |
||||
187 | } |
||||
188 | |||||
189 | // Single character search, lets go |
||||
190 | if (strlen($needle) === 1) |
||||
191 | { |
||||
192 | $result = array_search($needle, array_slice($haystack_arr, $offset), true); |
||||
193 | |||||
194 | return is_int($result) ? ($right ? $count - ($result + $offset) : $result + $offset) : false; |
||||
195 | } |
||||
196 | |||||
197 | $needle_check = empty($modSettings['disableEntityCheck']) ? preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $needle) : $needle; |
||||
198 | $needle_arr = preg_split('~(&#' . (empty($modSettings['disableEntityCheck']) ? '\d{1,7}' : '021') . ';|"|&|<|>| |.)~u', $needle_check, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
||||
199 | $needle_arr = $right ? array_reverse($needle_arr) : $needle_arr; |
||||
200 | |||||
201 | $needle_size = count($needle_arr); |
||||
202 | $result = array_search($needle_arr[0], array_slice($haystack_arr, $offset), true); |
||||
203 | while ((int) $result === $result) |
||||
204 | { |
||||
205 | $offset += $result; |
||||
206 | if (array_slice($haystack_arr, $offset, $needle_size) === $needle_arr) |
||||
207 | { |
||||
208 | return $right ? ($count - $offset - $needle_size + 1) : $offset; |
||||
209 | } |
||||
210 | |||||
211 | $result = array_search($needle_arr[0], array_slice($haystack_arr, ++$offset), true); |
||||
212 | } |
||||
213 | |||||
214 | return false; |
||||
215 | } |
||||
216 | |||||
217 | /** |
||||
218 | * Converts a multi-byte string to lowercase |
||||
219 | * |
||||
220 | * - Prefers to use mb_ functions if available, otherwise will use charset substitution tables |
||||
221 | * |
||||
222 | * @param string $string |
||||
223 | * |
||||
224 | * @return string |
||||
225 | */ |
||||
226 | public static function strtolower($string) |
||||
227 | { |
||||
228 | if (function_exists('mb_strtolower')) |
||||
229 | { |
||||
230 | return mb_strtolower($string, 'UTF-8'); |
||||
231 | } |
||||
232 | |||||
233 | require_once(SUBSDIR . '/Charset.subs.php'); |
||||
234 | |||||
235 | return utf8_strtolower($string); |
||||
236 | } |
||||
237 | |||||
238 | /** |
||||
239 | * Cuts off a multi-byte string at a certain length |
||||
240 | * |
||||
241 | * - Optionally performs an entity_fix to null any invalid character entities from the string prior to the length |
||||
242 | * check |
||||
243 | * - Use this when the number of actual characters ( = 6 not 1) must be <= length not the displayable, |
||||
244 | * for example db field compliance to avoid overflow |
||||
245 | * |
||||
246 | * @param string $string |
||||
247 | * @param int $length |
||||
248 | * |
||||
249 | * @return string |
||||
250 | */ |
||||
251 | public static function truncate($string, $length) |
||||
252 | { |
||||
253 | global $modSettings; |
||||
254 | |||||
255 | // Set a list of common functions. |
||||
256 | $ent_list = empty($modSettings['disableEntityCheck']) ? '&(#\d{1,7}|quot|amp|lt|gt|nbsp);' : '&(#021|quot|amp|lt|gt|nbsp);'; |
||||
257 | |||||
258 | if (empty($modSettings['disableEntityCheck'])) |
||||
259 | { |
||||
260 | $string = preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string); |
||||
261 | } |
||||
262 | |||||
263 | preg_match('~^(' . $ent_list . '|.){' . self::strlen(substr($string, 0, $length)) . '}~u', $string, $matches); |
||||
264 | $string = $matches[0]; |
||||
265 | while (strlen($string) > $length) |
||||
266 | { |
||||
267 | $string = preg_replace('~(?:' . $ent_list . '|.)$~u', '', $string); |
||||
268 | } |
||||
269 | |||||
270 | return $string; |
||||
271 | } |
||||
272 | |||||
273 | /** |
||||
274 | * Returns the length of multi-byte string |
||||
275 | * |
||||
276 | * @param string $string |
||||
277 | * |
||||
278 | * @return int |
||||
279 | */ |
||||
280 | public static function strlen($string) |
||||
281 | { |
||||
282 | global $modSettings; |
||||
283 | |||||
284 | if (empty($string)) |
||||
285 | { |
||||
286 | return 0; |
||||
287 | } |
||||
288 | |||||
289 | if (empty($modSettings['disableEntityCheck'])) |
||||
290 | { |
||||
291 | $ent_list = '&(#\d{1,7}|quot|amp|lt|gt|nbsp);'; |
||||
292 | if (function_exists('mb_strlen')) |
||||
293 | { |
||||
294 | $check = preg_replace('~' . $ent_list . '|.~u', '_', $string); |
||||
295 | return $check === null ? 0 : mb_strlen($check, 'UTF-8'); |
||||
296 | } |
||||
297 | |||||
298 | $check = preg_replace('~' . $ent_list . '|.~u', '_', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string)); |
||||
299 | return $check === null ? 0 : strlen($check); |
||||
300 | } |
||||
301 | |||||
302 | $ent_list = '&(#021|quot|amp|lt|gt|nbsp);'; |
||||
303 | $check = preg_replace('~' . $ent_list . '|.~u', '_', $string); |
||||
304 | |||||
305 | return $check === null ? 0 : strlen($check); |
||||
306 | } |
||||
307 | |||||
308 | /** |
||||
309 | * Shorten a string of text |
||||
310 | * |
||||
311 | * What it does: |
||||
312 | * |
||||
313 | * - Shortens a text string to a given visual length |
||||
314 | * - Considers certain html entities as 1 in length, & etc |
||||
315 | * - Optionally adds ending ellipsis that honor length or are appended |
||||
316 | * - Optionally attempts to break the string on a word boundary approximately at the allowed length |
||||
317 | * - If using cutword and the resulting length is < len minus buffer then it is truncated to length plus an |
||||
318 | * ellipsis. |
||||
319 | * - Respects internationalization characters, html spacing and entities as one character. |
||||
320 | * - Returns the shortened string. |
||||
321 | * - Does not account for html tags, ie <b>test</b> is 11 characters not 4 |
||||
322 | * |
||||
323 | * @param string $string The string to shorten |
||||
324 | * @param int $length The length to cut the string to |
||||
325 | * @param bool $cutword try to cut at a word boundary |
||||
326 | * @param string $ellipsis characters to add at the end of a cut string |
||||
327 | * @param bool $exact set true to include ellipsis in the allowed length, false will append instead |
||||
328 | * @param int $buffer maximum length underflow to allow when cutting on a word boundary |
||||
329 | * |
||||
330 | * @return string |
||||
331 | */ |
||||
332 | public static function shorten_text($string, $length = 384, $cutword = false, $ellipsis = '...', $exact = true, $buffer = 12) |
||||
333 | { |
||||
334 | // Does len include the ellipsis or are the ellipsis appended |
||||
335 | $ending = !empty($ellipsis) && $exact ? self::strlen($ellipsis) : 0; |
||||
336 | |||||
337 | // If its to long, cut it down to size |
||||
338 | if (self::strlen($string) > $length) |
||||
339 | { |
||||
340 | // Try to cut on a word boundary |
||||
341 | if ($cutword) |
||||
342 | { |
||||
343 | $string = self::substr($string, 0, $length - $ending); |
||||
344 | $space_pos = self::strpos($string, ' ', 0, true); |
||||
345 | |||||
346 | // Always one clown in the audience who likes long words or not using the spacebar |
||||
347 | if (!empty($space_pos) && ($length - $space_pos <= $buffer)) |
||||
348 | { |
||||
349 | $string = self::substr($string, 0, $space_pos); |
||||
350 | } |
||||
351 | |||||
352 | $string = rtrim($string) . $ellipsis; |
||||
353 | } |
||||
354 | else |
||||
355 | { |
||||
356 | $string = self::substr($string, 0, $length - $ending) . $ellipsis; |
||||
357 | } |
||||
358 | } |
||||
359 | |||||
360 | return $string; |
||||
361 | } |
||||
362 | |||||
363 | /** |
||||
364 | * Perform a substr operation on multi-byte strings |
||||
365 | * |
||||
366 | * - Optionally performs an entity_fix to null any invalid character entities from the string before the operation |
||||
367 | * |
||||
368 | * @param string $string |
||||
369 | * @param string $start |
||||
370 | * @param int|null $length |
||||
371 | * |
||||
372 | * @return string |
||||
373 | */ |
||||
374 | public static function substr($string, $start, $length = null) |
||||
375 | { |
||||
376 | global $modSettings; |
||||
377 | |||||
378 | if (empty($modSettings['disableEntityCheck'])) |
||||
379 | { |
||||
380 | $ent_arr = preg_split('~(&#\d{1,7};|"|&|<|>| |.)~u', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string), -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
||||
381 | } |
||||
382 | else |
||||
383 | { |
||||
384 | $ent_arr = preg_split('~(|"|&|<|>| |.)~u', $string, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
||||
385 | } |
||||
386 | |||||
387 | return $length === null ? implode('', array_slice($ent_arr, $start)) : implode('', array_slice($ent_arr, $start, $length)); |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
388 | } |
||||
389 | |||||
390 | /** |
||||
391 | * Truncate a string up to a number of characters while preserving whole words and HTML tags |
||||
392 | * |
||||
393 | * This function is an adaption of the cake php function truncate in utility string.php (MIT) |
||||
394 | * |
||||
395 | * @param string $string text to truncate. |
||||
396 | * @param int $length length of returned string |
||||
397 | * @param string $ellipsis characters to add at the end of cut string, like ... |
||||
398 | * @param bool $exact If to account for the $ellipsis length in returned string length |
||||
399 | * |
||||
400 | * @return string Trimmed string. |
||||
401 | */ |
||||
402 | public static function shorten_html($string, $length = 384, $ellipsis = '...', $exact = true) |
||||
403 | { |
||||
404 | // If its shorter than the maximum length, while accounting for html tags, simply return |
||||
405 | if (self::strlen(preg_replace('~<.*?>~', '', $string)) <= $length) |
||||
406 | { |
||||
407 | return $string; |
||||
408 | } |
||||
409 | |||||
410 | // Start off empty |
||||
411 | $total_length = $exact ? self::strlen($ellipsis) : 0; |
||||
412 | $open_tags = array(); |
||||
413 | $truncate = ''; |
||||
414 | |||||
415 | // Group all html open and closing tags, [1] full tag with <> [2] basic tag name [3] tag content |
||||
416 | preg_match_all('~(<\/?([\w+]+)[^>]*>)?([^<>]*)~', $string, $tags, PREG_SET_ORDER); |
||||
417 | |||||
418 | // Walk down the stack of tags |
||||
419 | foreach ($tags as $tag) |
||||
420 | { |
||||
421 | // If this tag has content |
||||
422 | if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/', $tag[2])) |
||||
423 | { |
||||
424 | // Opening tag add the closing tag to the top of the stack |
||||
425 | if (preg_match('~<[\w]+[^>]*>~', $tag[0])) |
||||
426 | { |
||||
427 | array_unshift($open_tags, $tag[2]); |
||||
428 | } |
||||
429 | // Closing tag |
||||
430 | elseif (preg_match('~<\/([\w]+)[^>]*>~', $tag[0], $close_tag)) |
||||
431 | { |
||||
432 | // Remove its starting tag |
||||
433 | $pos = array_search($close_tag[1], $open_tags, true); |
||||
434 | if ($pos !== false) |
||||
435 | { |
||||
436 | array_splice($open_tags, $pos, 1); |
||||
0 ignored issues
–
show
It seems like
$pos can also be of type string ; however, parameter $offset of array_splice() does only seem to accept integer , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
437 | } |
||||
438 | } |
||||
439 | } |
||||
440 | |||||
441 | // Add this (opening or closing) tag to $truncate |
||||
442 | $truncate .= $tag[1]; |
||||
443 | |||||
444 | // Calculate the length of the actual tag content, accounts for html entities as a single characters |
||||
445 | $content_length = self::strlen($tag[3]); |
||||
446 | |||||
447 | // Have we exceeded the allowed length limit, only add in what we are allowed |
||||
448 | if ($content_length + $total_length > $length) |
||||
449 | { |
||||
450 | // The number of characters which we can still return |
||||
451 | $remaining = $length - $total_length; |
||||
452 | $truncate .= self::substr($tag[3], 0, $remaining); |
||||
453 | break; |
||||
454 | } |
||||
455 | // Still room to go so add the tag content and continue |
||||
456 | else |
||||
457 | { |
||||
458 | $truncate .= $tag[3]; |
||||
459 | $total_length += $content_length; |
||||
460 | } |
||||
461 | |||||
462 | // Are we there yet? |
||||
463 | if ($total_length >= $length) |
||||
464 | { |
||||
465 | break; |
||||
466 | } |
||||
467 | } |
||||
468 | |||||
469 | // Our truncated string up to the last space |
||||
470 | $space_pos = self::strpos($truncate, ' ', 0, true); |
||||
471 | $space_pos = empty($space_pos) ? $length : $space_pos; |
||||
472 | |||||
473 | $truncate_check = self::substr($truncate, 0, $space_pos); |
||||
474 | |||||
475 | // Make sure this would not cause a cut in the middle of a tag |
||||
476 | $lastOpenTag = (int) self::strpos($truncate_check, '<', 0, true); |
||||
477 | $lastCloseTag = (int) self::strpos($truncate_check, '>', 0, true); |
||||
478 | if ($lastOpenTag > $lastCloseTag) |
||||
479 | { |
||||
480 | // Find the last full open tag in our truncated string, its what was being cut |
||||
481 | preg_match_all('~<[\w]+[^>]*>~', $truncate, $lastTagMatches); |
||||
482 | $last_tag = array_pop($lastTagMatches[0]); |
||||
483 | |||||
484 | // Set the space to just after the last tag |
||||
485 | $space_pos = self::strpos($truncate, $last_tag, 0, true) + strlen($last_tag); |
||||
486 | $space_pos = empty($space_pos) ? $length : $space_pos; |
||||
487 | } |
||||
488 | |||||
489 | // Look at what we are going to cut off the end of our truncated string |
||||
490 | $bits = self::substr($truncate, $space_pos); |
||||
491 | |||||
492 | // Does it cut a tag off, if so we need to know so it can be added back at the cut point |
||||
493 | preg_match_all('~<\/([a-z]+)>~', $bits, $dropped_tags, PREG_SET_ORDER); |
||||
494 | if (!empty($dropped_tags)) |
||||
495 | { |
||||
496 | if (!empty($open_tags)) |
||||
497 | { |
||||
498 | foreach ($dropped_tags as $closing_tag) |
||||
499 | { |
||||
500 | if (!in_array($closing_tag[1], $open_tags)) |
||||
501 | { |
||||
502 | array_unshift($open_tags, $closing_tag[1]); |
||||
503 | } |
||||
504 | } |
||||
505 | } |
||||
506 | else |
||||
507 | { |
||||
508 | foreach ($dropped_tags as $closing_tag) |
||||
509 | { |
||||
510 | $open_tags[] = $closing_tag[1]; |
||||
511 | } |
||||
512 | } |
||||
513 | } |
||||
514 | |||||
515 | // Cut it |
||||
516 | $truncate = self::substr($truncate, 0, $space_pos); |
||||
517 | |||||
518 | // Dot dot dot |
||||
519 | $truncate .= $ellipsis; |
||||
520 | |||||
521 | // Finally close any html tags that were left open |
||||
522 | foreach ($open_tags as $tag) |
||||
523 | { |
||||
524 | $truncate .= '</' . $tag . '>'; |
||||
525 | } |
||||
526 | |||||
527 | return $truncate; |
||||
528 | } |
||||
529 | |||||
530 | /** |
||||
531 | * Converts the first character of each work in a multi-byte string to uppercase |
||||
532 | * |
||||
533 | * @param string $string |
||||
534 | * |
||||
535 | * @return string |
||||
536 | */ |
||||
537 | public static function ucwords($string) |
||||
538 | { |
||||
539 | $words = preg_split('~([\s\r\n\t]+)~', $string, -1, PREG_SPLIT_DELIM_CAPTURE); |
||||
540 | for ($i = 0, $n = count($words); $i < $n; $i += 2) |
||||
541 | { |
||||
542 | $words[$i] = self::ucfirst($words[$i]); |
||||
543 | } |
||||
544 | |||||
545 | return implode('', $words); |
||||
546 | } |
||||
547 | |||||
548 | /** |
||||
549 | * Converts the first character of a multi-byte string to uppercase |
||||
550 | * |
||||
551 | * @param string $string |
||||
552 | * |
||||
553 | * @return string |
||||
554 | */ |
||||
555 | public static function ucfirst($string) |
||||
556 | { |
||||
557 | return self::strtoupper(self::substr($string, 0, 1)) . self::substr($string, 1); |
||||
558 | } |
||||
559 | |||||
560 | /** |
||||
561 | * Converts a multi-byte string to uppercase |
||||
562 | * |
||||
563 | * Prefers to use mb_ functions if available, otherwise will use charset substitution tables |
||||
564 | * |
||||
565 | * @param string $string |
||||
566 | * |
||||
567 | * @return string |
||||
568 | */ |
||||
569 | public static function strtoupper($string) |
||||
570 | { |
||||
571 | if (function_exists('mb_strtoupper')) |
||||
572 | { |
||||
573 | return mb_strtoupper($string, 'UTF-8'); |
||||
574 | } |
||||
575 | |||||
576 | require_once(SUBSDIR . '/Charset.subs.php'); |
||||
577 | |||||
578 | return utf8_strtoupper($string); |
||||
579 | } |
||||
580 | |||||
581 | /** |
||||
582 | * Wrappers for unserialize |
||||
583 | * |
||||
584 | * What it does: |
||||
585 | * |
||||
586 | * @param string $string The string to unserialize |
||||
587 | * @param string[] $options Optional. Additionally, it doesn't allow to use the option: |
||||
588 | * allowed_classes => true, that is reverted to false. |
||||
589 | * @return mixed |
||||
590 | */ |
||||
591 | public static function unserialize($string, $options = array()) |
||||
592 | { |
||||
593 | $options['allowed_classes'] = false; |
||||
594 | if (self::is_serialized($string)) |
||||
595 | { |
||||
596 | return unserialize($string, $options); |
||||
597 | } |
||||
598 | |||||
599 | return ''; |
||||
600 | } |
||||
601 | |||||
602 | /** |
||||
603 | * Determine if a string is serialized |
||||
604 | * |
||||
605 | * - avoids the notice/warning it could raise |
||||
606 | * |
||||
607 | * @param string $string |
||||
608 | * @return bool |
||||
609 | */ |
||||
610 | public static function is_serialized($string) |
||||
611 | { |
||||
612 | $check = false; |
||||
613 | |||||
614 | // Easy cases |
||||
615 | if (!is_string($string) || $string === '') |
||||
0 ignored issues
–
show
|
|||||
616 | { |
||||
617 | return false; |
||||
618 | } |
||||
619 | |||||
620 | // Attempt to unserialize, mask errors |
||||
621 | set_error_handler(static function () { /* ignore errors */ }); |
||||
622 | try |
||||
623 | { |
||||
624 | if (unserialize($string, ['allowed_classes' => false]) !== false) |
||||
625 | { |
||||
626 | $check = true; |
||||
627 | } |
||||
628 | } |
||||
629 | catch (\Throwable) |
||||
630 | { |
||||
631 | /* do nothing */ |
||||
632 | } |
||||
633 | finally |
||||
634 | { |
||||
635 | restore_error_handler(); |
||||
636 | } |
||||
637 | |||||
638 | return $check; |
||||
639 | } |
||||
640 | |||||
641 | /** |
||||
642 | * Provide a PHP 8.1 version of strftime |
||||
643 | * |
||||
644 | * @param string $format of the date/time to return |
||||
645 | * @param int|null $timestamp to convert |
||||
646 | * @return string|false |
||||
647 | */ |
||||
648 | public static function strftime(string $format, int $timestamp = null) |
||||
649 | { |
||||
650 | if (function_exists('strftime') && (PHP_VERSION_ID < 80100)) |
||||
651 | { |
||||
652 | return \strftime($format, $timestamp); |
||||
653 | } |
||||
654 | |||||
655 | if (is_null($timestamp)) |
||||
656 | { |
||||
657 | $timestamp = time(); |
||||
658 | } |
||||
659 | |||||
660 | $date_equivalents = array( |
||||
661 | '%a' => 'D', |
||||
662 | '%A' => 'l', |
||||
663 | '%d' => 'd', |
||||
664 | '%e' => 'j', |
||||
665 | '%j' => 'z', |
||||
666 | '%u' => 'N', |
||||
667 | '%w' => 'w', |
||||
668 | // Week |
||||
669 | '%U' => 'W', // Week Number of the given year |
||||
670 | '%V' => 'W', |
||||
671 | '%W' => 'W', |
||||
672 | // Month |
||||
673 | '%b' => 'M', |
||||
674 | '%B' => 'F', |
||||
675 | '%h' => 'M', |
||||
676 | '%m' => 'm', |
||||
677 | // Year |
||||
678 | '%C' => 'y', // Two digit representation of the century |
||||
679 | '%g' => 'y', |
||||
680 | '%G' => 'y', |
||||
681 | '%y' => 'y', |
||||
682 | '%Y' => 'Y', |
||||
683 | // Time |
||||
684 | '%H' => 'H', |
||||
685 | '%k' => 'G', |
||||
686 | '%I' => 'h', |
||||
687 | '%l' => 'g', |
||||
688 | '%M' => 'i', |
||||
689 | '%p' => 'A', |
||||
690 | '%P' => 'a', |
||||
691 | '%r' => 'H:i:s a', |
||||
692 | '%R' => 'H:i', |
||||
693 | '%S' => 's', |
||||
694 | '%T' => 'H:i:s', |
||||
695 | '%X' => 'h:i:s', // Preferred time representation based upon locale |
||||
696 | '%z' => 'O', |
||||
697 | '%Z' => 'T', |
||||
698 | // Time and Date Stamps |
||||
699 | '%c' => 'c', |
||||
700 | '%D' => 'm/d/y', |
||||
701 | '%F' => 'y/m/d', |
||||
702 | '%s' => 'U', |
||||
703 | '%x' => '', // Locale based date representation |
||||
704 | // Misc |
||||
705 | '%n' => "\n", |
||||
706 | '%t' => "\t", |
||||
707 | '%%' => '%', |
||||
708 | ); |
||||
709 | |||||
710 | return preg_replace_callback( |
||||
711 | '/%[A-Za-z]{1}/', |
||||
712 | static function ($matches) use ($timestamp, $date_equivalents) { |
||||
713 | $new_format = str_replace(array_keys($date_equivalents), array_values($date_equivalents), $matches[0]); |
||||
714 | return date($new_format, $timestamp); |
||||
715 | }, |
||||
716 | $format |
||||
717 | ); |
||||
718 | } |
||||
719 | |||||
720 | /** |
||||
721 | * Provide a PHP 8.1 version of gmstrftime |
||||
722 | * |
||||
723 | * @param string $format of the date/time to return |
||||
724 | * @param int|null $timestamp to convert |
||||
725 | * @return string|false |
||||
726 | */ |
||||
727 | public static function gmstrftime(string $format, int $timestamp = null) |
||||
728 | { |
||||
729 | if (function_exists('gmstrftime') && (PHP_VERSION_ID < 80100)) |
||||
730 | { |
||||
731 | return \gmstrftime($format, $timestamp); |
||||
732 | } |
||||
733 | |||||
734 | return self::strftime($format, $timestamp); |
||||
735 | } |
||||
736 | |||||
737 | /** |
||||
738 | * Checks if the string contains any 4byte chars (emoji) and if so, |
||||
739 | * converts them into &#x...; HTML entities. |
||||
740 | * |
||||
741 | * @param string $string |
||||
742 | * @return string |
||||
743 | */ |
||||
744 | public static function clean_4byte_chars($string) |
||||
745 | { |
||||
746 | global $modSettings; |
||||
747 | |||||
748 | if (!empty($modSettings['using_utf8mb4'])) |
||||
749 | { |
||||
750 | return $string; |
||||
751 | } |
||||
752 | |||||
753 | $result = $string; |
||||
754 | |||||
755 | // If we are in the 4-byte range |
||||
756 | if (preg_match('~[\x{10000}-\x{10FFFF}]~u', $string)) |
||||
757 | { |
||||
758 | $ord = array_map('ord', str_split($string)); |
||||
0 ignored issues
–
show
It seems like
str_split($string) can also be of type true ; however, parameter $array of array_map() does only seem to accept array , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
759 | |||||
760 | // Byte length |
||||
761 | $length = strlen($string); |
||||
762 | $result = ''; |
||||
763 | |||||
764 | // Look for a 4byte marker |
||||
765 | for ($i = 0; $i < $length; $i++) |
||||
766 | { |
||||
767 | // The first byte of a 4-byte character encoding starts with the bytes 0xF0-0xF4 (240 <-> 244) |
||||
768 | // but look all the way to 247 for safe measure |
||||
769 | $ord1 = $ord[$i]; |
||||
770 | if ($ord1 >= 240 && $ord1 <= 247) |
||||
771 | { |
||||
772 | // Replace it with the corresponding html entity |
||||
773 | $entity = self::uniord(chr($ord[$i]) . chr($ord[$i + 1]) . chr($ord[$i + 2]) . chr($ord[$i + 3])); |
||||
774 | |||||
775 | if ($entity === false) |
||||
776 | { |
||||
777 | $result .= "\xEF\xBF\xBD"; |
||||
778 | } |
||||
779 | else |
||||
780 | { |
||||
781 | $result .= '&#x' . dechex($entity) . ';'; |
||||
782 | } |
||||
783 | |||||
784 | $i += 3; |
||||
785 | } |
||||
786 | else |
||||
787 | { |
||||
788 | $result .= $string[$i]; |
||||
789 | } |
||||
790 | } |
||||
791 | } |
||||
792 | |||||
793 | return $result; |
||||
794 | } |
||||
795 | |||||
796 | /** |
||||
797 | * Converts a 4byte char into the corresponding HTML entity code. |
||||
798 | * |
||||
799 | * This function is derived from: |
||||
800 | * http://www.greywyvern.com/code/php/utf8_html |
||||
801 | * |
||||
802 | * @param string $c |
||||
803 | * @return integer|false |
||||
804 | */ |
||||
805 | public static function uniord($c) |
||||
806 | { |
||||
807 | if (ord($c[0]) >= 0 && ord($c[0]) <= 127) |
||||
808 | { |
||||
809 | return ord($c[0]); |
||||
810 | } |
||||
811 | |||||
812 | if (ord($c[0]) >= 192 && ord($c[0]) <= 223) |
||||
813 | { |
||||
814 | return (ord($c[0]) - 192) * 64 + (ord($c[1]) - 128); |
||||
815 | } |
||||
816 | |||||
817 | if (ord($c[0]) >= 224 && ord($c[0]) <= 239) |
||||
818 | { |
||||
819 | return (ord($c[0]) - 224) * 4096 + (ord($c[1]) - 128) * 64 + (ord($c[2]) - 128); |
||||
820 | } |
||||
821 | |||||
822 | if (ord($c[0]) >= 240 && ord($c[0]) <= 247) |
||||
823 | { |
||||
824 | return (ord($c[0]) - 240) * 262144 + (ord($c[1]) - 128) * 4096 + (ord($c[2]) - 128) * 64 + (ord($c[3]) - 128); |
||||
825 | } |
||||
826 | |||||
827 | if (ord($c[0]) >= 248 && ord($c[0]) <= 251) |
||||
828 | { |
||||
829 | return (ord($c[0]) - 248) * 16777216 + (ord($c[1]) - 128) * 262144 + (ord($c[2]) - 128) * 4096 + (ord($c[3]) - 128) * 64 + (ord($c[4]) - 128); |
||||
830 | } |
||||
831 | |||||
832 | if (ord($c[0]) >= 252 && ord($c[0]) <= 253) |
||||
833 | { |
||||
834 | return (ord($c[0]) - 252) * 1073741824 + (ord($c[1]) - 128) * 16777216 + (ord($c[2]) - 128) * 262144 + (ord($c[3]) - 128) * 4096 + (ord($c[4]) - 128) * 64 + (ord($c[5]) - 128); |
||||
835 | } |
||||
836 | |||||
837 | if (ord($c[0]) >= 254 && ord($c[0]) <= 255) |
||||
838 | { |
||||
839 | return false; |
||||
840 | } |
||||
841 | |||||
842 | return 0; |
||||
843 | } |
||||
844 | } |
||||
845 |