Issues (1686)

sources/ElkArte/Helper/Util.php (4 issues)

1
<?php
2
3
/**
4
 * Utility functions, such as to handle multi byte strings
5
 *
6
 * @package   ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
9
 *
10
 * @version 2.0 dev
11
 *
12
 */
13
14
namespace ElkArte\Helper;
15
16
/**
17
 * Utility functions, such as to handle multi byte strings
18
 * Note: some of these might be deprecated or removed in the future.
19
 */
20
class Util
21
{
22
	protected static $_entity_check_reg = '~(&#(\d{1,7}|x[0-9a-fA-F]{1,6});)~';
23
24
	/**
25
	 * Converts invalid / disallowed / out of range entities to nulls
26
	 *
27
	 * @param string $string
28
	 *
29
	 * @return string
30
	 */
31
	public static function entity_fix($string)
32
	{
33
		$num = $string[0] === 'x' ? hexdec(substr($string, 1)) : (int) $string;
34
35
		// We don't allow control characters, characters out of range, byte markers, etc
36
		if ($num < 0x20 || $num > 0x10FFFF || ($num >= 0xD800 && $num <= 0xDFFF) || $num === 0x202D || $num === 0x202E)
37
		{
38
			return '';
39
		}
40
41
		return '&#' . $num . ';';
42
	}
43
44
	/**
45
	 * Performs an htmlspecialchars on a string, using UTF-8 character set
46
	 * Optionally performs an entity_fix to null any invalid character entities from the string
47
	 *
48
	 * @param string $string
49
	 * @param int $quote_style integer or constant representation of one
50
	 * @param string $charset only UTF-8 allowed
51
	 * @param bool $double true will allow double encoding, false will not encode existing html entities,
52
	 *
53
	 * @return string
54
	 */
55
	public static function htmlspecialchars($string, $quote_style = ENT_COMPAT, $charset = 'UTF-8', $double = false)
56
	{
57
		global $modSettings;
58
59
		if (empty($string))
60
		{
61
			return $string;
62
		}
63
64
		if (empty($modSettings['disableEntityCheck']))
65
		{
66
			return preg_replace_callback('~(&amp;#(\d{1,7}|x[0-9a-fA-F]{1,6});)~', 'entity_fix__callback', htmlspecialchars($string, $quote_style, $charset, $double));
67
		}
68
69
		return htmlspecialchars($string, $quote_style, $charset, $double);
70
	}
71
72
	/**
73
	 * Adds html entities to the array/variable.  Uses two underscores to guard against overloading.
74
	 *
75
	 * What it does:
76
	 *
77
	 * - Adds entities (&quot;, &lt;, &gt;) to the array or string var.
78
	 * - Importantly, does not effect keys, only values.
79
	 * - Calls itself recursively if necessary.
80
	 * - Does not go deeper than 25 to prevent loop exhaustion
81
	 *
82
	 * @param array|string $var The string or array of strings to add entities
83
	 * @param int $level = 0 The current level we're at within the array (if called recursively)
84
	 *
85
	 * @return array|string The string or array of strings with entities added
86
	 */
87
	public static function htmlspecialchars__recursive($var, $level = 0)
88
	{
89
		if (!is_array($var))
90
		{
91
			return Util::htmlspecialchars($var, ENT_QUOTES);
92
		}
93
94
		// Apply htmlspecialchars to every element.
95
		foreach ($var as $k => $v)
96
		{
97
			$var[$k] = $level > 25 ? null : Util::htmlspecialchars__recursive($v, $level + 1);
98
		}
99
100
		return $var;
101
	}
102
103
	/**
104
	 * Trims tabs, newlines, carriage returns, spaces, vertical tabs and null bytes
105
	 * and any number of space characters from the start and end of a string
106
	 *
107
	 * - Optionally performs an entity_fix to null any invalid character entities from the string
108
	 *
109
	 * @param string $string
110
	 *
111
	 * @return string
112
	 */
113
	public static function htmltrim($string)
114
	{
115
		global $modSettings;
116
117
		// Preg_replace for any kind of whitespace or invisible separator
118
		// and invisible control characters and unused code points
119
		$space_chars = '\p{Z}\p{C}';
120
121
		if (empty($modSettings['disableEntityCheck']))
122
		{
123
			return preg_replace('~^(?:[' . $space_chars . ']|&nbsp;)+|(?:[' . $space_chars . ']|&nbsp;)+$~u', '', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string));
124
		}
125
126
		return preg_replace('~^(?:[' . $space_chars . ']|&nbsp;)+|(?:[' . $space_chars . ']|&nbsp;)+$~u', '', $string);
127
	}
128
129
	/**
130
	 * Trim a string including the HTML space, character 160.  Uses two underscores to guard against overloading.
131
	 *
132
	 * What it does:
133
	 *
134
	 * - Trims a string or an array using html characters as well.
135
	 * - Remove spaces (32), tabs (9), returns (13, 10, and 11), nulls (0), and hard spaces. (160)
136
	 * - Does not effect keys, only values.
137
	 * - May call itself recursively if needed.
138
	 * - Does not go deeper than 25 to prevent loop exhaustion
139
	 *
140
	 * @param array|string $var The string or array of strings to trim
141
	 * @param int $level = 0 How deep we're at within the array (if called recursively)
142
	 *
143
	 * @return mixed[]|string The trimmed string or array of trimmed strings
144
	 */
145
	public static function htmltrim__recursive($var, $level = 0)
146
	{
147
		// Remove spaces (32), tabs (9), returns (13, 10, and 11), nulls (0), and hard spaces. (160)
148
		if (!is_array($var))
149
		{
150
			return self::htmltrim($var);
151
		}
152
153
		// Go through all the elements and remove the whitespace.
154
		foreach ($var as $k => $v)
155
		{
156
			$var[$k] = $level > 25 ? null : self::htmltrim__recursive($v, $level + 1);
157
		}
158
159
		return $var;
160
	}
161
162
	/**
163
	 * Perform a strpos search on a multi-byte string
164
	 *
165
	 * - Optionally performs an entity_fix to null any invalid character entities from the string before the search
166
	 *
167
	 * @param string $haystack what to search in
168
	 * @param string $needle what is being looked for
169
	 * @param int $offset where to start, assumed 0
170
	 * @param bool $right set to true to mimic strrpos functions
171
	 *
172
	 * @return bool|mixed
173
	 */
174
	public static function strpos($haystack, $needle, $offset = 0, $right = false)
175
	{
176
		global $modSettings;
177
178
		$haystack_check = empty($modSettings['disableEntityCheck']) ? preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $haystack) : $haystack;
179
		$haystack_arr = preg_split('~(&#' . (empty($modSettings['disableEntityCheck']) ? '\d{1,7}' : '021') . ';|&quot;|&amp;|&lt;|&gt;|&nbsp;|.)~u', $haystack_check, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
180
		$count = 0;
181
182
		// From the right side, like mb_strrpos instead
183
		if ($right)
184
		{
185
			$haystack_arr = array_reverse($haystack_arr);
186
			$count = count($haystack_arr) - 1;
187
		}
188
189
		// Single character search, lets go
190
		if (strlen($needle) === 1)
191
		{
192
			$result = array_search($needle, array_slice($haystack_arr, $offset), true);
193
194
			return is_int($result) ? ($right ? $count - ($result + $offset) : $result + $offset) : false;
195
		}
196
197
		$needle_check = empty($modSettings['disableEntityCheck']) ? preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $needle) : $needle;
198
		$needle_arr = preg_split('~(&#' . (empty($modSettings['disableEntityCheck']) ? '\d{1,7}' : '021') . ';|&quot;|&amp;|&lt;|&gt;|&nbsp;|.)~u', $needle_check, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
199
		$needle_arr = $right ? array_reverse($needle_arr) : $needle_arr;
200
201
		$needle_size = count($needle_arr);
202
		$result = array_search($needle_arr[0], array_slice($haystack_arr, $offset), true);
203
		while ((int) $result === $result)
204
		{
205
			$offset += $result;
206
			if (array_slice($haystack_arr, $offset, $needle_size) === $needle_arr)
207
			{
208
				return $right ? ($count - $offset - $needle_size + 1) : $offset;
209
			}
210
211
			$result = array_search($needle_arr[0], array_slice($haystack_arr, ++$offset), true);
212
		}
213
214
		return false;
215
	}
216
217
	/**
218
	 * Converts a multi-byte string to lowercase
219
	 *
220
	 * - Prefers to use mb_ functions if available, otherwise will use charset substitution tables
221
	 *
222
	 * @param string $string
223
	 *
224
	 * @return string
225
	 */
226
	public static function strtolower($string)
227
	{
228
		if (function_exists('mb_strtolower'))
229
		{
230
			return mb_strtolower($string, 'UTF-8');
231
		}
232
233
		require_once(SUBSDIR . '/Charset.subs.php');
234
235
		return utf8_strtolower($string);
236
	}
237
238
	/**
239
	 * Cuts off a multi-byte string at a certain length
240
	 *
241
	 * - Optionally performs an entity_fix to null any invalid character entities from the string prior to the length
242
	 * check
243
	 * - Use this when the number of actual characters (&nbsp; = 6 not 1) must be <= length not the displayable,
244
	 * for example db field compliance to avoid overflow
245
	 *
246
	 * @param string $string
247
	 * @param int $length
248
	 *
249
	 * @return string
250
	 */
251
	public static function truncate($string, $length)
252
	{
253
		global $modSettings;
254
255
		// Set a list of common functions.
256
		$ent_list = empty($modSettings['disableEntityCheck']) ? '&(#\d{1,7}|quot|amp|lt|gt|nbsp);' : '&(#021|quot|amp|lt|gt|nbsp);';
257
258
		if (empty($modSettings['disableEntityCheck']))
259
		{
260
			$string = preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string);
261
		}
262
263
		preg_match('~^(' . $ent_list . '|.){' . self::strlen(substr($string, 0, $length)) . '}~u', $string, $matches);
264
		$string = $matches[0];
265
		while (strlen($string) > $length)
266
		{
267
			$string = preg_replace('~(?:' . $ent_list . '|.)$~u', '', $string);
268
		}
269
270
		return $string;
271
	}
272
273
	/**
274
	 * Returns the length of multi-byte string
275
	 *
276
	 * @param string $string
277
	 *
278
	 * @return int
279
	 */
280
	public static function strlen($string)
281
	{
282
		global $modSettings;
283
284
		if (empty($string))
285
		{
286
			return 0;
287
		}
288
289
		if (empty($modSettings['disableEntityCheck']))
290
		{
291
			$ent_list = '&(#\d{1,7}|quot|amp|lt|gt|nbsp);';
292
			if (function_exists('mb_strlen'))
293
			{
294
				$check = preg_replace('~' . $ent_list . '|.~u', '_', $string);
295
				return $check === null ? 0 : mb_strlen($check, 'UTF-8');
296
			}
297
298
			$check = preg_replace('~' . $ent_list . '|.~u', '_', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string));
299
			return $check === null ? 0 : strlen($check);
300
		}
301
302
		$ent_list = '&(#021|quot|amp|lt|gt|nbsp);';
303
		$check = preg_replace('~' . $ent_list . '|.~u', '_', $string);
304
305
		return $check === null ? 0 : strlen($check);
306
	}
307
308
	/**
309
	 * Shorten a string of text
310
	 *
311
	 * What it does:
312
	 *
313
	 * - Shortens a text string to a given visual length
314
	 * - Considers certain html entities as 1 in length, &amp; &nbsp; etc
315
	 * - Optionally adds ending ellipsis that honor length or are appended
316
	 * - Optionally attempts to break the string on a word boundary approximately at the allowed length
317
	 * - If using cutword and the resulting length is < len minus buffer then it is truncated to length plus an
318
	 * ellipsis.
319
	 * - Respects internationalization characters, html spacing and entities as one character.
320
	 * - Returns the shortened string.
321
	 * - Does not account for html tags, ie <b>test</b> is 11 characters not 4
322
	 *
323
	 * @param string $string The string to shorten
324
	 * @param int $length The length to cut the string to
325
	 * @param bool $cutword try to cut at a word boundary
326
	 * @param string $ellipsis characters to add at the end of a cut string
327
	 * @param bool $exact set true to include ellipsis in the allowed length, false will append instead
328
	 * @param int $buffer maximum length underflow to allow when cutting on a word boundary
329
	 *
330
	 * @return string
331
	 */
332
	public static function shorten_text($string, $length = 384, $cutword = false, $ellipsis = '...', $exact = true, $buffer = 12)
333
	{
334
		// Does len include the ellipsis or are the ellipsis appended
335
		$ending = !empty($ellipsis) && $exact ? self::strlen($ellipsis) : 0;
336
337
		// If its to long, cut it down to size
338
		if (self::strlen($string) > $length)
339
		{
340
			// Try to cut on a word boundary
341
			if ($cutword)
342
			{
343
				$string = self::substr($string, 0, $length - $ending);
344
				$space_pos = self::strpos($string, ' ', 0, true);
345
346
				// Always one clown in the audience who likes long words or not using the spacebar
347
				if (!empty($space_pos) && ($length - $space_pos <= $buffer))
348
				{
349
					$string = self::substr($string, 0, $space_pos);
350
				}
351
352
				$string = rtrim($string) . $ellipsis;
353
			}
354
			else
355
			{
356
				$string = self::substr($string, 0, $length - $ending) . $ellipsis;
357
			}
358
		}
359
360
		return $string;
361
	}
362
363
	/**
364
	 * Perform a substr operation on multi-byte strings
365
	 *
366
	 * - Optionally performs an entity_fix to null any invalid character entities from the string before the operation
367
	 *
368
	 * @param string $string
369
	 * @param string $start
370
	 * @param int|null $length
371
	 *
372
	 * @return string
373
	 */
374
	public static function substr($string, $start, $length = null)
375
	{
376
		global $modSettings;
377
378
		if (empty($modSettings['disableEntityCheck']))
379
		{
380
			$ent_arr = preg_split('~(&#\d{1,7};|&quot;|&amp;|&lt;|&gt;|&nbsp;|.)~u', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string), -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
381
		}
382
		else
383
		{
384
			$ent_arr = preg_split('~(&#021;|&quot;|&amp;|&lt;|&gt;|&nbsp;|.)~u', $string, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
385
		}
386
387
		return $length === null ? implode('', array_slice($ent_arr, $start)) : implode('', array_slice($ent_arr, $start, $length));
0 ignored issues
show
$start of type string is incompatible with the type integer expected by parameter $offset of array_slice(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

387
		return $length === null ? implode('', array_slice($ent_arr, /** @scrutinizer ignore-type */ $start)) : implode('', array_slice($ent_arr, $start, $length));
Loading history...
388
	}
389
390
	/**
391
	 * Truncate a string up to a number of characters while preserving whole words and HTML tags
392
	 *
393
	 * This function is an adaption of the cake php function truncate in utility string.php (MIT)
394
	 *
395
	 * @param string $string text to truncate.
396
	 * @param int $length length of returned string
397
	 * @param string $ellipsis characters to add at the end of cut string, like ...
398
	 * @param bool $exact If to account for the $ellipsis length in returned string length
399
	 *
400
	 * @return string Trimmed string.
401
	 */
402
	public static function shorten_html($string, $length = 384, $ellipsis = '...', $exact = true)
403
	{
404
		// If its shorter than the maximum length, while accounting for html tags, simply return
405
		if (self::strlen(preg_replace('~<.*?>~', '', $string)) <= $length)
406
		{
407
			return $string;
408
		}
409
410
		// Start off empty
411
		$total_length = $exact ? self::strlen($ellipsis) : 0;
412
		$open_tags = array();
413
		$truncate = '';
414
415
		// Group all html open and closing tags, [1] full tag with <> [2] basic tag name [3] tag content
416
		preg_match_all('~(<\/?([\w+]+)[^>]*>)?([^<>]*)~', $string, $tags, PREG_SET_ORDER);
417
418
		// Walk down the stack of tags
419
		foreach ($tags as $tag)
420
		{
421
			// If this tag has content
422
			if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/', $tag[2]))
423
			{
424
				// Opening tag add the closing tag to the top of the stack
425
				if (preg_match('~<[\w]+[^>]*>~', $tag[0]))
426
				{
427
					array_unshift($open_tags, $tag[2]);
428
				}
429
				// Closing tag
430
				elseif (preg_match('~<\/([\w]+)[^>]*>~', $tag[0], $close_tag))
431
				{
432
					// Remove its starting tag
433
					$pos = array_search($close_tag[1], $open_tags, true);
434
					if ($pos !== false)
435
					{
436
						array_splice($open_tags, $pos, 1);
0 ignored issues
show
It seems like $pos can also be of type string; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

436
						array_splice($open_tags, /** @scrutinizer ignore-type */ $pos, 1);
Loading history...
437
					}
438
				}
439
			}
440
441
			// Add this (opening or closing) tag to $truncate
442
			$truncate .= $tag[1];
443
444
			// Calculate the length of the actual tag content, accounts for html entities as a single characters
445
			$content_length = self::strlen($tag[3]);
446
447
			// Have we exceeded the allowed length limit, only add in what we are allowed
448
			if ($content_length + $total_length > $length)
449
			{
450
				// The number of characters which we can still return
451
				$remaining = $length - $total_length;
452
				$truncate .= self::substr($tag[3], 0, $remaining);
453
				break;
454
			}
455
			// Still room to go so add the tag content and continue
456
			else
457
			{
458
				$truncate .= $tag[3];
459
				$total_length += $content_length;
460
			}
461
462
			// Are we there yet?
463
			if ($total_length >= $length)
464
			{
465
				break;
466
			}
467
		}
468
469
		// Our truncated string up to the last space
470
		$space_pos = self::strpos($truncate, ' ', 0, true);
471
		$space_pos = empty($space_pos) ? $length : $space_pos;
472
473
		$truncate_check = self::substr($truncate, 0, $space_pos);
474
475
		// Make sure this would not cause a cut in the middle of a tag
476
		$lastOpenTag = (int) self::strpos($truncate_check, '<', 0, true);
477
		$lastCloseTag = (int) self::strpos($truncate_check, '>', 0, true);
478
		if ($lastOpenTag > $lastCloseTag)
479
		{
480
			// Find the last full open tag in our truncated string, its what was being cut
481
			preg_match_all('~<[\w]+[^>]*>~', $truncate, $lastTagMatches);
482
			$last_tag = array_pop($lastTagMatches[0]);
483
484
			// Set the space to just after the last tag
485
			$space_pos = self::strpos($truncate, $last_tag, 0, true) + strlen($last_tag);
486
			$space_pos = empty($space_pos) ? $length : $space_pos;
487
		}
488
489
		// Look at what we are going to cut off the end of our truncated string
490
		$bits = self::substr($truncate, $space_pos);
491
492
		// Does it cut a tag off, if so we need to know so it can be added back at the cut point
493
		preg_match_all('~<\/([a-z]+)>~', $bits, $dropped_tags, PREG_SET_ORDER);
494
		if (!empty($dropped_tags))
495
		{
496
			if (!empty($open_tags))
497
			{
498
				foreach ($dropped_tags as $closing_tag)
499
				{
500
					if (!in_array($closing_tag[1], $open_tags))
501
					{
502
						array_unshift($open_tags, $closing_tag[1]);
503
					}
504
				}
505
			}
506
			else
507
			{
508
				foreach ($dropped_tags as $closing_tag)
509
				{
510
					$open_tags[] = $closing_tag[1];
511
				}
512
			}
513
		}
514
515
		// Cut it
516
		$truncate = self::substr($truncate, 0, $space_pos);
517
518
		// Dot dot dot
519
		$truncate .= $ellipsis;
520
521
		// Finally close any html tags that were left open
522
		foreach ($open_tags as $tag)
523
		{
524
			$truncate .= '</' . $tag . '>';
525
		}
526
527
		return $truncate;
528
	}
529
530
	/**
531
	 * Converts the first character of each work in a multi-byte string to uppercase
532
	 *
533
	 * @param string $string
534
	 *
535
	 * @return string
536
	 */
537
	public static function ucwords($string)
538
	{
539
		$words = preg_split('~([\s\r\n\t]+)~', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
540
		for ($i = 0, $n = count($words); $i < $n; $i += 2)
541
		{
542
			$words[$i] = self::ucfirst($words[$i]);
543
		}
544
545
		return implode('', $words);
546
	}
547
548
	/**
549
	 * Converts the first character of a multi-byte string to uppercase
550
	 *
551
	 * @param string $string
552
	 *
553
	 * @return string
554
	 */
555
	public static function ucfirst($string)
556
	{
557
		return self::strtoupper(self::substr($string, 0, 1)) . self::substr($string, 1);
558
	}
559
560
	/**
561
	 * Converts a multi-byte string to uppercase
562
	 *
563
	 * Prefers to use mb_ functions if available, otherwise will use charset substitution tables
564
	 *
565
	 * @param string $string
566
	 *
567
	 * @return string
568
	 */
569
	public static function strtoupper($string)
570
	{
571
		if (function_exists('mb_strtoupper'))
572
		{
573
			return mb_strtoupper($string, 'UTF-8');
574
		}
575
576
		require_once(SUBSDIR . '/Charset.subs.php');
577
578
		return utf8_strtoupper($string);
579
	}
580
581
	/**
582
	 * Wrappers for unserialize
583
	 *
584
	 * What it does:
585
	 *
586
	 * @param string $string The string to unserialize
587
	 * @param string[] $options Optional.  Additionally, it doesn't allow to use the option:
588
	 *                          allowed_classes => true, that is reverted to false.
589
	 * @return mixed
590
	 */
591
	public static function unserialize($string, $options = array())
592
	{
593
		$options['allowed_classes'] = false;
594
		if (self::is_serialized($string))
595
		{
596
			return unserialize($string, $options);
597
		}
598
599
		return '';
600
	}
601
602
	/**
603
	 * Determine if a string is serialized
604
	 *
605
	 * - avoids the notice/warning it could raise
606
	 *
607
	 * @param string $string
608
	 * @return bool
609
	 */
610
	public static function is_serialized($string)
611
	{
612
		$check = false;
613
614
		// Easy cases
615
		if (!is_string($string) || $string === '')
0 ignored issues
show
The condition is_string($string) is always true.
Loading history...
616
		{
617
			return false;
618
		}
619
620
		// Attempt to unserialize, mask errors
621
		set_error_handler(static function () { /* ignore errors */ });
622
		try
623
		{
624
			if (unserialize($string, ['allowed_classes' => false]) !== false)
625
			{
626
				$check = true;
627
			}
628
		}
629
		catch (\Throwable)
630
		{
631
			/* do nothing */
632
		}
633
		finally
634
		{
635
			restore_error_handler();
636
		}
637
638
		return $check;
639
	}
640
641
	/**
642
	 * Provide a PHP 8.1 version of strftime
643
	 *
644
	 * @param string $format of the date/time to return
645
	 * @param int|null $timestamp to convert
646
	 * @return string|false
647
	 */
648
	public static function strftime(string $format, int $timestamp = null)
649
	{
650
		if (function_exists('strftime') && (PHP_VERSION_ID < 80100))
651
		{
652
			return \strftime($format, $timestamp);
653
		}
654
655
		if (is_null($timestamp))
656
		{
657
			$timestamp = time();
658
		}
659
660
		$date_equivalents = array(
661
			'%a' => 'D',
662
			'%A' => 'l',
663
			'%d' => 'd',
664
			'%e' => 'j',
665
			'%j' => 'z',
666
			'%u' => 'N',
667
			'%w' => 'w',
668
			// Week
669
			'%U' => 'W', // Week Number of the given year
670
			'%V' => 'W',
671
			'%W' => 'W',
672
			// Month
673
			'%b' => 'M',
674
			'%B' => 'F',
675
			'%h' => 'M',
676
			'%m' => 'm',
677
			// Year
678
			'%C' => 'y', // Two digit representation of the century
679
			'%g' => 'y',
680
			'%G' => 'y',
681
			'%y' => 'y',
682
			'%Y' => 'Y',
683
			// Time
684
			'%H' => 'H',
685
			'%k' => 'G',
686
			'%I' => 'h',
687
			'%l' => 'g',
688
			'%M' => 'i',
689
			'%p' => 'A',
690
			'%P' => 'a',
691
			'%r' => 'H:i:s a',
692
			'%R' => 'H:i',
693
			'%S' => 's',
694
			'%T' => 'H:i:s',
695
			'%X' => 'h:i:s', // Preferred time representation based upon locale
696
			'%z' => 'O',
697
			'%Z' => 'T',
698
			// Time and Date Stamps
699
			'%c' => 'c',
700
			'%D' => 'm/d/y',
701
			'%F' => 'y/m/d',
702
			'%s' => 'U',
703
			'%x' => '', // Locale based date representation
704
			// Misc
705
			'%n' => "\n",
706
			'%t' => "\t",
707
			'%%' => '%',
708
		);
709
710
		return preg_replace_callback(
711
			'/%[A-Za-z]{1}/',
712
			static function ($matches) use ($timestamp, $date_equivalents) {
713
				$new_format = str_replace(array_keys($date_equivalents), array_values($date_equivalents), $matches[0]);
714
				return date($new_format, $timestamp);
715
			},
716
			$format
717
		);
718
	}
719
720
	/**
721
	 * Provide a PHP 8.1 version of gmstrftime
722
	 *
723
	 * @param string $format of the date/time to return
724
	 * @param int|null $timestamp to convert
725
	 * @return string|false
726
	 */
727
	public static function gmstrftime(string $format, int $timestamp = null)
728
	{
729
		if (function_exists('gmstrftime') && (PHP_VERSION_ID < 80100))
730
		{
731
			return \gmstrftime($format, $timestamp);
732
		}
733
734
		return self::strftime($format, $timestamp);
735
	}
736
737
	/**
738
	 * Checks if the string contains any 4byte chars (emoji) and if so,
739
	 * converts them into &#x...; HTML entities.
740
	 *
741
	 * @param string $string
742
	 * @return string
743
	 */
744
	public static function clean_4byte_chars($string)
745
	{
746
		global $modSettings;
747
748
		if (!empty($modSettings['using_utf8mb4']))
749
		{
750
			return $string;
751
		}
752
753
		$result = $string;
754
755
		//  If we are in the 4-byte range
756
		if (preg_match('~[\x{10000}-\x{10FFFF}]~u', $string))
757
		{
758
			$ord = array_map('ord', str_split($string));
0 ignored issues
show
It seems like str_split($string) can also be of type true; however, parameter $array of array_map() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

758
			$ord = array_map('ord', /** @scrutinizer ignore-type */ str_split($string));
Loading history...
759
760
			// Byte length
761
			$length = strlen($string);
762
			$result = '';
763
764
			// Look for a 4byte marker
765
			for ($i = 0; $i < $length; $i++)
766
			{
767
				// The first byte of a 4-byte character encoding starts with the bytes 0xF0-0xF4 (240 <-> 244)
768
				// but look all the way to 247 for safe measure
769
				$ord1 = $ord[$i];
770
				if ($ord1 >= 240 && $ord1 <= 247)
771
				{
772
					// Replace it with the corresponding html entity
773
					$entity = self::uniord(chr($ord[$i]) . chr($ord[$i + 1]) . chr($ord[$i + 2]) . chr($ord[$i + 3]));
774
775
					if ($entity === false)
776
					{
777
						$result .= "\xEF\xBF\xBD";
778
					}
779
					else
780
					{
781
						$result .= '&#x' . dechex($entity) . ';';
782
					}
783
784
					$i += 3;
785
				}
786
				else
787
				{
788
					$result .= $string[$i];
789
				}
790
			}
791
		}
792
793
		return $result;
794
	}
795
796
	/**
797
	 * Converts a 4byte char into the corresponding HTML entity code.
798
	 *
799
	 * This function is derived from:
800
	 * http://www.greywyvern.com/code/php/utf8_html
801
	 *
802
	 * @param string $c
803
	 * @return integer|false
804
	 */
805
	public static function uniord($c)
806
	{
807
		if (ord($c[0]) >= 0 && ord($c[0]) <= 127)
808
		{
809
			return ord($c[0]);
810
		}
811
812
		if (ord($c[0]) >= 192 && ord($c[0]) <= 223)
813
		{
814
			return (ord($c[0]) - 192) * 64 + (ord($c[1]) - 128);
815
		}
816
817
		if (ord($c[0]) >= 224 && ord($c[0]) <= 239)
818
		{
819
			return (ord($c[0]) - 224) * 4096 + (ord($c[1]) - 128) * 64 + (ord($c[2]) - 128);
820
		}
821
822
		if (ord($c[0]) >= 240 && ord($c[0]) <= 247)
823
		{
824
			return (ord($c[0]) - 240) * 262144 + (ord($c[1]) - 128) * 4096 + (ord($c[2]) - 128) * 64 + (ord($c[3]) - 128);
825
		}
826
827
		if (ord($c[0]) >= 248 && ord($c[0]) <= 251)
828
		{
829
			return (ord($c[0]) - 248) * 16777216 + (ord($c[1]) - 128) * 262144 + (ord($c[2]) - 128) * 4096 + (ord($c[3]) - 128) * 64 + (ord($c[4]) - 128);
830
		}
831
832
		if (ord($c[0]) >= 252 && ord($c[0]) <= 253)
833
		{
834
			return (ord($c[0]) - 252) * 1073741824 + (ord($c[1]) - 128) * 16777216 + (ord($c[2]) - 128) * 262144 + (ord($c[3]) - 128) * 4096 + (ord($c[4]) - 128) * 64 + (ord($c[5]) - 128);
835
		}
836
837
		if (ord($c[0]) >= 254 && ord($c[0]) <= 255)
838
		{
839
			return false;
840
		}
841
842
		return 0;
843
	}
844
}
845