Util::ucwords()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 2
nop 1
dl 0
loc 9
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Utility functions, such as to handle multi byte strings
5
 *
6
 * @package   ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
9
 *
10
 * @version 2.0 dev
11
 *
12
 */
13
14
namespace ElkArte\Helper;
15
16
/**
17
 * Utility functions, such as to handle multi byte strings
18
 * Note: some of these might be deprecated or removed in the future.
19
 */
20
class Util
21
{
22
	protected static $_entity_check_reg = '~(&#(\d{1,7}|x[0-9a-fA-F]{1,6});)~';
23
24
	/**
25
	 * Converts invalid / disallowed / out of range entities to nulls
26
	 *
27
	 * @param string $string
28
	 *
29
	 * @return string
30
	 */
31
	public static function entity_fix($string): string
32
	{
33
		$num = $string[0] === 'x' ? hexdec(substr($string, 1)) : (int) $string;
34
35
		// We don't allow control characters, characters out of range, byte markers, etc
36
		if ($num < 0x20 || $num > 0x10FFFF || ($num >= 0xD800 && $num <= 0xDFFF) || $num === 0x202D || $num === 0x202E)
37
		{
38
			return '';
39
		}
40
41
		return '&#' . $num . ';';
42
	}
43
44
	/**
45
	 * Performs an htmlspecialchars on a string, using UTF-8 character set
46
	 * Optionally performs an entity_fix to null any invalid character entities from the string
47
	 *
48
	 * @param string $string
49
	 * @param int $quote_style integer or constant representation of one
50
	 * @param string $charset only UTF-8 allowed
51
	 * @param bool $double true will allow double encoding, false will not encode existing html entities,
52
	 *
53
	 * @return string|null
54
	 */
55
	public static function htmlspecialchars($string, $quote_style = ENT_COMPAT, $charset = 'UTF-8', $double = false): ?string
56
	{
57
		global $modSettings;
58
59
		if (empty($string))
60
		{
61
			return $string;
62
		}
63
64
		if (empty($modSettings['disableEntityCheck']))
65
		{
66
			return preg_replace_callback('~(&amp;#(\d{1,7}|x[0-9a-fA-F]{1,6});)~', 'entity_fix__callback', htmlspecialchars($string, $quote_style, $charset, $double));
67
		}
68
69
		return htmlspecialchars($string, $quote_style, $charset, $double);
70
	}
71
72
	/**
73
	 * Adds html entities to the array/variable.  Uses two underscores to guard against overloading.
74
	 *
75
	 * What it does:
76
	 *
77
	 * - Adds entities (&quot;, &lt;, &gt;) to the array or string var.
78
	 * - Importantly, does not effect keys, only values.
79
	 * - Calls itself recursively if necessary.
80
	 * - Does not go deeper than 25 to prevent loop exhaustion
81
	 *
82
	 * @param array|string $var The string or array of strings to add entities
83
	 * @param int $level = 0 The current level we're at within the array (if called recursively)
84
	 *
85
	 * @return array|string The string or array of strings with entities added
86
	 */
87
	public static function htmlspecialchars__recursive($var, $level = 0)
88
	{
89
		if (!is_array($var))
90
		{
91
			return Util::htmlspecialchars($var, ENT_QUOTES);
92
		}
93
94
		// Apply htmlspecialchars to every element.
95
		foreach ($var as $k => $v)
96
		{
97
			$var[$k] = $level > 25 ? null : Util::htmlspecialchars__recursive($v, $level + 1);
98
		}
99
100
		return $var;
101
	}
102
103
	/**
104
	 * Trims tabs, newlines, carriage returns, spaces, vertical tabs and null bytes
105
	 * and any number of space characters from the start and end of a string
106
	 *
107
	 * - Optionally performs an entity_fix to null any invalid character entities from the string
108
	 *
109
	 * @param string $string
110
	 *
111
	 * @return string
112
	 */
113
	public static function htmltrim($string): string
114
	{
115
		global $modSettings;
116
117
		// Preg_replace for any kind of whitespace or invisible separator
118
		// and invisible control characters and unused code points
119
		$space_chars = '\p{Z}\p{C}';
120
121
		if (empty($modSettings['disableEntityCheck']))
122
		{
123
			return preg_replace('~^(?:[' . $space_chars . ']|&nbsp;)+|(?:[' . $space_chars . ']|&nbsp;)+$~u', '', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string));
124
		}
125
126
		return preg_replace('~^(?:[' . $space_chars . ']|&nbsp;)+|(?:[' . $space_chars . ']|&nbsp;)+$~u', '', $string);
127
	}
128
129
	/**
130
	 * Trim a string including the HTML space, character 160.  Uses two underscores to guard against overloading.
131
	 *
132
	 * What it does:
133
	 *
134
	 * - Trims a string or an array using html characters as well.
135
	 * - Remove spaces (32), tabs (9), returns (13, 10, and 11), nulls (0), and hard spaces. (160)
136
	 * - Does not effect keys, only values.
137
	 * - May call itself recursively if needed.
138
	 * - Does not go deeper than 25 to prevent loop exhaustion
139
	 *
140
	 * @param array|string $var The string or array of strings to trim
141
	 * @param int $level = 0 How deep we're at within the array (if called recursively)
142
	 *
143
	 * @return array|string The trimmed string or array of trimmed strings
144
	 */
145
	public static function htmltrim__recursive($var, $level = 0)
146
	{
147
		// Remove spaces (32), tabs (9), returns (13, 10, and 11), nulls (0), and hard spaces. (160)
148
		if (!is_array($var))
149
		{
150
			return self::htmltrim($var);
151
		}
152
153
		// Go through all the elements and remove the whitespace.
154
		foreach ($var as $k => $v)
155
		{
156
			$var[$k] = $level > 25 ? null : self::htmltrim__recursive($v, $level + 1);
157
		}
158
159
		return $var;
160
	}
161
162
	/**
163
	 * Perform a strpos search on a multi-byte string
164
	 *
165
	 * - Optionally performs an entity_fix to null any invalid character entities from the string before the search
166
	 *
167
	 * @param string $haystack what to search in
168
	 * @param string $needle what is being looked for
169
	 * @param int $offset where to start, assumed 0
170
	 * @param bool $right set to true to mimic strrpos functions
171
	 *
172
	 * @return bool|int
173
	 */
174
	public static function strpos($haystack, $needle, $offset = 0, $right = false)
175
	{
176
		global $modSettings;
177
178
		$haystack_check = empty($modSettings['disableEntityCheck']) ? preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $haystack) : $haystack;
179
		$haystack_arr = preg_split('~(&#' . (empty($modSettings['disableEntityCheck']) ? '\d{1,7}' : '021') . ';|&quot;|&amp;|&lt;|&gt;|&nbsp;|.)~u', $haystack_check, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
180
		$count = 0;
181
182
		// From the right side, like mb_strrpos instead
183
		if ($right)
184
		{
185
			$haystack_arr = array_reverse($haystack_arr);
186
			$count = count($haystack_arr) - 1;
187
		}
188
189
		// Single character search, lets go
190
		if (strlen($needle) === 1)
191
		{
192
			$result = array_search($needle, array_slice($haystack_arr, $offset), true);
193
194
			return is_int($result) ? ($right ? $count - ($result + $offset) : $result + $offset) : false;
195
		}
196
197
		$needle_check = empty($modSettings['disableEntityCheck']) ? preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $needle) : $needle;
198
		$needle_arr = preg_split('~(&#' . (empty($modSettings['disableEntityCheck']) ? '\d{1,7}' : '021') . ';|&quot;|&amp;|&lt;|&gt;|&nbsp;|.)~u', $needle_check, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
199
		$needle_arr = $right ? array_reverse($needle_arr) : $needle_arr;
200
201
		$needle_size = count($needle_arr);
202
		$result = array_search($needle_arr[0], array_slice($haystack_arr, $offset), true);
203
		while ((int) $result === $result)
204
		{
205
			$offset += $result;
206
			if (array_slice($haystack_arr, $offset, $needle_size) === $needle_arr)
207
			{
208
				return $right ? ($count - $offset - $needle_size + 1) : $offset;
209
			}
210
211
			$result = array_search($needle_arr[0], array_slice($haystack_arr, ++$offset), true);
212
		}
213
214
		return false;
215
	}
216
217
	/**
218
	 * Converts a multi-byte string to lowercase
219
	 *
220
	 * - Prefers to use mb_ functions if available, otherwise will use charset substitution tables
221
	 *
222
	 * @param string $string
223
	 *
224
	 * @return string
225
	 */
226
	public static function strtolower($string): string
227
	{
228
		if (function_exists('mb_strtolower'))
229
		{
230
			return mb_strtolower($string, 'UTF-8');
231
		}
232
233
		require_once(SUBSDIR . '/Charset.subs.php');
234
235
		return utf8_strtolower($string);
236
	}
237
238
	/**
239
	 * Cuts off a multi-byte string at a certain length
240
	 *
241
	 * - Optionally performs an entity_fix to null any invalid character entities from the string prior to the length
242
	 * check
243
	 * - Use this when the number of actual characters (&nbsp; = 6 not 1) must be <= length not the displayable,
244
	 * for example db field compliance to avoid overflow
245
	 *
246
	 * @param string $string
247
	 * @param int $length
248
	 *
249
	 * @return string
250
	 */
251
	public static function truncate($string, $length): string
252
	{
253
		global $modSettings;
254
255
		// Set a list of common functions.
256
		$ent_list = empty($modSettings['disableEntityCheck']) ? '&(#\d{1,7}|quot|amp|lt|gt|nbsp);' : '&(#021|quot|amp|lt|gt|nbsp);';
257
258
		if (empty($modSettings['disableEntityCheck']))
259
		{
260
			$string = preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string);
261
		}
262
263
		preg_match('~^(' . $ent_list . '|.){' . self::strlen(substr($string, 0, $length)) . '}~u', $string, $matches);
264
		$string = $matches[0];
265
		while (strlen($string) > $length)
266
		{
267
			$string = preg_replace('~(?:' . $ent_list . '|.)$~u', '', $string);
268
		}
269
270
		return $string;
271
	}
272
273
	/**
274
	 * Returns the length of multi-byte string
275
	 *
276
	 * @param string $string
277
	 *
278
	 * @return int
279
	 */
280
	public static function strlen($string): int
281
	{
282
		global $modSettings;
283
284
		if (empty($string))
285
		{
286
			return 0;
287
		}
288
289
		if (empty($modSettings['disableEntityCheck']))
290
		{
291
			$ent_list = '&(#\d{1,7}|quot|amp|lt|gt|nbsp);';
292
			if (function_exists('mb_strlen'))
293
			{
294
				$check = preg_replace('~' . $ent_list . '|.~u', '_', $string);
295
				return $check === null ? 0 : mb_strlen($check, 'UTF-8');
296
			}
297
298
			$check = preg_replace('~' . $ent_list . '|.~u', '_', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string));
299
			return $check === null ? 0 : strlen($check);
300
		}
301
302
		$ent_list = '&(#021|quot|amp|lt|gt|nbsp);';
303
		$check = preg_replace('~' . $ent_list . '|.~u', '_', $string);
304
305
		return $check === null ? 0 : strlen($check);
306
	}
307
308
	/**
309
	 * Shorten a string of text
310
	 *
311
	 * What it does:
312
	 *
313
	 * - Shortens a text string to a given visual length
314
	 * - Considers certain html entities as 1 in length, &amp; &nbsp; etc
315
	 * - Optionally adds ending ellipsis that honor length or are appended
316
	 * - Optionally attempts to break the string on a word boundary approximately at the allowed length
317
	 * - If using cutword and the resulting length is < len minus buffer then it is truncated to length plus an
318
	 * ellipsis.
319
	 * - Respects internationalization characters, html spacing and entities as one character.
320
	 * - Returns the shortened string.
321
	 * - Does not account for html tags, ie <b>test</b> is 11 characters not 4
322
	 *
323
	 * @param string $string The string to shorten
324
	 * @param int $length The length to cut the string to
325
	 * @param bool $cutword try to cut at a word boundary
326
	 * @param string $ellipsis characters to add at the end of a cut string
327
	 * @param bool $exact set true to include ellipsis in the allowed length, false will append instead
328
	 * @param int $buffer maximum length underflow to allow when cutting on a word boundary
329
	 *
330
	 * @return string|null
331
	 */
332
	public static function shorten_text($string, $length = 384, $cutword = false, $ellipsis = '...', $exact = true, $buffer = 12): ?string
333
	{
334
		// Does len include the ellipsis or are the ellipsis appended
335
		$ending = !empty($ellipsis) && $exact ? self::strlen($ellipsis) : 0;
336
337
		// If its to long, cut it down to size
338
		if (self::strlen($string) > $length)
339
		{
340
			// Try to cut on a word boundary
341
			if ($cutword)
342
			{
343
				$string = self::substr($string, 0, $length - $ending);
344
				$space_pos = self::strpos($string, ' ', 0, true);
345
346
				// Always one clown in the audience who likes long words or not using the spacebar
347
				if (!empty($space_pos) && ($length - $space_pos <= $buffer))
348
				{
349
					$string = self::substr($string, 0, $space_pos);
350
				}
351
352
				$string = rtrim($string) . $ellipsis;
353
			}
354
			else
355
			{
356
				$string = self::substr($string, 0, $length - $ending) . $ellipsis;
357
			}
358
		}
359
360
		return $string;
361
	}
362
363
	/**
364
	 * Perform a substr operation on multi-byte strings
365
	 *
366
	 * - Optionally performs an entity_fix to null any invalid character entities from the string before the operation
367
	 *
368
	 * @param string $string
369
	 * @param string $start
370
	 * @param int|null $length
371
	 *
372
	 * @return string
373
	 */
374
	public static function substr($string, $start, $length = null): string
375
	{
376
		global $modSettings;
377
378
		if (empty($modSettings['disableEntityCheck']))
379
		{
380
			$ent_arr = preg_split('~(&#\d{1,7};|&quot;|&amp;|&lt;|&gt;|&nbsp;|.)~u', preg_replace_callback(self::$_entity_check_reg, 'entity_fix__callback', $string), -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
381
		}
382
		else
383
		{
384
			$ent_arr = preg_split('~(&#021;|&quot;|&amp;|&lt;|&gt;|&nbsp;|.)~u', $string, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
385
		}
386
387
		return $length === null ? implode('', array_slice($ent_arr, $start)) : implode('', array_slice($ent_arr, $start, $length));
0 ignored issues
show
Bug introduced by
$start of type string is incompatible with the type integer expected by parameter $offset of array_slice(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

387
		return $length === null ? implode('', array_slice($ent_arr, /** @scrutinizer ignore-type */ $start)) : implode('', array_slice($ent_arr, $start, $length));
Loading history...
388
	}
389
390
	/**
391
	 * Truncate a string up to a number of characters while preserving whole words and HTML tags
392
	 *
393
	 * This function is an adaption of the cake php function truncate in utility string.php (MIT)
394
	 *
395
	 * @param string $string text to truncate.
396
	 * @param int $length length of returned string
397
	 * @param string $ellipsis characters to add at the end of cut string, like ...
398
	 * @param bool $exact If to account for the $ellipsis length in returned string length
399
	 *
400
	 * @return string Trimmed string.
401
	 */
402
	public static function shorten_html($string, $length = 384, $ellipsis = '...', $exact = true): string
403
	{
404
		// If its shorter than the maximum length, while accounting for html tags, simply return
405
		if (self::strlen(preg_replace('~<.*?>~', '', $string)) <= $length)
406
		{
407
			return $string;
408
		}
409
410
		// Start off empty
411
		$total_length = $exact ? self::strlen($ellipsis) : 0;
412
		$open_tags = [];
413
		$truncate = '';
414
415
		// Group all html open and closing tags, [1] full tag with <> [2] basic tag name [3] tag content
416
		preg_match_all('~(<\/?([\w+]+)[^>]*>)?([^<>]*)~', $string, $tags, PREG_SET_ORDER);
417
418
		// Walk down the stack of tags
419
		foreach ($tags as $tag)
420
		{
421
			// If this tag has content
422
			if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/', $tag[2]))
423
			{
424
				// Opening tag add the closing tag to the top of the stack
425
				if (preg_match('~<[\w]+[^>]*>~', $tag[0]))
426
				{
427
					array_unshift($open_tags, $tag[2]);
428
				}
429
				// Closing tag
430
				elseif (preg_match('~<\/([\w]+)[^>]*>~', $tag[0], $close_tag))
431
				{
432
					// Remove its starting tag
433
					$pos = array_search($close_tag[1], $open_tags, true);
434
					if ($pos !== false)
435
					{
436
						array_splice($open_tags, $pos, 1);
0 ignored issues
show
Bug introduced by
It seems like $pos can also be of type string; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

436
						array_splice($open_tags, /** @scrutinizer ignore-type */ $pos, 1);
Loading history...
437
					}
438
				}
439
			}
440
441
			// Add this (opening or closing) tag to $truncate
442
			$truncate .= $tag[1];
443
444
			// Calculate the length of the actual tag content, accounts for html entities as a single characters
445
			$content_length = self::strlen($tag[3]);
446
447
			// Have we exceeded the allowed length limit, only add in what we are allowed
448
			if ($content_length + $total_length > $length)
449
			{
450
				// The number of characters which we can still return
451
				$remaining = $length - $total_length;
452
				$truncate .= self::substr($tag[3], 0, $remaining);
453
				break;
454
			}
455
456
			// Still room to go so add the tag content and continue
457
			$truncate .= $tag[3];
458
			$total_length += $content_length;
459
460
			// Are we there yet?
461
			if ($total_length >= $length)
462
			{
463
				break;
464
			}
465
		}
466
467
		// Our truncated string up to the last space
468
		$space_pos = self::strpos($truncate, ' ', 0, true);
469
		$space_pos = empty($space_pos) ? $length : $space_pos;
470
471
		$truncate_check = self::substr($truncate, 0, $space_pos);
472
473
		// Make sure this would not cause a cut in the middle of a tag
474
		$lastOpenTag = (int) self::strpos($truncate_check, '<', 0, true);
475
		$lastCloseTag = (int) self::strpos($truncate_check, '>', 0, true);
476
		if ($lastOpenTag > $lastCloseTag)
477
		{
478
			// Find the last full open tag in our truncated string, its what was being cut
479
			preg_match_all('~<[\w]+[^>]*>~', $truncate, $lastTagMatches);
480
			$last_tag = array_pop($lastTagMatches[0]);
481
482
			// Set the space to just after the last tag
483
			$space_pos = self::strpos($truncate, $last_tag, 0, true) + strlen($last_tag);
484
			$space_pos = empty($space_pos) ? $length : $space_pos;
485
		}
486
487
		// Look at what we are going to cut off the end of our truncated string
488
		$bits = self::substr($truncate, $space_pos);
489
490
		// Does it cut a tag off, if so we need to know so it can be added back at the cut point
491
		preg_match_all('~<\/([a-z]+)>~', $bits, $dropped_tags, PREG_SET_ORDER);
492
		if (!empty($dropped_tags))
493
		{
494
			if (!empty($open_tags))
495
			{
496
				foreach ($dropped_tags as $closing_tag)
497
				{
498
					if (!in_array($closing_tag[1], $open_tags))
499
					{
500
						array_unshift($open_tags, $closing_tag[1]);
501
					}
502
				}
503
			}
504
			else
505
			{
506
				foreach ($dropped_tags as $closing_tag)
507
				{
508
					$open_tags[] = $closing_tag[1];
509
				}
510
			}
511
		}
512
513
		// Cut it
514
		$truncate = self::substr($truncate, 0, $space_pos);
515
516
		// Dot dot dot
517
		$truncate .= $ellipsis;
518
519
		// Finally close any html tags that were left open
520
		foreach ($open_tags as $tag)
521
		{
522
			$truncate .= '</' . $tag . '>';
523
		}
524
525
		return $truncate;
526
	}
527
528
	/**
529
	 * Converts the first character of each work in a multi-byte string to uppercase
530
	 *
531
	 * @param string $string
532
	 *
533
	 * @return string
534
	 */
535
	public static function ucwords($string): string
536
	{
537
		$words = preg_split('~([\s\r\n\t]+)~', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
538
		for ($i = 0, $n = count($words); $i < $n; $i += 2)
539
		{
540
			$words[$i] = self::ucfirst($words[$i]);
541
		}
542
543
		return implode('', $words);
544
	}
545
546
	/**
547
	 * Converts the first character of a multi-byte string to uppercase
548
	 *
549
	 * @param string $string
550
	 *
551
	 * @return string
552
	 */
553
	public static function ucfirst($string): string
554
	{
555
		return self::strtoupper(self::substr($string, 0, 1)) . self::substr($string, 1);
556
	}
557
558
	/**
559
	 * Converts a multi-byte string to uppercase
560
	 *
561
	 * Prefers to use mb_ functions if available, otherwise will use charset substitution tables
562
	 *
563
	 * @param string $string
564
	 *
565
	 * @return string
566
	 */
567
	public static function strtoupper($string): string
568
	{
569
		if (function_exists('mb_strtoupper'))
570
		{
571
			return mb_strtoupper($string, 'UTF-8');
572
		}
573
574
		require_once(SUBSDIR . '/Charset.subs.php');
575
576
		return utf8_strtoupper($string);
577
	}
578
579
	/**
580
	 * Wrappers for unserialize
581
	 *
582
	 * What it does:
583
	 *
584
	 * @param string $string The string to unserialize
585
	 * @param string[] $options Optional.  Additionally, it doesn't allow to use the option:
586
	 *                          allowed_classes => true, that is reverted to false.
587
	 * @return mixed|false
588
	 */
589
	public static function unserialize($string, $options = [])
590
	{
591
		$options['allowed_classes'] = false;
592
		if (self::is_serialized($string))
593
		{
594
			return unserialize($string, $options);
595
		}
596
597
		return '';
598
	}
599
600
	/**
601
	 * Determine if a string is serialized
602
	 *
603
	 * - Avoids the notice/warning it could raise
604
	 *
605
	 * @param string $string
606
	 * @return bool
607
	 */
608
	public static function is_serialized($string): bool
609
	{
610
		$check = false;
611
612
		// Easy cases
613
		if (!is_string($string) || $string === '')
0 ignored issues
show
introduced by
The condition is_string($string) is always true.
Loading history...
614
		{
615
			return false;
616
		}
617
618
		// Attempt to unserialize, mask errors
619
		set_error_handler(static function () { /* ignore errors */ });
620
		try
621
		{
622
			if (unserialize($string, ['allowed_classes' => false]) !== false)
623
			{
624
				$check = true;
625
			}
626
		}
627
		catch (\Throwable)
628
		{
629
			/* do nothing */
630
		}
631
		finally
632
		{
633
			restore_error_handler();
634
		}
635
636
		return $check;
637
	}
638
639
	/**
640
	 * Provide a PHP 8.1 version of strftime
641
	 *
642
	 * @param string $format of the date/time to return
643
	 * @param int|null $timestamp to convert
644
	 * @return string|false
645
	 */
646
	public static function strftime(string $format, int $timestamp = null)
647
	{
648
		if (function_exists('strftime') && (PHP_VERSION_ID < 80100))
649
		{
650
			return \strftime($format, $timestamp);
651
		}
652
653
		if (is_null($timestamp))
654
		{
655
			$timestamp = time();
656
		}
657
658
		$date_equivalents = [
659
			'%a' => 'D',
660
			'%A' => 'l',
661
			'%d' => 'd',
662
			'%e' => 'j',
663
			'%j' => 'z',
664
			'%u' => 'N',
665
			'%w' => 'w',
666
			// Week
667
			'%U' => 'W', // Week Number of the given year
668
			'%V' => 'W',
669
			'%W' => 'W',
670
			// Month
671
			'%b' => 'M',
672
			'%B' => 'F',
673
			'%h' => 'M',
674
			'%m' => 'm',
675
			// Year
676
			'%C' => 'y', // Two digit representation of the century
677
			'%g' => 'y',
678
			'%G' => 'y',
679
			'%y' => 'y',
680
			'%Y' => 'Y',
681
			// Time
682
			'%H' => 'H',
683
			'%k' => 'G',
684
			'%I' => 'h',
685
			'%l' => 'g',
686
			'%M' => 'i',
687
			'%p' => 'A',
688
			'%P' => 'a',
689
			'%r' => 'H:i:s a',
690
			'%R' => 'H:i',
691
			'%S' => 's',
692
			'%T' => 'H:i:s',
693
			'%X' => 'h:i:s', // Preferred time representation based upon locale
694
			'%z' => 'O',
695
			'%Z' => 'T',
696
			// Time and Date Stamps
697
			'%c' => 'c',
698
			'%D' => 'm/d/y',
699
			'%F' => 'y/m/d',
700
			'%s' => 'U',
701
			'%x' => '', // Locale based date representation
702
			// Misc
703
			'%n' => "\n",
704
			'%t' => "\t",
705
			'%%' => '%',
706
		];
707
708
		return preg_replace_callback(
709
			'/%[A-Za-z]{1}/',
710
			static function ($matches) use ($timestamp, $date_equivalents) {
711
				$new_format = str_replace(array_keys($date_equivalents), array_values($date_equivalents), $matches[0]);
712
				return date($new_format, $timestamp);
713
			},
714
			$format
715
		);
716
	}
717
718
	/**
719
	 * Provide a PHP 8.1 version of gmstrftime
720
	 *
721
	 * @param string $format of the date/time to return
722
	 * @param int|null $timestamp to convert
723
	 * @return string|false
724
	 */
725
	public static function gmstrftime(string $format, int $timestamp = null)
726
	{
727
		if (function_exists('gmstrftime') && (PHP_VERSION_ID < 80100))
728
		{
729
			return \gmstrftime($format, $timestamp);
730
		}
731
732
		return self::strftime($format, $timestamp);
733
	}
734
735
	/**
736
	 * Checks if the string contains any 4byte chars (emoji) and if so,
737
	 * converts them into &#x...; HTML entities.
738
	 *
739
	 * @param string $string
740
	 * @return string
741
	 */
742
	public static function clean_4byte_chars($string): string
743
	{
744
		global $modSettings;
745
746
		if (!empty($modSettings['using_utf8mb4']))
747
		{
748
			return $string;
749
		}
750
751
		$result = $string;
752
753
		//  If we are in the 4-byte range
754
		if (preg_match('~[\x{10000}-\x{10FFFF}]~u', $string))
755
		{
756
			$ord = array_map('ord', str_split($string));
0 ignored issues
show
Bug introduced by
It seems like str_split($string) can also be of type true; however, parameter $array of array_map() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

756
			$ord = array_map('ord', /** @scrutinizer ignore-type */ str_split($string));
Loading history...
757
758
			// Byte length
759
			$length = strlen($string);
760
			$result = '';
761
762
			// Look for a 4byte marker
763
			for ($i = 0; $i < $length; $i++)
764
			{
765
				// The first byte of a 4-byte character encoding starts with the bytes 0xF0-0xF4 (240 <-> 244)
766
				// but look all the way to 247 for safe measure
767
				$ord1 = $ord[$i];
768
				if ($ord1 >= 240 && $ord1 <= 247)
769
				{
770
					// Replace it with the corresponding html entity
771
					$entity = self::getUnicodeOrdinal(chr($ord[$i]) . chr($ord[$i + 1]) . chr($ord[$i + 2]) . chr($ord[$i + 3]));
772
773
					if ($entity === false)
774
					{
775
						$result .= "\xEF\xBF\xBD";
776
					}
777
					else
778
					{
779
						$result .= '&#x' . dechex($entity) . ';';
780
					}
781
782
					$i += 3;
783
				}
784
				else
785
				{
786
					$result .= $string[$i];
787
				}
788
			}
789
		}
790
791
		return $result;
792
	}
793
794
	/**
795
	 * Converts a 4byte char into the corresponding HTML entity code.
796
	 *
797
	 * This function is derived from:
798
	 * http://www.greywyvern.com/code/php/utf8_html
799
	 *
800
	 * @param string $character
801
	 * @return int|false
802
	 */
803
	public static function getUnicodeOrdinal($character)
804
	{
805
		if (ord($character[0]) <= 127)
806
		{
807
			return ord($character[0]);
808
		}
809
810
		if (ord($character[0]) >= 192 && ord($character[0]) <= 223)
811
		{
812
			return (ord($character[0]) - 192) * 64 + (ord($character[1]) - 128);
813
		}
814
815
		if (ord($character[0]) >= 224 && ord($character[0]) <= 239)
816
		{
817
			return (ord($character[0]) - 224) * 4096 + (ord($character[1]) - 128) * 64 + (ord($character[2]) - 128);
818
		}
819
820
		if (ord($character[0]) >= 240 && ord($character[0]) <= 247)
821
		{
822
			return (ord($character[0]) - 240) * 262144 + (ord($character[1]) - 128) * 4096 + (ord($character[2]) - 128) * 64 + (ord($character[3]) - 128);
823
		}
824
825
		if (ord($character[0]) >= 248 && ord($character[0]) <= 251)
826
		{
827
			return (ord($character[0]) - 248) * 16777216 + (ord($character[1]) - 128) * 262144 + (ord($character[2]) - 128) * 4096 + (ord($character[3]) - 128) * 64 + (ord($character[4]) - 128);
828
		}
829
830
		if (ord($character[0]) >= 252 && ord($character[0]) <= 253)
831
		{
832
			return (ord($character[0]) - 252) * 1073741824 + (ord($character[1]) - 128) * 16777216 + (ord($character[2]) - 128) * 262144 + (ord($character[3]) - 128) * 4096 + (ord($character[4]) - 128) * 64 + (ord($character[5]) - 128);
833
		}
834
835
		if (ord($character[0]) >= 254 && ord($character[0]) <= 255)
836
		{
837
			return false;
838
		}
839
840
		return 0;
841
	}
842
}
843