Issues (1686)

sources/ElkArte/Emoji.php (2 issues)

Labels
1
<?php
2
3
/**
4
 * @package   ElkArte Forum
5
 * @copyright ElkArte Forum contributors
6
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
7
 *
8
 * @version 2.0 dev
9
 *
10
 */
11
12
namespace ElkArte;
13
14
use BBC\PreparseCode;
15
use ElkArte\Cache\Cache;
16
use ElkArte\Helper\Util;
17
18
/**
19
 * Used to add emoji images to text
20
 *
21
 * What it does:
22
 *
23
 * - Searches text for :tag: strings
24
 * - If tag is found to be a known emoji, replaces it with an image tag
25
 */
26
class Emoji extends AbstractModel
27
{
28
	/** @var string ranges that emoji may be found, not all points in the range are emoji, this is
29
	 * used to check whether any char in the text is potentially in a unicode emoji range */
30
	private const EMOJI_RANGES = '[\x{203C}-\x{3299}\x{1F004}-\x{1F251}\x{1F300}-\x{1FAF6}](?![\x{200d}\x{FE0F}])';
31
32
	/** @var string regex to find 4byte html as &#x1f937;‍️
33
	 * This is how 4byte characters are stored in the utf-8 db. */
34
	private const POSSIBLE_HTML_EMOJI = '~(&#x[a-fA-F\d]{5,6};|&#\d{5,6};)~';
35
36
	/** @var string regex to check if any none letter characters appear in the string */
37
	private const POSSIBLE_EMOJI = '~([^\p{L}\x00-\x7F]+)~u';
38
39
	/** @var string used to find :emoji: style codes */
40
	private const EMOJI_NAME = '~(?:\s?|^|]|<br />|<br>)(:([-+\w]+):\s?)~u';
41
42
	/** @var null|Emoji holds the instance of this class */
43
	private static $instance;
44
45
	/** @var string holds the url of where the emojis are stored */
46
	public $smileys_url;
47
48
	/** @var string[] Array of keys with known emoji names */
49
	public $shortcode_replace = [];
50
51
	/** @var string Supported emoji -> image regex */
52
	public $emoji_regex = '';
53
54
	/**
55
	 * Emoji constructor.
56
	 *
57
	 * @param string $smileys_url
58
	 */
59
	public function __construct($smileys_url = '')
60
	{
61
		parent::__construct();
62
63
		if (empty($smileys_url))
64
		{
65
			$smileys_url = htmlspecialchars($this->_modSettings['smileys_url']) . '/' . $this->_modSettings['emoji_selection'];
66
		}
67
68
		$this->smileys_url = $smileys_url;
69
	}
70
71
	/**
72
	 * Simple search and replace function
73
	 *
74
	 * What it does:
75
	 * - Finds emoji tags outside of code tags and converts applicable ones to images
76
	 * - Called from integrate_pre_bbc_parser
77
	 *
78
	 * @param string $string
79
	 * @param bool $uni false returns an emoji image tag, true returns the unicode point, useful for mail
80
	 * @param bool $protect if false will bypass codeblock protection (useful if already done!)
81
	 * @return string
82
	 */
83
	public function emojiNameToImage($string, $uni = false, $protect = true)
84
	{
85
		$emoji = self::instance();
86
87
		// Make sure we do not process emoji in code or icode tags
88
		$string = $protect ? $this->_protectCodeBlocks($string) : $string;
89
90
		// :emoji: must be at the start of a line, or have a leading space or be after a bbc ']' tag
91
		if ($uni)
92
		{
93
			$string = preg_replace_callback(self::EMOJI_NAME, static fn(array $m): string => $emoji->emojiToUni($m), $string);
94
		}
95
		else
96
		{
97
			$string = preg_replace_callback(self::EMOJI_NAME, static fn(array $m): string => $emoji->emojiToImage($m), $string);
98
99
			// Check for any embedded html / hex emoji
100
			$string = $this->keyboardEmojiToImage($string);
101
		}
102
103
		return $protect ? $this->_restoreCodeBlocks($string) : $string;
104
	}
105
106
	/**
107
	 * Replace [code] and [icode] blocks with tokens.  Both may exist on a page, as such you
108
	 * can't search for one and process and then the next. i.e. [code]bla[/code] xx [icode]bla[/icode]
109
	 * would process whats outside of code tags, which is an icode !
110
	 *
111
	 * @param string $string
112
	 * @return string
113
	 */
114
	private function _protectCodeBlocks($string)
115
	{
116
		// Quick sniff, was that you? I thought so !
117
		if (strpos($string, ':') === false
118
			&& !preg_match(self::POSSIBLE_EMOJI, $string))
119
		{
120
			return $string;
121
		}
122
123
		// Protect code and icode blocks
124
		return PreparseCode::instance('')->tokenizeCodeBlocks($string);
125
	}
126
127
	/**
128
	 * Replace any code tokens with the saved blocks
129
	 *
130
	 * @return string
131
	 */
132
	private function _restoreCodeBlocks($string)
133
	{
134
		return PreparseCode::instance('')->restoreCodeBlocks($string);
135
	}
136
137
	/**
138
	 * Find emoji codes that are HTML &#xxx codes or pure 😀 codes. If found
139
	 * replace them with our SVG version.
140
	 *
141
	 * Given &#128512; or 😀, aka grinning face, will convert to 1f600
142
	 * and search for available svg image, retuning <img /> or original
143
	 * string if not found.
144
	 *
145
	 * @param string $string
146
	 * @return string
147
	 */
148
	public function keyboardEmojiToImage($string)
149
	{
150
		$string = $this->emojiFromHTML($string);
151
152
		return $this->emojiFromUni($string);
153
	}
154
155
	/**
156
	 * Search and replace on &#xHEX; &#DEC; style emoji
157
	 *
158
	 * Given &#128512;; aka 😀 grinning face, will search on 1f600 and
159
	 * if found return as <img /> string pointing to SVG
160
	 *
161
	 * @param string $string
162
	 * @return string
163
	 */
164
	public function emojiFromHTML($string)
165
	{
166
		// If there are 4byte encoded values &#x1f123, change those back to utf8 characters
167
		return preg_replace_callback(self::POSSIBLE_HTML_EMOJI, static function ($match) {
168
			$replace = html_entity_decode($match[0], ENT_NOQUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8');
169
170
			// The Fitzpatrick Scale modifiers are not (well) supported across all graphics sets.  For now
171
			// drop it, allowing it to display the generic/cartoon color.  IF not things would render as the
172
			// individual images such as 🤷 🏻 ♂️ instead of just 🤷🏽‍
173
			$replace = preg_replace('~[\x{1F3FB}-\x{1F3FF}]~u', '', $replace);
174
175
			return $replace ?? $match[0];
176
		}, $string);
177
	}
178
179
	/**
180
	 * Search the Emoji array by unicode number
181
	 *
182
	 * Given unicode 1f600, aka 😀 grinning face, returns grinning
183
	 * Given unicode 1f6e9 or 1f6e9-fe0f, aka 🛩️ small airplane, returns small_airplane
184
	 *
185
	 * @param $hex
186
	 * @return string|false
187
	 */
188
	public function findEmojiByCode($hex)
189
	{
190
		$this->setSearchReplaceRegex();
191
192
		if (empty($hex))
193
		{
194
			return false;
195
		}
196
197
		// Is it one we have in our library?
198
		if ($key = (array_search($hex, $this->shortcode_replace, true)))
199
		{
200
			return $key;
201
		}
202
203
		// Does it end in -fe0f / Variation Selector-16? Libraries differ in its use or not.
204
		if (substr($hex, -5) !== '-fe0f')
205
		{
206
			return false;
207
		}
208
209
		if (!($key = (array_search(substr($hex, 0, -5), $this->shortcode_replace, true))))
210
		{
211
			return false;
212
		}
213
214
		return $key;
215
	}
216
217
	/**
218
	 * Takes a shortcode array and, if available, converts it to an <img> emoji
219
	 *
220
	 * - Uses input array of the form m[2] = 'doughnut' m[1]= ':doughnut:' m[0]= original
221
	 * - If shortcode does not exist in the emoji returns m[0] the preg full match
222
	 *
223
	 * @param array $m results from preg_replace_callback or other array
224
	 * @return string
225
	 */
226
	public function emojiToImage($m)
227
	{
228
		// No :tag: found or not a complete result, return
229
		if (empty($m[2]))
230
		{
231
			return $m[0];
232
		}
233
234
		// Finally, going to need these
235
		$this->setSearchReplaceRegex();
236
237
		// It is not a known tag, just return what was passed
238
		if (!isset($this->shortcode_replace[$m[2]]))
239
		{
240
			return $m[0];
241
		}
242
243
		// Otherwise, we have some Emoji :dancer:
244
		$filename = $this->smileys_url . '/' . $this->shortcode_replace[$m[2]] . '.svg';
245
		$alt = trim(strtr($m[1], [':' => '&#58;', '(' => '&#40;', ')' => '&#41;', '$' => '&#36;', '[' => '&#091;']));
246
		$title = ucwords(strtr(htmlspecialchars($m[2]), [':' => '&#58;', '(' => '&#40;', ')' => '&#41;', '$' => '&#36;', '[' => '&#091;', '_' => ' ']));
247
248
		return '<img class="smiley emoji ' . $this->_modSettings['emoji_selection'] . '" src="' . $filename . '" alt="' . $alt . '" title="' . $title . '" data-emoji-name="' . $alt . '" data-emoji-code="' . $this->shortcode_replace[$m[2]] . '" />';
249
	}
250
251
	/**
252
	 * Searches a string for unicode points and replaces them with emoji <img> tags
253
	 *
254
	 * We use [^\p{L}\x00-\x7F]+ which will match any non letter character including
255
	 * symbols, currency signs, dingbats, box-drawing characters, etc. This is an
256
	 * easier regex but with more "false" hits for what we want.  If this passes then the
257
	 * full emoji regex will be used to precisely find supported codepoints
258
	 *
259
	 * @param $string
260
	 * @return string
261
	 */
262
	public function emojiFromUni($string)
263
	{
264
		$this->setSearchReplaceRegex();
265
266
		// Avoid the large regex if there is no emoji DNA
267
		if (preg_match(self::POSSIBLE_EMOJI, $string) !== 1)
268
		{
269
			return $string;
270
		}
271
272
		$result = preg_replace_callback($this->emoji_regex, function ($match) {
273
			$hex_str = $this->unicodeCharacterToNumber($match[0]);
274
			$found = $this->findEmojiByCode($hex_str);
275
276
			// Hey I know you, your :space_invader:
277
			if ($found !== false)
278
			{
279
				return $this->emojiToImage([$match[0], ':' . $found . ':', $found]);
280
			}
281
282
			return $match[0];
283
		}, $string);
284
285
		return empty($result) ? $string : $result;
286
	}
287
288
	/**
289
	 * Takes a shortcode array and, if available, converts it to a html unicode points emoji
290
	 *
291
	 * - Uses input array of the form m[2] = 'doughnut' m[1]= ':doughnut:' m[0]= original
292
	 * - If shortcode does not exist in the emoji returns m[0] the preg full match
293
	 *
294
	 * - Given unicode 1f62e-200d-1f4a8 returns &#x1f62e;&#x200d;&#x1f4a8;
295
	 *
296
	 * @param array $m results from preg_replace_callback or other array
297
	 * @return string
298
	 */
299
	public function emojiToUni($m)
300
	{
301
		// No :tag: found or not a complete result, return
302
		if (!is_array($m) || empty($m[2]))
0 ignored issues
show
The condition is_array($m) is always true.
Loading history...
303
		{
304
			return $m[0];
305
		}
306
307
		// Need our known codes
308
		$this->setSearchReplaceRegex();
309
310
		// It is not a known :tag:, just return what was passed
311
		if (!isset($this->shortcode_replace[$m[2]]))
312
		{
313
			return $m[0];
314
		}
315
316
		// Otherwise, we have some Emoji :dancer:
317
		$uniCode = $this->shortcode_replace[$m[2]];
318
		$uniCode = str_replace('-', ';&#x', $uniCode);
319
320
		return '&#x' . $uniCode . ';';
321
	}
322
323
	/**
324
	 * Given a unicode character, convert to a Unicode number which can be
325
	 * used for emoji array searching
326
	 *
327
	 * Given 😀 aka grinning face returns unicode 1f600
328
	 * Given 😮‍💨 aka face exhaling returns unicode 1f62e-200d-1f4a8
329
	 *
330
	 * @param string $code
331
	 * @return string
332
	 */
333
	public function unicodeCharacterToNumber($code)
334
	{
335
		$points = [];
336
337
		for ($i = 0; $i < Util::strlen($code); $i++)
338
		{
339
			$points[] = str_pad(strtolower(dechex(Util::uniord(Util::substr($code, $i, 1)))), 4, '0', STR_PAD_LEFT);
0 ignored issues
show
It seems like ElkArte\Helper\Util::uni...::substr($code, $i, 1)) can also be of type false; however, parameter $num of dechex() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

339
			$points[] = str_pad(strtolower(dechex(/** @scrutinizer ignore-type */ Util::uniord(Util::substr($code, $i, 1)))), 4, '0', STR_PAD_LEFT);
Loading history...
340
		}
341
342
		return implode('-', $points);
343
	}
344
345
	/**
346
	 * Reads the base emoji tags file and load them to PHP array.
347
	 *
348
	 * Creates a regex to search text for known emoji sequences.  Uses generic search for
349
	 * singleton emoji such as 1f600 as all multipoint ones would have already been found
350
	 * and processed
351
	 */
352
	public function setSearchReplaceRegex()
353
	{
354
		global $settings;
355
356
		$this->_checkCache();
357
		if (empty($this->shortcode_replace))
358
		{
359
			$this->shortcode_replace = [];
360
			$emoji = file_get_contents($settings['default_theme_dir'] . '/scripts/emoji_tags.js');
361
			preg_match_all('~{name:\s[\'"](.*?)[\'"], key:\s[\'"](.*?)[\'"](?:, type:\s[\'"](.*?)[\'"])?}~', $emoji, $matches, PREG_SET_ORDER);
362
			foreach ($matches as $match)
363
			{
364
				if (isset($match[3]))
365
				{
366
					continue;
367
				}
368
369
				$name = trim($match[1]);
370
				$key = trim($match[2]);
371
				$this->shortcode_replace[$name] = $key;
372
373
				// Multipoint sequences use a unique, per key, regex to avoid collisions
374
				if (strpos($key, '-') !== false)
375
				{
376
					$emoji_regex[] = '\x{' . implode('}\x{', explode('-', $key)) . '}';
377
				}
378
			}
379
380
			call_integration_hook('integrate_custom_emoji', [&$this->shortcode_replace]);
381
382
			// Longest to shortest to avoid any partial matches due to sequences
383
			usort($emoji_regex, static fn($a, $b) => strlen($b) <=> strlen($a));
384
385
			// Build out the regex, append the single point search at end.
386
			$this->emoji_regex = '~' . implode('|', $emoji_regex) . '|' . self::EMOJI_RANGES . '~u';
387
			unset($emoji_regex);
388
389
			// Stash for an hour, not like this is going to change
390
			Cache::instance()->put('shortcode_replace', $this->shortcode_replace, 3600);
391
			Cache::instance()->put('emoji_regex', $this->emoji_regex, 3600);
392
		}
393
	}
394
395
	/**
396
	 * Check the cache to see if we already have the regex created/loaded
397
	 *
398
	 * @return void
399
	 */
400
	private function _checkCache()
401
	{
402
		if (empty($this->shortcode_replace))
403
		{
404
			Cache::instance()->getVar($this->shortcode_replace, 'shortcode_replace', 3600);
405
		}
406
407
		if (empty($this->emoji_regex))
408
		{
409
			Cache::instance()->getVar($this->emoji_regex, 'emoji_regex', 3600);
410
		}
411
	}
412
413
	/**
414
	 * Retrieve the sole instance of this class.
415
	 *
416
	 * @return Emoji
417
	 */
418
	public static function instance()
419
	{
420
		if (self::$instance === null)
421
		{
422
			self::$instance = new Emoji();
423
		}
424
425
		return self::$instance;
426
	}
427
}
428