1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @package ElkArte Forum |
5
|
|
|
* @copyright ElkArte Forum contributors |
6
|
|
|
* @license BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file) |
7
|
|
|
* |
8
|
|
|
* @version 2.0 dev |
9
|
|
|
* |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
namespace ElkArte; |
13
|
|
|
|
14
|
|
|
use BBC\PreparseCode; |
15
|
|
|
use ElkArte\Cache\Cache; |
16
|
|
|
use ElkArte\Helper\Util; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Used to add emoji images to text |
20
|
|
|
* |
21
|
|
|
* What it does: |
22
|
|
|
* |
23
|
|
|
* - Searches text for :tag: strings |
24
|
|
|
* - If tag is found to be a known emoji, replaces it with an image tag |
25
|
|
|
*/ |
26
|
|
|
class Emoji extends AbstractModel |
27
|
|
|
{ |
28
|
|
|
/** @var string ranges that emoji may be found, not all points in the range are emoji, this is |
29
|
|
|
* used to check whether any char in the text is potentially in a unicode emoji range */ |
30
|
|
|
private const EMOJI_RANGES = '[\x{203C}-\x{3299}\x{1F004}-\x{1F251}\x{1F300}-\x{1FAF6}](?![\x{200d}\x{FE0F}])'; |
31
|
|
|
|
32
|
|
|
/** @var string regex to find 4byte html as 🤷️ |
33
|
|
|
* This is how 4byte characters are stored in the utf-8 db. */ |
34
|
|
|
private const POSSIBLE_HTML_EMOJI = '~(&#x[a-fA-F\d]{5,6};|&#\d{5,6};)~'; |
35
|
|
|
|
36
|
|
|
/** @var string regex to check if any none letter characters appear in the string */ |
37
|
|
|
private const POSSIBLE_EMOJI = '~([^\p{L}\x00-\x7F]+)~u'; |
38
|
|
|
|
39
|
|
|
/** @var string used to find :emoji: style codes */ |
40
|
|
|
private const EMOJI_NAME = '~(?:\s?|^|]|<br />|<br>)(:([-+\w]+):\s?)~u'; |
41
|
|
|
|
42
|
|
|
/** @var null|Emoji holds the instance of this class */ |
43
|
|
|
private static $instance; |
44
|
|
|
|
45
|
|
|
/** @var string holds the url of where the emojis are stored */ |
46
|
|
|
public $smileys_url; |
47
|
|
|
|
48
|
|
|
/** @var string[] Array of keys with known emoji names */ |
49
|
|
|
public $shortcode_replace = []; |
50
|
|
|
|
51
|
|
|
/** @var string Supported emoji -> image regex */ |
52
|
|
|
public $emoji_regex = ''; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Emoji constructor. |
56
|
|
|
* |
57
|
|
|
* @param string $smileys_url |
58
|
|
|
*/ |
59
|
|
|
public function __construct($smileys_url = '') |
60
|
|
|
{ |
61
|
|
|
parent::__construct(); |
62
|
|
|
|
63
|
|
|
if (empty($smileys_url)) |
64
|
|
|
{ |
65
|
|
|
$smileys_url = htmlspecialchars($this->_modSettings['smileys_url']) . '/' . $this->_modSettings['emoji_selection']; |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
$this->smileys_url = $smileys_url; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Simple search and replace function |
73
|
|
|
* |
74
|
|
|
* What it does: |
75
|
|
|
* - Finds emoji tags outside of code tags and converts applicable ones to images |
76
|
|
|
* - Called from integrate_pre_bbc_parser |
77
|
|
|
* |
78
|
|
|
* @param string $string |
79
|
|
|
* @param bool $uni false returns an emoji image tag, true returns the unicode point, useful for mail |
80
|
|
|
* @param bool $protect if false will bypass codeblock protection (useful if already done!) |
81
|
|
|
* @return string |
82
|
|
|
*/ |
83
|
|
|
public function emojiNameToImage($string, $uni = false, $protect = true) |
84
|
|
|
{ |
85
|
|
|
$emoji = self::instance(); |
86
|
|
|
|
87
|
|
|
// Make sure we do not process emoji in code or icode tags |
88
|
|
|
$string = $protect ? $this->_protectCodeBlocks($string) : $string; |
89
|
|
|
|
90
|
|
|
// :emoji: must be at the start of a line, or have a leading space or be after a bbc ']' tag |
91
|
|
|
if ($uni) |
92
|
|
|
{ |
93
|
|
|
$string = preg_replace_callback(self::EMOJI_NAME, static fn(array $m): string => $emoji->emojiToUni($m), $string); |
94
|
|
|
} |
95
|
|
|
else |
96
|
|
|
{ |
97
|
|
|
$string = preg_replace_callback(self::EMOJI_NAME, static fn(array $m): string => $emoji->emojiToImage($m), $string); |
98
|
|
|
|
99
|
|
|
// Check for any embedded html / hex emoji |
100
|
|
|
$string = $this->keyboardEmojiToImage($string); |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
return $protect ? $this->_restoreCodeBlocks($string) : $string; |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
/** |
107
|
|
|
* Replace [code] and [icode] blocks with tokens. Both may exist on a page, as such you |
108
|
|
|
* can't search for one and process and then the next. i.e. [code]bla[/code] xx [icode]bla[/icode] |
109
|
|
|
* would process whats outside of code tags, which is an icode ! |
110
|
|
|
* |
111
|
|
|
* @param string $string |
112
|
|
|
* @return string |
113
|
|
|
*/ |
114
|
|
|
private function _protectCodeBlocks($string) |
115
|
|
|
{ |
116
|
|
|
// Quick sniff, was that you? I thought so ! |
117
|
|
|
if (strpos($string, ':') === false |
118
|
|
|
&& !preg_match(self::POSSIBLE_EMOJI, $string)) |
119
|
|
|
{ |
120
|
|
|
return $string; |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
// Protect code and icode blocks |
124
|
|
|
return PreparseCode::instance('')->tokenizeCodeBlocks($string); |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
/** |
128
|
|
|
* Replace any code tokens with the saved blocks |
129
|
|
|
* |
130
|
|
|
* @return string |
131
|
|
|
*/ |
132
|
|
|
private function _restoreCodeBlocks($string) |
133
|
|
|
{ |
134
|
|
|
return PreparseCode::instance('')->restoreCodeBlocks($string); |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
/** |
138
|
|
|
* Find emoji codes that are HTML &#xxx codes or pure 😀 codes. If found |
139
|
|
|
* replace them with our SVG version. |
140
|
|
|
* |
141
|
|
|
* Given 😀 or 😀, aka grinning face, will convert to 1f600 |
142
|
|
|
* and search for available svg image, retuning <img /> or original |
143
|
|
|
* string if not found. |
144
|
|
|
* |
145
|
|
|
* @param string $string |
146
|
|
|
* @return string |
147
|
|
|
*/ |
148
|
|
|
public function keyboardEmojiToImage($string) |
149
|
|
|
{ |
150
|
|
|
$string = $this->emojiFromHTML($string); |
151
|
|
|
|
152
|
|
|
return $this->emojiFromUni($string); |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
/** |
156
|
|
|
* Search and replace on &#xHEX; &#DEC; style emoji |
157
|
|
|
* |
158
|
|
|
* Given 😀; aka 😀 grinning face, will search on 1f600 and |
159
|
|
|
* if found return as <img /> string pointing to SVG |
160
|
|
|
* |
161
|
|
|
* @param string $string |
162
|
|
|
* @return string |
163
|
|
|
*/ |
164
|
|
|
public function emojiFromHTML($string) |
165
|
|
|
{ |
166
|
|
|
// If there are 4byte encoded values 🄣, change those back to utf8 characters |
167
|
|
|
return preg_replace_callback(self::POSSIBLE_HTML_EMOJI, static function ($match) { |
168
|
|
|
$replace = html_entity_decode($match[0], ENT_NOQUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8'); |
169
|
|
|
|
170
|
|
|
// The Fitzpatrick Scale modifiers are not (well) supported across all graphics sets. For now |
171
|
|
|
// drop it, allowing it to display the generic/cartoon color. IF not things would render as the |
172
|
|
|
// individual images such as 🤷 🏻 ♂️ instead of just 🤷🏽 |
173
|
|
|
$replace = preg_replace('~[\x{1F3FB}-\x{1F3FF}]~u', '', $replace); |
174
|
|
|
|
175
|
|
|
return $replace ?? $match[0]; |
176
|
|
|
}, $string); |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
/** |
180
|
|
|
* Search the Emoji array by unicode number |
181
|
|
|
* |
182
|
|
|
* Given unicode 1f600, aka 😀 grinning face, returns grinning |
183
|
|
|
* Given unicode 1f6e9 or 1f6e9-fe0f, aka 🛩️ small airplane, returns small_airplane |
184
|
|
|
* |
185
|
|
|
* @param $hex |
186
|
|
|
* @return string|false |
187
|
|
|
*/ |
188
|
|
|
public function findEmojiByCode($hex) |
189
|
|
|
{ |
190
|
|
|
$this->setSearchReplaceRegex(); |
191
|
|
|
|
192
|
|
|
if (empty($hex)) |
193
|
|
|
{ |
194
|
|
|
return false; |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
// Is it one we have in our library? |
198
|
|
|
if ($key = (array_search($hex, $this->shortcode_replace, true))) |
199
|
|
|
{ |
200
|
|
|
return $key; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
// Does it end in -fe0f / Variation Selector-16? Libraries differ in its use or not. |
204
|
|
|
if (substr($hex, -5) !== '-fe0f') |
205
|
|
|
{ |
206
|
|
|
return false; |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
if (!($key = (array_search(substr($hex, 0, -5), $this->shortcode_replace, true)))) |
210
|
|
|
{ |
211
|
|
|
return false; |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
return $key; |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
/** |
218
|
|
|
* Takes a shortcode array and, if available, converts it to an <img> emoji |
219
|
|
|
* |
220
|
|
|
* - Uses input array of the form m[2] = 'doughnut' m[1]= ':doughnut:' m[0]= original |
221
|
|
|
* - If shortcode does not exist in the emoji returns m[0] the preg full match |
222
|
|
|
* |
223
|
|
|
* @param array $m results from preg_replace_callback or other array |
224
|
|
|
* @return string |
225
|
|
|
*/ |
226
|
|
|
public function emojiToImage($m) |
227
|
|
|
{ |
228
|
|
|
// No :tag: found or not a complete result, return |
229
|
|
|
if (empty($m[2])) |
230
|
|
|
{ |
231
|
|
|
return $m[0]; |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
// Finally, going to need these |
235
|
|
|
$this->setSearchReplaceRegex(); |
236
|
|
|
|
237
|
|
|
// It is not a known tag, just return what was passed |
238
|
|
|
if (!isset($this->shortcode_replace[$m[2]])) |
239
|
|
|
{ |
240
|
|
|
return $m[0]; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
// Otherwise, we have some Emoji :dancer: |
244
|
|
|
$filename = $this->smileys_url . '/' . $this->shortcode_replace[$m[2]] . '.svg'; |
245
|
|
|
$alt = trim(strtr($m[1], [':' => ':', '(' => '(', ')' => ')', '$' => '$', '[' => '['])); |
246
|
|
|
$title = ucwords(strtr(htmlspecialchars($m[2]), [':' => ':', '(' => '(', ')' => ')', '$' => '$', '[' => '[', '_' => ' '])); |
247
|
|
|
|
248
|
|
|
return '<img class="smiley emoji ' . $this->_modSettings['emoji_selection'] . '" src="' . $filename . '" alt="' . $alt . '" title="' . $title . '" data-emoji-name="' . $alt . '" data-emoji-code="' . $this->shortcode_replace[$m[2]] . '" />'; |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
/** |
252
|
|
|
* Searches a string for unicode points and replaces them with emoji <img> tags |
253
|
|
|
* |
254
|
|
|
* We use [^\p{L}\x00-\x7F]+ which will match any non letter character including |
255
|
|
|
* symbols, currency signs, dingbats, box-drawing characters, etc. This is an |
256
|
|
|
* easier regex but with more "false" hits for what we want. If this passes then the |
257
|
|
|
* full emoji regex will be used to precisely find supported codepoints |
258
|
|
|
* |
259
|
|
|
* @param $string |
260
|
|
|
* @return string |
261
|
|
|
*/ |
262
|
|
|
public function emojiFromUni($string) |
263
|
|
|
{ |
264
|
|
|
$this->setSearchReplaceRegex(); |
265
|
|
|
|
266
|
|
|
// Avoid the large regex if there is no emoji DNA |
267
|
|
|
if (preg_match(self::POSSIBLE_EMOJI, $string) !== 1) |
268
|
|
|
{ |
269
|
|
|
return $string; |
270
|
|
|
} |
271
|
|
|
|
272
|
|
|
$result = preg_replace_callback($this->emoji_regex, function ($match) { |
273
|
|
|
$hex_str = $this->unicodeCharacterToNumber($match[0]); |
274
|
|
|
$found = $this->findEmojiByCode($hex_str); |
275
|
|
|
|
276
|
|
|
// Hey I know you, your :space_invader: |
277
|
|
|
if ($found !== false) |
278
|
|
|
{ |
279
|
|
|
return $this->emojiToImage([$match[0], ':' . $found . ':', $found]); |
280
|
|
|
} |
281
|
|
|
|
282
|
|
|
return $match[0]; |
283
|
|
|
}, $string); |
284
|
|
|
|
285
|
|
|
return empty($result) ? $string : $result; |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
/** |
289
|
|
|
* Takes a shortcode array and, if available, converts it to a html unicode points emoji |
290
|
|
|
* |
291
|
|
|
* - Uses input array of the form m[2] = 'doughnut' m[1]= ':doughnut:' m[0]= original |
292
|
|
|
* - If shortcode does not exist in the emoji returns m[0] the preg full match |
293
|
|
|
* |
294
|
|
|
* - Given unicode 1f62e-200d-1f4a8 returns 😮‍💨 |
295
|
|
|
* |
296
|
|
|
* @param array $m results from preg_replace_callback or other array |
297
|
|
|
* @return string |
298
|
|
|
*/ |
299
|
|
|
public function emojiToUni($m) |
300
|
|
|
{ |
301
|
|
|
// No :tag: found or not a complete result, return |
302
|
|
|
if (!is_array($m) || empty($m[2])) |
|
|
|
|
303
|
|
|
{ |
304
|
|
|
return $m[0]; |
305
|
|
|
} |
306
|
|
|
|
307
|
|
|
// Need our known codes |
308
|
|
|
$this->setSearchReplaceRegex(); |
309
|
|
|
|
310
|
|
|
// It is not a known :tag:, just return what was passed |
311
|
|
|
if (!isset($this->shortcode_replace[$m[2]])) |
312
|
|
|
{ |
313
|
|
|
return $m[0]; |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
// Otherwise, we have some Emoji :dancer: |
317
|
|
|
$uniCode = $this->shortcode_replace[$m[2]]; |
318
|
|
|
$uniCode = str_replace('-', ';&#x', $uniCode); |
319
|
|
|
|
320
|
|
|
return '&#x' . $uniCode . ';'; |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
/** |
324
|
|
|
* Given a unicode character, convert to a Unicode number which can be |
325
|
|
|
* used for emoji array searching |
326
|
|
|
* |
327
|
|
|
* Given 😀 aka grinning face returns unicode 1f600 |
328
|
|
|
* Given 😮💨 aka face exhaling returns unicode 1f62e-200d-1f4a8 |
329
|
|
|
* |
330
|
|
|
* @param string $code |
331
|
|
|
* @return string |
332
|
|
|
*/ |
333
|
|
|
public function unicodeCharacterToNumber($code) |
334
|
|
|
{ |
335
|
|
|
$points = []; |
336
|
|
|
|
337
|
|
|
for ($i = 0; $i < Util::strlen($code); $i++) |
338
|
|
|
{ |
339
|
|
|
$points[] = str_pad(strtolower(dechex(Util::uniord(Util::substr($code, $i, 1)))), 4, '0', STR_PAD_LEFT); |
|
|
|
|
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
return implode('-', $points); |
343
|
|
|
} |
344
|
|
|
|
345
|
|
|
/** |
346
|
|
|
* Reads the base emoji tags file and load them to PHP array. |
347
|
|
|
* |
348
|
|
|
* Creates a regex to search text for known emoji sequences. Uses generic search for |
349
|
|
|
* singleton emoji such as 1f600 as all multipoint ones would have already been found |
350
|
|
|
* and processed |
351
|
|
|
*/ |
352
|
|
|
public function setSearchReplaceRegex() |
353
|
|
|
{ |
354
|
|
|
global $settings; |
355
|
|
|
|
356
|
|
|
$this->_checkCache(); |
357
|
|
|
if (empty($this->shortcode_replace)) |
358
|
|
|
{ |
359
|
|
|
$this->shortcode_replace = []; |
360
|
|
|
$emoji = file_get_contents($settings['default_theme_dir'] . '/scripts/emoji_tags.js'); |
361
|
|
|
preg_match_all('~{name:\s[\'"](.*?)[\'"], key:\s[\'"](.*?)[\'"](?:, type:\s[\'"](.*?)[\'"])?}~', $emoji, $matches, PREG_SET_ORDER); |
362
|
|
|
foreach ($matches as $match) |
363
|
|
|
{ |
364
|
|
|
if (isset($match[3])) |
365
|
|
|
{ |
366
|
|
|
continue; |
367
|
|
|
} |
368
|
|
|
|
369
|
|
|
$name = trim($match[1]); |
370
|
|
|
$key = trim($match[2]); |
371
|
|
|
$this->shortcode_replace[$name] = $key; |
372
|
|
|
|
373
|
|
|
// Multipoint sequences use a unique, per key, regex to avoid collisions |
374
|
|
|
if (strpos($key, '-') !== false) |
375
|
|
|
{ |
376
|
|
|
$emoji_regex[] = '\x{' . implode('}\x{', explode('-', $key)) . '}'; |
377
|
|
|
} |
378
|
|
|
} |
379
|
|
|
|
380
|
|
|
call_integration_hook('integrate_custom_emoji', [&$this->shortcode_replace]); |
381
|
|
|
|
382
|
|
|
// Longest to shortest to avoid any partial matches due to sequences |
383
|
|
|
usort($emoji_regex, static fn($a, $b) => strlen($b) <=> strlen($a)); |
384
|
|
|
|
385
|
|
|
// Build out the regex, append the single point search at end. |
386
|
|
|
$this->emoji_regex = '~' . implode('|', $emoji_regex) . '|' . self::EMOJI_RANGES . '~u'; |
387
|
|
|
unset($emoji_regex); |
388
|
|
|
|
389
|
|
|
// Stash for an hour, not like this is going to change |
390
|
|
|
Cache::instance()->put('shortcode_replace', $this->shortcode_replace, 3600); |
391
|
|
|
Cache::instance()->put('emoji_regex', $this->emoji_regex, 3600); |
392
|
|
|
} |
393
|
|
|
} |
394
|
|
|
|
395
|
|
|
/** |
396
|
|
|
* Check the cache to see if we already have the regex created/loaded |
397
|
|
|
* |
398
|
|
|
* @return void |
399
|
|
|
*/ |
400
|
|
|
private function _checkCache() |
401
|
|
|
{ |
402
|
|
|
if (empty($this->shortcode_replace)) |
403
|
|
|
{ |
404
|
|
|
Cache::instance()->getVar($this->shortcode_replace, 'shortcode_replace', 3600); |
405
|
|
|
} |
406
|
|
|
|
407
|
|
|
if (empty($this->emoji_regex)) |
408
|
|
|
{ |
409
|
|
|
Cache::instance()->getVar($this->emoji_regex, 'emoji_regex', 3600); |
410
|
|
|
} |
411
|
|
|
} |
412
|
|
|
|
413
|
|
|
/** |
414
|
|
|
* Retrieve the sole instance of this class. |
415
|
|
|
* |
416
|
|
|
* @return Emoji |
417
|
|
|
*/ |
418
|
|
|
public static function instance() |
419
|
|
|
{ |
420
|
|
|
if (self::$instance === null) |
421
|
|
|
{ |
422
|
|
|
self::$instance = new Emoji(); |
423
|
|
|
} |
424
|
|
|
|
425
|
|
|
return self::$instance; |
426
|
|
|
} |
427
|
|
|
} |
428
|
|
|
|