LucaDevelop /
telegram-entities-decoder
| 1 | <?php |
||||
| 2 | /** |
||||
| 3 | * This class decode style entities from Telegram bot messages (bold, italic, etc.) in text with inline entities that duplicate (when possible) the |
||||
| 4 | * exact style the message has originally when was sended to the bot. |
||||
| 5 | * All this work is necessary because Telegram returns offset and length of the entities in UTF-16 code units that they've been hard to decode correctly in PHP |
||||
| 6 | * |
||||
| 7 | * Inspired By: https://github.com/php-telegram-bot/core/issues/544#issuecomment-564950430 |
||||
| 8 | * |
||||
| 9 | * Example usage: |
||||
| 10 | * $entity_decoder = new EntityDecoder('HTML'); |
||||
| 11 | * $decoded_text = $entity_decoder->decode($message); |
||||
| 12 | * |
||||
| 13 | * @author LucaDevelop |
||||
| 14 | * @access public |
||||
| 15 | * @see https://github.com/LucaDevelop/telegram-entities-decoder |
||||
| 16 | */ |
||||
| 17 | |||||
| 18 | namespace lucadevelop\TelegramEntitiesDecoder; |
||||
| 19 | |||||
| 20 | class EntityDecoder |
||||
| 21 | { |
||||
| 22 | private $entitiesToParse = ['bold', 'italic', 'code', 'pre', 'text_mention', 'text_link', 'strikethrough', 'underline', 'spoiler', 'blockquote', 'custom_emoji']; |
||||
| 23 | private $entities = []; |
||||
| 24 | private $style; |
||||
| 25 | |||||
| 26 | /** |
||||
| 27 | * @param string $style Either 'HTML', 'Markdown' or 'MarkdownV2'. |
||||
| 28 | * |
||||
| 29 | * @throws InvalidArgumentException if the provided style name in invalid. |
||||
| 30 | */ |
||||
| 31 | public function __construct(string $style = 'HTML') |
||||
| 32 | { |
||||
| 33 | if (in_array($style, ["HTML", "MarkdownV2", "Markdown"])) |
||||
| 34 | { |
||||
| 35 | $this->style = $style; |
||||
| 36 | } |
||||
| 37 | else |
||||
| 38 | { |
||||
| 39 | throw new \InvalidArgumentException("Wrong style name"); |
||||
| 40 | } |
||||
| 41 | } |
||||
| 42 | |||||
| 43 | /** |
||||
| 44 | * Decode entities and return decoded text |
||||
| 45 | * |
||||
| 46 | * @param object $message message object to reconstruct Entities from (json decoded without assoc). |
||||
| 47 | * @return string |
||||
| 48 | */ |
||||
| 49 | public function decode($message): string |
||||
| 50 | { |
||||
| 51 | if (!is_object($message)) |
||||
| 52 | { |
||||
| 53 | throw new \Exception('message must be an object'); |
||||
| 54 | } |
||||
| 55 | //Get available entities (for text or for attachment like photo, document, etc.) |
||||
| 56 | if (!empty($message->entities)) |
||||
| 57 | { |
||||
| 58 | $this->entities = $message->entities; |
||||
| 59 | } |
||||
| 60 | if (!empty($message->caption_entities)) |
||||
| 61 | { |
||||
| 62 | $this->entities = $message->caption_entities; |
||||
| 63 | } |
||||
| 64 | //Get internal encoding |
||||
| 65 | $prevencoding = mb_internal_encoding(); |
||||
| 66 | //Set encoding to UTF-8 |
||||
| 67 | mb_internal_encoding('UTF-8'); |
||||
| 68 | //Get available text (text message or caption for attachment) |
||||
| 69 | $textToDecode = (!empty($message->text) ? $message->text : (!empty($message->caption) ? $message->caption : "")); |
||||
| 70 | //if the message has no entities or no text return the original text |
||||
| 71 | if (empty($this->entities) || $textToDecode == "") { |
||||
| 72 | if ($prevencoding) |
||||
| 73 | { |
||||
| 74 | mb_internal_encoding($prevencoding); |
||||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 75 | } |
||||
| 76 | return $textToDecode; |
||||
| 77 | } |
||||
| 78 | //split text in char array with UTF-16 code units length |
||||
| 79 | $arrayText = $this->splitCharAndLength($textToDecode); |
||||
| 80 | $finalText = ""; |
||||
| 81 | |||||
| 82 | $openedEntities = []; |
||||
| 83 | $currenPosition = 0; |
||||
| 84 | //Cycle characters one by one to calculate begins and ends of entities and escape special chars |
||||
| 85 | for ($i = 0, $c = count($arrayText); $i < $c; $i++) { |
||||
| 86 | $offsetAndLength = $currenPosition + $arrayText[$i]['length']; |
||||
| 87 | $entityCheckStart = $this->checkForEntityStart($currenPosition); |
||||
| 88 | $entityCheckStop = $this->checkForEntityStop($offsetAndLength); |
||||
| 89 | if ($entityCheckStart !== false) |
||||
| 90 | { |
||||
| 91 | foreach ($entityCheckStart as $stEntity) |
||||
| 92 | { |
||||
| 93 | $startChar = $this->getEntityStartString($stEntity); |
||||
| 94 | $openedEntities[] = $stEntity; |
||||
| 95 | $finalText .= $startChar; |
||||
| 96 | } |
||||
| 97 | $finalText .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); |
||||
| 98 | } |
||||
| 99 | if ($entityCheckStop !== false) |
||||
| 100 | { |
||||
| 101 | if ($entityCheckStart === false) |
||||
| 102 | { |
||||
| 103 | $finalText .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); |
||||
| 104 | } |
||||
| 105 | if ($this->style == 'MarkdownV2' && $this->checkMarkdownV2AmbiguousEntities($entityCheckStop)) |
||||
| 106 | { |
||||
| 107 | $stopChar = "_\r__"; |
||||
| 108 | $finalText .= $stopChar; |
||||
| 109 | array_pop($openedEntities); |
||||
| 110 | array_pop($openedEntities); |
||||
| 111 | } |
||||
| 112 | foreach ($entityCheckStop as $stEntity) |
||||
| 113 | { |
||||
| 114 | $stopChar = $this->getEntityStopString($stEntity); |
||||
| 115 | $finalText .= $stopChar; |
||||
| 116 | array_pop($openedEntities); |
||||
| 117 | } |
||||
| 118 | } |
||||
| 119 | if ($entityCheckStart === false && $entityCheckStop === false) |
||||
| 120 | { |
||||
| 121 | $isEntityOpen = !empty($openedEntities); |
||||
| 122 | $finalText .= $this->escapeSpecialChars($arrayText[$i]['char'], $isEntityOpen, $openedEntities); |
||||
| 123 | } |
||||
| 124 | $currenPosition = $offsetAndLength; |
||||
| 125 | } |
||||
| 126 | if (!empty($openedEntities)) |
||||
| 127 | { |
||||
| 128 | $openedEntities = array_reverse($openedEntities); |
||||
| 129 | foreach ($openedEntities as $oe) |
||||
| 130 | { |
||||
| 131 | $finalText .= $this->getEntityStopString($oe); |
||||
| 132 | } |
||||
| 133 | } |
||||
| 134 | if ($prevencoding) |
||||
| 135 | { |
||||
| 136 | mb_internal_encoding($prevencoding); |
||||
| 137 | } |
||||
| 138 | |||||
| 139 | return $finalText; |
||||
| 140 | } |
||||
| 141 | |||||
| 142 | /** |
||||
| 143 | * Extract all entities in an array |
||||
| 144 | * |
||||
| 145 | * @param object $message message object to reconstruct Entities from (json decoded without assoc). |
||||
| 146 | * @return array |
||||
| 147 | */ |
||||
| 148 | public function extractAllEntities($message): array |
||||
| 149 | { |
||||
| 150 | $entitiesArray = []; |
||||
| 151 | if (!is_object($message)) |
||||
| 152 | { |
||||
| 153 | throw new \Exception('message must be an object'); |
||||
| 154 | } |
||||
| 155 | //Get available entities (for text or for attachment like photo, document, etc.) |
||||
| 156 | if (!empty($message->entities)) |
||||
| 157 | { |
||||
| 158 | $this->entities = $message->entities; |
||||
| 159 | } |
||||
| 160 | if (!empty($message->caption_entities)) |
||||
| 161 | { |
||||
| 162 | $this->entities = $message->caption_entities; |
||||
| 163 | } |
||||
| 164 | //Get internal encoding |
||||
| 165 | $prevencoding = mb_internal_encoding(); |
||||
| 166 | //Set encoding to UTF-8 |
||||
| 167 | mb_internal_encoding('UTF-8'); |
||||
| 168 | //Get available text (text message or caption for attachment) |
||||
| 169 | $textToDecode = (!empty($message->text) ? $message->text : (!empty($message->caption) ? $message->caption : "")); |
||||
| 170 | //if the message has no entities or no text return the empty array |
||||
| 171 | if (empty($this->entities) || $textToDecode == "") { |
||||
| 172 | if ($prevencoding) |
||||
| 173 | { |
||||
| 174 | mb_internal_encoding($prevencoding); |
||||
|
0 ignored issues
–
show
It seems like
$prevencoding can also be of type true; however, parameter $encoding of mb_internal_encoding() does only seem to accept null|string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 175 | } |
||||
| 176 | return $entitiesArray; |
||||
| 177 | } |
||||
| 178 | $arrayText = $this->splitCharAndLength($textToDecode); |
||||
| 179 | $entitytext = ""; |
||||
| 180 | |||||
| 181 | $openedEntities = []; |
||||
| 182 | $currenPosition = 0; |
||||
| 183 | //Cycle characters one by one to calculate begins and ends of entities and escape special chars |
||||
| 184 | for ($i = 0, $c = count($arrayText); $i < $c; $i++) { |
||||
| 185 | $offsetAndLength = $currenPosition + $arrayText[$i]['length']; |
||||
| 186 | $entityCheckStart = $this->checkForEntityStart($currenPosition); |
||||
| 187 | $entityCheckStop = $this->checkForEntityStop($offsetAndLength); |
||||
| 188 | if ($entityCheckStart !== false) |
||||
| 189 | { |
||||
| 190 | foreach ($entityCheckStart as $stEntity) |
||||
| 191 | { |
||||
| 192 | $startChar = $this->getEntityStartString($stEntity); |
||||
| 193 | $openedEntities[] = $stEntity; |
||||
| 194 | $entitytext .= $startChar; |
||||
| 195 | } |
||||
| 196 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); |
||||
| 197 | } |
||||
| 198 | if ($entityCheckStop !== false) |
||||
| 199 | { |
||||
| 200 | if ($entityCheckStart === false) |
||||
| 201 | { |
||||
| 202 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); |
||||
| 203 | } |
||||
| 204 | if ($this->style == 'MarkdownV2' && $this->checkMarkdownV2AmbiguousEntities($entityCheckStop)) |
||||
| 205 | { |
||||
| 206 | $stopChar = "_\r__"; |
||||
| 207 | $entitytext .= $stopChar; |
||||
| 208 | array_pop($openedEntities); |
||||
| 209 | array_pop($openedEntities); |
||||
| 210 | if(empty($openedEntities)) |
||||
| 211 | { |
||||
| 212 | $entitiesArray[] = $entitytext; |
||||
| 213 | $entitytext = ""; |
||||
| 214 | } |
||||
| 215 | } |
||||
| 216 | foreach ($entityCheckStop as $stEntity) |
||||
| 217 | { |
||||
| 218 | $stopChar = $this->getEntityStopString($stEntity); |
||||
| 219 | $entitytext .= $stopChar; |
||||
| 220 | array_pop($openedEntities); |
||||
| 221 | if(empty($openedEntities)) |
||||
| 222 | { |
||||
| 223 | $entitiesArray[] = $entitytext; |
||||
| 224 | $entitytext = ""; |
||||
| 225 | } |
||||
| 226 | } |
||||
| 227 | } |
||||
| 228 | if ($entityCheckStart === false && $entityCheckStop === false) |
||||
| 229 | { |
||||
| 230 | $isEntityOpen = !empty($openedEntities); |
||||
| 231 | if($isEntityOpen) |
||||
| 232 | { |
||||
| 233 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], $isEntityOpen, $openedEntities); |
||||
| 234 | } |
||||
| 235 | } |
||||
| 236 | $currenPosition = $offsetAndLength; |
||||
| 237 | } |
||||
| 238 | if (!empty($openedEntities)) |
||||
| 239 | { |
||||
| 240 | $openedEntities = array_reverse($openedEntities); |
||||
| 241 | foreach ($openedEntities as $oe) |
||||
| 242 | { |
||||
| 243 | $entitytext .= $this->getEntityStopString($oe); |
||||
| 244 | $entitiesArray[] = $entitytext; |
||||
| 245 | } |
||||
| 246 | } |
||||
| 247 | if ($prevencoding) |
||||
| 248 | { |
||||
| 249 | mb_internal_encoding($prevencoding); |
||||
| 250 | } |
||||
| 251 | return $entitiesArray; |
||||
| 252 | } |
||||
| 253 | |||||
| 254 | /** |
||||
| 255 | * Split message text in chars array with lengthes |
||||
| 256 | */ |
||||
| 257 | protected function splitCharAndLength($string) |
||||
| 258 | { |
||||
| 259 | //Split string in individual unicode points |
||||
| 260 | $str_split_unicode = preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY); |
||||
| 261 | $new_string_split = []; |
||||
| 262 | $joiner = false; |
||||
| 263 | for ($i = 0, $c = count($str_split_unicode); $i < $c; $i++) |
||||
| 264 | { |
||||
| 265 | //loop the array |
||||
| 266 | $codepoint = bin2hex(mb_convert_encoding($str_split_unicode[$i], 'UTF-16')); //Get the string rappresentation of the unicode char |
||||
|
0 ignored issues
–
show
It seems like
mb_convert_encoding($str..._unicode[$i], 'UTF-16') can also be of type array; however, parameter $string of bin2hex() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 267 | if ($codepoint == "fe0f" || $codepoint == "1f3fb" || $codepoint == "1f3fc" || $codepoint == "1f3fd" || $codepoint == "1f3fe" || $codepoint == "1f3ff") |
||||
| 268 | { |
||||
| 269 | //Manage the modifiers |
||||
| 270 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend the modifier to the previous char |
||||
| 271 | } |
||||
| 272 | else |
||||
| 273 | { |
||||
| 274 | if ($codepoint == "200d") |
||||
| 275 | { |
||||
| 276 | //Manage the Zero Width Joiner |
||||
| 277 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend the ZWJ to the previous char |
||||
| 278 | $joiner = true; |
||||
| 279 | } |
||||
| 280 | else |
||||
| 281 | { |
||||
| 282 | if ($joiner) |
||||
| 283 | { |
||||
| 284 | //If previous one was a ZWJ |
||||
| 285 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend to the previous char |
||||
| 286 | $joiner = false; |
||||
| 287 | } |
||||
| 288 | else |
||||
| 289 | { |
||||
| 290 | $new_string_split[] = $str_split_unicode[$i]; //New char |
||||
| 291 | } |
||||
| 292 | } |
||||
| 293 | } |
||||
| 294 | } |
||||
| 295 | $data = []; |
||||
| 296 | foreach ($new_string_split as $s) |
||||
| 297 | { |
||||
| 298 | $data[] = ["char" => $s, "length" => $this->getUTF16CodePointsLength($s)]; |
||||
| 299 | } |
||||
| 300 | return $data; |
||||
| 301 | } |
||||
| 302 | |||||
| 303 | /** |
||||
| 304 | * Apply Telegram escape rules for the choosen style |
||||
| 305 | */ |
||||
| 306 | protected function escapeSpecialChars($char, $isEntityOpen, $entities) { |
||||
| 307 | if ($this->style == 'Markdown') |
||||
| 308 | { |
||||
| 309 | if ($isEntityOpen) |
||||
| 310 | { |
||||
| 311 | $entity = $entities[0]; |
||||
| 312 | if ($char == '*' || $char == '_') |
||||
| 313 | { |
||||
| 314 | if ($char == $this->getEntityStartString($entity)) |
||||
| 315 | { |
||||
| 316 | return $char."\\".$char.$char; |
||||
| 317 | } |
||||
| 318 | else |
||||
| 319 | { |
||||
| 320 | return $char; |
||||
| 321 | } |
||||
| 322 | } |
||||
| 323 | else |
||||
| 324 | { |
||||
| 325 | return $char; |
||||
| 326 | } |
||||
| 327 | } |
||||
| 328 | else |
||||
| 329 | { |
||||
| 330 | if ($char == '*' || $char == '_' || $char == '[' || $char == '`') |
||||
| 331 | { |
||||
| 332 | return "\\".$char; |
||||
| 333 | } |
||||
| 334 | else |
||||
| 335 | { |
||||
| 336 | return $char; |
||||
| 337 | } |
||||
| 338 | } |
||||
| 339 | } |
||||
| 340 | else if ($this->style == 'HTML') |
||||
| 341 | { |
||||
| 342 | return ($char == '<' ? '<' : ($char == '>' ? '>' : ($char == '&' ? '&' : $char))); |
||||
| 343 | } |
||||
| 344 | else if ($this->style == 'MarkdownV2') |
||||
| 345 | { |
||||
| 346 | $isBlockquoteOpen = false; |
||||
| 347 | foreach ($entities as $entity) { |
||||
| 348 | if ($entity->type === 'blockquote') { |
||||
| 349 | $isBlockquoteOpen = true; |
||||
| 350 | break; |
||||
| 351 | } |
||||
| 352 | } |
||||
| 353 | if($isBlockquoteOpen && $char == "\n") |
||||
| 354 | { |
||||
| 355 | return $char.'>'; |
||||
| 356 | } |
||||
| 357 | else |
||||
| 358 | { |
||||
| 359 | return (in_array($char, ['_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!', '\\']) ? '\\'.$char : $char); |
||||
| 360 | } |
||||
| 361 | } |
||||
| 362 | else |
||||
| 363 | { |
||||
| 364 | return $char; |
||||
| 365 | } |
||||
| 366 | } |
||||
| 367 | |||||
| 368 | /** |
||||
| 369 | * Get the begin string of the entity for the choosen style |
||||
| 370 | */ |
||||
| 371 | protected function getEntityStartString($entity) |
||||
| 372 | { |
||||
| 373 | $startString = ''; |
||||
| 374 | if ($this->style == 'Markdown') |
||||
| 375 | { |
||||
| 376 | switch ($entity->type) |
||||
| 377 | { |
||||
| 378 | case 'bold': |
||||
| 379 | { |
||||
| 380 | $startString = '*'; |
||||
| 381 | break; |
||||
| 382 | } |
||||
| 383 | case 'italic': |
||||
| 384 | { |
||||
| 385 | $startString = '_'; |
||||
| 386 | break; |
||||
| 387 | } |
||||
| 388 | case 'code': |
||||
| 389 | { |
||||
| 390 | $startString = '`'; |
||||
| 391 | break; |
||||
| 392 | } |
||||
| 393 | case 'pre': |
||||
| 394 | { |
||||
| 395 | $startString = '```'; |
||||
| 396 | if (isset($entity->language)) |
||||
| 397 | { |
||||
| 398 | $startString .= $entity->language; |
||||
| 399 | } |
||||
| 400 | $startString .= "\n"; |
||||
| 401 | break; |
||||
| 402 | } |
||||
| 403 | case 'text_mention': |
||||
| 404 | case 'text_link': |
||||
| 405 | { |
||||
| 406 | $startString = '['; |
||||
| 407 | break; |
||||
| 408 | } |
||||
| 409 | } |
||||
| 410 | } |
||||
| 411 | else if ($this->style == 'HTML') |
||||
| 412 | { |
||||
| 413 | switch ($entity->type) |
||||
| 414 | { |
||||
| 415 | case 'bold': |
||||
| 416 | { |
||||
| 417 | $startString = '<b>'; |
||||
| 418 | break; |
||||
| 419 | } |
||||
| 420 | case 'italic': |
||||
| 421 | { |
||||
| 422 | $startString = '<i>'; |
||||
| 423 | break; |
||||
| 424 | } |
||||
| 425 | case 'underline': |
||||
| 426 | { |
||||
| 427 | $startString = '<u>'; |
||||
| 428 | break; |
||||
| 429 | } |
||||
| 430 | case 'strikethrough': |
||||
| 431 | { |
||||
| 432 | $startString = '<s>'; |
||||
| 433 | break; |
||||
| 434 | } |
||||
| 435 | case 'spoiler': |
||||
| 436 | { |
||||
| 437 | $startString = '<span class="tg-spoiler">'; |
||||
| 438 | break; |
||||
| 439 | } |
||||
| 440 | case 'code': |
||||
| 441 | { |
||||
| 442 | $startString = '<code>'; |
||||
| 443 | break; |
||||
| 444 | } |
||||
| 445 | case 'pre': |
||||
| 446 | { |
||||
| 447 | $startString = '<pre>'; |
||||
| 448 | if (isset($entity->language)) |
||||
| 449 | { |
||||
| 450 | $startString .= '<code class="language-'.$entity->language.'">'; |
||||
| 451 | } |
||||
| 452 | break; |
||||
| 453 | } |
||||
| 454 | case 'text_mention': |
||||
| 455 | { |
||||
| 456 | $startString = '<a href="tg://user?id='.$entity->user->id.'">'; |
||||
| 457 | break; |
||||
| 458 | } |
||||
| 459 | case 'text_link': |
||||
| 460 | { |
||||
| 461 | $startString = '<a href="'.$entity->url.'">'; |
||||
| 462 | break; |
||||
| 463 | } |
||||
| 464 | case 'custom_emoji': |
||||
| 465 | { |
||||
| 466 | $startString = '<tg-emoji emoji-id="'.$entity->custom_emoji_id.'">'; |
||||
| 467 | break; |
||||
| 468 | } |
||||
| 469 | case 'blockquote': |
||||
| 470 | { |
||||
| 471 | $startString = '<blockquote>'; |
||||
| 472 | break; |
||||
| 473 | } |
||||
| 474 | } |
||||
| 475 | } |
||||
| 476 | else if ($this->style == 'MarkdownV2') |
||||
| 477 | { |
||||
| 478 | switch ($entity->type) |
||||
| 479 | { |
||||
| 480 | case 'bold': |
||||
| 481 | { |
||||
| 482 | $startString = '*'; |
||||
| 483 | break; |
||||
| 484 | } |
||||
| 485 | case 'italic': |
||||
| 486 | { |
||||
| 487 | $startString = '_'; |
||||
| 488 | break; |
||||
| 489 | } |
||||
| 490 | case 'spoiler': |
||||
| 491 | { |
||||
| 492 | $startString = '||'; |
||||
| 493 | break; |
||||
| 494 | } |
||||
| 495 | case 'code': |
||||
| 496 | { |
||||
| 497 | $startString = '`'; |
||||
| 498 | break; |
||||
| 499 | } |
||||
| 500 | case 'pre': |
||||
| 501 | { |
||||
| 502 | $startString = '```'; |
||||
| 503 | if (isset($entity->language)) |
||||
| 504 | { |
||||
| 505 | $startString .= $entity->language; |
||||
| 506 | } |
||||
| 507 | $startString .= "\n"; |
||||
| 508 | break; |
||||
| 509 | } |
||||
| 510 | case 'underline': |
||||
| 511 | { |
||||
| 512 | $startString .= '__'; |
||||
| 513 | break; |
||||
| 514 | } |
||||
| 515 | case 'strikethrough': |
||||
| 516 | { |
||||
| 517 | $startString .= '~'; |
||||
| 518 | break; |
||||
| 519 | } |
||||
| 520 | case 'text_mention': |
||||
| 521 | case 'text_link': |
||||
| 522 | { |
||||
| 523 | $startString = '['; |
||||
| 524 | break; |
||||
| 525 | } |
||||
| 526 | case 'custom_emoji': |
||||
| 527 | { |
||||
| 528 | $startString = '!['; |
||||
| 529 | break; |
||||
| 530 | } |
||||
| 531 | case 'blockquote': |
||||
| 532 | { |
||||
| 533 | $startString = '>'; |
||||
| 534 | break; |
||||
| 535 | } |
||||
| 536 | } |
||||
| 537 | } |
||||
| 538 | return $startString; |
||||
| 539 | } |
||||
| 540 | |||||
| 541 | /** |
||||
| 542 | * Check if there are entities that start at the given position and return them |
||||
| 543 | */ |
||||
| 544 | protected function checkForEntityStart($pos) |
||||
| 545 | { |
||||
| 546 | $entities = []; |
||||
| 547 | foreach ($this->entities as $entity) |
||||
| 548 | { |
||||
| 549 | if ($entity->offset == $pos) |
||||
| 550 | { |
||||
| 551 | if (in_array($entity->type, $this->entitiesToParse)) |
||||
| 552 | { |
||||
| 553 | $entities[] = $entity; |
||||
| 554 | } |
||||
| 555 | } |
||||
| 556 | } |
||||
| 557 | if (!empty($entities)) { |
||||
| 558 | return $entities; |
||||
| 559 | } else { |
||||
| 560 | return false; |
||||
| 561 | } |
||||
| 562 | } |
||||
| 563 | |||||
| 564 | /** |
||||
| 565 | * Get the end string of the entity for the choosen style |
||||
| 566 | */ |
||||
| 567 | protected function getEntityStopString($entity) |
||||
| 568 | { |
||||
| 569 | $stopString = ''; |
||||
| 570 | if ($this->style == 'Markdown') |
||||
| 571 | { |
||||
| 572 | switch ($entity->type) |
||||
| 573 | { |
||||
| 574 | case 'bold': |
||||
| 575 | { |
||||
| 576 | $stopString = '*'; |
||||
| 577 | break; |
||||
| 578 | } |
||||
| 579 | case 'italic': |
||||
| 580 | { |
||||
| 581 | $stopString = '_'; |
||||
| 582 | break; |
||||
| 583 | } |
||||
| 584 | case 'code': |
||||
| 585 | { |
||||
| 586 | $stopString = '`'; |
||||
| 587 | break; |
||||
| 588 | } |
||||
| 589 | case 'pre': |
||||
| 590 | { |
||||
| 591 | $stopString = "\n".'```'; |
||||
| 592 | break; |
||||
| 593 | } |
||||
| 594 | case 'text_mention': |
||||
| 595 | { |
||||
| 596 | $stopString = '](tg://user?id='.$entity->user->id.')'; |
||||
| 597 | break; |
||||
| 598 | } |
||||
| 599 | case 'text_link': |
||||
| 600 | { |
||||
| 601 | $stopString = ']('.$entity->url.')'; |
||||
| 602 | break; |
||||
| 603 | } |
||||
| 604 | } |
||||
| 605 | } |
||||
| 606 | else if ($this->style == 'HTML') |
||||
| 607 | { |
||||
| 608 | switch ($entity->type) |
||||
| 609 | { |
||||
| 610 | case 'bold': |
||||
| 611 | { |
||||
| 612 | $stopString = '</b>'; |
||||
| 613 | break; |
||||
| 614 | } |
||||
| 615 | case 'italic': |
||||
| 616 | { |
||||
| 617 | $stopString = '</i>'; |
||||
| 618 | break; |
||||
| 619 | } |
||||
| 620 | case 'underline': |
||||
| 621 | { |
||||
| 622 | $stopString = '</u>'; |
||||
| 623 | break; |
||||
| 624 | } |
||||
| 625 | case 'strikethrough': |
||||
| 626 | { |
||||
| 627 | $stopString = '</s>'; |
||||
| 628 | break; |
||||
| 629 | } |
||||
| 630 | case 'spoiler': |
||||
| 631 | { |
||||
| 632 | $stopString = '</span>'; |
||||
| 633 | break; |
||||
| 634 | } |
||||
| 635 | case 'code': |
||||
| 636 | { |
||||
| 637 | $stopString = '</code>'; |
||||
| 638 | break; |
||||
| 639 | } |
||||
| 640 | case 'pre': |
||||
| 641 | { |
||||
| 642 | if (isset($entity->language)) |
||||
| 643 | { |
||||
| 644 | $stopString = '</code>'; |
||||
| 645 | } |
||||
| 646 | $stopString .= '</pre>'; |
||||
| 647 | break; |
||||
| 648 | } |
||||
| 649 | case 'text_mention': |
||||
| 650 | case 'text_link': |
||||
| 651 | { |
||||
| 652 | $stopString = '</a>'; |
||||
| 653 | break; |
||||
| 654 | } |
||||
| 655 | case 'custom_emoji': |
||||
| 656 | { |
||||
| 657 | $stopString = '</tg-emoji>'; |
||||
| 658 | break; |
||||
| 659 | } |
||||
| 660 | case 'blockquote': |
||||
| 661 | { |
||||
| 662 | $stopString = '</blockquote>'; |
||||
| 663 | break; |
||||
| 664 | } |
||||
| 665 | } |
||||
| 666 | } |
||||
| 667 | else if ($this->style == 'MarkdownV2') |
||||
| 668 | { |
||||
| 669 | switch ($entity->type) |
||||
| 670 | { |
||||
| 671 | case 'bold': |
||||
| 672 | { |
||||
| 673 | $stopString = '*'; |
||||
| 674 | break; |
||||
| 675 | } |
||||
| 676 | case 'italic': |
||||
| 677 | { |
||||
| 678 | $stopString = '_'; |
||||
| 679 | break; |
||||
| 680 | } |
||||
| 681 | case 'spoiler': |
||||
| 682 | { |
||||
| 683 | $stopString = '||'; |
||||
| 684 | break; |
||||
| 685 | } |
||||
| 686 | case 'code': |
||||
| 687 | { |
||||
| 688 | $stopString = '`'; |
||||
| 689 | break; |
||||
| 690 | } |
||||
| 691 | case 'pre': |
||||
| 692 | { |
||||
| 693 | $stopString = "\n".'```'; |
||||
| 694 | break; |
||||
| 695 | } |
||||
| 696 | case 'underline': |
||||
| 697 | { |
||||
| 698 | $stopString = '__'; |
||||
| 699 | break; |
||||
| 700 | } |
||||
| 701 | case 'strikethrough': |
||||
| 702 | { |
||||
| 703 | $stopString = '~'; |
||||
| 704 | break; |
||||
| 705 | } |
||||
| 706 | case 'text_mention': |
||||
| 707 | { |
||||
| 708 | $stopString = '](tg://user?id='.$entity->user->id.')'; |
||||
| 709 | break; |
||||
| 710 | } |
||||
| 711 | case 'text_link': |
||||
| 712 | { |
||||
| 713 | $stopString = ']('.$entity->url.')'; |
||||
| 714 | break; |
||||
| 715 | } |
||||
| 716 | case 'custom_emoji': |
||||
| 717 | { |
||||
| 718 | $stopString = '](tg://emoji?id='.$entity->custom_emoji_id.')'; |
||||
| 719 | break; |
||||
| 720 | } |
||||
| 721 | } |
||||
| 722 | } |
||||
| 723 | return $stopString; |
||||
| 724 | } |
||||
| 725 | |||||
| 726 | /** |
||||
| 727 | * Check if there are entities that end at the given position and return them (reversed because they are nested) |
||||
| 728 | */ |
||||
| 729 | protected function checkForEntityStop($pos) |
||||
| 730 | { |
||||
| 731 | $entities = []; |
||||
| 732 | foreach ($this->entities as $entity) |
||||
| 733 | { |
||||
| 734 | if ($entity->offset + $entity->length == $pos) |
||||
| 735 | { |
||||
| 736 | if (in_array($entity->type, $this->entitiesToParse)) |
||||
| 737 | { |
||||
| 738 | $entities[] = $entity; |
||||
| 739 | } |
||||
| 740 | } |
||||
| 741 | } |
||||
| 742 | if (!empty($entities)) { |
||||
| 743 | return array_reverse($entities); |
||||
| 744 | } else { |
||||
| 745 | return false; |
||||
| 746 | } |
||||
| 747 | } |
||||
| 748 | |||||
| 749 | /** |
||||
| 750 | * Check for ambiguous entities in MarkdownV2 style (see Telegram docs) |
||||
| 751 | */ |
||||
| 752 | protected function checkMarkdownV2AmbiguousEntities(&$entitiesToCheck) |
||||
| 753 | { |
||||
| 754 | $result = false; |
||||
| 755 | $newEntities = []; |
||||
| 756 | $foundIndex = 0; |
||||
| 757 | foreach ($entitiesToCheck as $ec) |
||||
| 758 | { |
||||
| 759 | if ($ec->type == 'italic' || $ec->type == 'underline') |
||||
| 760 | { |
||||
| 761 | $foundIndex++; |
||||
| 762 | } |
||||
| 763 | } |
||||
| 764 | if ($foundIndex == 2) |
||||
| 765 | { |
||||
| 766 | $result = true; |
||||
| 767 | foreach ($entitiesToCheck as $ec) |
||||
| 768 | { |
||||
| 769 | if ($ec->type != 'italic' && $ec->type != 'underline') |
||||
| 770 | { |
||||
| 771 | $newEntities[] = $ec; |
||||
| 772 | } |
||||
| 773 | } |
||||
| 774 | $entitiesToCheck = $newEntities; |
||||
| 775 | } |
||||
| 776 | return $result; |
||||
| 777 | } |
||||
| 778 | |||||
| 779 | /** |
||||
| 780 | * Count UTF-16 code units of the char passed |
||||
| 781 | */ |
||||
| 782 | protected function getUTF16CodePointsLength($char) { |
||||
| 783 | $chunks = str_split(bin2hex(mb_convert_encoding($char, 'UTF-16')), 4); |
||||
|
0 ignored issues
–
show
It seems like
mb_convert_encoding($char, 'UTF-16') can also be of type array; however, parameter $string of bin2hex() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 784 | return count($chunks); |
||||
|
0 ignored issues
–
show
It seems like
$chunks can also be of type true; however, parameter $value of count() does only seem to accept Countable|array, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 785 | } |
||||
| 786 | } |
||||
| 787 |