| Total Complexity | 166 |
| Total Lines | 765 |
| Duplicated Lines | 0 % |
| Changes | 15 | ||
| Bugs | 8 | Features | 2 |
Complex classes like EntityDecoder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use EntityDecoder, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 20 | class EntityDecoder |
||
| 21 | { |
||
| 22 | private $entitiesToParse = ['bold', 'italic', 'code', 'pre', 'text_mention', 'text_link', 'strikethrough', 'underline', 'spoiler', 'blockquote', 'custom_emoji']; |
||
| 23 | private $entities = []; |
||
| 24 | private $style; |
||
| 25 | |||
| 26 | /** |
||
| 27 | * @param string $style Either 'HTML', 'Markdown' or 'MarkdownV2'. |
||
| 28 | * |
||
| 29 | * @throws InvalidArgumentException if the provided style name in invalid. |
||
| 30 | */ |
||
| 31 | public function __construct(string $style = 'HTML') |
||
| 40 | } |
||
| 41 | } |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Decode entities and return decoded text |
||
| 45 | * |
||
| 46 | * @param object $message message object to reconstruct Entities from (json decoded without assoc). |
||
| 47 | * @return string |
||
| 48 | */ |
||
| 49 | public function decode($message): string |
||
| 140 | } |
||
| 141 | |||
| 142 | /** |
||
| 143 | * Extract all entities in an array |
||
| 144 | * |
||
| 145 | * @param object $message message object to reconstruct Entities from (json decoded without assoc). |
||
| 146 | * @return array |
||
| 147 | */ |
||
| 148 | public function extractAllEntities($message): array |
||
| 149 | { |
||
| 150 | $entitiesArray = []; |
||
| 151 | if (!is_object($message)) |
||
| 152 | { |
||
| 153 | throw new \Exception('message must be an object'); |
||
| 154 | } |
||
| 155 | //Get available entities (for text or for attachment like photo, document, etc.) |
||
| 156 | if (!empty($message->entities)) |
||
| 157 | { |
||
| 158 | $this->entities = $message->entities; |
||
| 159 | } |
||
| 160 | if (!empty($message->caption_entities)) |
||
| 161 | { |
||
| 162 | $this->entities = $message->caption_entities; |
||
| 163 | } |
||
| 164 | //Get internal encoding |
||
| 165 | $prevencoding = mb_internal_encoding(); |
||
| 166 | //Set encoding to UTF-8 |
||
| 167 | mb_internal_encoding('UTF-8'); |
||
| 168 | //Get available text (text message or caption for attachment) |
||
| 169 | $textToDecode = (!empty($message->text) ? $message->text : (!empty($message->caption) ? $message->caption : "")); |
||
| 170 | //if the message has no entities or no text return the empty array |
||
| 171 | if (empty($this->entities) || $textToDecode == "") { |
||
| 172 | if ($prevencoding) |
||
| 173 | { |
||
| 174 | mb_internal_encoding($prevencoding); |
||
| 175 | } |
||
| 176 | return $entitiesArray; |
||
| 177 | } |
||
| 178 | $arrayText = $this->splitCharAndLength($textToDecode); |
||
| 179 | $entitytext = ""; |
||
| 180 | |||
| 181 | $openedEntities = []; |
||
| 182 | $currenPosition = 0; |
||
| 183 | //Cycle characters one by one to calculate begins and ends of entities and escape special chars |
||
| 184 | for ($i = 0, $c = count($arrayText); $i < $c; $i++) { |
||
| 185 | $offsetAndLength = $currenPosition + $arrayText[$i]['length']; |
||
| 186 | $entityCheckStart = $this->checkForEntityStart($currenPosition); |
||
| 187 | $entityCheckStop = $this->checkForEntityStop($offsetAndLength); |
||
| 188 | if ($entityCheckStart !== false) |
||
| 189 | { |
||
| 190 | foreach ($entityCheckStart as $stEntity) |
||
| 191 | { |
||
| 192 | $startChar = $this->getEntityStartString($stEntity); |
||
| 193 | $openedEntities[] = $stEntity; |
||
| 194 | $entitytext .= $startChar; |
||
| 195 | } |
||
| 196 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); |
||
| 197 | } |
||
| 198 | if ($entityCheckStop !== false) |
||
| 199 | { |
||
| 200 | if ($entityCheckStart === false) |
||
| 201 | { |
||
| 202 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], true, $openedEntities); |
||
| 203 | } |
||
| 204 | if ($this->style == 'MarkdownV2' && $this->checkMarkdownV2AmbiguousEntities($entityCheckStop)) |
||
| 205 | { |
||
| 206 | $stopChar = "_\r__"; |
||
| 207 | $entitytext .= $stopChar; |
||
| 208 | array_pop($openedEntities); |
||
| 209 | array_pop($openedEntities); |
||
| 210 | if(empty($openedEntities)) |
||
| 211 | { |
||
| 212 | $entitiesArray[] = $entitytext; |
||
| 213 | $entitytext = ""; |
||
| 214 | } |
||
| 215 | } |
||
| 216 | foreach ($entityCheckStop as $stEntity) |
||
| 217 | { |
||
| 218 | $stopChar = $this->getEntityStopString($stEntity); |
||
| 219 | $entitytext .= $stopChar; |
||
| 220 | array_pop($openedEntities); |
||
| 221 | if(empty($openedEntities)) |
||
| 222 | { |
||
| 223 | $entitiesArray[] = $entitytext; |
||
| 224 | $entitytext = ""; |
||
| 225 | } |
||
| 226 | } |
||
| 227 | } |
||
| 228 | if ($entityCheckStart === false && $entityCheckStop === false) |
||
| 229 | { |
||
| 230 | $isEntityOpen = !empty($openedEntities); |
||
| 231 | if($isEntityOpen) |
||
| 232 | { |
||
| 233 | $entitytext .= $this->escapeSpecialChars($arrayText[$i]['char'], $isEntityOpen, $openedEntities); |
||
| 234 | } |
||
| 235 | } |
||
| 236 | $currenPosition = $offsetAndLength; |
||
| 237 | } |
||
| 238 | if (!empty($openedEntities)) |
||
| 239 | { |
||
| 240 | $openedEntities = array_reverse($openedEntities); |
||
| 241 | foreach ($openedEntities as $oe) |
||
| 242 | { |
||
| 243 | $entitytext .= $this->getEntityStopString($oe); |
||
| 244 | $entitiesArray[] = $entitytext; |
||
| 245 | } |
||
| 246 | } |
||
| 247 | if ($prevencoding) |
||
| 248 | { |
||
| 249 | mb_internal_encoding($prevencoding); |
||
| 250 | } |
||
| 251 | return $entitiesArray; |
||
| 252 | } |
||
| 253 | |||
| 254 | /** |
||
| 255 | * Split message text in chars array with lengthes |
||
| 256 | */ |
||
| 257 | protected function splitCharAndLength($string) |
||
| 258 | { |
||
| 259 | //Split string in individual unicode points |
||
| 260 | $str_split_unicode = preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY); |
||
| 261 | $new_string_split = []; |
||
| 262 | $joiner = false; |
||
| 263 | for ($i = 0, $c = count($str_split_unicode); $i < $c; $i++) |
||
| 264 | { |
||
| 265 | //loop the array |
||
| 266 | $codepoint = bin2hex(mb_convert_encoding($str_split_unicode[$i], 'UTF-16')); //Get the string rappresentation of the unicode char |
||
| 267 | if ($codepoint == "fe0f" || $codepoint == "1f3fb" || $codepoint == "1f3fc" || $codepoint == "1f3fd" || $codepoint == "1f3fe" || $codepoint == "1f3ff") |
||
| 268 | { |
||
| 269 | //Manage the modifiers |
||
| 270 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend the modifier to the previous char |
||
| 271 | } |
||
| 272 | else |
||
| 273 | { |
||
| 274 | if ($codepoint == "200d") |
||
| 275 | { |
||
| 276 | //Manage the Zero Width Joiner |
||
| 277 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend the ZWJ to the previous char |
||
| 278 | $joiner = true; |
||
| 279 | } |
||
| 280 | else |
||
| 281 | { |
||
| 282 | if ($joiner) |
||
| 283 | { |
||
| 284 | //If previous one was a ZWJ |
||
| 285 | $new_string_split[count($new_string_split) - 1] .= $str_split_unicode[$i]; //Apppend to the previous char |
||
| 286 | $joiner = false; |
||
| 287 | } |
||
| 288 | else |
||
| 289 | { |
||
| 290 | $new_string_split[] = $str_split_unicode[$i]; //New char |
||
| 291 | } |
||
| 292 | } |
||
| 293 | } |
||
| 294 | } |
||
| 295 | $data = []; |
||
| 296 | foreach ($new_string_split as $s) |
||
| 297 | { |
||
| 298 | $data[] = ["char" => $s, "length" => $this->getUTF16CodePointsLength($s)]; |
||
| 299 | } |
||
| 300 | return $data; |
||
| 301 | } |
||
| 302 | |||
| 303 | /** |
||
| 304 | * Apply Telegram escape rules for the choosen style |
||
| 305 | */ |
||
| 306 | protected function escapeSpecialChars($char, $isEntityOpen, $entities) { |
||
| 307 | if ($this->style == 'Markdown') |
||
| 308 | { |
||
| 309 | if ($isEntityOpen) |
||
| 310 | { |
||
| 311 | $entity = $entities[0]; |
||
| 312 | if ($char == '*' || $char == '_') |
||
| 313 | { |
||
| 314 | if ($char == $this->getEntityStartString($entity)) |
||
| 315 | { |
||
| 316 | return $char."\\".$char.$char; |
||
| 317 | } |
||
| 318 | else |
||
| 319 | { |
||
| 320 | return $char; |
||
| 321 | } |
||
| 322 | } |
||
| 323 | else |
||
| 324 | { |
||
| 325 | return $char; |
||
| 326 | } |
||
| 327 | } |
||
| 328 | else |
||
| 329 | { |
||
| 330 | if ($char == '*' || $char == '_' || $char == '[' || $char == '`') |
||
| 331 | { |
||
| 332 | return "\\".$char; |
||
| 333 | } |
||
| 334 | else |
||
| 335 | { |
||
| 336 | return $char; |
||
| 337 | } |
||
| 338 | } |
||
| 339 | } |
||
| 340 | else if ($this->style == 'HTML') |
||
| 341 | { |
||
| 342 | return ($char == '<' ? '<' : ($char == '>' ? '>' : ($char == '&' ? '&' : $char))); |
||
| 343 | } |
||
| 344 | else if ($this->style == 'MarkdownV2') |
||
| 345 | { |
||
| 346 | $isBlockquoteOpen = false; |
||
| 347 | foreach ($entities as $entity) { |
||
| 348 | if ($entity->type === 'blockquote') { |
||
| 349 | $isBlockquoteOpen = true; |
||
| 350 | break; |
||
| 351 | } |
||
| 352 | } |
||
| 353 | if($isBlockquoteOpen && $char == "\n") |
||
| 354 | { |
||
| 355 | return $char.'>'; |
||
| 356 | } |
||
| 357 | else |
||
| 358 | { |
||
| 359 | return (in_array($char, ['_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!', '\\']) ? '\\'.$char : $char); |
||
| 360 | } |
||
| 361 | } |
||
| 362 | else |
||
| 363 | { |
||
| 364 | return $char; |
||
| 365 | } |
||
| 366 | } |
||
| 367 | |||
| 368 | /** |
||
| 369 | * Get the begin string of the entity for the choosen style |
||
| 370 | */ |
||
| 371 | protected function getEntityStartString($entity) |
||
| 372 | { |
||
| 373 | $startString = ''; |
||
| 374 | if ($this->style == 'Markdown') |
||
| 375 | { |
||
| 376 | switch ($entity->type) |
||
| 377 | { |
||
| 378 | case 'bold': |
||
| 379 | { |
||
| 380 | $startString = '*'; |
||
| 381 | break; |
||
| 382 | } |
||
| 383 | case 'italic': |
||
| 384 | { |
||
| 385 | $startString = '_'; |
||
| 386 | break; |
||
| 387 | } |
||
| 388 | case 'code': |
||
| 389 | { |
||
| 390 | $startString = '`'; |
||
| 391 | break; |
||
| 392 | } |
||
| 393 | case 'pre': |
||
| 394 | { |
||
| 395 | $startString = '```'; |
||
| 396 | if (isset($entity->language)) |
||
| 397 | { |
||
| 398 | $startString .= $entity->language; |
||
| 399 | } |
||
| 400 | $startString .= "\n"; |
||
| 401 | break; |
||
| 402 | } |
||
| 403 | case 'text_mention': |
||
| 404 | case 'text_link': |
||
| 405 | { |
||
| 406 | $startString = '['; |
||
| 407 | break; |
||
| 408 | } |
||
| 409 | } |
||
| 410 | } |
||
| 411 | else if ($this->style == 'HTML') |
||
| 412 | { |
||
| 413 | switch ($entity->type) |
||
| 414 | { |
||
| 415 | case 'bold': |
||
| 416 | { |
||
| 417 | $startString = '<b>'; |
||
| 418 | break; |
||
| 419 | } |
||
| 420 | case 'italic': |
||
| 421 | { |
||
| 422 | $startString = '<i>'; |
||
| 423 | break; |
||
| 424 | } |
||
| 425 | case 'underline': |
||
| 426 | { |
||
| 427 | $startString = '<u>'; |
||
| 428 | break; |
||
| 429 | } |
||
| 430 | case 'strikethrough': |
||
| 431 | { |
||
| 432 | $startString = '<s>'; |
||
| 433 | break; |
||
| 434 | } |
||
| 435 | case 'spoiler': |
||
| 436 | { |
||
| 437 | $startString = '<span class="tg-spoiler">'; |
||
| 438 | break; |
||
| 439 | } |
||
| 440 | case 'code': |
||
| 441 | { |
||
| 442 | $startString = '<code>'; |
||
| 443 | break; |
||
| 444 | } |
||
| 445 | case 'pre': |
||
| 446 | { |
||
| 447 | $startString = '<pre>'; |
||
| 448 | if (isset($entity->language)) |
||
| 449 | { |
||
| 450 | $startString .= '<code class="language-'.$entity->language.'">'; |
||
| 451 | } |
||
| 452 | break; |
||
| 453 | } |
||
| 454 | case 'text_mention': |
||
| 455 | { |
||
| 456 | $startString = '<a href="tg://user?id='.$entity->user->id.'">'; |
||
| 457 | break; |
||
| 458 | } |
||
| 459 | case 'text_link': |
||
| 460 | { |
||
| 461 | $startString = '<a href="'.$entity->url.'">'; |
||
| 462 | break; |
||
| 463 | } |
||
| 464 | case 'custom_emoji': |
||
| 465 | { |
||
| 466 | $startString = '<tg-emoji emoji-id="'.$entity->custom_emoji_id.'">'; |
||
| 467 | break; |
||
| 468 | } |
||
| 469 | case 'blockquote': |
||
| 470 | { |
||
| 471 | $startString = '<blockquote>'; |
||
| 472 | break; |
||
| 473 | } |
||
| 474 | } |
||
| 475 | } |
||
| 476 | else if ($this->style == 'MarkdownV2') |
||
| 477 | { |
||
| 478 | switch ($entity->type) |
||
| 479 | { |
||
| 480 | case 'bold': |
||
| 481 | { |
||
| 482 | $startString = '*'; |
||
| 483 | break; |
||
| 484 | } |
||
| 485 | case 'italic': |
||
| 486 | { |
||
| 487 | $startString = '_'; |
||
| 488 | break; |
||
| 489 | } |
||
| 490 | case 'spoiler': |
||
| 491 | { |
||
| 492 | $startString = '||'; |
||
| 493 | break; |
||
| 494 | } |
||
| 495 | case 'code': |
||
| 496 | { |
||
| 497 | $startString = '`'; |
||
| 498 | break; |
||
| 499 | } |
||
| 500 | case 'pre': |
||
| 501 | { |
||
| 502 | $startString = '```'; |
||
| 503 | if (isset($entity->language)) |
||
| 504 | { |
||
| 505 | $startString .= $entity->language; |
||
| 506 | } |
||
| 507 | $startString .= "\n"; |
||
| 508 | break; |
||
| 509 | } |
||
| 510 | case 'underline': |
||
| 511 | { |
||
| 512 | $startString .= '__'; |
||
| 513 | break; |
||
| 514 | } |
||
| 515 | case 'strikethrough': |
||
| 516 | { |
||
| 517 | $startString .= '~'; |
||
| 518 | break; |
||
| 519 | } |
||
| 520 | case 'text_mention': |
||
| 521 | case 'text_link': |
||
| 522 | { |
||
| 523 | $startString = '['; |
||
| 524 | break; |
||
| 525 | } |
||
| 526 | case 'custom_emoji': |
||
| 527 | { |
||
| 528 | $startString = '!['; |
||
| 529 | break; |
||
| 530 | } |
||
| 531 | case 'blockquote': |
||
| 532 | { |
||
| 533 | $startString = '>'; |
||
| 534 | break; |
||
| 535 | } |
||
| 536 | } |
||
| 537 | } |
||
| 538 | return $startString; |
||
| 539 | } |
||
| 540 | |||
| 541 | /** |
||
| 542 | * Check if there are entities that start at the given position and return them |
||
| 543 | */ |
||
| 544 | protected function checkForEntityStart($pos) |
||
| 561 | } |
||
| 562 | } |
||
| 563 | |||
| 564 | /** |
||
| 565 | * Get the end string of the entity for the choosen style |
||
| 566 | */ |
||
| 567 | protected function getEntityStopString($entity) |
||
| 724 | } |
||
| 725 | |||
| 726 | /** |
||
| 727 | * Check if there are entities that end at the given position and return them (reversed because they are nested) |
||
| 728 | */ |
||
| 729 | protected function checkForEntityStop($pos) |
||
| 730 | { |
||
| 731 | $entities = []; |
||
| 732 | foreach ($this->entities as $entity) |
||
| 733 | { |
||
| 734 | if ($entity->offset + $entity->length == $pos) |
||
| 735 | { |
||
| 736 | if (in_array($entity->type, $this->entitiesToParse)) |
||
| 737 | { |
||
| 738 | $entities[] = $entity; |
||
| 739 | } |
||
| 740 | } |
||
| 741 | } |
||
| 742 | if (!empty($entities)) { |
||
| 743 | return array_reverse($entities); |
||
| 744 | } else { |
||
| 745 | return false; |
||
| 746 | } |
||
| 747 | } |
||
| 748 | |||
| 749 | /** |
||
| 750 | * Check for ambiguous entities in MarkdownV2 style (see Telegram docs) |
||
| 751 | */ |
||
| 752 | protected function checkMarkdownV2AmbiguousEntities(&$entitiesToCheck) |
||
| 753 | { |
||
| 754 | $result = false; |
||
| 755 | $newEntities = []; |
||
| 756 | $foundIndex = 0; |
||
| 757 | foreach ($entitiesToCheck as $ec) |
||
| 758 | { |
||
| 759 | if ($ec->type == 'italic' || $ec->type == 'underline') |
||
| 760 | { |
||
| 761 | $foundIndex++; |
||
| 762 | } |
||
| 763 | } |
||
| 764 | if ($foundIndex == 2) |
||
| 765 | { |
||
| 766 | $result = true; |
||
| 767 | foreach ($entitiesToCheck as $ec) |
||
| 768 | { |
||
| 769 | if ($ec->type != 'italic' && $ec->type != 'underline') |
||
| 770 | { |
||
| 771 | $newEntities[] = $ec; |
||
| 772 | } |
||
| 773 | } |
||
| 774 | $entitiesToCheck = $newEntities; |
||
| 775 | } |
||
| 776 | return $result; |
||
| 777 | } |
||
| 778 | |||
| 779 | /** |
||
| 780 | * Count UTF-16 code units of the char passed |
||
| 781 | */ |
||
| 782 | protected function getUTF16CodePointsLength($char) { |
||
| 785 | } |
||
| 786 | } |
||
| 787 |