Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Encoding often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Encoding, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 42 | class Encoding |
||
| 43 | { |
||
| 44 | use \PHPDaemon\Traits\ClassWatchdog; |
||
| 45 | use \PHPDaemon\Traits\StaticObjectWatchdog; |
||
| 46 | |||
| 47 | protected static $win1252ToUtf8 = array( |
||
| 48 | 128 => "\xe2\x82\xac", |
||
| 49 | |||
| 50 | 130 => "\xe2\x80\x9a", |
||
| 51 | 131 => "\xc6\x92", |
||
| 52 | 132 => "\xe2\x80\x9e", |
||
| 53 | 133 => "\xe2\x80\xa6", |
||
| 54 | 134 => "\xe2\x80\xa0", |
||
| 55 | 135 => "\xe2\x80\xa1", |
||
| 56 | 136 => "\xcb\x86", |
||
| 57 | 137 => "\xe2\x80\xb0", |
||
| 58 | 138 => "\xc5\xa0", |
||
| 59 | 139 => "\xe2\x80\xb9", |
||
| 60 | 140 => "\xc5\x92", |
||
| 61 | |||
| 62 | 142 => "\xc5\xbd", |
||
| 63 | |||
| 64 | |||
| 65 | 145 => "\xe2\x80\x98", |
||
| 66 | 146 => "\xe2\x80\x99", |
||
| 67 | 147 => "\xe2\x80\x9c", |
||
| 68 | 148 => "\xe2\x80\x9d", |
||
| 69 | 149 => "\xe2\x80\xa2", |
||
| 70 | 150 => "\xe2\x80\x93", |
||
| 71 | 151 => "\xe2\x80\x94", |
||
| 72 | 152 => "\xcb\x9c", |
||
| 73 | 153 => "\xe2\x84\xa2", |
||
| 74 | 154 => "\xc5\xa1", |
||
| 75 | 155 => "\xe2\x80\xba", |
||
| 76 | 156 => "\xc5\x93", |
||
| 77 | |||
| 78 | 158 => "\xc5\xbe", |
||
| 79 | 159 => "\xc5\xb8" |
||
| 80 | ); |
||
| 81 | |||
| 82 | protected static $brokenUtf8ToUtf8 = array( |
||
| 83 | "\xc2\x80" => "\xe2\x82\xac", |
||
| 84 | |||
| 85 | "\xc2\x82" => "\xe2\x80\x9a", |
||
| 86 | "\xc2\x83" => "\xc6\x92", |
||
| 87 | "\xc2\x84" => "\xe2\x80\x9e", |
||
| 88 | "\xc2\x85" => "\xe2\x80\xa6", |
||
| 89 | "\xc2\x86" => "\xe2\x80\xa0", |
||
| 90 | "\xc2\x87" => "\xe2\x80\xa1", |
||
| 91 | "\xc2\x88" => "\xcb\x86", |
||
| 92 | "\xc2\x89" => "\xe2\x80\xb0", |
||
| 93 | "\xc2\x8a" => "\xc5\xa0", |
||
| 94 | "\xc2\x8b" => "\xe2\x80\xb9", |
||
| 95 | "\xc2\x8c" => "\xc5\x92", |
||
| 96 | |||
| 97 | "\xc2\x8e" => "\xc5\xbd", |
||
| 98 | |||
| 99 | |||
| 100 | "\xc2\x91" => "\xe2\x80\x98", |
||
| 101 | "\xc2\x92" => "\xe2\x80\x99", |
||
| 102 | "\xc2\x93" => "\xe2\x80\x9c", |
||
| 103 | "\xc2\x94" => "\xe2\x80\x9d", |
||
| 104 | "\xc2\x95" => "\xe2\x80\xa2", |
||
| 105 | "\xc2\x96" => "\xe2\x80\x93", |
||
| 106 | "\xc2\x97" => "\xe2\x80\x94", |
||
| 107 | "\xc2\x98" => "\xcb\x9c", |
||
| 108 | "\xc2\x99" => "\xe2\x84\xa2", |
||
| 109 | "\xc2\x9a" => "\xc5\xa1", |
||
| 110 | "\xc2\x9b" => "\xe2\x80\xba", |
||
| 111 | "\xc2\x9c" => "\xc5\x93", |
||
| 112 | |||
| 113 | "\xc2\x9e" => "\xc5\xbe", |
||
| 114 | "\xc2\x9f" => "\xc5\xb8" |
||
| 115 | ); |
||
| 116 | |||
| 117 | protected static $utf8ToWin1252 = array( |
||
| 118 | "\xe2\x82\xac" => "\x80", |
||
| 119 | |||
| 120 | "\xe2\x80\x9a" => "\x82", |
||
| 121 | "\xc6\x92" => "\x83", |
||
| 122 | "\xe2\x80\x9e" => "\x84", |
||
| 123 | "\xe2\x80\xa6" => "\x85", |
||
| 124 | "\xe2\x80\xa0" => "\x86", |
||
| 125 | "\xe2\x80\xa1" => "\x87", |
||
| 126 | "\xcb\x86" => "\x88", |
||
| 127 | "\xe2\x80\xb0" => "\x89", |
||
| 128 | "\xc5\xa0" => "\x8a", |
||
| 129 | "\xe2\x80\xb9" => "\x8b", |
||
| 130 | "\xc5\x92" => "\x8c", |
||
| 131 | |||
| 132 | "\xc5\xbd" => "\x8e", |
||
| 133 | |||
| 134 | |||
| 135 | "\xe2\x80\x98" => "\x91", |
||
| 136 | "\xe2\x80\x99" => "\x92", |
||
| 137 | "\xe2\x80\x9c" => "\x93", |
||
| 138 | "\xe2\x80\x9d" => "\x94", |
||
| 139 | "\xe2\x80\xa2" => "\x95", |
||
| 140 | "\xe2\x80\x93" => "\x96", |
||
| 141 | "\xe2\x80\x94" => "\x97", |
||
| 142 | "\xcb\x9c" => "\x98", |
||
| 143 | "\xe2\x84\xa2" => "\x99", |
||
| 144 | "\xc5\xa1" => "\x9a", |
||
| 145 | "\xe2\x80\xba" => "\x9b", |
||
| 146 | "\xc5\x93" => "\x9c", |
||
| 147 | |||
| 148 | "\xc5\xbe" => "\x9e", |
||
| 149 | "\xc5\xb8" => "\x9f" |
||
| 150 | ); |
||
| 151 | |||
| 152 | /** |
||
| 153 | * toISO8859 |
||
| 154 | * @param string $text Any string |
||
| 155 | * @return string The same string, Win1252 encoded |
||
|
|
|||
| 156 | */ |
||
| 157 | public static function toISO8859($text) |
||
| 161 | |||
| 162 | /** |
||
| 163 | * toWin1252 |
||
| 164 | * @param string $text Any string |
||
| 165 | * @return string The same string, Win1252 encoded |
||
| 166 | */ |
||
| 167 | public static function toWin1252($text) |
||
| 186 | |||
| 187 | /** |
||
| 188 | * Function Encoding::toUTF8 |
||
| 189 | * |
||
| 190 | * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. |
||
| 191 | * |
||
| 192 | * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. |
||
| 193 | * |
||
| 194 | * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: |
||
| 195 | * |
||
| 196 | * 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß |
||
| 197 | * are followed by any of these: ("group B") |
||
| 198 | * ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿ |
||
| 199 | * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» |
||
| 200 | * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) |
||
| 201 | * is also a valid unicode character, and will be left unchanged. |
||
| 202 | * |
||
| 203 | * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, |
||
| 204 | * 3) when any of these: ðñòó are followed by THREE chars from group B. |
||
| 205 | * |
||
| 206 | * @name toUTF8 |
||
| 207 | * @param string $text Any string |
||
| 208 | * @return string The same string, UTF8 encoded |
||
| 209 | * |
||
| 210 | */ |
||
| 211 | public static function toUTF8($text) |
||
| 278 | |||
| 279 | /** |
||
| 280 | * fixUTF8 |
||
| 281 | * @param string $text Any string |
||
| 282 | * @return string |
||
| 283 | */ |
||
| 284 | public static function fixUTF8($text) |
||
| 309 | |||
| 310 | /** |
||
| 311 | * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 |
||
| 312 | * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. |
||
| 313 | * See: http://en.wikipedia.org/wiki/Windows-1252 |
||
| 314 | * @param string $text Any string |
||
| 315 | * @return string |
||
| 316 | */ |
||
| 317 | public static function UTF8FixWin1252Chars($text) |
||
| 321 | |||
| 322 | /** |
||
| 323 | * Remove BOM |
||
| 324 | * @param string $str Any string |
||
| 325 | * @return string |
||
| 326 | */ |
||
| 327 | public static function removeBOM($str = "") |
||
| 334 | |||
| 335 | /** |
||
| 336 | * Encode |
||
| 337 | * @param string $str Any string |
||
| 338 | * @return string |
||
| 339 | */ |
||
| 340 | public static function encode($encodingLabel, $text) |
||
| 350 | |||
| 351 | /** |
||
| 352 | * Normalize encoding name |
||
| 353 | * @param string $str Encoding name |
||
| 354 | * @return string |
||
| 355 | */ |
||
| 356 | public static function normalizeEncoding($encodingLabel) |
||
| 378 | |||
| 379 | /** |
||
| 380 | * toLatin1 |
||
| 381 | * @param string $text Any string |
||
| 382 | * @return string The same string, Win1252 encoded |
||
| 383 | */ |
||
| 384 | public static function toLatin1($text) |
||
| 388 | } |
||
| 389 |
This check compares the return type specified in the
@returnannotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.