Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Encoding often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Encoding, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
42 | class Encoding |
||
43 | { |
||
44 | use \PHPDaemon\Traits\ClassWatchdog; |
||
45 | use \PHPDaemon\Traits\StaticObjectWatchdog; |
||
46 | |||
47 | protected static $win1252ToUtf8 = array( |
||
48 | 128 => "\xe2\x82\xac", |
||
49 | |||
50 | 130 => "\xe2\x80\x9a", |
||
51 | 131 => "\xc6\x92", |
||
52 | 132 => "\xe2\x80\x9e", |
||
53 | 133 => "\xe2\x80\xa6", |
||
54 | 134 => "\xe2\x80\xa0", |
||
55 | 135 => "\xe2\x80\xa1", |
||
56 | 136 => "\xcb\x86", |
||
57 | 137 => "\xe2\x80\xb0", |
||
58 | 138 => "\xc5\xa0", |
||
59 | 139 => "\xe2\x80\xb9", |
||
60 | 140 => "\xc5\x92", |
||
61 | |||
62 | 142 => "\xc5\xbd", |
||
63 | |||
64 | |||
65 | 145 => "\xe2\x80\x98", |
||
66 | 146 => "\xe2\x80\x99", |
||
67 | 147 => "\xe2\x80\x9c", |
||
68 | 148 => "\xe2\x80\x9d", |
||
69 | 149 => "\xe2\x80\xa2", |
||
70 | 150 => "\xe2\x80\x93", |
||
71 | 151 => "\xe2\x80\x94", |
||
72 | 152 => "\xcb\x9c", |
||
73 | 153 => "\xe2\x84\xa2", |
||
74 | 154 => "\xc5\xa1", |
||
75 | 155 => "\xe2\x80\xba", |
||
76 | 156 => "\xc5\x93", |
||
77 | |||
78 | 158 => "\xc5\xbe", |
||
79 | 159 => "\xc5\xb8" |
||
80 | ); |
||
81 | |||
82 | protected static $brokenUtf8ToUtf8 = array( |
||
83 | "\xc2\x80" => "\xe2\x82\xac", |
||
84 | |||
85 | "\xc2\x82" => "\xe2\x80\x9a", |
||
86 | "\xc2\x83" => "\xc6\x92", |
||
87 | "\xc2\x84" => "\xe2\x80\x9e", |
||
88 | "\xc2\x85" => "\xe2\x80\xa6", |
||
89 | "\xc2\x86" => "\xe2\x80\xa0", |
||
90 | "\xc2\x87" => "\xe2\x80\xa1", |
||
91 | "\xc2\x88" => "\xcb\x86", |
||
92 | "\xc2\x89" => "\xe2\x80\xb0", |
||
93 | "\xc2\x8a" => "\xc5\xa0", |
||
94 | "\xc2\x8b" => "\xe2\x80\xb9", |
||
95 | "\xc2\x8c" => "\xc5\x92", |
||
96 | |||
97 | "\xc2\x8e" => "\xc5\xbd", |
||
98 | |||
99 | |||
100 | "\xc2\x91" => "\xe2\x80\x98", |
||
101 | "\xc2\x92" => "\xe2\x80\x99", |
||
102 | "\xc2\x93" => "\xe2\x80\x9c", |
||
103 | "\xc2\x94" => "\xe2\x80\x9d", |
||
104 | "\xc2\x95" => "\xe2\x80\xa2", |
||
105 | "\xc2\x96" => "\xe2\x80\x93", |
||
106 | "\xc2\x97" => "\xe2\x80\x94", |
||
107 | "\xc2\x98" => "\xcb\x9c", |
||
108 | "\xc2\x99" => "\xe2\x84\xa2", |
||
109 | "\xc2\x9a" => "\xc5\xa1", |
||
110 | "\xc2\x9b" => "\xe2\x80\xba", |
||
111 | "\xc2\x9c" => "\xc5\x93", |
||
112 | |||
113 | "\xc2\x9e" => "\xc5\xbe", |
||
114 | "\xc2\x9f" => "\xc5\xb8" |
||
115 | ); |
||
116 | |||
117 | protected static $utf8ToWin1252 = array( |
||
118 | "\xe2\x82\xac" => "\x80", |
||
119 | |||
120 | "\xe2\x80\x9a" => "\x82", |
||
121 | "\xc6\x92" => "\x83", |
||
122 | "\xe2\x80\x9e" => "\x84", |
||
123 | "\xe2\x80\xa6" => "\x85", |
||
124 | "\xe2\x80\xa0" => "\x86", |
||
125 | "\xe2\x80\xa1" => "\x87", |
||
126 | "\xcb\x86" => "\x88", |
||
127 | "\xe2\x80\xb0" => "\x89", |
||
128 | "\xc5\xa0" => "\x8a", |
||
129 | "\xe2\x80\xb9" => "\x8b", |
||
130 | "\xc5\x92" => "\x8c", |
||
131 | |||
132 | "\xc5\xbd" => "\x8e", |
||
133 | |||
134 | |||
135 | "\xe2\x80\x98" => "\x91", |
||
136 | "\xe2\x80\x99" => "\x92", |
||
137 | "\xe2\x80\x9c" => "\x93", |
||
138 | "\xe2\x80\x9d" => "\x94", |
||
139 | "\xe2\x80\xa2" => "\x95", |
||
140 | "\xe2\x80\x93" => "\x96", |
||
141 | "\xe2\x80\x94" => "\x97", |
||
142 | "\xcb\x9c" => "\x98", |
||
143 | "\xe2\x84\xa2" => "\x99", |
||
144 | "\xc5\xa1" => "\x9a", |
||
145 | "\xe2\x80\xba" => "\x9b", |
||
146 | "\xc5\x93" => "\x9c", |
||
147 | |||
148 | "\xc5\xbe" => "\x9e", |
||
149 | "\xc5\xb8" => "\x9f" |
||
150 | ); |
||
151 | |||
152 | /** |
||
153 | * toISO8859 |
||
154 | * @param string $text Any string |
||
155 | * @return string The same string, Win1252 encoded |
||
|
|||
156 | */ |
||
157 | public static function toISO8859($text) |
||
161 | |||
162 | /** |
||
163 | * toWin1252 |
||
164 | * @param string $text Any string |
||
165 | * @return string The same string, Win1252 encoded |
||
166 | */ |
||
167 | public static function toWin1252($text) |
||
186 | |||
187 | /** |
||
188 | * Function Encoding::toUTF8 |
||
189 | * |
||
190 | * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. |
||
191 | * |
||
192 | * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. |
||
193 | * |
||
194 | * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: |
||
195 | * |
||
196 | * 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß |
||
197 | * are followed by any of these: ("group B") |
||
198 | * ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿ |
||
199 | * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» |
||
200 | * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) |
||
201 | * is also a valid unicode character, and will be left unchanged. |
||
202 | * |
||
203 | * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, |
||
204 | * 3) when any of these: ðñòó are followed by THREE chars from group B. |
||
205 | * |
||
206 | * @name toUTF8 |
||
207 | * @param string $text Any string |
||
208 | * @return string The same string, UTF8 encoded |
||
209 | * |
||
210 | */ |
||
211 | public static function toUTF8($text) |
||
278 | |||
279 | /** |
||
280 | * fixUTF8 |
||
281 | * @param string $text Any string |
||
282 | * @return string |
||
283 | */ |
||
284 | public static function fixUTF8($text) |
||
309 | |||
310 | /** |
||
311 | * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 |
||
312 | * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. |
||
313 | * See: http://en.wikipedia.org/wiki/Windows-1252 |
||
314 | * @param string $text Any string |
||
315 | * @return string |
||
316 | */ |
||
317 | public static function UTF8FixWin1252Chars($text) |
||
321 | |||
322 | /** |
||
323 | * Remove BOM |
||
324 | * @param string $str Any string |
||
325 | * @return string |
||
326 | */ |
||
327 | public static function removeBOM($str = "") |
||
334 | |||
335 | /** |
||
336 | * Encode |
||
337 | * @param string $str Any string |
||
338 | * @return string |
||
339 | */ |
||
340 | public static function encode($encodingLabel, $text) |
||
350 | |||
351 | /** |
||
352 | * Normalize encoding name |
||
353 | * @param string $str Encoding name |
||
354 | * @return string |
||
355 | */ |
||
356 | public static function normalizeEncoding($encodingLabel) |
||
378 | |||
379 | /** |
||
380 | * toLatin1 |
||
381 | * @param string $text Any string |
||
382 | * @return string The same string, Win1252 encoded |
||
383 | */ |
||
384 | public static function toLatin1($text) |
||
388 | } |
||
389 |
This check compares the return type specified in the
@return
annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.