Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
1 | <?php |
||
7 | class Charset |
||
8 | { |
||
9 | // tables used for transcoding different charsets into us-ascii xml |
||
10 | protected $xml_iso88591_Entities = array("in" => array(), "out" => array()); |
||
11 | protected $xml_iso88591_utf8 = array("in" => array(), "out" => array()); |
||
12 | |||
13 | /// @todo add to iso table the characters from cp_1252 range, i.e. 128 to 159? |
||
14 | /// These will NOT be present in true ISO-8859-1, but will save the unwary |
||
15 | /// windows user from sending junk (though no luck when receiving them...) |
||
16 | /* |
||
|
|||
17 | protected $xml_cp1252_Entities = array('in' => array(), out' => array( |
||
18 | '€', '?', '‚', 'ƒ', |
||
19 | '„', '…', '†', '‡', |
||
20 | 'ˆ', '‰', 'Š', '‹', |
||
21 | 'Œ', '?', 'Ž', '?', |
||
22 | '?', '‘', '’', '“', |
||
23 | '”', '•', '–', '—', |
||
24 | '˜', '™', 'š', '›', |
||
25 | 'œ', '?', 'ž', 'Ÿ' |
||
26 | )); |
||
27 | */ |
||
28 | |||
29 | protected $charset_supersets = array( |
||
30 | 'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', |
||
31 | 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', |
||
32 | 'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12', |
||
33 | 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8', |
||
34 | 'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',), |
||
35 | ); |
||
36 | |||
37 | protected static $instance = null; |
||
38 | |||
39 | /** |
||
40 | * This class is singleton for performance reasons. |
||
41 | * |
||
42 | * @return Charset |
||
43 | */ |
||
44 | public static function instance() |
||
45 | { |
||
46 | if (self::$instance === null) { |
||
47 | self::$instance = new self(); |
||
48 | } |
||
49 | |||
50 | return self::$instance; |
||
51 | } |
||
52 | |||
53 | private function __construct() |
||
54 | { |
||
55 | View Code Duplication | for ($i = 0; $i < 32; $i++) { |
|
56 | $this->xml_iso88591_Entities["in"][] = chr($i); |
||
57 | $this->xml_iso88591_Entities["out"][] = "&#{$i};"; |
||
58 | } |
||
59 | |||
60 | View Code Duplication | for ($i = 160; $i < 256; $i++) { |
|
61 | $this->xml_iso88591_Entities["in"][] = chr($i); |
||
62 | $this->xml_iso88591_Entities["out"][] = "&#{$i};"; |
||
63 | } |
||
64 | |||
65 | /*for ($i = 128; $i < 160; $i++) |
||
66 | { |
||
67 | $this->xml_cp1252_Entities['in'][] = chr($i); |
||
68 | }*/ |
||
69 | } |
||
70 | |||
71 | /** |
||
72 | * Convert a string to the correct XML representation in a target charset. |
||
73 | * |
||
74 | * To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending |
||
75 | * requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars |
||
76 | * present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are |
||
77 | * independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them. |
||
78 | * Note that in the std case we are not sending a charset encoding mime type along with http headers, so we are |
||
79 | * bound by RFC 3023 to emit strict us-ascii. |
||
80 | * |
||
81 | * @todo do a bit of basic benchmarking (strtr vs. str_replace) |
||
82 | * @todo make usage of iconv() or recode_string() or mb_string() where available |
||
83 | * |
||
84 | * @param string $data |
||
85 | * @param string $srcEncoding |
||
86 | * @param string $destEncoding |
||
87 | * |
||
88 | * @return string |
||
89 | */ |
||
90 | public function encodeEntities($data, $srcEncoding = '', $destEncoding = '') |
||
222 | |||
223 | /** |
||
224 | * Checks if a given charset encoding is present in a list of encodings or |
||
225 | * if it is a valid subset of any encoding in the list. |
||
226 | * |
||
227 | * @param string $encoding charset to be tested |
||
228 | * @param string|array $validList comma separated list of valid charsets (or array of charsets) |
||
229 | * |
||
230 | * @return bool |
||
231 | */ |
||
232 | public function isValidCharset($encoding, $validList) |
||
233 | { |
||
234 | if (is_string($validList)) { |
||
235 | $validList = explode(',', $validList); |
||
236 | } |
||
237 | if (@in_array(strtoupper($encoding), $validList)) { |
||
238 | return true; |
||
239 | } else { |
||
240 | if (array_key_exists($encoding, $this->charset_supersets)) { |
||
241 | foreach ($validList as $allowed) { |
||
242 | if (in_array($allowed, $this->charset_supersets[$encoding])) { |
||
243 | return true; |
||
244 | } |
||
245 | } |
||
246 | } |
||
247 | |||
248 | return false; |
||
249 | } |
||
250 | } |
||
251 | |||
252 | /** |
||
253 | * Used only for backwards compatibility |
||
254 | * @deprecated |
||
255 | * |
||
256 | * @param string $charset |
||
257 | * |
||
258 | * @return array |
||
259 | * |
||
260 | * @throws \Exception for unknown/unsupported charsets |
||
261 | */ |
||
262 | public function getEntities($charset) |
||
272 | |||
273 | } |
||
274 |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.