Completed
Push — master ( 6be6c0...cc60cf )
by Gaetano
03:16
created

Charset   A

Complexity

Total Complexity 38

Size/Duplication

Total Lines 267
Duplicated Lines 3 %

Coupling/Cohesion

Components 3
Dependencies 1

Test Coverage

Coverage 63.39%

Importance

Changes 0
Metric Value
dl 8
loc 267
ccs 71
cts 112
cp 0.6339
rs 9.36
c 0
b 0
f 0
wmc 38
lcom 3
cbo 1

5 Methods

Rating   Name   Duplication   Size   Complexity  
A instance() 0 8 2
A __construct() 8 17 3
F encodeEntities() 0 132 25
A isValidCharset() 0 19 6
A getEntities() 0 10 2

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
3
namespace PhpXmlRpc\Helper;
4
5
use PhpXmlRpc\PhpXmlRpc;
6
7
class Charset
8
{
9
    // tables used for transcoding different charsets into us-ascii xml
10
    protected $xml_iso88591_Entities = array("in" => array(), "out" => array());
11
    protected $xml_iso88591_utf8 = array("in" => array(), "out" => array());
12
13
    /// @todo add to iso table the characters from cp_1252 range, i.e. 128 to 159?
14
    /// These will NOT be present in true ISO-8859-1, but will save the unwary
15
    /// windows user from sending junk (though no luck when receiving them...)
16
    /*
17
    protected $xml_cp1252_Entities = array('in' => array(), out' => array(
18
        '&#x20AC;', '?',        '&#x201A;', '&#x0192;',
19
        '&#x201E;', '&#x2026;', '&#x2020;', '&#x2021;',
20
        '&#x02C6;', '&#x2030;', '&#x0160;', '&#x2039;',
21
        '&#x0152;', '?',        '&#x017D;', '?',
22
        '?',        '&#x2018;', '&#x2019;', '&#x201C;',
23
        '&#x201D;', '&#x2022;', '&#x2013;', '&#x2014;',
24
        '&#x02DC;', '&#x2122;', '&#x0161;', '&#x203A;',
25
        '&#x0153;', '?',        '&#x017E;', '&#x0178;'
26
    ));
27
    */
28
29
    protected $charset_supersets = array(
30
        'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
31
            'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8',
32
            'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12',
33
            'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8',
34
            'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',),
35
    );
36
37
    protected static $instance = null;
38
39
    /**
40
     * This class is singleton for performance reasons.
41
     *
42
     * @return Charset
43
     */
44 551
    public static function instance()
45
    {
46 551
        if (self::$instance === null) {
47
            self::$instance = new self();
48
        }
49
50 551
        return self::$instance;
51
    }
52
53
    private function __construct()
54
    {
55 View Code Duplication
        for ($i = 0; $i < 32; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
56
            $this->xml_iso88591_Entities["in"][] = chr($i);
57
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
58
        }
59
60 View Code Duplication
        for ($i = 160; $i < 256; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
61
            $this->xml_iso88591_Entities["in"][] = chr($i);
62
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
63
        }
64
65
        /*for ($i = 128; $i < 160; $i++)
66
        {
67
            $this->xml_cp1252_Entities['in'][] = chr($i);
68
        }*/
69
    }
70
71
    /**
72
     * Convert a string to the correct XML representation in a target charset.
73
     *
74
     * To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending
75
     * requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars
76
     * present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are
77
     * independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them.
78
     * Note that in the std case we are not sending a charset encoding mime type along with http headers, so we are
79
     * bound by RFC 3023 to emit strict us-ascii.
80
     *
81
     * @todo do a bit of basic benchmarking (strtr vs. str_replace)
82
     * @todo make usage of iconv() or recode_string() or mb_string() where available
83
     *
84
     * @param string $data
85
     * @param string $srcEncoding
86
     * @param string $destEncoding
87
     *
88
     * @return string
89
     */
90 551
    public function encodeEntities($data, $srcEncoding = '', $destEncoding = '')
91
    {
92 551
        if ($srcEncoding == '') {
93
            // lame, but we know no better...
94
            $srcEncoding = PhpXmlRpc::$xmlrpc_internalencoding;
95
        }
96
97 551
        $conversion = strtoupper($srcEncoding . '_' . $destEncoding);
98 551
        switch ($conversion) {
99 551
            case 'ISO-8859-1_':
100 545
            case 'ISO-8859-1_US-ASCII':
101 6
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
102 6
                $escapedData = str_replace($this->xml_iso88591_Entities['in'], $this->xml_iso88591_Entities['out'], $escapedData);
103 6
                break;
104
105 545
            case 'ISO-8859-1_UTF-8':
106
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
107
                $escapedData = utf8_encode($escapedData);
108
                break;
109
110 545
            case 'ISO-8859-1_ISO-8859-1':
111 545
            case 'US-ASCII_US-ASCII':
112 545
            case 'US-ASCII_UTF-8':
113 545
            case 'US-ASCII_':
114 545
            case 'US-ASCII_ISO-8859-1':
115 545
            case 'UTF-8_UTF-8':
116
            //case 'CP1252_CP1252':
117 28
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
118 28
                break;
119
120 517
            case 'UTF-8_':
121 34
            case 'UTF-8_US-ASCII':
122 34
            case 'UTF-8_ISO-8859-1':
123
                // NB: this will choke on invalid UTF-8, going most likely beyond EOF
124 517
                $escapedData = '';
125
                // be kind to users creating string xmlrpc values out of different php types
126 517
                $data = (string)$data;
127 517
                $ns = strlen($data);
128 517
                for ($nn = 0; $nn < $ns; $nn++) {
129 517
                    $ch = $data[$nn];
130 517
                    $ii = ord($ch);
131
                    // 7 bits: 0bbbbbbb (127)
132 517
                    if ($ii < 128) {
133
                        /// @todo shall we replace this with a (supposedly) faster str_replace?
134 514
                        switch ($ii) {
135 514
                            case 34:
136 19
                                $escapedData .= '&quot;';
137 19
                                break;
138 514
                            case 38:
139 37
                                $escapedData .= '&amp;';
140 37
                                break;
141 514
                            case 39:
142 37
                                $escapedData .= '&apos;';
143 37
                                break;
144 514
                            case 60:
145 37
                                $escapedData .= '&lt;';
146 37
                                break;
147 514
                            case 62:
148 37
                                $escapedData .= '&gt;';
149 37
                                break;
150
                            default:
151 514
                                $escapedData .= $ch;
152
                        } // switch
153
                    } // 11 bits: 110bbbbb 10bbbbbb (2047)
154 27
                    elseif ($ii >> 5 == 6) {
155 7
                        $b1 = ($ii & 31);
156 7
                        $ii = ord($data[$nn + 1]);
157 7
                        $b2 = ($ii & 63);
158 7
                        $ii = ($b1 * 64) + $b2;
159 7
                        $ent = sprintf('&#%d;', $ii);
160 7
                        $escapedData .= $ent;
161 7
                        $nn += 1;
162
                    } // 16 bits: 1110bbbb 10bbbbbb 10bbbbbb
163 25
                    elseif ($ii >> 4 == 14) {
164 25
                        $b1 = ($ii & 15);
165 25
                        $ii = ord($data[$nn + 1]);
166 25
                        $b2 = ($ii & 63);
167 25
                        $ii = ord($data[$nn + 2]);
168 25
                        $b3 = ($ii & 63);
169 25
                        $ii = ((($b1 * 64) + $b2) * 64) + $b3;
170 25
                        $ent = sprintf('&#%d;', $ii);
171 25
                        $escapedData .= $ent;
172 25
                        $nn += 2;
173
                    } // 21 bits: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
174 18
                    elseif ($ii >> 3 == 30) {
175
                        $b1 = ($ii & 7);
176
                        $ii = ord($data[$nn + 1]);
177
                        $b2 = ($ii & 63);
178
                        $ii = ord($data[$nn + 2]);
179
                        $b3 = ($ii & 63);
180
                        $ii = ord($data[$nn + 3]);
181
                        $b4 = ($ii & 63);
182
                        $ii = ((((($b1 * 64) + $b2) * 64) + $b3) * 64) + $b4;
183
                        $ent = sprintf('&#%d;', $ii);
184
                        $escapedData .= $ent;
185
                        $nn += 3;
186
                    }
187
                }
188
189
                // when converting to latin-1, do not be so eager with using entities for characters 160-255
190 517
                if ($conversion == 'UTF-8_ISO-8859-1') {
191 34
                    $escapedData = str_replace(array_slice($this->xml_iso88591_Entities['out'], 32), array_slice($this->xml_iso88591_Entities['in'], 32), $escapedData);
192
                }
193 517
                break;
194
195
            /*
196
            case 'CP1252_':
197
            case 'CP1252_US-ASCII':
198
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
199
                $escapedData = str_replace($this->xml_iso88591_Entities']['in'], $this->xml_iso88591_Entities['out'], $escapedData);
200
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
201
                break;
202
            case 'CP1252_UTF-8':
203
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
204
                /// @todo we could use real UTF8 chars here instead of xml entities... (note that utf_8 encode all allone will NOT convert them)
205
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
206
                $escapedData = utf8_encode($escapedData);
207
                break;
208
            case 'CP1252_ISO-8859-1':
209
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
210
                // we might as well replace all funky chars with a '?' here, but we are kind and leave it to the receiving application layer to decide what to do with these weird entities...
211
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
212
                break;
213
            */
214
215
            default:
216
                $escapedData = '';
217
                error_log('XML-RPC: ' . __METHOD__ . ": Converting from $srcEncoding to $destEncoding: not supported...");
218
        }
219
220 551
        return $escapedData;
221
    }
222
223
    /**
224
     * Checks if a given charset encoding is present in a list of encodings or
225
     * if it is a valid subset of any encoding in the list.
226
     *
227
     * @param string $encoding charset to be tested
228
     * @param string|array $validList comma separated list of valid charsets (or array of charsets)
229
     *
230
     * @return bool
231
     */
232
    public function isValidCharset($encoding, $validList)
233
    {
234
        if (is_string($validList)) {
235
            $validList = explode(',', $validList);
236
        }
237
        if (@in_array(strtoupper($encoding), $validList)) {
238
            return true;
239
        } else {
240
            if (array_key_exists($encoding, $this->charset_supersets)) {
241
                foreach ($validList as $allowed) {
242
                    if (in_array($allowed, $this->charset_supersets[$encoding])) {
243
                        return true;
244
                    }
245
                }
246
            }
247
248
            return false;
249
        }
250
    }
251
252
    /**
253
     * Used only for backwards compatibility
254
     * @deprecated
255
     *
256
     * @param string $charset
257
     *
258
     * @return array
259
     *
260
     * @throws \Exception for unknown/unsupported charsets
261
     */
262
    public function getEntities($charset)
263
    {
264
        switch ($charset)
265
        {
266
            case 'iso88591':
267
                return $this->xml_iso88591_Entities;
268
            default:
269
                throw new \Exception('Unsupported charset: ' . $charset);
270
        }
271
    }
272
273
}
274