Completed
Push — master ( 8f4643...4ef224 )
by Gaetano
07:48 queued 06:06
created

Charset   A

Complexity

Total Complexity 38

Size/Duplication

Total Lines 267
Duplicated Lines 3 %

Coupling/Cohesion

Components 3
Dependencies 1

Test Coverage

Coverage 73.21%

Importance

Changes 0
Metric Value
dl 8
loc 267
ccs 82
cts 112
cp 0.7321
rs 9.36
c 0
b 0
f 0
wmc 38
lcom 3
cbo 1

5 Methods

Rating   Name   Duplication   Size   Complexity  
A isValidCharset() 0 19 6
A getEntities() 0 10 2
A instance() 0 8 2
A __construct() 8 17 3
F encodeEntities() 0 132 25

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
3
namespace PhpXmlRpc\Helper;
4
5
use PhpXmlRpc\PhpXmlRpc;
6
7
class Charset
8
{
9
    // tables used for transcoding different charsets into us-ascii xml
10
    protected $xml_iso88591_Entities = array("in" => array(), "out" => array());
11
    protected $xml_iso88591_utf8 = array("in" => array(), "out" => array());
12
13
    /// @todo add to iso table the characters from cp_1252 range, i.e. 128 to 159?
14
    /// These will NOT be present in true ISO-8859-1, but will save the unwary
15
    /// windows user from sending junk (though no luck when receiving them...)
16
    /*
17
    protected $xml_cp1252_Entities = array('in' => array(), out' => array(
18
        '&#x20AC;', '?',        '&#x201A;', '&#x0192;',
19
        '&#x201E;', '&#x2026;', '&#x2020;', '&#x2021;',
20
        '&#x02C6;', '&#x2030;', '&#x0160;', '&#x2039;',
21
        '&#x0152;', '?',        '&#x017D;', '?',
22
        '?',        '&#x2018;', '&#x2019;', '&#x201C;',
23
        '&#x201D;', '&#x2022;', '&#x2013;', '&#x2014;',
24
        '&#x02DC;', '&#x2122;', '&#x0161;', '&#x203A;',
25
        '&#x0153;', '?',        '&#x017E;', '&#x0178;'
26
    ));
27
    */
28
29
    protected $charset_supersets = array(
30
        'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
31
            'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8',
32
            'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12',
33
            'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8',
34
            'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',),
35
    );
36
37
    protected static $instance = null;
38
39
    /**
40
     * This class is singleton for performance reasons.
41
     *
42
     * @return Charset
43
     */
44 591
    public static function instance()
45
    {
46 591
        if (self::$instance === null) {
47 336
            self::$instance = new self();
48
        }
49
50 591
        return self::$instance;
51
    }
52
53 336
    private function __construct()
54
    {
55 336 View Code Duplication
        for ($i = 0; $i < 32; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
56 336
            $this->xml_iso88591_Entities["in"][] = chr($i);
57 336
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
58
        }
59
60 336 View Code Duplication
        for ($i = 160; $i < 256; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
61 336
            $this->xml_iso88591_Entities["in"][] = chr($i);
62 336
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
63
        }
64
65
        /*for ($i = 128; $i < 160; $i++)
66
        {
67
            $this->xml_cp1252_Entities['in'][] = chr($i);
68
        }*/
69 336
    }
70
71
    /**
72
     * Convert a string to the correct XML representation in a target charset.
73
     *
74
     * To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending
75
     * requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars
76
     * present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are
77
     * independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them.
78
     * Note that in the std case we are not sending a charset encoding mime type along with http headers, so we are
79
     * bound by RFC 3023 to emit strict us-ascii.
80
     *
81
     * @todo do a bit of basic benchmarking (strtr vs. str_replace)
82
     * @todo make usage of iconv() or recode_string() or mb_string() where available
83
     *
84
     * @param string $data
85
     * @param string $srcEncoding
86
     * @param string $destEncoding
87
     *
88
     * @return string
89
     */
90 574
    public function encodeEntities($data, $srcEncoding = '', $destEncoding = '')
91
    {
92 574
        if ($srcEncoding == '') {
93
            // lame, but we know no better...
94
            $srcEncoding = PhpXmlRpc::$xmlrpc_internalencoding;
95
        }
96
97 574
        $conversion = strtoupper($srcEncoding . '_' . $destEncoding);
98 574
        switch ($conversion) {
99 574
            case 'ISO-8859-1_':
100 348
            case 'ISO-8859-1_US-ASCII':
101 486
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
102 486
                $escapedData = str_replace($this->xml_iso88591_Entities['in'], $this->xml_iso88591_Entities['out'], $escapedData);
103 486
                break;
104
105 348
            case 'ISO-8859-1_UTF-8':
106 28
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
107 28
                $escapedData = utf8_encode($escapedData);
108 28
                break;
109
110 337
            case 'ISO-8859-1_ISO-8859-1':
111 326
            case 'US-ASCII_US-ASCII':
112 326
            case 'US-ASCII_UTF-8':
113 326
            case 'US-ASCII_':
114 326
            case 'US-ASCII_ISO-8859-1':
115 326
            case 'UTF-8_UTF-8':
116
            //case 'CP1252_CP1252':
117 46
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
118 46
                break;
119
120 308
            case 'UTF-8_':
121 24
            case 'UTF-8_US-ASCII':
122 24
            case 'UTF-8_ISO-8859-1':
123
                // NB: this will choke on invalid UTF-8, going most likely beyond EOF
124 308
                $escapedData = '';
125
                // be kind to users creating string xmlrpc values out of different php types
126 308
                $data = (string)$data;
127 308
                $ns = strlen($data);
128 308
                for ($nn = 0; $nn < $ns; $nn++) {
129 308
                    $ch = $data[$nn];
130 308
                    $ii = ord($ch);
131
                    // 7 bits: 0bbbbbbb (127)
132 308
                    if ($ii < 128) {
133
                        /// @todo shall we replace this with a (supposedly) faster str_replace?
134 301
                        switch ($ii) {
135 301
                            case 34:
136 18
                                $escapedData .= '&quot;';
137 18
                                break;
138 301
                            case 38:
139 18
                                $escapedData .= '&amp;';
140 18
                                break;
141 301
                            case 39:
142 20
                                $escapedData .= '&apos;';
143 20
                                break;
144 301
                            case 60:
145 18
                                $escapedData .= '&lt;';
146 18
                                break;
147 301
                            case 62:
148 18
                                $escapedData .= '&gt;';
149 18
                                break;
150
                            default:
151 301
                                $escapedData .= $ch;
152
                        } // switch
153
                    } // 11 bits: 110bbbbb 10bbbbbb (2047)
154 64
                    elseif ($ii >> 5 == 6) {
155 60
                        $b1 = ($ii & 31);
156 60
                        $ii = ord($data[$nn + 1]);
157 60
                        $b2 = ($ii & 63);
158 60
                        $ii = ($b1 * 64) + $b2;
159 60
                        $ent = sprintf('&#%d;', $ii);
160 60
                        $escapedData .= $ent;
161 60
                        $nn += 1;
162
                    } // 16 bits: 1110bbbb 10bbbbbb 10bbbbbb
163 28
                    elseif ($ii >> 4 == 14) {
164 28
                        $b1 = ($ii & 15);
165 28
                        $ii = ord($data[$nn + 1]);
166 28
                        $b2 = ($ii & 63);
167 28
                        $ii = ord($data[$nn + 2]);
168 28
                        $b3 = ($ii & 63);
169 28
                        $ii = ((($b1 * 64) + $b2) * 64) + $b3;
170 28
                        $ent = sprintf('&#%d;', $ii);
171 28
                        $escapedData .= $ent;
172 28
                        $nn += 2;
173
                    } // 21 bits: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
174
                    elseif ($ii >> 3 == 30) {
175
                        $b1 = ($ii & 7);
176
                        $ii = ord($data[$nn + 1]);
177
                        $b2 = ($ii & 63);
178
                        $ii = ord($data[$nn + 2]);
179
                        $b3 = ($ii & 63);
180
                        $ii = ord($data[$nn + 3]);
181
                        $b4 = ($ii & 63);
182
                        $ii = ((((($b1 * 64) + $b2) * 64) + $b3) * 64) + $b4;
183
                        $ent = sprintf('&#%d;', $ii);
184
                        $escapedData .= $ent;
185
                        $nn += 3;
186
                    }
187
                }
188
189
                // when converting to latin-1, do not be so eager with using entities for characters 160-255
190 308
                if ($conversion == 'UTF-8_ISO-8859-1') {
191 24
                    $escapedData = str_replace(array_slice($this->xml_iso88591_Entities['out'], 32), array_slice($this->xml_iso88591_Entities['in'], 32), $escapedData);
192
                }
193 308
                break;
194
195
            /*
196
            case 'CP1252_':
197
            case 'CP1252_US-ASCII':
198
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
199
                $escapedData = str_replace($this->xml_iso88591_Entities']['in'], $this->xml_iso88591_Entities['out'], $escapedData);
200
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
201
                break;
202
            case 'CP1252_UTF-8':
203
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
204
                /// @todo we could use real UTF8 chars here instead of xml entities... (note that utf_8 encode all allone will NOT convert them)
205
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
206
                $escapedData = utf8_encode($escapedData);
207
                break;
208
            case 'CP1252_ISO-8859-1':
209
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
210
                // we might as well replace all funky chars with a '?' here, but we are kind and leave it to the receiving application layer to decide what to do with these weird entities...
211
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
212
                break;
213
            */
214
215
            default:
216
                $escapedData = '';
217
                error_log('XML-RPC: ' . __METHOD__ . ": Converting from $srcEncoding to $destEncoding: not supported...");
218
        }
219
220 574
        return $escapedData;
221
    }
222
223
    /**
224
     * Checks if a given charset encoding is present in a list of encodings or
225
     * if it is a valid subset of any encoding in the list.
226
     *
227
     * @param string $encoding charset to be tested
228
     * @param string|array $validList comma separated list of valid charsets (or array of charsets)
229
     *
230
     * @return bool
231
     */
232
    public function isValidCharset($encoding, $validList)
233
    {
234
        if (is_string($validList)) {
235
            $validList = explode(',', $validList);
236
        }
237
        if (@in_array(strtoupper($encoding), $validList)) {
238
            return true;
239
        } else {
240
            if (array_key_exists($encoding, $this->charset_supersets)) {
241
                foreach ($validList as $allowed) {
242
                    if (in_array($allowed, $this->charset_supersets[$encoding])) {
243
                        return true;
244
                    }
245
                }
246
            }
247
248
            return false;
249
        }
250
    }
251
252
    /**
253
     * Used only for backwards compatibility
254
     * @deprecated
255
     *
256
     * @param string $charset
257
     *
258
     * @return array
259
     *
260
     * @throws \Exception for unknown/unsupported charsets
261
     */
262
    public function getEntities($charset)
263
    {
264
        switch ($charset)
265
        {
266
            case 'iso88591':
267
                return $this->xml_iso88591_Entities;
268
            default:
269
                throw new \Exception('Unsupported charset: ' . $charset);
270
        }
271
    }
272
273
}
274