Passed
Push — master ( 67ed62...5da465 )
by Gaetano
02:56
created

Charset::encodeEntities()   D

Complexity

Conditions 25
Paths 32

Size

Total Lines 131
Code Lines 86

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 70
CRAP Score 28.4364

Importance

Changes 0
Metric Value
cc 25
eloc 86
c 0
b 0
f 0
nc 32
nop 3
dl 0
loc 131
rs 4.1666
ccs 70
cts 85
cp 0.8235
crap 28.4364

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace PhpXmlRpc\Helper;
4
5
use PhpXmlRpc\PhpXmlRpc;
6
7
class Charset
8
{
9
    // tables used for transcoding different charsets into us-ascii xml
10
    protected $xml_iso88591_Entities = array("in" => array(), "out" => array());
11
12
    /// @todo add to iso table the characters from cp_1252 range, i.e. 128 to 159?
13
    /// These will NOT be present in true ISO-8859-1, but will save the unwary windows user from sending junk
14
    /// (though no luck when receiving them...)
15
    /*
16
    protected $xml_cp1252_Entities = array('in' => array(), out' => array(
17
        '&#x20AC;', '?',        '&#x201A;', '&#x0192;',
18
        '&#x201E;', '&#x2026;', '&#x2020;', '&#x2021;',
19
        '&#x02C6;', '&#x2030;', '&#x0160;', '&#x2039;',
20
        '&#x0152;', '?',        '&#x017D;', '?',
21
        '?',        '&#x2018;', '&#x2019;', '&#x201C;',
22
        '&#x201D;', '&#x2022;', '&#x2013;', '&#x2014;',
23
        '&#x02DC;', '&#x2122;', '&#x0161;', '&#x203A;',
24
        '&#x0153;', '?',        '&#x017E;', '&#x0178;'
25
    ));
26
    */
27
28
    protected $charset_supersets = array(
29
        'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
30
            'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8',
31
            'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12',
32
            'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8',
33
            'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',),
34
    );
35
36
    protected static $instance = null;
37
38
    /**
39
     * This class is singleton for performance reasons.
40
     *
41
     * @return Charset
42
     */
43 604
    public static function instance()
44
    {
45 604
        if (self::$instance === null) {
46 382
            self::$instance = new self();
47
        }
48
49 604
        return self::$instance;
50
    }
51
52 382
    private function __construct()
53
    {
54 382
        for ($i = 0; $i < 32; $i++) {
55 382
            $this->xml_iso88591_Entities["in"][] = chr($i);
56 382
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
57
        }
58
59 382
        for ($i = 160; $i < 256; $i++) {
60 382
            $this->xml_iso88591_Entities["in"][] = chr($i);
61 382
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
62
        }
63
64
        /*for ($i = 128; $i < 160; $i++)
65
        {
66
            $this->xml_cp1252_Entities['in'][] = chr($i);
67
        }*/
68 382
    }
69
70
    /**
71
     * Convert a string to the correct XML representation in a target charset.
72
     *
73
     * To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending
74
     * requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars
75
     * present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are
76
     * independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them.
77
     * Note that in the std case we are not sending a charset encoding mime type along with http headers, so we are
78
     * bound by RFC 3023 to emit strict us-ascii.
79
     *
80
     * @todo do a bit of basic benchmarking (strtr vs. str_replace)
81
     * @todo make usage of iconv() or recode_string() or mb_string() where available
82
     *
83
     * @param string $data
84
     * @param string $srcEncoding
85
     * @param string $destEncoding
86
     *
87
     * @return string
88
     */
89 585
    public function encodeEntities($data, $srcEncoding = '', $destEncoding = '')
90
    {
91 585
        if ($srcEncoding == '') {
92
            // lame, but we know no better...
93
            $srcEncoding = PhpXmlRpc::$xmlrpc_internalencoding;
94
        }
95
96 585
        $conversion = strtoupper($srcEncoding . '_' . $destEncoding);
97 585
        switch ($conversion) {
98 585
            case 'ISO-8859-1_':
99 392
            case 'ISO-8859-1_US-ASCII':
100 487
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
101 487
                $escapedData = str_replace($this->xml_iso88591_Entities['in'], $this->xml_iso88591_Entities['out'], $escapedData);
102 487
                break;
103
104 392
            case 'ISO-8859-1_UTF-8':
105 28
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
106 28
                $escapedData = utf8_encode($escapedData);
107 28
                break;
108
109 381
            case 'ISO-8859-1_ISO-8859-1':
110 370
            case 'US-ASCII_US-ASCII':
111 370
            case 'US-ASCII_UTF-8':
112 370
            case 'US-ASCII_':
113 370
            case 'US-ASCII_ISO-8859-1':
114 370
            case 'UTF-8_UTF-8':
115
            //case 'CP1252_CP1252':
116 46
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
117 46
                break;
118
119 352
            case 'UTF-8_':
120 24
            case 'UTF-8_US-ASCII':
121 24
            case 'UTF-8_ISO-8859-1':
122
                // NB: this will choke on invalid UTF-8, going most likely beyond EOF
123 352
                $escapedData = '';
124
                // be kind to users creating string xmlrpc values out of different php types
125 352
                $data = (string)$data;
126 352
                $ns = strlen($data);
127 352
                for ($nn = 0; $nn < $ns; $nn++) {
128 352
                    $ch = $data[$nn];
129 352
                    $ii = ord($ch);
130
                    // 7 bits: 0bbbbbbb (127)
131 352
                    if ($ii < 128) {
132
                        /// @todo shall we replace this with a (supposedly) faster str_replace?
133 345
                        switch ($ii) {
134 345
                            case 34:
135 20
                                $escapedData .= '&quot;';
136 20
                                break;
137 345
                            case 38:
138 20
                                $escapedData .= '&amp;';
139 20
                                break;
140 345
                            case 39:
141 22
                                $escapedData .= '&apos;';
142 22
                                break;
143 345
                            case 60:
144 20
                                $escapedData .= '&lt;';
145 20
                                break;
146 345
                            case 62:
147 20
                                $escapedData .= '&gt;';
148 20
                                break;
149
                            default:
150 345
                                $escapedData .= $ch;
151
                        } // switch
152
                    } // 11 bits: 110bbbbb 10bbbbbb (2047)
153 71
                    elseif ($ii >> 5 == 6) {
154 67
                        $b1 = ($ii & 31);
155 67
                        $ii = ord($data[$nn + 1]);
156 67
                        $b2 = ($ii & 63);
157 67
                        $ii = ($b1 * 64) + $b2;
158 67
                        $ent = sprintf('&#%d;', $ii);
159 67
                        $escapedData .= $ent;
160 67
                        $nn += 1;
161
                    } // 16 bits: 1110bbbb 10bbbbbb 10bbbbbb
162 31
                    elseif ($ii >> 4 == 14) {
163 31
                        $b1 = ($ii & 15);
164 31
                        $ii = ord($data[$nn + 1]);
165 31
                        $b2 = ($ii & 63);
166 31
                        $ii = ord($data[$nn + 2]);
167 31
                        $b3 = ($ii & 63);
168 31
                        $ii = ((($b1 * 64) + $b2) * 64) + $b3;
169 31
                        $ent = sprintf('&#%d;', $ii);
170 31
                        $escapedData .= $ent;
171 31
                        $nn += 2;
172
                    } // 21 bits: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
173
                    elseif ($ii >> 3 == 30) {
174
                        $b1 = ($ii & 7);
175
                        $ii = ord($data[$nn + 1]);
176
                        $b2 = ($ii & 63);
177
                        $ii = ord($data[$nn + 2]);
178
                        $b3 = ($ii & 63);
179
                        $ii = ord($data[$nn + 3]);
180
                        $b4 = ($ii & 63);
181
                        $ii = ((((($b1 * 64) + $b2) * 64) + $b3) * 64) + $b4;
182
                        $ent = sprintf('&#%d;', $ii);
183
                        $escapedData .= $ent;
184
                        $nn += 3;
185
                    }
186
                }
187
188
                // when converting to latin-1, do not be so eager with using entities for characters 160-255
189 352
                if ($conversion == 'UTF-8_ISO-8859-1') {
190 24
                    $escapedData = str_replace(array_slice($this->xml_iso88591_Entities['out'], 32), array_slice($this->xml_iso88591_Entities['in'], 32), $escapedData);
191
                }
192 352
                break;
193
194
            /*
195
            case 'CP1252_':
196
            case 'CP1252_US-ASCII':
197
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
198
                $escapedData = str_replace($this->xml_iso88591_Entities']['in'], $this->xml_iso88591_Entities['out'], $escapedData);
199
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
200
                break;
201
            case 'CP1252_UTF-8':
202
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
203
                /// @todo we could use real UTF8 chars here instead of xml entities... (note that utf_8 encode all allone will NOT convert them)
204
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
205
                $escapedData = utf8_encode($escapedData);
206
                break;
207
            case 'CP1252_ISO-8859-1':
208
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
209
                // we might as well replace all funky chars with a '?' here, but we are kind and leave it to the receiving application layer to decide what to do with these weird entities...
210
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
211
                break;
212
            */
213
214
            default:
215
                $escapedData = '';
216
                Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ": Converting from $srcEncoding to $destEncoding: not supported...");
217
        }
218
219 585
        return $escapedData;
220
    }
221
222
    /**
223
     * Checks if a given charset encoding is present in a list of encodings or if it is a valid subset of any encoding
224
     * in the list.
225
     *
226
     * @param string $encoding charset to be tested
227
     * @param string|array $validList comma separated list of valid charsets (or array of charsets)
228
     *
229
     * @return bool
230
     */
231
    public function isValidCharset($encoding, $validList)
232
    {
233
        if (is_string($validList)) {
234
            $validList = explode(',', $validList);
235
        }
236
        if (@in_array(strtoupper($encoding), $validList)) {
237
            return true;
238
        } else {
239
            if (array_key_exists($encoding, $this->charset_supersets)) {
240
                foreach ($validList as $allowed) {
241
                    if (in_array($allowed, $this->charset_supersets[$encoding])) {
242
                        return true;
243
                    }
244
                }
245
            }
246
247
            return false;
248
        }
249
    }
250
251
    /**
252
     * Used only for backwards compatibility
253
     * @deprecated
254
     *
255
     * @param string $charset
256
     *
257
     * @return array
258
     *
259
     * @throws \Exception for unknown/unsupported charsets
260
     */
261
    public function getEntities($charset)
262
    {
263
        //trigger_error('Method ' . __METHOD__ . ' is deprecated', E_USER_DEPRECATED);
264
265
        switch ($charset)
266
        {
267
            case 'iso88591':
268
                return $this->xml_iso88591_Entities;
269
            default:
270
                throw new \Exception('Unsupported charset: ' . $charset);
271
        }
272
    }
273
274
}
275