Passed
Push — master ( ae5a85...67ed62 )
by Gaetano
24:17
created

Charset::encodeEntities()   D

Complexity

Conditions 25
Paths 32

Size

Total Lines 131
Code Lines 86

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 70
CRAP Score 28.4364

Importance

Changes 0
Metric Value
cc 25
eloc 86
c 0
b 0
f 0
nc 32
nop 3
dl 0
loc 131
rs 4.1666
ccs 70
cts 85
cp 0.8235
crap 28.4364

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace PhpXmlRpc\Helper;
4
5
use PhpXmlRpc\PhpXmlRpc;
6
7
class Charset
8
{
9
    // tables used for transcoding different charsets into us-ascii xml
10
    protected $xml_iso88591_Entities = array("in" => array(), "out" => array());
11
12
    /// @todo add to iso table the characters from cp_1252 range, i.e. 128 to 159?
13
    /// These will NOT be present in true ISO-8859-1, but will save the unwary windows user from sending junk
14
    /// (though no luck when receiving them...)
15
    /*
16
    protected $xml_cp1252_Entities = array('in' => array(), out' => array(
17
        '&#x20AC;', '?',        '&#x201A;', '&#x0192;',
18
        '&#x201E;', '&#x2026;', '&#x2020;', '&#x2021;',
19
        '&#x02C6;', '&#x2030;', '&#x0160;', '&#x2039;',
20
        '&#x0152;', '?',        '&#x017D;', '?',
21
        '?',        '&#x2018;', '&#x2019;', '&#x201C;',
22
        '&#x201D;', '&#x2022;', '&#x2013;', '&#x2014;',
23
        '&#x02DC;', '&#x2122;', '&#x0161;', '&#x203A;',
24
        '&#x0153;', '?',        '&#x017E;', '&#x0178;'
25
    ));
26
    */
27
28
    protected $charset_supersets = array(
29
        'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
30
            'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8',
31
            'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12',
32
            'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8',
33
            'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',),
34
    );
35
36
    protected static $instance = null;
37
38
    /**
39
     * This class is singleton for performance reasons.
40
     *
41
     * @return Charset
42
     */
43 552
    public static function instance()
44
    {
45 552
        if (self::$instance === null) {
46
            self::$instance = new self();
47
        }
48
49 552
        return self::$instance;
50
    }
51
52
    private function __construct()
53
    {
54
        for ($i = 0; $i < 32; $i++) {
55
            $this->xml_iso88591_Entities["in"][] = chr($i);
56
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
57
        }
58
59
        for ($i = 160; $i < 256; $i++) {
60
            $this->xml_iso88591_Entities["in"][] = chr($i);
61
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
62
        }
63
64
        /*for ($i = 128; $i < 160; $i++)
65
        {
66
            $this->xml_cp1252_Entities['in'][] = chr($i);
67
        }*/
68
    }
69
70
    /**
71
     * Convert a string to the correct XML representation in a target charset.
72
     *
73
     * To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending
74
     * requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars
75
     * present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are
76
     * independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them.
77
     * Note that in the std case we are not sending a charset encoding mime type along with http headers, so we are
78
     * bound by RFC 3023 to emit strict us-ascii.
79
     *
80
     * @todo do a bit of basic benchmarking (strtr vs. str_replace)
81
     * @todo make usage of iconv() or recode_string() or mb_string() where available
82
     *
83
     * @param string $data
84
     * @param string $srcEncoding
85
     * @param string $destEncoding
86
     *
87
     * @return string
88
     */
89 552
    public function encodeEntities($data, $srcEncoding = '', $destEncoding = '')
90
    {
91 552
        if ($srcEncoding == '') {
92
            // lame, but we know no better...
93
            $srcEncoding = PhpXmlRpc::$xmlrpc_internalencoding;
94
        }
95
96 552
        $conversion = strtoupper($srcEncoding . '_' . $destEncoding);
97 552
        switch ($conversion) {
98 552
            case 'ISO-8859-1_':
99 65
            case 'ISO-8859-1_US-ASCII':
100 487
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
101 487
                $escapedData = str_replace($this->xml_iso88591_Entities['in'], $this->xml_iso88591_Entities['out'], $escapedData);
102 487
                break;
103
104 65
            case 'ISO-8859-1_UTF-8':
105 28
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
106 28
                $escapedData = utf8_encode($escapedData);
107 28
                break;
108
109 37
            case 'ISO-8859-1_ISO-8859-1':
110 9
            case 'US-ASCII_US-ASCII':
111 9
            case 'US-ASCII_UTF-8':
112 9
            case 'US-ASCII_':
113 9
            case 'US-ASCII_ISO-8859-1':
114 9
            case 'UTF-8_UTF-8':
115
            //case 'CP1252_CP1252':
116 28
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
117 28
                break;
118
119 9
            case 'UTF-8_':
120 6
            case 'UTF-8_US-ASCII':
121 6
            case 'UTF-8_ISO-8859-1':
122
                // NB: this will choke on invalid UTF-8, going most likely beyond EOF
123 9
                $escapedData = '';
124
                // be kind to users creating string xmlrpc values out of different php types
125 9
                $data = (string)$data;
126 9
                $ns = strlen($data);
127 9
                for ($nn = 0; $nn < $ns; $nn++) {
128 9
                    $ch = $data[$nn];
129 9
                    $ii = ord($ch);
130
                    // 7 bits: 0bbbbbbb (127)
131 9
                    if ($ii < 128) {
132
                        /// @todo shall we replace this with a (supposedly) faster str_replace?
133 6
                        switch ($ii) {
134 6
                            case 34:
135 1
                                $escapedData .= '&quot;';
136 1
                                break;
137 6
                            case 38:
138 1
                                $escapedData .= '&amp;';
139 1
                                break;
140 6
                            case 39:
141 1
                                $escapedData .= '&apos;';
142 1
                                break;
143 6
                            case 60:
144 1
                                $escapedData .= '&lt;';
145 1
                                break;
146 6
                            case 62:
147 1
                                $escapedData .= '&gt;';
148 1
                                break;
149
                            default:
150 6
                                $escapedData .= $ch;
151
                        } // switch
152
                    } // 11 bits: 110bbbbb 10bbbbbb (2047)
153 9
                    elseif ($ii >> 5 == 6) {
154 7
                        $b1 = ($ii & 31);
155 7
                        $ii = ord($data[$nn + 1]);
156 7
                        $b2 = ($ii & 63);
157 7
                        $ii = ($b1 * 64) + $b2;
158 7
                        $ent = sprintf('&#%d;', $ii);
159 7
                        $escapedData .= $ent;
160 7
                        $nn += 1;
161
                    } // 16 bits: 1110bbbb 10bbbbbb 10bbbbbb
162 7
                    elseif ($ii >> 4 == 14) {
163 7
                        $b1 = ($ii & 15);
164 7
                        $ii = ord($data[$nn + 1]);
165 7
                        $b2 = ($ii & 63);
166 7
                        $ii = ord($data[$nn + 2]);
167 7
                        $b3 = ($ii & 63);
168 7
                        $ii = ((($b1 * 64) + $b2) * 64) + $b3;
169 7
                        $ent = sprintf('&#%d;', $ii);
170 7
                        $escapedData .= $ent;
171 7
                        $nn += 2;
172
                    } // 21 bits: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
173
                    elseif ($ii >> 3 == 30) {
174
                        $b1 = ($ii & 7);
175
                        $ii = ord($data[$nn + 1]);
176
                        $b2 = ($ii & 63);
177
                        $ii = ord($data[$nn + 2]);
178
                        $b3 = ($ii & 63);
179
                        $ii = ord($data[$nn + 3]);
180
                        $b4 = ($ii & 63);
181
                        $ii = ((((($b1 * 64) + $b2) * 64) + $b3) * 64) + $b4;
182
                        $ent = sprintf('&#%d;', $ii);
183
                        $escapedData .= $ent;
184
                        $nn += 3;
185
                    }
186
                }
187
188
                // when converting to latin-1, do not be so eager with using entities for characters 160-255
189 9
                if ($conversion == 'UTF-8_ISO-8859-1') {
190 6
                    $escapedData = str_replace(array_slice($this->xml_iso88591_Entities['out'], 32), array_slice($this->xml_iso88591_Entities['in'], 32), $escapedData);
191
                }
192 9
                break;
193
194
            /*
195
            case 'CP1252_':
196
            case 'CP1252_US-ASCII':
197
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
198
                $escapedData = str_replace($this->xml_iso88591_Entities']['in'], $this->xml_iso88591_Entities['out'], $escapedData);
199
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
200
                break;
201
            case 'CP1252_UTF-8':
202
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
203
                /// @todo we could use real UTF8 chars here instead of xml entities... (note that utf_8 encode all allone will NOT convert them)
204
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
205
                $escapedData = utf8_encode($escapedData);
206
                break;
207
            case 'CP1252_ISO-8859-1':
208
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
209
                // we might as well replace all funky chars with a '?' here, but we are kind and leave it to the receiving application layer to decide what to do with these weird entities...
210
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
211
                break;
212
            */
213
214
            default:
215
                $escapedData = '';
216
                Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ": Converting from $srcEncoding to $destEncoding: not supported...");
217
        }
218
219 552
        return $escapedData;
220
    }
221
222
    /**
223
     * Checks if a given charset encoding is present in a list of encodings or if it is a valid subset of any encoding
224
     * in the list.
225
     *
226
     * @param string $encoding charset to be tested
227
     * @param string|array $validList comma separated list of valid charsets (or array of charsets)
228
     *
229
     * @return bool
230
     */
231
    public function isValidCharset($encoding, $validList)
232
    {
233
        if (is_string($validList)) {
234
            $validList = explode(',', $validList);
235
        }
236
        if (@in_array(strtoupper($encoding), $validList)) {
237
            return true;
238
        } else {
239
            if (array_key_exists($encoding, $this->charset_supersets)) {
240
                foreach ($validList as $allowed) {
241
                    if (in_array($allowed, $this->charset_supersets[$encoding])) {
242
                        return true;
243
                    }
244
                }
245
            }
246
247
            return false;
248
        }
249
    }
250
251
    /**
252
     * Used only for backwards compatibility
253
     * @deprecated
254
     *
255
     * @param string $charset
256
     *
257
     * @return array
258
     *
259
     * @throws \Exception for unknown/unsupported charsets
260
     */
261
    public function getEntities($charset)
262
    {
263
        //trigger_error('Method ' . __METHOD__ . ' is deprecated', E_USER_DEPRECATED);
264
265
        switch ($charset)
266
        {
267
            case 'iso88591':
268
                return $this->xml_iso88591_Entities;
269
            default:
270
                throw new \Exception('Unsupported charset: ' . $charset);
271
        }
272
    }
273
274
}
275