Completed
Push — master ( 87b7a4...6ce28d )
by Gaetano
11:11 queued 06:38
created

Charset::encodeEntities()   D

Complexity

Conditions 25
Paths 32

Size

Total Lines 131
Code Lines 86

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 70
CRAP Score 28.4364

Importance

Changes 0
Metric Value
cc 25
eloc 86
c 0
b 0
f 0
nc 32
nop 3
dl 0
loc 131
rs 4.1666
ccs 70
cts 85
cp 0.8235
crap 28.4364

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace PhpXmlRpc\Helper;
4
5
use PhpXmlRpc\PhpXmlRpc;
6
7
class Charset
8
{
9
    // tables used for transcoding different charsets into us-ascii xml
10
    protected $xml_iso88591_Entities = array("in" => array(), "out" => array());
11
12
    /// @todo add to iso table the characters from cp_1252 range, i.e. 128 to 159?
13
    /// These will NOT be present in true ISO-8859-1, but will save the unwary windows user from sending junk
14
    /// (though no luck when receiving them...)
15
    /*
16
    protected $xml_cp1252_Entities = array('in' => array(), out' => array(
17
        '&#x20AC;', '?',        '&#x201A;', '&#x0192;',
18
        '&#x201E;', '&#x2026;', '&#x2020;', '&#x2021;',
19
        '&#x02C6;', '&#x2030;', '&#x0160;', '&#x2039;',
20
        '&#x0152;', '?',        '&#x017D;', '?',
21
        '?',        '&#x2018;', '&#x2019;', '&#x201C;',
22
        '&#x201D;', '&#x2022;', '&#x2013;', '&#x2014;',
23
        '&#x02DC;', '&#x2122;', '&#x0161;', '&#x203A;',
24
        '&#x0153;', '?',        '&#x017E;', '&#x0178;'
25
    ));
26
    */
27
28
    protected $charset_supersets = array(
29
        'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
30
            'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8',
31
            'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12',
32
            'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8',
33
            'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',),
34
    );
35
36
    protected static $instance = null;
37
38
    /**
39
     * This class is singleton for performance reasons.
40
     *
41
     * @return Charset
42
     */
43 596
    public static function instance()
44
    {
45 596
        if (self::$instance === null) {
46 374
            self::$instance = new self();
47
        }
48
49 596
        return self::$instance;
50
    }
51
52 374
    private function __construct()
53
    {
54 374
        for ($i = 0; $i < 32; $i++) {
55 374
            $this->xml_iso88591_Entities["in"][] = chr($i);
56 374
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
57
        }
58
59 374
        for ($i = 160; $i < 256; $i++) {
60 374
            $this->xml_iso88591_Entities["in"][] = chr($i);
61 374
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
62
        }
63
64
        /*for ($i = 128; $i < 160; $i++)
65
        {
66
            $this->xml_cp1252_Entities['in'][] = chr($i);
67
        }*/
68 374
    }
69
70
    /**
71
     * Convert a string to the correct XML representation in a target charset.
72
     *
73
     * To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending
74
     * requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars
75
     * present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are
76
     * independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them.
77
     * Note that in the std case we are not sending a charset encoding mime type along with http headers, so we are
78
     * bound by RFC 3023 to emit strict us-ascii.
79
     *
80
     * @todo do a bit of basic benchmarking (strtr vs. str_replace)
81
     * @todo make usage of iconv() or recode_string() or mb_string() where available
82
     *
83
     * @param string $data
84
     * @param string $srcEncoding
85
     * @param string $destEncoding
86
     *
87
     * @return string
88
     */
89 577
    public function encodeEntities($data, $srcEncoding = '', $destEncoding = '')
90
    {
91 577
        if ($srcEncoding == '') {
92
            // lame, but we know no better...
93
            $srcEncoding = PhpXmlRpc::$xmlrpc_internalencoding;
94
        }
95
96 577
        $conversion = strtoupper($srcEncoding . '_' . $destEncoding);
97 577
        switch ($conversion) {
98 577
            case 'ISO-8859-1_':
99 384
            case 'ISO-8859-1_US-ASCII':
100 487
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
101 487
                $escapedData = str_replace($this->xml_iso88591_Entities['in'], $this->xml_iso88591_Entities['out'], $escapedData);
102 487
                break;
103
104 384
            case 'ISO-8859-1_UTF-8':
105 28
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
106 28
                $escapedData = utf8_encode($escapedData);
107 28
                break;
108
109 373
            case 'ISO-8859-1_ISO-8859-1':
110 362
            case 'US-ASCII_US-ASCII':
111 362
            case 'US-ASCII_UTF-8':
112 362
            case 'US-ASCII_':
113 362
            case 'US-ASCII_ISO-8859-1':
114 362
            case 'UTF-8_UTF-8':
115
            //case 'CP1252_CP1252':
116 46
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
117 46
                break;
118
119 344
            case 'UTF-8_':
120 24
            case 'UTF-8_US-ASCII':
121 24
            case 'UTF-8_ISO-8859-1':
122
                // NB: this will choke on invalid UTF-8, going most likely beyond EOF
123 344
                $escapedData = '';
124
                // be kind to users creating string xmlrpc values out of different php types
125 344
                $data = (string)$data;
126 344
                $ns = strlen($data);
127 344
                for ($nn = 0; $nn < $ns; $nn++) {
128 344
                    $ch = $data[$nn];
129 344
                    $ii = ord($ch);
130
                    // 7 bits: 0bbbbbbb (127)
131 344
                    if ($ii < 128) {
132
                        /// @todo shall we replace this with a (supposedly) faster str_replace?
133 337
                        switch ($ii) {
134 337
                            case 34:
135 20
                                $escapedData .= '&quot;';
136 20
                                break;
137 337
                            case 38:
138 20
                                $escapedData .= '&amp;';
139 20
                                break;
140 337
                            case 39:
141 22
                                $escapedData .= '&apos;';
142 22
                                break;
143 337
                            case 60:
144 20
                                $escapedData .= '&lt;';
145 20
                                break;
146 337
                            case 62:
147 20
                                $escapedData .= '&gt;';
148 20
                                break;
149
                            default:
150 337
                                $escapedData .= $ch;
151
                        } // switch
152
                    } // 11 bits: 110bbbbb 10bbbbbb (2047)
153 70
                    elseif ($ii >> 5 == 6) {
154 66
                        $b1 = ($ii & 31);
155 66
                        $ii = ord($data[$nn + 1]);
156 66
                        $b2 = ($ii & 63);
157 66
                        $ii = ($b1 * 64) + $b2;
158 66
                        $ent = sprintf('&#%d;', $ii);
159 66
                        $escapedData .= $ent;
160 66
                        $nn += 1;
161
                    } // 16 bits: 1110bbbb 10bbbbbb 10bbbbbb
162 30
                    elseif ($ii >> 4 == 14) {
163 30
                        $b1 = ($ii & 15);
164 30
                        $ii = ord($data[$nn + 1]);
165 30
                        $b2 = ($ii & 63);
166 30
                        $ii = ord($data[$nn + 2]);
167 30
                        $b3 = ($ii & 63);
168 30
                        $ii = ((($b1 * 64) + $b2) * 64) + $b3;
169 30
                        $ent = sprintf('&#%d;', $ii);
170 30
                        $escapedData .= $ent;
171 30
                        $nn += 2;
172
                    } // 21 bits: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
173
                    elseif ($ii >> 3 == 30) {
174
                        $b1 = ($ii & 7);
175
                        $ii = ord($data[$nn + 1]);
176
                        $b2 = ($ii & 63);
177
                        $ii = ord($data[$nn + 2]);
178
                        $b3 = ($ii & 63);
179
                        $ii = ord($data[$nn + 3]);
180
                        $b4 = ($ii & 63);
181
                        $ii = ((((($b1 * 64) + $b2) * 64) + $b3) * 64) + $b4;
182
                        $ent = sprintf('&#%d;', $ii);
183
                        $escapedData .= $ent;
184
                        $nn += 3;
185
                    }
186
                }
187
188
                // when converting to latin-1, do not be so eager with using entities for characters 160-255
189 344
                if ($conversion == 'UTF-8_ISO-8859-1') {
190 24
                    $escapedData = str_replace(array_slice($this->xml_iso88591_Entities['out'], 32), array_slice($this->xml_iso88591_Entities['in'], 32), $escapedData);
191
                }
192 344
                break;
193
194
            /*
195
            case 'CP1252_':
196
            case 'CP1252_US-ASCII':
197
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
198
                $escapedData = str_replace($this->xml_iso88591_Entities']['in'], $this->xml_iso88591_Entities['out'], $escapedData);
199
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
200
                break;
201
            case 'CP1252_UTF-8':
202
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
203
                /// @todo we could use real UTF8 chars here instead of xml entities... (note that utf_8 encode all allone will NOT convert them)
204
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
205
                $escapedData = utf8_encode($escapedData);
206
                break;
207
            case 'CP1252_ISO-8859-1':
208
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
209
                // we might as well replace all funky chars with a '?' here, but we are kind and leave it to the receiving application layer to decide what to do with these weird entities...
210
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
211
                break;
212
            */
213
214
            default:
215
                $escapedData = '';
216
                Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ": Converting from $srcEncoding to $destEncoding: not supported...");
217
        }
218
219 577
        return $escapedData;
220
    }
221
222
    /**
223
     * Checks if a given charset encoding is present in a list of encodings or if it is a valid subset of any encoding
224
     * in the list.
225
     *
226
     * @param string $encoding charset to be tested
227
     * @param string|array $validList comma separated list of valid charsets (or array of charsets)
228
     *
229
     * @return bool
230
     */
231
    public function isValidCharset($encoding, $validList)
232
    {
233
        if (is_string($validList)) {
234
            $validList = explode(',', $validList);
235
        }
236
        if (@in_array(strtoupper($encoding), $validList)) {
237
            return true;
238
        } else {
239
            if (array_key_exists($encoding, $this->charset_supersets)) {
240
                foreach ($validList as $allowed) {
241
                    if (in_array($allowed, $this->charset_supersets[$encoding])) {
242
                        return true;
243
                    }
244
                }
245
            }
246
247
            return false;
248
        }
249
    }
250
251
    /**
252
     * Used only for backwards compatibility
253
     * @deprecated
254
     *
255
     * @param string $charset
256
     *
257
     * @return array
258
     *
259
     * @throws \Exception for unknown/unsupported charsets
260
     */
261
    public function getEntities($charset)
262
    {
263
        //trigger_error('Method ' . __METHOD__ . ' is deprecated', E_USER_DEPRECATED);
264
265
        switch ($charset)
266
        {
267
            case 'iso88591':
268
                return $this->xml_iso88591_Entities;
269
            default:
270
                throw new \Exception('Unsupported charset: ' . $charset);
271
        }
272
    }
273
274
}
275