Charset::__construct() - Code Metrics - Inspection of "travis" - gggeek/phpxmlrpc - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 8f4643...4ef224 )

by Gaetano

created 2019-05-18 11:13 UTC

Charset::__construct() A

↳ Parent: Charset

Complexity

Conditions	3
Paths	4

Size

Total Lines

Duplication

Lines	8
Ratio	47.06 %

Code Coverage

Tests	8
CRAP Score	3

Importance

Changes

Metric	Value
cc	3
nc	4
nop	0
dl	8
loc	17
ccs	8
cts	8
cp	1
crap	3
rs	9.7
c	0
b	0
f	0

<?php

namespace PhpXmlRpc\Helper;

use PhpXmlRpc\PhpXmlRpc;

class Charset
{
    // tables used for transcoding different charsets into us-ascii xml
    protected $xml_iso88591_Entities = array("in" => array(), "out" => array());
    protected $xml_iso88591_utf8 = array("in" => array(), "out" => array());

    /// @todo add to iso table the characters from cp_1252 range, i.e. 128 to 159?
    /// These will NOT be present in true ISO-8859-1, but will save the unwary
    /// windows user from sending junk (though no luck when receiving them...)
    /*
    protected $xml_cp1252_Entities = array('in' => array(), out' => array(
        '&#x20AC;', '?',        '&#x201A;', '&#x0192;',
        '&#x201E;', '&#x2026;', '&#x2020;', '&#x2021;',
        '&#x02C6;', '&#x2030;', '&#x0160;', '&#x2039;',
        '&#x0152;', '?',        '&#x017D;', '?',
        '?',        '&#x2018;', '&#x2019;', '&#x201C;',
        '&#x201D;', '&#x2022;', '&#x2013;', '&#x2014;',
        '&#x02DC;', '&#x2122;', '&#x0161;', '&#x203A;',
        '&#x0153;', '?',        '&#x017E;', '&#x0178;'
    ));
    */

    protected $charset_supersets = array(
        'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
            'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8',
            'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12',
            'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8',
            'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',),
    );

    protected static $instance = null;

    /**
     * This class is singleton for performance reasons.
     *
     * @return Charset
     */
    public static function instance()
    {
        if (self::$instance === null) {
            self::$instance = new self();
        }

        return self::$instance;
    }

    private function __construct()
    {
        for ($i = 0; $i < 32; $i++) {

            $this->xml_iso88591_Entities["in"][] = chr($i);
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
        }

        for ($i = 160; $i < 256; $i++) {

            $this->xml_iso88591_Entities["in"][] = chr($i);
            $this->xml_iso88591_Entities["out"][] = "&#{$i};";
        }

        /*for ($i = 128; $i < 160; $i++)
        {
            $this->xml_cp1252_Entities['in'][] = chr($i);
        }*/
    }

    /**
     * Convert a string to the correct XML representation in a target charset.
     *
     * To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending
     * requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars
     * present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are
     * independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them.
     * Note that in the std case we are not sending a charset encoding mime type along with http headers, so we are
     * bound by RFC 3023 to emit strict us-ascii.
     *
     * @todo do a bit of basic benchmarking (strtr vs. str_replace)
     * @todo make usage of iconv() or recode_string() or mb_string() where available
     *
     * @param string $data
     * @param string $srcEncoding
     * @param string $destEncoding
     *
     * @return string
     */
    public function encodeEntities($data, $srcEncoding = '', $destEncoding = '')
    {
        if ($srcEncoding == '') {
            // lame, but we know no better...
            $srcEncoding = PhpXmlRpc::$xmlrpc_internalencoding;
        }

        $conversion = strtoupper($srcEncoding . '_' . $destEncoding);
        switch ($conversion) {
            case 'ISO-8859-1_':
            case 'ISO-8859-1_US-ASCII':
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
                $escapedData = str_replace($this->xml_iso88591_Entities['in'], $this->xml_iso88591_Entities['out'], $escapedData);
                break;

            case 'ISO-8859-1_UTF-8':
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
                $escapedData = utf8_encode($escapedData);
                break;

            case 'ISO-8859-1_ISO-8859-1':
            case 'US-ASCII_US-ASCII':
            case 'US-ASCII_UTF-8':
            case 'US-ASCII_':
            case 'US-ASCII_ISO-8859-1':
            case 'UTF-8_UTF-8':
            //case 'CP1252_CP1252':
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
                break;

            case 'UTF-8_':
            case 'UTF-8_US-ASCII':
            case 'UTF-8_ISO-8859-1':
                // NB: this will choke on invalid UTF-8, going most likely beyond EOF
                $escapedData = '';
                // be kind to users creating string xmlrpc values out of different php types
                $data = (string)$data;
                $ns = strlen($data);
                for ($nn = 0; $nn < $ns; $nn++) {
                    $ch = $data[$nn];
                    $ii = ord($ch);
                    // 7 bits: 0bbbbbbb (127)
                    if ($ii < 128) {
                        /// @todo shall we replace this with a (supposedly) faster str_replace?
                        switch ($ii) {
                            case 34:
                                $escapedData .= '&quot;';
                                break;
                            case 38:
                                $escapedData .= '&amp;';
                                break;
                            case 39:
                                $escapedData .= '&apos;';
                                break;
                            case 60:
                                $escapedData .= '&lt;';
                                break;
                            case 62:
                                $escapedData .= '&gt;';
                                break;
                            default:
                                $escapedData .= $ch;
                        } // switch
                    } // 11 bits: 110bbbbb 10bbbbbb (2047)
                    elseif ($ii >> 5 == 6) {
                        $b1 = ($ii & 31);
                        $ii = ord($data[$nn + 1]);
                        $b2 = ($ii & 63);
                        $ii = ($b1 * 64) + $b2;
                        $ent = sprintf('&#%d;', $ii);
                        $escapedData .= $ent;
                        $nn += 1;
                    } // 16 bits: 1110bbbb 10bbbbbb 10bbbbbb
                    elseif ($ii >> 4 == 14) {
                        $b1 = ($ii & 15);
                        $ii = ord($data[$nn + 1]);
                        $b2 = ($ii & 63);
                        $ii = ord($data[$nn + 2]);
                        $b3 = ($ii & 63);
                        $ii = ((($b1 * 64) + $b2) * 64) + $b3;
                        $ent = sprintf('&#%d;', $ii);
                        $escapedData .= $ent;
                        $nn += 2;
                    } // 21 bits: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
                    elseif ($ii >> 3 == 30) {
                        $b1 = ($ii & 7);
                        $ii = ord($data[$nn + 1]);
                        $b2 = ($ii & 63);
                        $ii = ord($data[$nn + 2]);
                        $b3 = ($ii & 63);
                        $ii = ord($data[$nn + 3]);
                        $b4 = ($ii & 63);
                        $ii = ((((($b1 * 64) + $b2) * 64) + $b3) * 64) + $b4;
                        $ent = sprintf('&#%d;', $ii);
                        $escapedData .= $ent;
                        $nn += 3;
                    }
                }

                // when converting to latin-1, do not be so eager with using entities for characters 160-255
                if ($conversion == 'UTF-8_ISO-8859-1') {
                    $escapedData = str_replace(array_slice($this->xml_iso88591_Entities['out'], 32), array_slice($this->xml_iso88591_Entities['in'], 32), $escapedData);
                }
                break;

            /*
            case 'CP1252_':
            case 'CP1252_US-ASCII':
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
                $escapedData = str_replace($this->xml_iso88591_Entities']['in'], $this->xml_iso88591_Entities['out'], $escapedData);
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
                break;
            case 'CP1252_UTF-8':
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
                /// @todo we could use real UTF8 chars here instead of xml entities... (note that utf_8 encode all allone will NOT convert them)
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
                $escapedData = utf8_encode($escapedData);
                break;
            case 'CP1252_ISO-8859-1':
                $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
                // we might as well replace all funky chars with a '?' here, but we are kind and leave it to the receiving application layer to decide what to do with these weird entities...
                $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
                break;
            */

            default:
                $escapedData = '';
                error_log('XML-RPC: ' . __METHOD__ . ": Converting from $srcEncoding to $destEncoding: not supported...");
        }

        return $escapedData;
    }

    /**
     * Checks if a given charset encoding is present in a list of encodings or
     * if it is a valid subset of any encoding in the list.
     *
     * @param string $encoding charset to be tested
     * @param string|array $validList comma separated list of valid charsets (or array of charsets)
     *
     * @return bool
     */
    public function isValidCharset($encoding, $validList)
    {
        if (is_string($validList)) {
            $validList = explode(',', $validList);
        }
        if (@in_array(strtoupper($encoding), $validList)) {
            return true;
        } else {
            if (array_key_exists($encoding, $this->charset_supersets)) {
                foreach ($validList as $allowed) {
                    if (in_array($allowed, $this->charset_supersets[$encoding])) {
                        return true;
                    }
                }
            }

            return false;
        }
    }

    /**
     * Used only for backwards compatibility
     * @deprecated
     *
     * @param string $charset
     *
     * @return array
     *
     * @throws \Exception for unknown/unsupported charsets
     */
    public function getEntities($charset)
    {
        switch ($charset)
        {
            case 'iso88591':
                return $this->xml_iso88591_Entities;
            default:
                throw new \Exception('Unsupported charset: ' . $charset);
        }
    }

}


1			<?php
2
3			namespace PhpXmlRpc\Helper;
4
5			use PhpXmlRpc\PhpXmlRpc;
6
7			class Charset
8			{
9			// tables used for transcoding different charsets into us-ascii xml
10			protected $xml_iso88591_Entities = array("in" => array(), "out" => array());
11			protected $xml_iso88591_utf8 = array("in" => array(), "out" => array());
12
13			/// @todo add to iso table the characters from cp_1252 range, i.e. 128 to 159?
14			/// These will NOT be present in true ISO-8859-1, but will save the unwary
15			/// windows user from sending junk (though no luck when receiving them...)
16			/*
17			protected $xml_cp1252_Entities = array('in' => array(), out' => array(
18			'€', '?', '‚', 'ƒ',
19			'„', '…', '†', '‡',
20			'ˆ', '‰', 'Š', '‹',
21			'Œ', '?', 'Ž', '?',
22			'?', '‘', '’', '“',
23			'”', '•', '–', '—',
24			'˜', '™', 'š', '›',
25			'œ', '?', 'ž', 'Ÿ'
26			));
27			*/
28
29			protected $charset_supersets = array(
30			'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
31			'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8',
32			'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12',
33			'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8',
34			'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',),
35			);
36
37			protected static $instance = null;
38
39			/**
40			* This class is singleton for performance reasons.
41			*
42			* @return Charset
43			*/
44	591		public static function instance()
45			{
46	591		if (self::$instance === null) {
47	336		self::$instance = new self();
48			}
49
50	591		return self::$instance;
51			}
52
53	336		private function __construct()
54			{
55	336	View Code Duplication	for ($i = 0; $i < 32; $i++) {
			0 ignored issues – show Duplication introduced 2016-01-20 23:15 UTC by Report Bug Copy Issue Report This code seems to be duplicated across your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
56	336		$this->xml_iso88591_Entities["in"][] = chr($i);
57	336		$this->xml_iso88591_Entities["out"][] = "&#{$i};";
58			}
59
60	336	View Code Duplication	for ($i = 160; $i < 256; $i++) {
			0 ignored issues – show Duplication introduced 2016-01-20 23:15 UTC by Report Bug Copy Issue Report This code seems to be duplicated across your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
61	336		$this->xml_iso88591_Entities["in"][] = chr($i);
62	336		$this->xml_iso88591_Entities["out"][] = "&#{$i};";
63			}
64
65			/*for ($i = 128; $i < 160; $i++)
66			{
67			$this->xml_cp1252_Entities['in'][] = chr($i);
68			}*/
69	336		}
70
71			/**
72			* Convert a string to the correct XML representation in a target charset.
73			*
74			* To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending
75			* requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars
76			* present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are
77			* independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them.
78			* Note that in the std case we are not sending a charset encoding mime type along with http headers, so we are
79			* bound by RFC 3023 to emit strict us-ascii.
80			*
81			* @todo do a bit of basic benchmarking (strtr vs. str_replace)
82			* @todo make usage of iconv() or recode_string() or mb_string() where available
83			*
84			* @param string $data
85			* @param string $srcEncoding
86			* @param string $destEncoding
87			*
88			* @return string
89			*/
90	574		public function encodeEntities($data, $srcEncoding = '', $destEncoding = '')
91			{
92	574		if ($srcEncoding == '') {
93			// lame, but we know no better...
94			$srcEncoding = PhpXmlRpc::$xmlrpc_internalencoding;
95			}
96
97	574		$conversion = strtoupper($srcEncoding . '_' . $destEncoding);
98	574		switch ($conversion) {
99	574		case 'ISO-8859-1_':
100	348		case 'ISO-8859-1_US-ASCII':
101	486		$escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&', '"', ''', '<', '>'), $data);
102	486		$escapedData = str_replace($this->xml_iso88591_Entities['in'], $this->xml_iso88591_Entities['out'], $escapedData);
103	486		break;
104
105	348		case 'ISO-8859-1_UTF-8':
106	28		$escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&', '"', ''', '<', '>'), $data);
107	28		$escapedData = utf8_encode($escapedData);
108	28		break;
109
110	337		case 'ISO-8859-1_ISO-8859-1':
111	326		case 'US-ASCII_US-ASCII':
112	326		case 'US-ASCII_UTF-8':
113	326		case 'US-ASCII_':
114	326		case 'US-ASCII_ISO-8859-1':
115	326		case 'UTF-8_UTF-8':
116			//case 'CP1252_CP1252':
117	46		$escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&', '"', ''', '<', '>'), $data);
118	46		break;
119
120	308		case 'UTF-8_':
121	24		case 'UTF-8_US-ASCII':
122	24		case 'UTF-8_ISO-8859-1':
123			// NB: this will choke on invalid UTF-8, going most likely beyond EOF
124	308		$escapedData = '';
125			// be kind to users creating string xmlrpc values out of different php types
126	308		$data = (string)$data;
127	308		$ns = strlen($data);
128	308		for ($nn = 0; $nn < $ns; $nn++) {
129	308		$ch = $data[$nn];
130	308		$ii = ord($ch);
131			// 7 bits: 0bbbbbbb (127)
132	308		if ($ii < 128) {
133			/// @todo shall we replace this with a (supposedly) faster str_replace?
134	301		switch ($ii) {
135	301		case 34:
136	18		$escapedData .= '"';
137	18		break;
138	301		case 38:
139	18		$escapedData .= '&';
140	18		break;
141	301		case 39:
142	20		$escapedData .= ''';
143	20		break;
144	301		case 60:
145	18		$escapedData .= '<';
146	18		break;
147	301		case 62:
148	18		$escapedData .= '>';
149	18		break;
150			default:
151	301		$escapedData .= $ch;
152			} // switch
153			} // 11 bits: 110bbbbb 10bbbbbb (2047)
154	64		elseif ($ii >> 5 == 6) {
155	60		$b1 = ($ii & 31);
156	60		$ii = ord($data[$nn + 1]);
157	60		$b2 = ($ii & 63);
158	60		$ii = ($b1 * 64) + $b2;
159	60		$ent = sprintf('&#%d;', $ii);
160	60		$escapedData .= $ent;
161	60		$nn += 1;
162			} // 16 bits: 1110bbbb 10bbbbbb 10bbbbbb
163	28		elseif ($ii >> 4 == 14) {
164	28		$b1 = ($ii & 15);
165	28		$ii = ord($data[$nn + 1]);
166	28		$b2 = ($ii & 63);
167	28		$ii = ord($data[$nn + 2]);
168	28		$b3 = ($ii & 63);
169	28		$ii = ((($b1 * 64) + $b2) * 64) + $b3;
170	28		$ent = sprintf('&#%d;', $ii);
171	28		$escapedData .= $ent;
172	28		$nn += 2;
173			} // 21 bits: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
174			elseif ($ii >> 3 == 30) {
175			$b1 = ($ii & 7);
176			$ii = ord($data[$nn + 1]);
177			$b2 = ($ii & 63);
178			$ii = ord($data[$nn + 2]);
179			$b3 = ($ii & 63);
180			$ii = ord($data[$nn + 3]);
181			$b4 = ($ii & 63);
182			$ii = ((((($b1 * 64) + $b2) * 64) + $b3) * 64) + $b4;
183			$ent = sprintf('&#%d;', $ii);
184			$escapedData .= $ent;
185			$nn += 3;
186			}
187			}
188
189			// when converting to latin-1, do not be so eager with using entities for characters 160-255
190	308		if ($conversion == 'UTF-8_ISO-8859-1') {
191	24		$escapedData = str_replace(array_slice($this->xml_iso88591_Entities['out'], 32), array_slice($this->xml_iso88591_Entities['in'], 32), $escapedData);
192			}
193	308		break;
194
195			/*
196			case 'CP1252_':
197			case 'CP1252_US-ASCII':
198			$escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&', '"', ''', '<', '>'), $data);
199			$escapedData = str_replace($this->xml_iso88591_Entities']['in'], $this->xml_iso88591_Entities['out'], $escapedData);
200			$escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
201			break;
202			case 'CP1252_UTF-8':
203			$escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&', '"', ''', '<', '>'), $data);
204			/// @todo we could use real UTF8 chars here instead of xml entities... (note that utf_8 encode all allone will NOT convert them)
205			$escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
206			$escapedData = utf8_encode($escapedData);
207			break;
208			case 'CP1252_ISO-8859-1':
209			$escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&', '"', ''', '<', '>'), $data);
210			// we might as well replace all funky chars with a '?' here, but we are kind and leave it to the receiving application layer to decide what to do with these weird entities...
211			$escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
212			break;
213			*/
214
215			default:
216			$escapedData = '';
217			error_log('XML-RPC: ' . __METHOD__ . ": Converting from $srcEncoding to $destEncoding: not supported...");
218			}
219
220	574		return $escapedData;
221			}
222
223			/**
224			* Checks if a given charset encoding is present in a list of encodings or
225			* if it is a valid subset of any encoding in the list.
226			*
227			* @param string $encoding charset to be tested
228			* @param string\|array $validList comma separated list of valid charsets (or array of charsets)
229			*
230			* @return bool
231			*/
232			public function isValidCharset($encoding, $validList)
233			{
234			if (is_string($validList)) {
235			$validList = explode(',', $validList);
236			}
237			if (@in_array(strtoupper($encoding), $validList)) {
238			return true;
239			} else {
240			if (array_key_exists($encoding, $this->charset_supersets)) {
241			foreach ($validList as $allowed) {
242			if (in_array($allowed, $this->charset_supersets[$encoding])) {
243			return true;
244			}
245			}
246			}
247
248			return false;
249			}
250			}
251
252			/**
253			* Used only for backwards compatibility
254			* @deprecated
255			*
256			* @param string $charset
257			*
258			* @return array
259			*
260			* @throws \Exception for unknown/unsupported charsets
261			*/
262			public function getEntities($charset)
263			{
264			switch ($charset)
265			{
266			case 'iso88591':
267			return $this->xml_iso88591_Entities;
268			default:
269			throw new \Exception('Unsupported charset: ' . $charset);
270			}
271			}
272
273			}
274

gggeek / phpxmlrpc

Push — master ( 8f4643...4ef224 )

Charset::__construct() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like