Completed
Push — master ( 081f49...6be6c0 )
by Gaetano
03:29
created

XMLParser::xmlrpc_dh()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 4.3731

Importance

Changes 0
Metric Value
cc 4
nc 3
nop 2
dl 0
loc 11
ccs 5
cts 7
cp 0.7143
crap 4.3731
rs 9.9
c 0
b 0
f 0
1
<?php
2
3
namespace PhpXmlRpc\Helper;
4
5
use PhpXmlRpc\PhpXmlRpc;
6
use PhpXmlRpc\Value;
7
8
/**
9
 * Deals with parsing the XML.
10
 */
11
class XMLParser
12
{
13
    // used to store state during parsing
14
    // quick explanation of components:
15
    //   ac - used to accumulate values
16
    //   stack - array with genealogy of xml elements names:
17
    //           used to validate nesting of xmlrpc elements
18
    //   valuestack - array used for parsing arrays and structs
19
    //   lv - used to indicate "looking for a value": implements
20
    //        the logic to allow values with no types to be strings
21
    //   isf - used to indicate a parsing fault (2) or xmlrpc response fault (1)
22
    //   isf_reason - used for storing xmlrpc response fault string
23
    //   method - used to store method name
24
    //   params - used to store parameters in method calls
25
    //   pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
26
    //   rt  - 'methodcall or 'methodresponse'
27
    public $_xh = array(
28
        'ac' => '',
29
        'stack' => array(),
30
        'valuestack' => array(),
31
        'isf' => 0,
32
        'isf_reason' => '',
33
        'method' => false, // so we can check later if we got a methodname or not
34
        'params' => array(),
35
        'pt' => array(),
36
        'rt' => '',
37
    );
38
39
    public $xmlrpc_valid_parents = array(
40
        'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
41
        'BOOLEAN' => array('VALUE'),
42
        'I4' => array('VALUE'),
43
        'I8' => array('VALUE'),
44
        'EX:I8' => array('VALUE'),
45
        'INT' => array('VALUE'),
46
        'STRING' => array('VALUE'),
47
        'DOUBLE' => array('VALUE'),
48
        'DATETIME.ISO8601' => array('VALUE'),
49
        'BASE64' => array('VALUE'),
50
        'MEMBER' => array('STRUCT'),
51
        'NAME' => array('MEMBER'),
52
        'DATA' => array('ARRAY'),
53
        'ARRAY' => array('VALUE'),
54
        'STRUCT' => array('VALUE'),
55
        'PARAM' => array('PARAMS'),
56
        'METHODNAME' => array('METHODCALL'),
57
        'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
58
        'FAULT' => array('METHODRESPONSE'),
59
        'NIL' => array('VALUE'), // only used when extension activated
60
        'EX:NIL' => array('VALUE'), // only used when extension activated
61
    );
62
63
    /**
64
     * xml parser handler function for opening element tags.
65
     */
66 16
    public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
67
    {
68
        // if invalid xmlrpc already detected, skip all processing
69 16
        if ($this->_xh['isf'] < 2) {
70
            // check for correct element nesting
71
            // top level element can only be of 2 types
72
            /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
73
            ///       there is only a single top level element in xml anyway
74 16
            if (count($this->_xh['stack']) == 0) {
75 16
                if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && (
76 1
                        $name != 'VALUE' && !$acceptSingleVals)
77 16
                ) {
78
                    $this->_xh['isf'] = 2;
79
                    $this->_xh['isf_reason'] = 'missing top level xmlrpc element';
80
81
                    return;
82
                } else {
83 16
                    $this->_xh['rt'] = strtolower($name);
84
                }
85 16
            } else {
86
                // not top level element: see if parent is OK
87 16
                $parent = end($this->_xh['stack']);
88 16
                if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) {
89 2
                    $this->_xh['isf'] = 2;
90 2
                    $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
91
92 2
                    return;
93
                }
94
            }
95
96
            switch ($name) {
97
                // optimize for speed switch cases: most common cases first
98 16
                case 'VALUE':
99
                    /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
100 14
                    $this->_xh['vt'] = 'value'; // indicator: no value found yet
101 14
                    $this->_xh['ac'] = '';
102 14
                    $this->_xh['lv'] = 1;
103 14
                    $this->_xh['php_class'] = null;
104 14
                    break;
105 16
                case 'I8':
106 16
                case 'EX:I8':
107 1
                    if (PHP_INT_SIZE === 4) {
108
                        /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
109
                        $this->_xh['isf'] = 2;
110
                        $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
111
112
                        return;
113
                    }
114
                // fall through voluntarily
115 16
                case 'I4':
116 16
                case 'INT':
117 16
                case 'STRING':
118 16
                case 'BOOLEAN':
119 16
                case 'DOUBLE':
120 16
                case 'DATETIME.ISO8601':
121 16 View Code Duplication
                case 'BASE64':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
122 12
                    if ($this->_xh['vt'] != 'value') {
123
                        // two data elements inside a value: an error occurred!
124 1
                        $this->_xh['isf'] = 2;
125 1
                        $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
126
127 1
                        return;
128
                    }
129 12
                    $this->_xh['ac'] = ''; // reset the accumulator
130 12
                    break;
131 16
                case 'STRUCT':
132 16
                case 'ARRAY':
133 13
                    if ($this->_xh['vt'] != 'value') {
134
                        //two data elements inside a value: an error occurred!
135 1
                        $this->_xh['isf'] = 2;
136 1
                        $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
137
138 1
                        return;
139
                    }
140
                    // create an empty array to hold child values, and push it onto appropriate stack
141 12
                    $curVal = array();
142 12
                    $curVal['values'] = array();
143 12
                    $curVal['type'] = $name;
144
                    // check for out-of-band information to rebuild php objs
145
                    // and in case it is found, save it
146 12
                    if (@isset($attrs['PHP_CLASS'])) {
147
                        $curVal['php_class'] = $attrs['PHP_CLASS'];
148
                    }
149 12
                    $this->_xh['valuestack'][] = $curVal;
150 12
                    $this->_xh['vt'] = 'data'; // be prepared for a data element next
151 12
                    break;
152 16
                case 'DATA':
153 2
                    if ($this->_xh['vt'] != 'data') {
154
                        //two data elements inside a value: an error occurred!
155 1
                        $this->_xh['isf'] = 2;
156 1
                        $this->_xh['isf_reason'] = "found two data elements inside an array element";
157
158 1
                        return;
159
                    }
160 16
                case 'METHODCALL':
161 16
                case 'METHODRESPONSE':
162 16
                case 'PARAMS':
163
                    // valid elements that add little to processing
164 16
                    break;
165 16
                case 'METHODNAME':
166 16
                case 'NAME':
167
                    /// @todo we could check for 2 NAME elements inside a MEMBER element
168 12
                    $this->_xh['ac'] = '';
169 12
                    break;
170 16
                case 'FAULT':
171 1
                    $this->_xh['isf'] = 1;
172 1
                    break;
173 16
                case 'MEMBER':
174 11
                    $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; // set member name to null, in case we do not find in the xml later on
175
                    //$this->_xh['ac']='';
176
                // Drop trough intentionally
177 16
                case 'PARAM':
178
                    // clear value type, so we can check later if no value has been passed for this param/member
179 16
                    $this->_xh['vt'] = null;
180 16
                    break;
181 1
                case 'NIL':
182 1 View Code Duplication
                case 'EX:NIL':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
183 1
                    if (PhpXmlRpc::$xmlrpc_null_extension) {
184 1
                        if ($this->_xh['vt'] != 'value') {
185
                            //two data elements inside a value: an error occurred!
186
                            $this->_xh['isf'] = 2;
187
                            $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
188
189
                            return;
190
                        }
191 1
                        $this->_xh['ac'] = ''; // reset the accumulator
192 1
                        break;
193
                    }
194
                // we do not support the <NIL/> extension, so
195
                // drop through intentionally
196 1
                default:
197
                    /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
198 1
                    $this->_xh['isf'] = 2;
199 1
                    $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
200 1
                    break;
201
            }
202
203
            // Save current element name to stack, to validate nesting
204 16
            $this->_xh['stack'][] = $name;
205
206
            /// @todo optimization creep: move this inside the big switch() above
207 16
            if ($name != 'VALUE') {
208 16
                $this->_xh['lv'] = 0;
209 16
            }
210 16
        }
211 16
    }
212
213
    /**
214
     * Used in decoding xml chunks that might represent single xmlrpc values.
215
     */
216 3
    public function xmlrpc_se_any($parser, $name, $attrs)
217
    {
218 3
        $this->xmlrpc_se($parser, $name, $attrs, true);
219 3
    }
220
221
    /**
222
     * xml parser handler function for close element tags.
223
     */
224 16
    public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
225
    {
226 16
        if ($this->_xh['isf'] < 2) {
227
            // push this element name from stack
228
            // NB: if XML validates, correct opening/closing is guaranteed and
229
            // we do not have to check for $name == $currElem.
230
            // we also checked for proper nesting at start of elements...
231 15
            $currElem = array_pop($this->_xh['stack']);
0 ignored issues
show
Unused Code introduced by
$currElem is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
232
233
            switch ($name) {
234 15
                case 'VALUE':
235
                    // This if() detects if no scalar was inside <VALUE></VALUE>
236 13
                    if ($this->_xh['vt'] == 'value') {
237 6
                        $this->_xh['value'] = $this->_xh['ac'];
238 6
                        $this->_xh['vt'] = Value::$xmlrpcString;
239 6
                    }
240
241 13
                    if ($rebuildXmlrpcvals) {
242
                        // build the xmlrpc val out of the data received, and substitute it
243 12
                        $temp = new Value($this->_xh['value'], $this->_xh['vt']);
244
                        // in case we got info about underlying php class, save it
245
                        // in the object we're rebuilding
246 12
                        if (isset($this->_xh['php_class'])) {
247
                            $temp->_php_class = $this->_xh['php_class'];
248
                        }
249
                        // check if we are inside an array or struct:
250
                        // if value just built is inside an array, let's move it into array on the stack
251 12
                        $vscount = count($this->_xh['valuestack']);
252 12 View Code Duplication
                        if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
253 2
                            $this->_xh['valuestack'][$vscount - 1]['values'][] = $temp;
254 2
                        } else {
255 11
                            $this->_xh['value'] = $temp;
256
                        }
257 12
                    } else {
258
                        /// @todo this needs to treat correctly php-serialized objects,
259
                        /// since std deserializing is done by php_xmlrpc_decode,
260
                        /// which we will not be calling...
261 3
                        if (isset($this->_xh['php_class'])) {
262
                        }
263
264
                        // check if we are inside an array or struct:
265
                        // if value just built is inside an array, let's move it into array on the stack
266 3
                        $vscount = count($this->_xh['valuestack']);
267 3 View Code Duplication
                        if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
268
                            $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
269
                        }
270
                    }
271 13
                    break;
272 15
                case 'BOOLEAN':
273 15
                case 'I4':
274 15
                case 'I8':
275 15
                case 'EX:I8':
276 15
                case 'INT':
277 15
                case 'STRING':
278 15
                case 'DOUBLE':
279 15
                case 'DATETIME.ISO8601':
280 15
                case 'BASE64':
281 12
                    $this->_xh['vt'] = strtolower($name);
282
                    /// @todo: optimization creep - remove the if/elseif cycle below
283
                    /// since the case() in which we are already did that
284 12
                    if ($name == 'STRING') {
285 5
                        $this->_xh['value'] = $this->_xh['ac'];
286 12 View Code Duplication
                    } elseif ($name == 'DATETIME.ISO8601') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
287 6
                        if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) {
288
                            error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']);
289
                        }
290 6
                        $this->_xh['vt'] = Value::$xmlrpcDateTime;
291 6
                        $this->_xh['value'] = $this->_xh['ac'];
292 10
                    } elseif ($name == 'BASE64') {
293
                        /// @todo check for failure of base64 decoding / catch warnings
294
                        $this->_xh['value'] = base64_decode($this->_xh['ac']);
295 5
                    } elseif ($name == 'BOOLEAN') {
296
                        // special case here: we translate boolean 1 or 0 into PHP
297
                        // constants true or false.
298
                        // Strings 'true' and 'false' are accepted, even though the
299
                        // spec never mentions them (see eg. Blogger api docs)
300
                        // NB: this simple checks helps a lot sanitizing input, ie no
301
                        // security problems around here
302 2
                        if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') == 0) {
303 2
                            $this->_xh['value'] = true;
304 2
                        } else {
305
                            // log if receiving something strange, even though we set the value to false anyway
306 1
                            if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) {
307
                                error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']);
308
                            }
309 1
                            $this->_xh['value'] = false;
310
                        }
311 5 View Code Duplication
                    } elseif ($name == 'DOUBLE') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
312
                        // we have a DOUBLE
313
                        // we must check that only 0123456789-.<space> are characters here
314
                        // NOTE: regexp could be much stricter than this...
315 2
                        if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) {
316
                            /// @todo: find a better way of throwing an error than this!
317
                            error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']);
318
                            $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
319
                        } else {
320
                            // it's ok, add it on
321 2
                            $this->_xh['value'] = (double)$this->_xh['ac'];
322
                        }
323 2
                    } else {
324
                        // we have an I4/I8/INT
325
                        // we must check that only 0123456789-<space> are characters here
326 4 View Code Duplication
                        if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
327
                            /// @todo find a better way of throwing an error than this!
328
                            error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']);
329
                            $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
330
                        } else {
331
                            // it's ok, add it on
332 4
                            $this->_xh['value'] = (int)$this->_xh['ac'];
333
                        }
334
                    }
335 12
                    $this->_xh['lv'] = 3; // indicate we've found a value
336 12
                    break;
337 14
                case 'NAME':
338 11
                    $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac'];
339 11
                    break;
340 14
                case 'MEMBER':
341
                    // add to array in the stack the last element built,
342
                    // unless no VALUE was found
343 11
                    if ($this->_xh['vt']) {
344 11
                        $vscount = count($this->_xh['valuestack']);
345 11
                        $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value'];
346 11
                    } else {
347
                        error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml');
348
                    }
349 11
                    break;
350 14
                case 'DATA':
351 2
                    $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty
352 2
                    break;
353 13
                case 'STRUCT':
354 13
                case 'ARRAY':
355
                    // fetch out of stack array of values, and promote it to current value
356 11
                    $currVal = array_pop($this->_xh['valuestack']);
357 11
                    $this->_xh['value'] = $currVal['values'];
358 11
                    $this->_xh['vt'] = strtolower($name);
359 11
                    if (isset($currVal['php_class'])) {
360
                        $this->_xh['php_class'] = $currVal['php_class'];
361
                    }
362 11
                    break;
363 13
                case 'PARAM':
364
                    // add to array of params the current value,
365
                    // unless no VALUE was found
366 11
                    if ($this->_xh['vt']) {
367 11
                        $this->_xh['params'][] = $this->_xh['value'];
368 11
                        $this->_xh['pt'][] = $this->_xh['vt'];
369 11
                    } else {
370
                        error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml');
371
                    }
372 11
                    break;
373 13
                case 'METHODNAME':
374 2
                    $this->_xh['method'] = preg_replace('/^[\n\r\t ]+/', '', $this->_xh['ac']);
375 2
                    break;
376 12
                case 'NIL':
377 12
                case 'EX:NIL':
378 1
                    if (PhpXmlRpc::$xmlrpc_null_extension) {
379 1
                        $this->_xh['vt'] = 'null';
380 1
                        $this->_xh['value'] = null;
381 1
                        $this->_xh['lv'] = 3;
382 1
                        break;
383
                    }
384
                // drop through intentionally if nil extension not enabled
385 12
                case 'PARAMS':
386 12
                case 'FAULT':
387 12
                case 'METHODCALL':
388 12
                case 'METHORESPONSE':
389 12
                    break;
390 12
                default:
391
                    // End of INVALID ELEMENT!
392
                    // shall we add an assert here for unreachable code???
393 12
                    break;
394 12
            }
395 15
        }
396 16
    }
397
398
    /**
399
     * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values.
400
     */
401 3
    public function xmlrpc_ee_fast($parser, $name)
402
    {
403 3
        $this->xmlrpc_ee($parser, $name, false);
404 3
    }
405
406
    /**
407
     * xml parser handler function for character data.
408
     */
409 16
    public function xmlrpc_cd($parser, $data)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
410
    {
411
        // skip processing if xml fault already detected
412 16
        if ($this->_xh['isf'] < 2) {
413
            // "lookforvalue==3" means that we've found an entire value
414
            // and should discard any further character data
415 16
            if ($this->_xh['lv'] != 3) {
416 16
                $this->_xh['ac'] .= $data;
417 16
            }
418 16
        }
419 16
    }
420
421
    /**
422
     * xml parser handler function for 'other stuff', ie. not char data or
423
     * element start/end tag. In fact it only gets called on unknown entities...
424
     */
425 3
    public function xmlrpc_dh($parser, $data)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
426
    {
427
        // skip processing if xml fault already detected
428 3
        if ($this->_xh['isf'] < 2) {
429 3
            if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') {
430
                $this->_xh['ac'] .= $data;
431
            }
432 3
        }
433
434 3
        return true;
435
    }
436
437
    /**
438
     * xml charset encoding guessing helper function.
439
     * Tries to determine the charset encoding of an XML chunk received over HTTP.
440
     * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,
441
     * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non conforming (legacy?) clients/servers,
442
     * which will be most probably using UTF-8 anyway...
443
     * In order of importance checks:
444
     * 1. http headers
445
     * 2. BOM
446
     * 3. XML declaration
447
     * 4. guesses using mb_detect_encoding()
448
     *
449
     * @param string $httpHeader the http Content-type header
450
     * @param string $xmlChunk xml content buffer
451
     * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled).
452
     *                              This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings
453
     * @return string the encoding determined. Null if it can't be determined and mbstring is enabled,
454
     *                PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled
455
     *
456
     * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!
457
     */
458 16
    public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null)
459
    {
460
        // discussion: see http://www.yale.edu/pclt/encoding/
461
        // 1 - test if encoding is specified in HTTP HEADERS
462
463
        // Details:
464
        // LWS:           (\13\10)?( |\t)+
465
        // token:         (any char but excluded stuff)+
466
        // quoted string: " (any char but double quotes and control chars)* "
467
        // header:        Content-type = ...; charset=value(; ...)*
468
        //   where value is of type token, no LWS allowed between 'charset' and value
469
        // Note: we do not check for invalid chars in VALUE:
470
        //   this had better be done using pure ereg as below
471
        // Note 2: we might be removing whitespace/tabs that ought to be left in if
472
        //   the received charset is a quoted string. But nobody uses such charset names...
473
474
        /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it?
475 16
        $matches = array();
476 16
        if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) {
477 2
            return strtoupper(trim($matches[1], " \t\""));
478
        }
479
480
        // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern
481
        //     (source: http://www.w3.org/TR/2000/REC-xml-20001006)
482
        //     NOTE: actually, according to the spec, even if we find the BOM and determine
483
        //     an encoding, we should check if there is an encoding specified
484
        //     in the xml declaration, and verify if they match.
485
        /// @todo implement check as described above?
486
        /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)
487 16
        if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
488
            return 'UCS-4';
489 16
        } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
490
            return 'UTF-16';
491 16
        } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) {
492
            return 'UTF-8';
493
        }
494
495
        // 3 - test if encoding is specified in the xml declaration
496
        // Details:
497
        // SPACE:         (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
498
        // EQ:            SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
499 16
        if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
500 16
            '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
501 16
            $xmlChunk, $matches)) {
502 2
            return strtoupper(substr($matches[2], 1, -1));
503
        }
504
505
        // 4 - if mbstring is available, let it do the guesswork
506 14
        if (extension_loaded('mbstring')) {
507 14
            if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) {
0 ignored issues
show
Bug introduced by
It seems like you are loosely comparing $encodingPrefs of type string|null against null; this is ambiguous if the string can be empty. Consider using a strict comparison === instead.
Loading history...
508
                $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings;
509
            }
510 14
            if ($encodingPrefs) {
511
                $enc = mb_detect_encoding($xmlChunk, $encodingPrefs);
512
            } else {
513 14
                $enc = mb_detect_encoding($xmlChunk);
514
            }
515
            // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII...
516
            // IANA also likes better US-ASCII, so go with it
517 14
            if ($enc == 'ASCII') {
518 13
                $enc = 'US-' . $enc;
519 13
            }
520
521 14
            return $enc;
522
        } else {
523
            // no encoding specified: as per HTTP1.1 assume it is iso-8859-1?
524
            // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types
525
            // this should be the standard. And we should be getting text/xml as request and response.
526
            // BUT we have to be backward compatible with the lib, which always used UTF-8 as default...
527
            return PhpXmlRpc::$xmlrpc_defencoding;
528
        }
529
    }
530
531
    /**
532
     * Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration)
533
     *
534
     * @param string $xmlChunk
535
     * @return bool
536
     */
537 1
    public static function hasEncoding($xmlChunk)
538
    {
539
        // scan the first bytes of the data for a UTF-16 (or other) BOM pattern
540
        //     (source: http://www.w3.org/TR/2000/REC-xml-20001006)
541 1
        if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
542
            return true;
543 1
        } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
544
            return true;
545 1
        } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) {
546
            return true;
547
        }
548
549
        // test if encoding is specified in the xml declaration
550
        // Details:
551
        // SPACE:         (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
552
        // EQ:            SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
553 1
        if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
554 1
            '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
555 1
            $xmlChunk, $matches)) {
556 1
            return true;
557
        }
558
559 1
        return false;
560
    }
561
}
562