Completed
Push — master ( 5fc0c8...b5d242 )
by Gaetano
06:30
created

XMLParser::xmlrpc_dh()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 11
rs 9.2
cc 4
eloc 5
nc 3
nop 2
1
<?php
2
3
namespace PhpXmlRpc\Helper;
4
5
use PhpXmlRpc\PhpXmlRpc;
6
use PhpXmlRpc\Value;
7
8
/**
9
 * Deals with parsing the XML.
10
 */
11
class XMLParser
12
{
13
    // used to store state during parsing
14
    // quick explanation of components:
15
    //   ac - used to accumulate values
16
    //   stack - array with genealogy of xml elements names:
17
    //           used to validate nesting of xmlrpc elements
18
    //   valuestack - array used for parsing arrays and structs
19
    //   lv - used to indicate "looking for a value": implements
20
    //        the logic to allow values with no types to be strings
21
    //   isf - used to indicate a parsing fault (2) or xmlrpc response fault (1)
22
    //   isf_reason - used for storing xmlrpc response fault string
23
    //   method - used to store method name
24
    //   params - used to store parameters in method calls
25
    //   pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
26
    //   rt  - 'methodcall or 'methodresponse'
27
    public $_xh = array(
28
        'ac' => '',
29
        'stack' => array(),
30
        'valuestack' => array(),
31
        'isf' => 0,
32
        'isf_reason' => '',
33
        'method' => false, // so we can check later if we got a methodname or not
34
        'params' => array(),
35
        'pt' => array(),
36
        'rt' => '',
37
    );
38
39
    public $xmlrpc_valid_parents = array(
40
        'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
41
        'BOOLEAN' => array('VALUE'),
42
        'I4' => array('VALUE'),
43
        'I8' => array('VALUE'),
44
        'EX:I8' => array('VALUE'),
45
        'INT' => array('VALUE'),
46
        'STRING' => array('VALUE'),
47
        'DOUBLE' => array('VALUE'),
48
        'DATETIME.ISO8601' => array('VALUE'),
49
        'BASE64' => array('VALUE'),
50
        'MEMBER' => array('STRUCT'),
51
        'NAME' => array('MEMBER'),
52
        'DATA' => array('ARRAY'),
53
        'ARRAY' => array('VALUE'),
54
        'STRUCT' => array('VALUE'),
55
        'PARAM' => array('PARAMS'),
56
        'METHODNAME' => array('METHODCALL'),
57
        'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
58
        'FAULT' => array('METHODRESPONSE'),
59
        'NIL' => array('VALUE'), // only used when extension activated
60
        'EX:NIL' => array('VALUE'), // only used when extension activated
61
    );
62
63
    /**
64
     * xml parser handler function for opening element tags.
65
     */
66
    public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
67
    {
68
        // if invalid xmlrpc already detected, skip all processing
69
        if ($this->_xh['isf'] < 2) {
70
            // check for correct element nesting
71
            // top level element can only be of 2 types
72
            /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
73
            ///       there is only a single top level element in xml anyway
74
            if (count($this->_xh['stack']) == 0) {
75
                if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && (
76
                        $name != 'VALUE' && !$acceptSingleVals)
77
                ) {
78
                    $this->_xh['isf'] = 2;
79
                    $this->_xh['isf_reason'] = 'missing top level xmlrpc element';
80
81
                    return;
82
                } else {
83
                    $this->_xh['rt'] = strtolower($name);
84
                }
85
            } else {
86
                // not top level element: see if parent is OK
87
                $parent = end($this->_xh['stack']);
88
                if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) {
89
                    $this->_xh['isf'] = 2;
90
                    $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
91
92
                    return;
93
                }
94
            }
95
96
            switch ($name) {
97
                // optimize for speed switch cases: most common cases first
98
                case 'VALUE':
99
                    /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
100
                    $this->_xh['vt'] = 'value'; // indicator: no value found yet
101
                    $this->_xh['ac'] = '';
102
                    $this->_xh['lv'] = 1;
103
                    $this->_xh['php_class'] = null;
104
                    break;
105
                case 'I8':
106
                case 'EX:I8':
107
                    if (PHP_INT_SIZE === 4) {
108
                        /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
109
                        $this->_xh['isf'] = 2;
110
                        $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
111
112
                        return;
113
                    }
114
                // fall through voluntarily
115
                case 'I4':
116
                case 'INT':
117
                case 'STRING':
118
                case 'BOOLEAN':
119
                case 'DOUBLE':
120
                case 'DATETIME.ISO8601':
121 View Code Duplication
                case 'BASE64':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
122
                    if ($this->_xh['vt'] != 'value') {
123
                        // two data elements inside a value: an error occurred!
124
                        $this->_xh['isf'] = 2;
125
                        $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
126
127
                        return;
128
                    }
129
                    $this->_xh['ac'] = ''; // reset the accumulator
130
                    break;
131
                case 'STRUCT':
132
                case 'ARRAY':
133
                    if ($this->_xh['vt'] != 'value') {
134
                        //two data elements inside a value: an error occurred!
135
                        $this->_xh['isf'] = 2;
136
                        $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
137
138
                        return;
139
                    }
140
                    // create an empty array to hold child values, and push it onto appropriate stack
141
                    $curVal = array();
142
                    $curVal['values'] = array();
143
                    $curVal['type'] = $name;
144
                    // check for out-of-band information to rebuild php objs
145
                    // and in case it is found, save it
146
                    if (@isset($attrs['PHP_CLASS'])) {
147
                        $curVal['php_class'] = $attrs['PHP_CLASS'];
148
                    }
149
                    $this->_xh['valuestack'][] = $curVal;
150
                    $this->_xh['vt'] = 'data'; // be prepared for a data element next
151
                    break;
152
                case 'DATA':
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
153
                    if ($this->_xh['vt'] != 'data') {
154
                        //two data elements inside a value: an error occurred!
155
                        $this->_xh['isf'] = 2;
156
                        $this->_xh['isf_reason'] = "found two data elements inside an array element";
157
158
                        return;
159
                    }
160
                case 'METHODCALL':
161
                case 'METHODRESPONSE':
162
                case 'PARAMS':
163
                    // valid elements that add little to processing
164
                    break;
165
                case 'METHODNAME':
166
                case 'NAME':
167
                    /// @todo we could check for 2 NAME elements inside a MEMBER element
168
                    $this->_xh['ac'] = '';
169
                    break;
170
                case 'FAULT':
171
                    $this->_xh['isf'] = 1;
172
                    break;
173
                case 'MEMBER':
174
                    $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; // set member name to null, in case we do not find in the xml later on
175
                    //$this->_xh['ac']='';
0 ignored issues
show
Unused Code Comprehensibility introduced by
78% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
176
                // Drop trough intentionally
177
                case 'PARAM':
178
                    // clear value type, so we can check later if no value has been passed for this param/member
179
                    $this->_xh['vt'] = null;
180
                    break;
181
                case 'NIL':
182 View Code Duplication
                case 'EX:NIL':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
183
                    if (PhpXmlRpc::$xmlrpc_null_extension) {
184
                        if ($this->_xh['vt'] != 'value') {
185
                            //two data elements inside a value: an error occurred!
186
                            $this->_xh['isf'] = 2;
187
                            $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
188
189
                            return;
190
                        }
191
                        $this->_xh['ac'] = ''; // reset the accumulator
192
                        break;
193
                    }
194
                // we do not support the <NIL/> extension, so
195
                // drop through intentionally
196
                default:
197
                    /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
198
                    $this->_xh['isf'] = 2;
199
                    $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
200
                    break;
201
            }
202
203
            // Save current element name to stack, to validate nesting
204
            $this->_xh['stack'][] = $name;
205
206
            /// @todo optimization creep: move this inside the big switch() above
207
            if ($name != 'VALUE') {
208
                $this->_xh['lv'] = 0;
209
            }
210
        }
211
    }
212
213
    /**
214
     * Used in decoding xml chunks that might represent single xmlrpc values.
215
     */
216
    public function xmlrpc_se_any($parser, $name, $attrs)
217
    {
218
        $this->xmlrpc_se($parser, $name, $attrs, true);
219
    }
220
221
    /**
222
     * xml parser handler function for close element tags.
223
     */
224
    public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
225
    {
226
        if ($this->_xh['isf'] < 2) {
227
            // push this element name from stack
228
            // NB: if XML validates, correct opening/closing is guaranteed and
229
            // we do not have to check for $name == $currElem.
230
            // we also checked for proper nesting at start of elements...
231
            $currElem = array_pop($this->_xh['stack']);
0 ignored issues
show
Unused Code introduced by
$currElem is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
232
233
            switch ($name) {
234
                case 'VALUE':
235
                    // This if() detects if no scalar was inside <VALUE></VALUE>
236
                    if ($this->_xh['vt'] == 'value') {
237
                        $this->_xh['value'] = $this->_xh['ac'];
238
                        $this->_xh['vt'] = Value::$xmlrpcString;
239
                    }
240
241
                    if ($rebuildXmlrpcvals) {
242
                        // build the xmlrpc val out of the data received, and substitute it
243
                        $temp = new Value($this->_xh['value'], $this->_xh['vt']);
244
                        // in case we got info about underlying php class, save it
245
                        // in the object we're rebuilding
246
                        if (isset($this->_xh['php_class'])) {
247
                            $temp->_php_class = $this->_xh['php_class'];
248
                        }
249
                        // check if we are inside an array or struct:
250
                        // if value just built is inside an array, let's move it into array on the stack
251
                        $vscount = count($this->_xh['valuestack']);
252 View Code Duplication
                        if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
253
                            $this->_xh['valuestack'][$vscount - 1]['values'][] = $temp;
254
                        } else {
255
                            $this->_xh['value'] = $temp;
256
                        }
257
                    } else {
258
                        /// @todo this needs to treat correctly php-serialized objects,
259
                        /// since std deserializing is done by php_xmlrpc_decode,
260
                        /// which we will not be calling...
261
                        if (isset($this->_xh['php_class'])) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
262
                        }
263
264
                        // check if we are inside an array or struct:
265
                        // if value just built is inside an array, let's move it into array on the stack
266
                        $vscount = count($this->_xh['valuestack']);
267 View Code Duplication
                        if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
268
                            $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
269
                        }
270
                    }
271
                    break;
272
                case 'BOOLEAN':
273
                case 'I4':
274
                case 'I8':
275
                case 'EX:I8':
276
                case 'INT':
277
                case 'STRING':
278
                case 'DOUBLE':
279
                case 'DATETIME.ISO8601':
280
                case 'BASE64':
281
                    $this->_xh['vt'] = strtolower($name);
282
                    /// @todo: optimization creep - remove the if/elseif cycle below
283
                    /// since the case() in which we are already did that
284
                    if ($name == 'STRING') {
285
                        $this->_xh['value'] = $this->_xh['ac'];
286 View Code Duplication
                    } elseif ($name == 'DATETIME.ISO8601') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
287
                        if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) {
288
                            error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']);
289
                        }
290
                        $this->_xh['vt'] = Value::$xmlrpcDateTime;
291
                        $this->_xh['value'] = $this->_xh['ac'];
292
                    } elseif ($name == 'BASE64') {
293
                        /// @todo check for failure of base64 decoding / catch warnings
294
                        $this->_xh['value'] = base64_decode($this->_xh['ac']);
295
                    } elseif ($name == 'BOOLEAN') {
296
                        // special case here: we translate boolean 1 or 0 into PHP
297
                        // constants true or false.
298
                        // Strings 'true' and 'false' are accepted, even though the
299
                        // spec never mentions them (see eg. Blogger api docs)
300
                        // NB: this simple checks helps a lot sanitizing input, ie no
301
                        // security problems around here
302
                        if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') == 0) {
303
                            $this->_xh['value'] = true;
304
                        } else {
305
                            // log if receiving something strange, even though we set the value to false anyway
306
                            if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) {
307
                                error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']);
308
                            }
309
                            $this->_xh['value'] = false;
310
                        }
311 View Code Duplication
                    } elseif ($name == 'DOUBLE') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
312
                        // we have a DOUBLE
313
                        // we must check that only 0123456789-.<space> are characters here
314
                        // NOTE: regexp could be much stricter than this...
315
                        if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) {
316
                            /// @todo: find a better way of throwing an error than this!
317
                            error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']);
318
                            $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
319
                        } else {
320
                            // it's ok, add it on
321
                            $this->_xh['value'] = (double)$this->_xh['ac'];
322
                        }
323
                    } else {
324
                        // we have an I4/I8/INT
325
                        // we must check that only 0123456789-<space> are characters here
326 View Code Duplication
                        if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
327
                            /// @todo find a better way of throwing an error than this!
328
                            error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']);
329
                            $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
330
                        } else {
331
                            // it's ok, add it on
332
                            $this->_xh['value'] = (int)$this->_xh['ac'];
333
                        }
334
                    }
335
                    $this->_xh['lv'] = 3; // indicate we've found a value
336
                    break;
337
                case 'NAME':
338
                    $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac'];
339
                    break;
340
                case 'MEMBER':
341
                    // add to array in the stack the last element built,
342
                    // unless no VALUE was found
343
                    if ($this->_xh['vt']) {
344
                        $vscount = count($this->_xh['valuestack']);
345
                        $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value'];
346
                    } else {
347
                        error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml');
348
                    }
349
                    break;
350
                case 'DATA':
351
                    $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty
352
                    break;
353
                case 'STRUCT':
354
                case 'ARRAY':
355
                    // fetch out of stack array of values, and promote it to current value
356
                    $currVal = array_pop($this->_xh['valuestack']);
357
                    $this->_xh['value'] = $currVal['values'];
358
                    $this->_xh['vt'] = strtolower($name);
359
                    if (isset($currVal['php_class'])) {
360
                        $this->_xh['php_class'] = $currVal['php_class'];
361
                    }
362
                    break;
363
                case 'PARAM':
364
                    // add to array of params the current value,
365
                    // unless no VALUE was found
366
                    if ($this->_xh['vt']) {
367
                        $this->_xh['params'][] = $this->_xh['value'];
368
                        $this->_xh['pt'][] = $this->_xh['vt'];
369
                    } else {
370
                        error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml');
371
                    }
372
                    break;
373
                case 'METHODNAME':
374
                    $this->_xh['method'] = preg_replace('/^[\n\r\t ]+/', '', $this->_xh['ac']);
375
                    break;
376
                case 'NIL':
377
                case 'EX:NIL':
378
                    if (PhpXmlRpc::$xmlrpc_null_extension) {
379
                        $this->_xh['vt'] = 'null';
380
                        $this->_xh['value'] = null;
381
                        $this->_xh['lv'] = 3;
382
                        break;
383
                    }
384
                // drop through intentionally if nil extension not enabled
385
                case 'PARAMS':
386
                case 'FAULT':
387
                case 'METHODCALL':
388
                case 'METHORESPONSE':
389
                    break;
390
                default:
391
                    // End of INVALID ELEMENT!
392
                    // shall we add an assert here for unreachable code???
393
                    break;
394
            }
395
        }
396
    }
397
398
    /**
399
     * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values.
400
     */
401
    public function xmlrpc_ee_fast($parser, $name)
402
    {
403
        $this->xmlrpc_ee($parser, $name, false);
404
    }
405
406
    /**
407
     * xml parser handler function for character data.
408
     */
409
    public function xmlrpc_cd($parser, $data)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
410
    {
411
        // skip processing if xml fault already detected
412
        if ($this->_xh['isf'] < 2) {
413
            // "lookforvalue==3" means that we've found an entire value
414
            // and should discard any further character data
415
            if ($this->_xh['lv'] != 3) {
416
                $this->_xh['ac'] .= $data;
417
            }
418
        }
419
    }
420
421
    /**
422
     * xml parser handler function for 'other stuff', ie. not char data or
423
     * element start/end tag. In fact it only gets called on unknown entities...
424
     */
425
    public function xmlrpc_dh($parser, $data)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
426
    {
427
        // skip processing if xml fault already detected
428
        if ($this->_xh['isf'] < 2) {
429
            if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') {
430
                $this->_xh['ac'] .= $data;
431
            }
432
        }
433
434
        return true;
435
    }
436
437
    /**
438
     * xml charset encoding guessing helper function.
439
     * Tries to determine the charset encoding of an XML chunk received over HTTP.
440
     * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,
441
     * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non conforming (legacy?) clients/servers,
442
     * which will be most probably using UTF-8 anyway...
443
     * In order of importance checks:
444
     * 1. http headers
445
     * 2. BOM
446
     * 3. XML declaration
447
     * 4. guesses using mb_detect_encoding()
448
     *
449
     * @param string $httpHeader the http Content-type header
450
     * @param string $xmlChunk xml content buffer
451
     * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled).
452
     *                              This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings
453
     * @return string the encoding determined. Null if it can't be determined and mbstring is enabled,
454
     *                PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled
455
     *
456
     * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!
457
     */
458
    public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null)
459
    {
460
        // discussion: see http://www.yale.edu/pclt/encoding/
461
        // 1 - test if encoding is specified in HTTP HEADERS
462
463
        // Details:
464
        // LWS:           (\13\10)?( |\t)+
0 ignored issues
show
Unused Code Comprehensibility introduced by
47% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
465
        // token:         (any char but excluded stuff)+
466
        // quoted string: " (any char but double quotes and control chars)* "
467
        // header:        Content-type = ...; charset=value(; ...)*
468
        //   where value is of type token, no LWS allowed between 'charset' and value
469
        // Note: we do not check for invalid chars in VALUE:
470
        //   this had better be done using pure ereg as below
471
        // Note 2: we might be removing whitespace/tabs that ought to be left in if
472
        //   the received charset is a quoted string. But nobody uses such charset names...
473
474
        /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it?
475
        $matches = array();
476
        if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) {
477
            return strtoupper(trim($matches[1], " \t\""));
478
        }
479
480
        // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern
481
        //     (source: http://www.w3.org/TR/2000/REC-xml-20001006)
482
        //     NOTE: actually, according to the spec, even if we find the BOM and determine
483
        //     an encoding, we should check if there is an encoding specified
484
        //     in the xml declaration, and verify if they match.
485
        /// @todo implement check as described above?
486
        /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)
487
        if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
488
            return 'UCS-4';
489
        } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
490
            return 'UTF-16';
491
        } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) {
492
            return 'UTF-8';
493
        }
494
495
        // 3 - test if encoding is specified in the xml declaration
496
        // Details:
497
        // SPACE:         (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
498
        // EQ:            SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
499
        if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
500
            '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
501
            $xmlChunk, $matches)) {
502
            return strtoupper(substr($matches[2], 1, -1));
503
        }
504
505
        // 4 - if mbstring is available, let it do the guesswork
506
        if (extension_loaded('mbstring')) {
507
            if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) {
0 ignored issues
show
Bug introduced by
It seems like you are loosely comparing $encodingPrefs of type string|null against null; this is ambiguous if the string can be empty. Consider using a strict comparison === instead.
Loading history...
508
                $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings;
509
            }
510
            if ($encodingPrefs) {
511
                $enc = mb_detect_encoding($xmlChunk, $encodingPrefs);
512
            } else {
513
                $enc = mb_detect_encoding($xmlChunk);
514
            }
515
            // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII...
516
            // IANA also likes better US-ASCII, so go with it
517
            if ($enc == 'ASCII') {
518
                $enc = 'US-' . $enc;
519
            }
520
521
            return $enc;
522
        } else {
523
            // no encoding specified: as per HTTP1.1 assume it is iso-8859-1?
524
            // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types
525
            // this should be the standard. And we should be getting text/xml as request and response.
526
            // BUT we have to be backward compatible with the lib, which always used UTF-8 as default...
527
            return PhpXmlRpc::$xmlrpc_defencoding;
528
        }
529
    }
530
531
    /**
532
     * Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration)
533
     *
534
     * @param string $xmlChunk
535
     * @return bool
536
     */
537
    public static function hasEncoding($xmlChunk)
538
    {
539
        // scan the first bytes of the data for a UTF-16 (or other) BOM pattern
540
        //     (source: http://www.w3.org/TR/2000/REC-xml-20001006)
541
        if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
542
            return true;
543
        } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
544
            return true;
545
        } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) {
546
            return true;
547
        }
548
549
        // test if encoding is specified in the xml declaration
550
        // Details:
551
        // SPACE:         (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
552
        // EQ:            SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
553
        if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
554
            '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
555
            $xmlChunk, $matches)) {
556
            return true;
557
        }
558
559
        return false;
560
    }
561
}
562