Completed
Push — master ( 8f4643...4ef224 )
by Gaetano
07:48 queued 06:06
created

XMLParser::xmlrpc_ee()   F

Complexity

Conditions 49
Paths 136

Size

Total Lines 173

Duplication

Lines 36
Ratio 20.81 %

Code Coverage

Tests 93
CRAP Score 49.8235

Importance

Changes 0
Metric Value
cc 49
nc 136
nop 3
dl 36
loc 173
ccs 93
cts 100
cp 0.93
crap 49.8235
rs 3.0933
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace PhpXmlRpc\Helper;
4
5
use PhpXmlRpc\PhpXmlRpc;
6
use PhpXmlRpc\Value;
7
8
/**
9
 * Deals with parsing the XML.
10
 */
11
class XMLParser
12
{
13
    // used to store state during parsing
14
    // quick explanation of components:
15
    //   ac - used to accumulate values
16
    //   stack - array with genealogy of xml elements names:
17
    //           used to validate nesting of xmlrpc elements
18
    //   valuestack - array used for parsing arrays and structs
19
    //   lv - used to indicate "looking for a value": implements
20
    //        the logic to allow values with no types to be strings
21
    //   isf - used to indicate a parsing fault (2) or xmlrpc response fault (1)
22
    //   isf_reason - used for storing xmlrpc response fault string
23
    //   method - used to store method name
24
    //   params - used to store parameters in method calls
25
    //   pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
26
    //   rt  - 'methodcall or 'methodresponse'
27
    public $_xh = array(
28
        'ac' => '',
29
        'stack' => array(),
30
        'valuestack' => array(),
31
        'isf' => 0,
32
        'isf_reason' => '',
33
        'method' => false, // so we can check later if we got a methodname or not
34
        'params' => array(),
35
        'pt' => array(),
36
        'rt' => '',
37
    );
38
39
    public $xmlrpc_valid_parents = array(
40
        'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
41
        'BOOLEAN' => array('VALUE'),
42
        'I4' => array('VALUE'),
43
        'I8' => array('VALUE'),
44
        'EX:I8' => array('VALUE'),
45
        'INT' => array('VALUE'),
46
        'STRING' => array('VALUE'),
47
        'DOUBLE' => array('VALUE'),
48
        'DATETIME.ISO8601' => array('VALUE'),
49
        'BASE64' => array('VALUE'),
50
        'MEMBER' => array('STRUCT'),
51
        'NAME' => array('MEMBER'),
52
        'DATA' => array('ARRAY'),
53
        'ARRAY' => array('VALUE'),
54
        'STRUCT' => array('VALUE'),
55
        'PARAM' => array('PARAMS'),
56
        'METHODNAME' => array('METHODCALL'),
57
        'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
58
        'FAULT' => array('METHODRESPONSE'),
59
        'NIL' => array('VALUE'), // only used when extension activated
60
        'EX:NIL' => array('VALUE'), // only used when extension activated
61
    );
62
63
    /**
64
     * xml parser handler function for opening element tags.
65
     */
66 540
    public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
67
    {
68
        // if invalid xmlrpc already detected, skip all processing
69 540
        if ($this->_xh['isf'] < 2) {
70
            // check for correct element nesting
71
            // top level element can only be of 2 types
72
            /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
73
            ///       there is only a single top level element in xml anyway
74 540
            if (count($this->_xh['stack']) == 0) {
75 540
                if ($name != 'METHODRESPONSE' && $name != 'METHODCALL' && (
76 540
                        $name != 'VALUE' && !$acceptSingleVals)
77
                ) {
78 2
                    $this->_xh['isf'] = 2;
79 2
                    $this->_xh['isf_reason'] = 'missing top level xmlrpc element';
80
81 2
                    return;
82
                } else {
83 540
                    $this->_xh['rt'] = strtolower($name);
84
                }
85
            } else {
86
                // not top level element: see if parent is OK
87 540
                $parent = end($this->_xh['stack']);
88 540
                if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) {
89 2
                    $this->_xh['isf'] = 2;
90 2
                    $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
91
92 2
                    return;
93
                }
94
            }
95
96 540
            switch ($name) {
97
                // optimize for speed switch cases: most common cases first
98 540
                case 'VALUE':
99
                    /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
100 538
                    $this->_xh['vt'] = 'value'; // indicator: no value found yet
101 538
                    $this->_xh['ac'] = '';
102 538
                    $this->_xh['lv'] = 1;
103 538
                    $this->_xh['php_class'] = null;
104 538
                    break;
105 540
                case 'I8':
106 540
                case 'EX:I8':
107 1
                    if (PHP_INT_SIZE === 4) {
108
                        /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
109
                        $this->_xh['isf'] = 2;
110
                        $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
111
112
                        return;
113
                    }
114
                // fall through voluntarily
115 540
                case 'I4':
116 540
                case 'INT':
117 540
                case 'STRING':
118 540
                case 'BOOLEAN':
119 540
                case 'DOUBLE':
120 540
                case 'DATETIME.ISO8601':
121 540 View Code Duplication
                case 'BASE64':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
122 519
                    if ($this->_xh['vt'] != 'value') {
123
                        // two data elements inside a value: an error occurred!
124 1
                        $this->_xh['isf'] = 2;
125 1
                        $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
126
127 1
                        return;
128
                    }
129 519
                    $this->_xh['ac'] = ''; // reset the accumulator
130 519
                    break;
131 540
                case 'STRUCT':
132 540
                case 'ARRAY':
133 305
                    if ($this->_xh['vt'] != 'value') {
134
                        //two data elements inside a value: an error occurred!
135 1
                        $this->_xh['isf'] = 2;
136 1
                        $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
137
138 1
                        return;
139
                    }
140
                    // create an empty array to hold child values, and push it onto appropriate stack
141 304
                    $curVal = array();
142 304
                    $curVal['values'] = array();
143 304
                    $curVal['type'] = $name;
144
                    // check for out-of-band information to rebuild php objs
145
                    // and in case it is found, save it
146 304
                    if (@isset($attrs['PHP_CLASS'])) {
147 18
                        $curVal['php_class'] = $attrs['PHP_CLASS'];
148
                    }
149 304
                    $this->_xh['valuestack'][] = $curVal;
150 304
                    $this->_xh['vt'] = 'data'; // be prepared for a data element next
151 304
                    break;
152 540
                case 'DATA':
153 190
                    if ($this->_xh['vt'] != 'data') {
154
                        //two data elements inside a value: an error occurred!
155 1
                        $this->_xh['isf'] = 2;
156 1
                        $this->_xh['isf_reason'] = "found two data elements inside an array element";
157
158 1
                        return;
159
                    }
160 540
                case 'METHODCALL':
161 540
                case 'METHODRESPONSE':
162 540
                case 'PARAMS':
163
                    // valid elements that add little to processing
164 540
                    break;
165 540
                case 'METHODNAME':
166 540
                case 'NAME':
167
                    /// @todo we could check for 2 NAME elements inside a MEMBER element
168 484
                    $this->_xh['ac'] = '';
169 484
                    break;
170 540
                case 'FAULT':
171 72
                    $this->_xh['isf'] = 1;
172 72
                    break;
173 540
                case 'MEMBER':
174 218
                    $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; // set member name to null, in case we do not find in the xml later on
175
                    //$this->_xh['ac']='';
176
                // Drop trough intentionally
177 539
                case 'PARAM':
178
                    // clear value type, so we can check later if no value has been passed for this param/member
179 540
                    $this->_xh['vt'] = null;
180 540
                    break;
181 19
                case 'NIL':
182 19 View Code Duplication
                case 'EX:NIL':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
183 19
                    if (PhpXmlRpc::$xmlrpc_null_extension) {
184 19
                        if ($this->_xh['vt'] != 'value') {
185
                            //two data elements inside a value: an error occurred!
186
                            $this->_xh['isf'] = 2;
187
                            $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
188
189
                            return;
190
                        }
191 19
                        $this->_xh['ac'] = ''; // reset the accumulator
192 19
                        break;
193
                    }
194
                // we do not support the <NIL/> extension, so
195
                // drop through intentionally
196
                default:
197
                    /// INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
198 1
                    $this->_xh['isf'] = 2;
199 1
                    $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
200 1
                    break;
201
            }
202
203
            // Save current element name to stack, to validate nesting
204 540
            $this->_xh['stack'][] = $name;
205
206
            /// @todo optimization creep: move this inside the big switch() above
207 540
            if ($name != 'VALUE') {
208 540
                $this->_xh['lv'] = 0;
209
            }
210
        }
211 540
    }
212
213
    /**
214
     * Used in decoding xml chunks that might represent single xmlrpc values.
215
     */
216 3
    public function xmlrpc_se_any($parser, $name, $attrs)
217
    {
218 3
        $this->xmlrpc_se($parser, $name, $attrs, true);
219 3
    }
220
221
    /**
222
     * xml parser handler function for close element tags.
223
     */
224 540
    public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
225
    {
226 540
        if ($this->_xh['isf'] < 2) {
227
            // push this element name from stack
228
            // NB: if XML validates, correct opening/closing is guaranteed and
229
            // we do not have to check for $name == $currElem.
230
            // we also checked for proper nesting at start of elements...
231 539
            $currElem = array_pop($this->_xh['stack']);
0 ignored issues
show
Unused Code introduced by
$currElem is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
232
233 539
            switch ($name) {
234 539
                case 'VALUE':
235
                    // This if() detects if no scalar was inside <VALUE></VALUE>
236 537
                    if ($this->_xh['vt'] == 'value') {
237 26
                        $this->_xh['value'] = $this->_xh['ac'];
238 26
                        $this->_xh['vt'] = Value::$xmlrpcString;
239
                    }
240
241 537
                    if ($rebuildXmlrpcvals) {
242
                        // build the xmlrpc val out of the data received, and substitute it
243 536
                        $temp = new Value($this->_xh['value'], $this->_xh['vt']);
244
                        // in case we got info about underlying php class, save it
245
                        // in the object we're rebuilding
246 536
                        if (isset($this->_xh['php_class'])) {
247 18
                            $temp->_php_class = $this->_xh['php_class'];
248
                        }
249
                        // check if we are inside an array or struct:
250
                        // if value just built is inside an array, let's move it into array on the stack
251 536
                        $vscount = count($this->_xh['valuestack']);
252 536 View Code Duplication
                        if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
253 190
                            $this->_xh['valuestack'][$vscount - 1]['values'][] = $temp;
254
                        } else {
255 536
                            $this->_xh['value'] = $temp;
256
                        }
257
                    } else {
258
                        /// @todo this needs to treat correctly php-serialized objects,
259
                        /// since std deserializing is done by php_xmlrpc_decode,
260
                        /// which we will not be calling...
261 21
                        if (isset($this->_xh['php_class'])) {
262
                        }
263
264
                        // check if we are inside an array or struct:
265
                        // if value just built is inside an array, let's move it into array on the stack
266 21
                        $vscount = count($this->_xh['valuestack']);
267 21 View Code Duplication
                        if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
268 18
                            $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
269
                        }
270
                    }
271 537
                    break;
272 539
                case 'BOOLEAN':
273 539
                case 'I4':
274 539
                case 'I8':
275 539
                case 'EX:I8':
276 539
                case 'INT':
277 539
                case 'STRING':
278 538
                case 'DOUBLE':
279 538
                case 'DATETIME.ISO8601':
280 538
                case 'BASE64':
281 519
                    $this->_xh['vt'] = strtolower($name);
282
                    /// @todo: optimization creep - remove the if/elseif cycle below
283
                    /// since the case() in which we are already did that
284 519
                    if ($name == 'STRING') {
285 444
                        $this->_xh['value'] = $this->_xh['ac'];
286 353 View Code Duplication
                    } elseif ($name == 'DATETIME.ISO8601') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
287 6
                        if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) {
288
                            error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']);
289
                        }
290 6
                        $this->_xh['vt'] = Value::$xmlrpcDateTime;
291 6
                        $this->_xh['value'] = $this->_xh['ac'];
292 348
                    } elseif ($name == 'BASE64') {
293
                        /// @todo check for failure of base64 decoding / catch warnings
294 18
                        $this->_xh['value'] = base64_decode($this->_xh['ac']);
295 331
                    } elseif ($name == 'BOOLEAN') {
296
                        // special case here: we translate boolean 1 or 0 into PHP
297
                        // constants true or false.
298
                        // Strings 'true' and 'false' are accepted, even though the
299
                        // spec never mentions them (see eg. Blogger api docs)
300
                        // NB: this simple checks helps a lot sanitizing input, ie no
301
                        // security problems around here
302 37
                        if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') == 0) {
303 37
                            $this->_xh['value'] = true;
304
                        } else {
305
                            // log if receiving something strange, even though we set the value to false anyway
306 19
                            if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) {
307
                                error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']);
308
                            }
309 37
                            $this->_xh['value'] = false;
310
                        }
311 296 View Code Duplication
                    } elseif ($name == 'DOUBLE') {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
312
                        // we have a DOUBLE
313
                        // we must check that only 0123456789-.<space> are characters here
314
                        // NOTE: regexp could be much stricter than this...
315 20
                        if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) {
316
                            /// @todo: find a better way of throwing an error than this!
317
                            error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']);
318
                            $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
319
                        } else {
320
                            // it's ok, add it on
321 20
                            $this->_xh['value'] = (double)$this->_xh['ac'];
322
                        }
323
                    } else {
324
                        // we have an I4/I8/INT
325
                        // we must check that only 0123456789-<space> are characters here
326 279 View Code Duplication
                        if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
327
                            /// @todo find a better way of throwing an error than this!
328
                            error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']);
329
                            $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
330
                        } else {
331
                            // it's ok, add it on
332 279
                            $this->_xh['value'] = (int)$this->_xh['ac'];
333
                        }
334
                    }
335 519
                    $this->_xh['lv'] = 3; // indicate we've found a value
336 519
                    break;
337 538
                case 'NAME':
338 218
                    $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac'];
339 218
                    break;
340 538
                case 'MEMBER':
341
                    // add to array in the stack the last element built,
342
                    // unless no VALUE was found
343 218
                    if ($this->_xh['vt']) {
344 201
                        $vscount = count($this->_xh['valuestack']);
345 201
                        $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value'];
346
                    } else {
347 18
                        error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml');
348
                    }
349 218
                    break;
350 538
                case 'DATA':
351 190
                    $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty
352 190
                    break;
353 537
                case 'STRUCT':
354 537
                case 'ARRAY':
355
                    // fetch out of stack array of values, and promote it to current value
356 303
                    $currVal = array_pop($this->_xh['valuestack']);
357 303
                    $this->_xh['value'] = $currVal['values'];
358 303
                    $this->_xh['vt'] = strtolower($name);
359 303
                    if (isset($currVal['php_class'])) {
360 18
                        $this->_xh['php_class'] = $currVal['php_class'];
361
                    }
362 303
                    break;
363 537
                case 'PARAM':
364
                    // add to array of params the current value,
365
                    // unless no VALUE was found
366 535
                    if ($this->_xh['vt']) {
367 535
                        $this->_xh['params'][] = $this->_xh['value'];
368 535
                        $this->_xh['pt'][] = $this->_xh['vt'];
369
                    } else {
370
                        error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml');
371
                    }
372 535
                    break;
373 537
                case 'METHODNAME':
374 440
                    $this->_xh['method'] = preg_replace('/^[\n\r\t ]+/', '', $this->_xh['ac']);
375 440
                    break;
376 536
                case 'NIL':
377 536
                case 'EX:NIL':
378 19
                    if (PhpXmlRpc::$xmlrpc_null_extension) {
379 19
                        $this->_xh['vt'] = 'null';
380 19
                        $this->_xh['value'] = null;
381 19
                        $this->_xh['lv'] = 3;
382 19
                        break;
383
                    }
384
                // drop through intentionally if nil extension not enabled
385 536
                case 'PARAMS':
386 536
                case 'FAULT':
387 536
                case 'METHODCALL':
388 536
                case 'METHORESPONSE':
389 536
                    break;
390
                default:
391
                    // End of INVALID ELEMENT!
392
                    // shall we add an assert here for unreachable code???
393 536
                    break;
394
            }
395
        }
396 540
    }
397
398
    /**
399
     * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values.
400
     */
401 21
    public function xmlrpc_ee_fast($parser, $name)
402
    {
403 21
        $this->xmlrpc_ee($parser, $name, false);
404 21
    }
405
406
    /**
407
     * xml parser handler function for character data.
408
     */
409 540
    public function xmlrpc_cd($parser, $data)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
410
    {
411
        // skip processing if xml fault already detected
412 540
        if ($this->_xh['isf'] < 2) {
413
            // "lookforvalue==3" means that we've found an entire value
414
            // and should discard any further character data
415 540
            if ($this->_xh['lv'] != 3) {
416 540
                $this->_xh['ac'] .= $data;
417
            }
418
        }
419 540
    }
420
421
    /**
422
     * xml parser handler function for 'other stuff', ie. not char data or
423
     * element start/end tag. In fact it only gets called on unknown entities...
424
     */
425 527
    public function xmlrpc_dh($parser, $data)
0 ignored issues
show
Unused Code introduced by
The parameter $parser is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
426
    {
427
        // skip processing if xml fault already detected
428 527
        if ($this->_xh['isf'] < 2) {
429 527
            if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') {
430
                $this->_xh['ac'] .= $data;
431
            }
432
        }
433
434 527
        return true;
435
    }
436
437
    /**
438
     * xml charset encoding guessing helper function.
439
     * Tries to determine the charset encoding of an XML chunk received over HTTP.
440
     * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,
441
     * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non conforming (legacy?) clients/servers,
442
     * which will be most probably using UTF-8 anyway...
443
     * In order of importance checks:
444
     * 1. http headers
445
     * 2. BOM
446
     * 3. XML declaration
447
     * 4. guesses using mb_detect_encoding()
448
     *
449
     * @param string $httpHeader the http Content-type header
450
     * @param string $xmlChunk xml content buffer
451
     * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled).
452
     *                              This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings
453
     * @return string the encoding determined. Null if it can't be determined and mbstring is enabled,
454
     *                PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled
455
     *
456
     * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!
457
     */
458 540
    public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null)
459
    {
460
        // discussion: see http://www.yale.edu/pclt/encoding/
461
        // 1 - test if encoding is specified in HTTP HEADERS
462
463
        // Details:
464
        // LWS:           (\13\10)?( |\t)+
465
        // token:         (any char but excluded stuff)+
466
        // quoted string: " (any char but double quotes and control chars)* "
467
        // header:        Content-type = ...; charset=value(; ...)*
468
        //   where value is of type token, no LWS allowed between 'charset' and value
469
        // Note: we do not check for invalid chars in VALUE:
470
        //   this had better be done using pure ereg as below
471
        // Note 2: we might be removing whitespace/tabs that ought to be left in if
472
        //   the received charset is a quoted string. But nobody uses such charset names...
473
474
        /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it?
475 540
        $matches = array();
476 540
        if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) {
477 526
            return strtoupper(trim($matches[1], " \t\""));
478
        }
479
480
        // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern
481
        //     (source: http://www.w3.org/TR/2000/REC-xml-20001006)
482
        //     NOTE: actually, according to the spec, even if we find the BOM and determine
483
        //     an encoding, we should check if there is an encoding specified
484
        //     in the xml declaration, and verify if they match.
485
        /// @todo implement check as described above?
486
        /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)
487 408
        if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
488
            return 'UCS-4';
489 408
        } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
490
            return 'UTF-16';
491 408
        } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) {
492
            return 'UTF-8';
493
        }
494
495
        // 3 - test if encoding is specified in the xml declaration
496
        // Details:
497
        // SPACE:         (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
498
        // EQ:            SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
499 408
        if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
500 408
            '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
501
            $xmlChunk, $matches)) {
502 20
            return strtoupper(substr($matches[2], 1, -1));
503
        }
504
505
        // 4 - if mbstring is available, let it do the guesswork
506 389
        if (extension_loaded('mbstring')) {
507 389
            if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) {
0 ignored issues
show
Bug introduced by
It seems like you are loosely comparing $encodingPrefs of type string|null against null; this is ambiguous if the string can be empty. Consider using a strict comparison === instead.
Loading history...
508 4
                $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings;
509
            }
510 389
            if ($encodingPrefs) {
511 4
                $enc = mb_detect_encoding($xmlChunk, $encodingPrefs);
512
            } else {
513 385
                $enc = mb_detect_encoding($xmlChunk);
514
            }
515
            // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII...
516
            // IANA also likes better US-ASCII, so go with it
517 389
            if ($enc == 'ASCII') {
518 382
                $enc = 'US-' . $enc;
519
            }
520
521 389
            return $enc;
522
        } else {
523
            // no encoding specified: as per HTTP1.1 assume it is iso-8859-1?
524
            // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types
525
            // this should be the standard. And we should be getting text/xml as request and response.
526
            // BUT we have to be backward compatible with the lib, which always used UTF-8 as default...
527
            return PhpXmlRpc::$xmlrpc_defencoding;
528
        }
529
    }
530
531
    /**
532
     * Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration)
533
     *
534
     * @param string $xmlChunk
535
     * @return bool
536
     */
537 75
    public static function hasEncoding($xmlChunk)
538
    {
539
        // scan the first bytes of the data for a UTF-16 (or other) BOM pattern
540
        //     (source: http://www.w3.org/TR/2000/REC-xml-20001006)
541 75
        if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
542
            return true;
543 75
        } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
544
            return true;
545 75
        } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) {
546
            return true;
547
        }
548
549
        // test if encoding is specified in the xml declaration
550
        // Details:
551
        // SPACE:         (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
552
        // EQ:            SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
553 75
        if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
554 75
            '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
555
            $xmlChunk, $matches)) {
556 71
            return true;
557
        }
558
559 5
        return false;
560
    }
561
}
562