1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpXmlRpc\Helper; |
4
|
|
|
|
5
|
|
|
use PhpXmlRpc\PhpXmlRpc; |
6
|
|
|
use PhpXmlRpc\Value; |
7
|
|
|
|
8
|
|
|
/** |
9
|
|
|
* Deals with parsing the XML. |
10
|
|
|
*/ |
11
|
|
|
class XMLParser |
12
|
|
|
{ |
13
|
|
|
const RETURN_XMLRPCVALS = 'xmlrpcvals'; |
14
|
|
|
const RETURN_PHP = 'phpvals'; |
15
|
|
|
|
16
|
|
|
const ACCEPT_REQUEST = 1; |
17
|
|
|
const ACCEPT_RESPONSE = 2; |
18
|
|
|
const ACCEPT_VALUE = 4; |
19
|
|
|
|
20
|
|
|
// Used to store state during parsing. |
21
|
|
|
// Quick explanation of components: |
22
|
|
|
// private: |
23
|
|
|
// ac - used to accumulate values |
24
|
|
|
// stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements |
25
|
|
|
// valuestack - array used for parsing arrays and structs |
26
|
|
|
// lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings |
27
|
|
|
// public: |
28
|
|
|
// isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1) |
29
|
|
|
// isf_reason - used for storing xmlrpc response fault string |
30
|
|
|
// method - used to store method name |
31
|
|
|
// params - used to store parameters in method calls |
32
|
|
|
// pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values |
33
|
|
|
// rt - 'methodcall', 'methodresponse' or 'value' |
34
|
|
|
public $_xh = array( |
35
|
|
|
'ac' => '', |
36
|
|
|
'stack' => array(), |
37
|
|
|
'valuestack' => array(), |
38
|
|
|
'isf' => 0, |
39
|
|
|
'isf_reason' => '', |
40
|
|
|
'method' => false, |
41
|
|
|
'params' => array(), |
42
|
|
|
'pt' => array(), |
43
|
|
|
'rt' => '', |
44
|
|
|
); |
45
|
|
|
|
46
|
|
|
public $xmlrpc_valid_parents = array( |
47
|
|
|
'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'), |
48
|
|
|
'BOOLEAN' => array('VALUE'), |
49
|
|
|
'I4' => array('VALUE'), |
50
|
|
|
'I8' => array('VALUE'), |
51
|
|
|
'EX:I8' => array('VALUE'), |
52
|
|
|
'INT' => array('VALUE'), |
53
|
|
|
'STRING' => array('VALUE'), |
54
|
|
|
'DOUBLE' => array('VALUE'), |
55
|
|
|
'DATETIME.ISO8601' => array('VALUE'), |
56
|
|
|
'BASE64' => array('VALUE'), |
57
|
|
|
'MEMBER' => array('STRUCT'), |
58
|
|
|
'NAME' => array('MEMBER'), |
59
|
|
|
'DATA' => array('ARRAY'), |
60
|
|
|
'ARRAY' => array('VALUE'), |
61
|
|
|
'STRUCT' => array('VALUE'), |
62
|
|
|
'PARAM' => array('PARAMS'), |
63
|
|
|
'METHODNAME' => array('METHODCALL'), |
64
|
|
|
'PARAMS' => array('METHODCALL', 'METHODRESPONSE'), |
65
|
|
|
'FAULT' => array('METHODRESPONSE'), |
66
|
|
|
'NIL' => array('VALUE'), // only used when extension activated |
67
|
|
|
'EX:NIL' => array('VALUE'), // only used when extension activated |
68
|
|
|
); |
69
|
|
|
|
70
|
|
|
/** @var array $parsing_options */ |
71
|
|
|
protected $parsing_options = array(); |
72
|
|
|
/** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */ |
73
|
|
|
protected $accept = 3; |
74
|
|
|
/** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ |
75
|
|
|
protected $maxChunkLength = 4194304; |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* @param array $options passed to the xml parser |
79
|
|
|
*/ |
80
|
600 |
|
public function __construct(array $options = array()) |
81
|
|
|
{ |
82
|
600 |
|
$this->parsing_options = $options; |
83
|
600 |
|
} |
84
|
|
|
|
85
|
|
|
/** |
86
|
|
|
* @param string $data |
87
|
|
|
* @param string $returnType |
88
|
|
|
* @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE |
89
|
|
|
* @return string |
90
|
|
|
*/ |
91
|
|
|
public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3) |
92
|
|
|
{ |
93
|
|
|
$this->_xh = array( |
94
|
|
|
'ac' => '', |
95
|
|
|
'stack' => array(), |
96
|
|
|
'valuestack' => array(), |
97
|
|
|
'isf' => 0, |
98
|
|
|
'isf_reason' => '', |
99
|
600 |
|
'method' => false, // so we can check later if we got a methodname or not |
100
|
|
|
'params' => array(), |
101
|
600 |
|
'pt' => array(), |
102
|
|
|
'rt' => '', |
103
|
600 |
|
); |
104
|
600 |
|
|
105
|
|
|
$len = strlen($data); |
106
|
|
|
|
107
|
600 |
|
// we test for empty documents here to save on resource allocation and simply the chunked-parsing loop below |
108
|
|
View Code Duplication |
if ($len == 0) { |
|
|
|
|
109
|
600 |
|
$this->_xh['isf'] = 3; |
110
|
23 |
|
$this->_xh['isf_reason'] = 'XML error 5: empty document'; |
111
|
|
|
return; |
112
|
599 |
|
} |
113
|
|
|
|
114
|
|
|
$parser = xml_parser_create(); |
115
|
600 |
|
|
116
|
600 |
|
foreach ($this->parsing_options as $key => $val) { |
117
|
|
|
xml_parser_set_option($parser, $key, $val); |
118
|
600 |
|
} |
119
|
|
|
// always set this, in case someone tries to disable it via options... |
120
|
600 |
|
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); |
121
|
|
|
|
122
|
|
|
xml_set_object($parser, $this); |
123
|
|
|
|
124
|
|
|
if ($returnType == self::RETURN_PHP) { |
125
|
|
|
xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); |
126
|
|
|
} else { |
127
|
|
|
xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
xml_set_character_data_handler($parser, 'xmlrpc_cd'); |
131
|
|
|
xml_set_default_handler($parser, 'xmlrpc_dh'); |
132
|
600 |
|
|
133
|
600 |
|
$this->accept = $accept; |
134
|
600 |
|
|
135
|
|
|
// @see ticket #70 - we have to parse big xml docks in chunks to avoid errors |
136
|
600 |
|
for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { |
137
|
3 |
|
$chunk = substr($data, $offset, $this->maxChunkLength); |
138
|
3 |
|
// error handling: xml not well formed |
139
|
3 |
|
if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) { |
140
|
|
|
$errCode = xml_get_error_code($parser); |
141
|
3 |
|
$errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode), |
142
|
3 |
|
xml_get_current_line_number($parser), xml_get_current_column_number($parser)); |
143
|
3 |
|
|
144
|
|
|
$this->_xh['isf'] = 3; |
145
|
|
|
$this->_xh['isf_reason'] = $errStr; |
146
|
|
|
break; |
147
|
600 |
|
} |
148
|
600 |
|
} |
149
|
|
|
|
150
|
|
|
xml_parser_free($parser); |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
/** |
154
|
|
|
* xml parser handler function for opening element tags. |
155
|
|
|
* @param resource $parser |
156
|
|
|
* @param string $name |
157
|
600 |
|
* @param $attrs |
158
|
|
|
* @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead |
159
|
|
|
*/ |
160
|
600 |
|
public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false) |
|
|
|
|
161
|
|
|
{ |
162
|
|
|
// if invalid xmlrpc already detected, skip all processing |
163
|
600 |
|
if ($this->_xh['isf'] < 2) { |
164
|
|
|
|
165
|
|
|
// check for correct element nesting |
166
|
|
|
if (count($this->_xh['stack']) == 0) { |
167
|
|
|
// top level element can only be of 2 types |
168
|
600 |
|
/// @todo optimization creep: save this check into a bool variable, instead of using count() every time: |
169
|
600 |
|
/// there is only a single top level element in xml anyway |
170
|
|
|
// BC |
171
|
|
|
if ($acceptSingleVals === false) { |
172
|
|
|
$accept = $this->accept; |
173
|
600 |
|
} else { |
174
|
599 |
|
$accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; |
175
|
600 |
|
} |
176
|
600 |
|
if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || |
177
|
|
|
($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || |
178
|
2 |
|
($name == 'VALUE' && ($accept & self::ACCEPT_VALUE))) { |
179
|
2 |
|
$this->_xh['rt'] = strtolower($name); |
180
|
|
View Code Duplication |
} else { |
|
|
|
|
181
|
600 |
|
$this->_xh['isf'] = 2; |
182
|
|
|
$this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name; |
183
|
|
|
|
184
|
|
|
return; |
185
|
600 |
|
} |
186
|
600 |
|
} else { |
187
|
2 |
|
// not top level element: see if parent is OK |
188
|
2 |
|
$parent = end($this->_xh['stack']); |
189
|
|
|
if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) { |
190
|
2 |
|
$this->_xh['isf'] = 2; |
191
|
|
|
$this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent"; |
192
|
|
|
|
193
|
|
|
return; |
194
|
600 |
|
} |
195
|
|
|
} |
196
|
600 |
|
|
197
|
|
|
switch ($name) { |
198
|
598 |
|
// optimize for speed switch cases: most common cases first |
199
|
598 |
|
case 'VALUE': |
200
|
598 |
|
/// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element |
201
|
598 |
|
$this->_xh['vt'] = 'value'; // indicator: no value found yet |
202
|
598 |
|
$this->_xh['ac'] = ''; |
203
|
600 |
|
$this->_xh['lv'] = 1; |
204
|
600 |
|
$this->_xh['php_class'] = null; |
205
|
1 |
|
break; |
206
|
|
|
case 'I8': |
207
|
|
|
case 'EX:I8': |
208
|
|
|
if (PHP_INT_SIZE === 4) { |
209
|
|
|
// INVALID ELEMENT: RAISE ISF so that it is later recognized!!! |
210
|
|
|
$this->_xh['isf'] = 2; |
211
|
|
|
$this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode"; |
212
|
|
|
|
213
|
600 |
|
return; |
214
|
600 |
|
} |
215
|
600 |
|
// fall through voluntarily |
216
|
600 |
|
case 'I4': |
217
|
600 |
|
case 'INT': |
218
|
600 |
|
case 'STRING': |
219
|
600 |
|
case 'BOOLEAN': |
220
|
577 |
|
case 'DOUBLE': |
221
|
|
|
case 'DATETIME.ISO8601': |
222
|
1 |
View Code Duplication |
case 'BASE64': |
|
|
|
|
223
|
1 |
|
if ($this->_xh['vt'] != 'value') { |
224
|
|
|
// two data elements inside a value: an error occurred! |
225
|
1 |
|
$this->_xh['isf'] = 2; |
226
|
|
|
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; |
227
|
577 |
|
|
228
|
577 |
|
return; |
229
|
600 |
|
} |
230
|
600 |
|
$this->_xh['ac'] = ''; // reset the accumulator |
231
|
339 |
|
break; |
232
|
|
|
case 'STRUCT': |
233
|
1 |
|
case 'ARRAY': |
234
|
1 |
|
if ($this->_xh['vt'] != 'value') { |
235
|
|
|
// two data elements inside a value: an error occurred! |
236
|
1 |
|
$this->_xh['isf'] = 2; |
237
|
|
|
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; |
238
|
|
|
|
239
|
338 |
|
return; |
240
|
338 |
|
} |
241
|
338 |
|
// create an empty array to hold child values, and push it onto appropriate stack |
242
|
|
|
$curVal = array(); |
243
|
|
|
$curVal['values'] = array(); |
244
|
338 |
|
$curVal['type'] = $name; |
245
|
20 |
|
// check for out-of-band information to rebuild php objs |
246
|
|
|
// and in case it is found, save it |
247
|
338 |
|
if (@isset($attrs['PHP_CLASS'])) { |
248
|
338 |
|
$curVal['php_class'] = $attrs['PHP_CLASS']; |
249
|
338 |
|
} |
250
|
600 |
|
$this->_xh['valuestack'][] = $curVal; |
251
|
212 |
|
$this->_xh['vt'] = 'data'; // be prepared for a data element next |
252
|
|
|
break; |
253
|
1 |
|
case 'DATA': |
254
|
1 |
|
if ($this->_xh['vt'] != 'data') { |
255
|
|
|
// two data elements inside a value: an error occurred! |
256
|
1 |
|
$this->_xh['isf'] = 2; |
257
|
|
|
$this->_xh['isf_reason'] = "found two data elements inside an array element"; |
258
|
600 |
|
|
259
|
600 |
|
return; |
260
|
600 |
|
} |
261
|
|
|
case 'METHODCALL': |
262
|
600 |
|
case 'METHODRESPONSE': |
263
|
600 |
|
case 'PARAMS': |
264
|
600 |
|
// valid elements that add little to processing |
265
|
|
|
break; |
266
|
538 |
|
case 'METHODNAME': |
267
|
538 |
|
case 'NAME': |
268
|
600 |
|
/// @todo we could check for 2 NAME elements inside a MEMBER element |
269
|
80 |
|
$this->_xh['ac'] = ''; |
270
|
80 |
|
break; |
271
|
600 |
|
case 'FAULT': |
272
|
|
|
$this->_xh['isf'] = 1; |
273
|
242 |
|
break; |
274
|
|
|
case 'MEMBER': |
275
|
|
|
// set member name to null, in case we do not find in the xml later on |
276
|
599 |
|
$this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = ''; |
277
|
|
|
//$this->_xh['ac']=''; |
278
|
600 |
|
// Drop trough intentionally |
279
|
600 |
|
case 'PARAM': |
280
|
21 |
|
// clear value type, so we can check later if no value has been passed for this param/member |
281
|
21 |
|
$this->_xh['vt'] = null; |
282
|
21 |
|
break; |
283
|
21 |
|
case 'NIL': |
284
|
|
View Code Duplication |
case 'EX:NIL': |
|
|
|
|
285
|
|
|
if (PhpXmlRpc::$xmlrpc_null_extension) { |
286
|
|
|
if ($this->_xh['vt'] != 'value') { |
287
|
|
|
// two data elements inside a value: an error occurred! |
288
|
|
|
$this->_xh['isf'] = 2; |
289
|
|
|
$this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; |
290
|
21 |
|
|
291
|
21 |
|
return; |
292
|
|
|
} |
293
|
|
|
$this->_xh['ac'] = ''; // reset the accumulator |
294
|
|
|
break; |
295
|
|
|
} |
296
|
|
|
// we do not support the <NIL/> extension, so |
297
|
1 |
|
// drop through intentionally |
298
|
1 |
|
default: |
299
|
1 |
|
/// INVALID ELEMENT: RAISE ISF so that it is later recognized!!! |
300
|
|
|
$this->_xh['isf'] = 2; |
301
|
|
|
$this->_xh['isf_reason'] = "found not-xmlrpc xml element $name"; |
302
|
|
|
break; |
303
|
600 |
|
} |
304
|
|
|
|
305
|
|
|
// Save current element name to stack, to validate nesting |
306
|
600 |
|
$this->_xh['stack'][] = $name; |
307
|
600 |
|
|
308
|
|
|
/// @todo optimization creep: move this inside the big switch() above |
309
|
|
|
if ($name != 'VALUE') { |
310
|
600 |
|
$this->_xh['lv'] = 0; |
311
|
|
|
} |
312
|
|
|
} |
313
|
|
|
} |
314
|
|
|
|
315
|
|
|
/** |
316
|
|
|
* xml parser handler function for opening element tags. |
317
|
|
|
* Used in decoding xml chunks that might represent single xmlrpc values as well as requests, responses. |
318
|
|
|
* @deprecated |
319
|
|
|
* @param resource $parser |
320
|
|
|
* @param $name |
321
|
|
|
* @param $attrs |
322
|
|
|
*/ |
323
|
|
|
public function xmlrpc_se_any($parser, $name, $attrs) |
324
|
|
|
{ |
325
|
|
|
$this->xmlrpc_se($parser, $name, $attrs, true); |
326
|
|
|
} |
327
|
|
|
|
328
|
|
|
/** |
329
|
|
|
* xml parser handler function for close element tags. |
330
|
|
|
* @param resource $parser |
331
|
600 |
|
* @param string $name |
332
|
|
|
* @param bool $rebuildXmlrpcvals |
333
|
600 |
|
*/ |
334
|
|
|
public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = true) |
|
|
|
|
335
|
|
|
{ |
336
|
|
|
if ($this->_xh['isf'] < 2) { |
337
|
|
|
// push this element name from stack |
338
|
599 |
|
// NB: if XML validates, correct opening/closing is guaranteed and |
339
|
|
|
// we do not have to check for $name == $currElem. |
340
|
599 |
|
// we also checked for proper nesting at start of elements... |
341
|
599 |
|
$currElem = array_pop($this->_xh['stack']); |
|
|
|
|
342
|
|
|
|
343
|
597 |
|
switch ($name) { |
344
|
28 |
|
case 'VALUE': |
345
|
28 |
|
// This if() detects if no scalar was inside <VALUE></VALUE> |
346
|
|
|
if ($this->_xh['vt'] == 'value') { |
347
|
|
|
$this->_xh['value'] = $this->_xh['ac']; |
348
|
597 |
|
$this->_xh['vt'] = Value::$xmlrpcString; |
349
|
|
|
} |
350
|
596 |
|
|
351
|
|
|
if ($rebuildXmlrpcvals) { |
352
|
|
|
// build the xmlrpc val out of the data received, and substitute it |
353
|
596 |
|
$temp = new Value($this->_xh['value'], $this->_xh['vt']); |
354
|
20 |
|
// in case we got info about underlying php class, save it |
355
|
|
|
// in the object we're rebuilding |
356
|
|
|
if (isset($this->_xh['php_class'])) { |
357
|
|
|
$temp->_php_class = $this->_xh['php_class']; |
358
|
596 |
|
} |
359
|
596 |
|
// check if we are inside an array or struct: |
360
|
212 |
|
// if value just built is inside an array, let's move it into array on the stack |
361
|
|
|
$vscount = count($this->_xh['valuestack']); |
362
|
596 |
View Code Duplication |
if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { |
|
|
|
|
363
|
|
|
$this->_xh['valuestack'][$vscount - 1]['values'][] = $temp; |
364
|
|
|
} else { |
365
|
|
|
$this->_xh['value'] = $temp; |
366
|
|
|
} |
367
|
|
|
} else { |
368
|
23 |
|
/// @todo this needs to treat correctly php-serialized objects, |
369
|
|
|
/// since std deserializing is done by php_xmlrpc_decode, |
370
|
|
|
/// which we will not be calling... |
371
|
|
|
if (isset($this->_xh['php_class'])) { |
372
|
|
|
} |
373
|
23 |
|
|
374
|
23 |
|
// check if we are inside an array or struct: |
375
|
20 |
|
// if value just built is inside an array, let's move it into array on the stack |
376
|
|
|
$vscount = count($this->_xh['valuestack']); |
377
|
|
View Code Duplication |
if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { |
|
|
|
|
378
|
597 |
|
$this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; |
379
|
599 |
|
} |
380
|
599 |
|
} |
381
|
599 |
|
break; |
382
|
599 |
|
case 'BOOLEAN': |
383
|
599 |
|
case 'I4': |
384
|
599 |
|
case 'I8': |
385
|
598 |
|
case 'EX:I8': |
386
|
598 |
|
case 'INT': |
387
|
598 |
|
case 'STRING': |
388
|
577 |
|
case 'DOUBLE': |
389
|
|
|
case 'DATETIME.ISO8601': |
390
|
|
|
case 'BASE64': |
391
|
577 |
|
$this->_xh['vt'] = strtolower($name); |
392
|
494 |
|
/// @todo: optimization creep - remove the if/elseif cycle below |
393
|
393 |
|
/// since the case() in which we are already did that |
394
|
6 |
|
if ($name == 'STRING') { |
395
|
|
|
$this->_xh['value'] = $this->_xh['ac']; |
396
|
|
View Code Duplication |
} elseif ($name == 'DATETIME.ISO8601') { |
|
|
|
|
397
|
6 |
|
if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) { |
398
|
6 |
|
error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); |
399
|
388 |
|
} |
400
|
|
|
$this->_xh['vt'] = Value::$xmlrpcDateTime; |
401
|
20 |
|
$this->_xh['value'] = $this->_xh['ac']; |
402
|
369 |
|
} elseif ($name == 'BASE64') { |
403
|
|
|
/// @todo check for failure of base64 decoding / catch warnings |
404
|
|
|
$this->_xh['value'] = base64_decode($this->_xh['ac']); |
405
|
|
|
} elseif ($name == 'BOOLEAN') { |
406
|
|
|
// special case here: we translate boolean 1 or 0 into PHP |
407
|
|
|
// constants true or false. |
408
|
|
|
// Strings 'true' and 'false' are accepted, even though the |
409
|
41 |
|
// spec never mentions them (see eg. Blogger api docs) |
410
|
41 |
|
// NB: this simple checks helps a lot sanitizing input, ie no |
411
|
|
|
// security problems around here |
412
|
|
|
if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') == 0) { |
413
|
21 |
|
$this->_xh['value'] = true; |
414
|
|
|
} else { |
415
|
|
|
// log if receiving something strange, even though we set the value to false anyway |
416
|
41 |
|
if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) { |
417
|
|
|
error_log('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']); |
418
|
330 |
|
} |
419
|
|
|
$this->_xh['value'] = false; |
420
|
|
|
} |
421
|
|
View Code Duplication |
} elseif ($name == 'DOUBLE') { |
|
|
|
|
422
|
22 |
|
// we have a DOUBLE |
423
|
|
|
// we must check that only 0123456789-.<space> are characters here |
424
|
|
|
// NOTE: regexp could be much stricter than this... |
425
|
|
|
if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) { |
426
|
|
|
/// @todo: find a better way of throwing an error than this! |
427
|
|
|
error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']); |
428
|
22 |
|
$this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; |
429
|
|
|
} else { |
430
|
|
|
// it's ok, add it on |
431
|
|
|
$this->_xh['value'] = (double)$this->_xh['ac']; |
432
|
|
|
} |
433
|
311 |
|
} else { |
434
|
|
|
// we have an I4/I8/INT |
435
|
|
|
// we must check that only 0123456789-<space> are characters here |
436
|
|
View Code Duplication |
if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) { |
|
|
|
|
437
|
|
|
/// @todo find a better way of throwing an error than this! |
438
|
|
|
error_log('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']); |
439
|
311 |
|
$this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; |
440
|
|
|
} else { |
441
|
|
|
// it's ok, add it on |
442
|
577 |
|
$this->_xh['value'] = (int)$this->_xh['ac']; |
443
|
577 |
|
} |
444
|
598 |
|
} |
445
|
242 |
|
$this->_xh['lv'] = 3; // indicate we've found a value |
446
|
242 |
|
break; |
447
|
598 |
|
case 'NAME': |
448
|
|
|
$this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac']; |
449
|
|
|
break; |
450
|
242 |
|
case 'MEMBER': |
451
|
223 |
|
// add to array in the stack the last element built, |
452
|
223 |
|
// unless no VALUE was found |
453
|
|
|
if ($this->_xh['vt']) { |
454
|
20 |
|
$vscount = count($this->_xh['valuestack']); |
455
|
|
|
$this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; |
456
|
242 |
|
} else { |
457
|
598 |
|
error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); |
458
|
212 |
|
} |
459
|
212 |
|
break; |
460
|
597 |
|
case 'DATA': |
461
|
597 |
|
$this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty |
462
|
|
|
break; |
463
|
337 |
|
case 'STRUCT': |
464
|
337 |
|
case 'ARRAY': |
465
|
337 |
|
// fetch out of stack array of values, and promote it to current value |
466
|
337 |
|
$currVal = array_pop($this->_xh['valuestack']); |
467
|
20 |
|
$this->_xh['value'] = $currVal['values']; |
468
|
|
|
$this->_xh['vt'] = strtolower($name); |
469
|
337 |
|
if (isset($currVal['php_class'])) { |
470
|
597 |
|
$this->_xh['php_class'] = $currVal['php_class']; |
471
|
|
|
} |
472
|
|
|
break; |
473
|
595 |
|
case 'PARAM': |
474
|
595 |
|
// add to array of params the current value, |
475
|
595 |
|
// unless no VALUE was found |
476
|
|
|
if ($this->_xh['vt']) { |
477
|
|
|
$this->_xh['params'][] = $this->_xh['value']; |
478
|
|
|
$this->_xh['pt'][] = $this->_xh['vt']; |
479
|
595 |
|
} else { |
480
|
597 |
|
error_log('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml'); |
481
|
490 |
|
} |
482
|
490 |
|
break; |
483
|
596 |
|
case 'METHODNAME': |
484
|
596 |
|
$this->_xh['method'] = preg_replace('/^[\n\r\t ]+/', '', $this->_xh['ac']); |
485
|
21 |
|
break; |
486
|
21 |
|
case 'NIL': |
487
|
21 |
|
case 'EX:NIL': |
488
|
21 |
|
if (PhpXmlRpc::$xmlrpc_null_extension) { |
489
|
21 |
|
$this->_xh['vt'] = 'null'; |
490
|
|
|
$this->_xh['value'] = null; |
491
|
|
|
$this->_xh['lv'] = 3; |
492
|
596 |
|
break; |
493
|
596 |
|
} |
494
|
596 |
|
// drop through intentionally if nil extension not enabled |
495
|
596 |
|
case 'PARAMS': |
496
|
596 |
|
case 'FAULT': |
497
|
|
|
case 'METHODCALL': |
498
|
|
|
case 'METHORESPONSE': |
499
|
|
|
break; |
500
|
596 |
|
default: |
501
|
|
|
// End of INVALID ELEMENT! |
502
|
|
|
// shall we add an assert here for unreachable code??? |
503
|
600 |
|
break; |
504
|
|
|
} |
505
|
|
|
} |
506
|
|
|
} |
507
|
|
|
|
508
|
|
|
/** |
509
|
|
|
* Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values. |
510
|
23 |
|
* @param resource $parser |
511
|
|
|
* @param string $name |
512
|
23 |
|
*/ |
513
|
23 |
|
public function xmlrpc_ee_fast($parser, $name) |
514
|
|
|
{ |
515
|
|
|
$this->xmlrpc_ee($parser, $name, false); |
516
|
|
|
} |
517
|
|
|
|
518
|
|
|
/** |
519
|
|
|
* xml parser handler function for character data. |
520
|
600 |
|
* @param resource $parser |
521
|
|
|
* @param string $data |
522
|
|
|
*/ |
523
|
600 |
|
public function xmlrpc_cd($parser, $data) |
|
|
|
|
524
|
|
|
{ |
525
|
|
|
// skip processing if xml fault already detected |
526
|
600 |
|
if ($this->_xh['isf'] < 2) { |
527
|
600 |
|
// "lookforvalue==3" means that we've found an entire value |
528
|
|
|
// and should discard any further character data |
529
|
|
|
if ($this->_xh['lv'] != 3) { |
530
|
600 |
|
$this->_xh['ac'] .= $data; |
531
|
|
|
} |
532
|
|
|
} |
533
|
|
|
} |
534
|
|
|
|
535
|
|
|
/** |
536
|
|
|
* xml parser handler function for 'other stuff', ie. not char data or |
537
|
|
|
* element start/end tag. In fact it only gets called on unknown entities... |
538
|
587 |
|
* @param $parser |
539
|
|
|
* @param string data |
540
|
|
|
*/ |
541
|
587 |
|
public function xmlrpc_dh($parser, $data) |
|
|
|
|
542
|
587 |
|
{ |
543
|
|
|
// skip processing if xml fault already detected |
544
|
|
|
if ($this->_xh['isf'] < 2) { |
545
|
|
|
if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') { |
546
|
|
|
$this->_xh['ac'] .= $data; |
547
|
|
|
} |
548
|
587 |
|
} |
549
|
|
|
|
550
|
|
|
//return true; |
551
|
|
|
} |
552
|
|
|
|
553
|
|
|
/** |
554
|
|
|
* xml charset encoding guessing helper function. |
555
|
|
|
* Tries to determine the charset encoding of an XML chunk received over HTTP. |
556
|
|
|
* NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type, |
557
|
|
|
* we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non conforming (legacy?) clients/servers, |
558
|
|
|
* which will be most probably using UTF-8 anyway... |
559
|
|
|
* In order of importance checks: |
560
|
|
|
* 1. http headers |
561
|
|
|
* 2. BOM |
562
|
|
|
* 3. XML declaration |
563
|
|
|
* 4. guesses using mb_detect_encoding() |
564
|
|
|
* |
565
|
|
|
* @param string $httpHeader the http Content-type header |
566
|
|
|
* @param string $xmlChunk xml content buffer |
567
|
|
|
* @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled). |
568
|
|
|
* This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings |
569
|
|
|
* @return string the encoding determined. Null if it can't be determined and mbstring is enabled, |
570
|
|
|
* PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled |
571
|
600 |
|
* |
572
|
|
|
* @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!! |
573
|
|
|
*/ |
574
|
|
|
public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null) |
575
|
|
|
{ |
576
|
|
|
// discussion: see http://www.yale.edu/pclt/encoding/ |
577
|
|
|
// 1 - test if encoding is specified in HTTP HEADERS |
578
|
|
|
|
579
|
|
|
// Details: |
580
|
|
|
// LWS: (\13\10)?( |\t)+ |
581
|
|
|
// token: (any char but excluded stuff)+ |
582
|
|
|
// quoted string: " (any char but double quotes and control chars)* " |
583
|
|
|
// header: Content-type = ...; charset=value(; ...)* |
584
|
|
|
// where value is of type token, no LWS allowed between 'charset' and value |
585
|
|
|
// Note: we do not check for invalid chars in VALUE: |
586
|
|
|
// this had better be done using pure ereg as below |
587
|
|
|
// Note 2: we might be removing whitespace/tabs that ought to be left in if |
588
|
600 |
|
// the received charset is a quoted string. But nobody uses such charset names... |
589
|
600 |
|
|
590
|
586 |
|
/// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it? |
591
|
|
|
$matches = array(); |
592
|
|
|
if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) { |
593
|
|
|
return strtoupper(trim($matches[1], " \t\"")); |
594
|
|
|
} |
595
|
|
|
|
596
|
|
|
// 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern |
597
|
|
|
// (source: http://www.w3.org/TR/2000/REC-xml-20001006) |
598
|
|
|
// NOTE: actually, according to the spec, even if we find the BOM and determine |
599
|
|
|
// an encoding, we should check if there is an encoding specified |
600
|
458 |
|
// in the xml declaration, and verify if they match. |
601
|
|
|
/// @todo implement check as described above? |
602
|
458 |
|
/// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM) |
603
|
|
|
if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { |
604
|
458 |
|
return 'UCS-4'; |
605
|
|
|
} elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { |
606
|
|
|
return 'UTF-16'; |
607
|
|
|
} elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) { |
608
|
|
|
return 'UTF-8'; |
609
|
|
|
} |
610
|
|
|
|
611
|
|
|
// 3 - test if encoding is specified in the xml declaration |
612
|
458 |
|
// Details: |
613
|
458 |
|
// SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ |
614
|
|
|
// EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* |
615
|
22 |
|
if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . |
616
|
|
|
'\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", |
617
|
|
|
$xmlChunk, $matches)) { |
618
|
|
|
return strtoupper(substr($matches[2], 1, -1)); |
619
|
437 |
|
} |
620
|
437 |
|
|
621
|
4 |
|
// 4 - if mbstring is available, let it do the guesswork |
622
|
|
|
if (extension_loaded('mbstring')) { |
623
|
437 |
|
if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) { |
|
|
|
|
624
|
4 |
|
$encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings; |
625
|
|
|
} |
626
|
433 |
|
if ($encodingPrefs) { |
627
|
|
|
$enc = mb_detect_encoding($xmlChunk, $encodingPrefs); |
628
|
|
|
} else { |
629
|
|
|
$enc = mb_detect_encoding($xmlChunk); |
630
|
437 |
|
} |
631
|
430 |
|
// NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII... |
632
|
|
|
// IANA also likes better US-ASCII, so go with it |
633
|
|
|
if ($enc == 'ASCII') { |
634
|
437 |
|
$enc = 'US-' . $enc; |
635
|
|
|
} |
636
|
|
|
|
637
|
|
|
return $enc; |
638
|
|
|
} else { |
639
|
|
|
// no encoding specified: as per HTTP1.1 assume it is iso-8859-1? |
640
|
|
|
// Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types |
641
|
|
|
// this should be the standard. And we should be getting text/xml as request and response. |
642
|
|
|
// BUT we have to be backward compatible with the lib, which always used UTF-8 as default... |
643
|
|
|
return PhpXmlRpc::$xmlrpc_defencoding; |
644
|
|
|
} |
645
|
|
|
} |
646
|
|
|
|
647
|
|
|
/** |
648
|
|
|
* Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration) |
649
|
|
|
* |
650
|
77 |
|
* @param string $xmlChunk |
651
|
|
|
* @return bool |
652
|
|
|
*/ |
653
|
|
|
public static function hasEncoding($xmlChunk) |
654
|
77 |
|
{ |
655
|
|
|
// scan the first bytes of the data for a UTF-16 (or other) BOM pattern |
656
|
77 |
|
// (source: http://www.w3.org/TR/2000/REC-xml-20001006) |
657
|
|
|
if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { |
658
|
77 |
|
return true; |
659
|
|
|
} elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { |
660
|
|
|
return true; |
661
|
|
|
} elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) { |
662
|
|
|
return true; |
663
|
|
|
} |
664
|
|
|
|
665
|
|
|
// test if encoding is specified in the xml declaration |
666
|
77 |
|
// Details: |
667
|
77 |
|
// SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ |
668
|
|
|
// EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* |
669
|
73 |
|
if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . |
670
|
|
|
'\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", |
671
|
|
|
$xmlChunk, $matches)) { |
672
|
5 |
|
return true; |
673
|
|
|
} |
674
|
|
|
|
675
|
|
|
return false; |
676
|
|
|
} |
677
|
|
|
} |
678
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.