1 | <?php |
||
2 | |||
3 | namespace PhpXmlRpc\Helper; |
||
4 | |||
5 | use PhpXmlRpc\PhpXmlRpc; |
||
6 | use PhpXmlRpc\Traits\DeprecationLogger; |
||
7 | use PhpXmlRpc\Value; |
||
8 | |||
9 | /** |
||
10 | * Deals with parsing the XML. |
||
11 | * @see http://xmlrpc.com/spec.md |
||
12 | * |
||
13 | * @todo implement an interface to allow for alternative implementations |
||
14 | * - make access to $_xh protected, return more high-level data structures |
||
15 | * - move the private parts of $_xh to the internal-use parsing-options config |
||
16 | * - add parseRequest, parseResponse, parseValue methods |
||
17 | * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding |
||
18 | * while parsing, which is faster than doing it later by going over the rebuilt data structure |
||
19 | * @todo rename? This is an xml-rpc parser, not a generic xml parser... |
||
20 | * |
||
21 | * @property array $xmlrpc_valid_parents deprecated - public access left in purely for BC |
||
22 | * @property int $accept deprecated - (protected) access left in purely for BC |
||
23 | */ |
||
24 | class XMLParser |
||
25 | { |
||
26 | use DeprecationLogger; |
||
27 | |||
28 | const RETURN_XMLRPCVALS = 'xmlrpcvals'; |
||
29 | const RETURN_EPIVALS = 'epivals'; |
||
30 | const RETURN_PHP = 'phpvals'; |
||
31 | |||
32 | const ACCEPT_REQUEST = 1; |
||
33 | const ACCEPT_RESPONSE = 2; |
||
34 | const ACCEPT_VALUE = 4; |
||
35 | const ACCEPT_FAULT = 8; |
||
36 | |||
37 | /** |
||
38 | * @var int |
||
39 | * The max length beyond which data will get truncated in error messages |
||
40 | */ |
||
41 | protected $maxLogValueLength = 100; |
||
42 | |||
43 | /** |
||
44 | * @var array |
||
45 | * Used to store state during parsing and to pass parsing results to callers. |
||
46 | * Quick explanation of components: |
||
47 | * private: |
||
48 | * ac - used to accumulate values |
||
49 | * stack - array with genealogy of xml elements names, used to validate nesting of xml-rpc elements |
||
50 | * valuestack - array used for parsing arrays and structs |
||
51 | * lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings |
||
52 | * (values: 0=not looking, 1=looking, 3=found) |
||
53 | * public: |
||
54 | * isf - used to indicate an xml-rpc response fault (1), invalid xml-rpc fault (2), xml parsing fault (3) |
||
55 | * isf_reason - used for storing xml-rpc response fault string |
||
56 | * value - used to store the value in responses |
||
57 | * method - used to store method name in requests |
||
58 | * params - used to store parameters in requests |
||
59 | * pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values |
||
60 | * rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode) |
||
61 | */ |
||
62 | protected $_xh = array( |
||
63 | 'ac' => '', |
||
64 | 'stack' => array(), |
||
65 | 'valuestack' => array(), |
||
66 | 'lv' => 0, |
||
67 | 'isf' => 0, |
||
68 | 'isf_reason' => '', |
||
69 | 'value' => null, |
||
70 | 'method' => false, |
||
71 | 'params' => array(), |
||
72 | 'pt' => array(), |
||
73 | 'rt' => '', |
||
74 | ); |
||
75 | |||
76 | /** |
||
77 | * @var array[] |
||
78 | */ |
||
79 | protected $xmlrpc_valid_parents = array( |
||
80 | 'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'), |
||
81 | 'BOOLEAN' => array('VALUE'), |
||
82 | 'I4' => array('VALUE'), |
||
83 | 'I8' => array('VALUE'), |
||
84 | 'EX:I8' => array('VALUE'), |
||
85 | 'INT' => array('VALUE'), |
||
86 | 'STRING' => array('VALUE'), |
||
87 | 'DOUBLE' => array('VALUE'), |
||
88 | 'DATETIME.ISO8601' => array('VALUE'), |
||
89 | 'BASE64' => array('VALUE'), |
||
90 | 'MEMBER' => array('STRUCT'), |
||
91 | 572 | 'NAME' => array('MEMBER'), |
|
92 | 'DATA' => array('ARRAY'), |
||
93 | 572 | 'ARRAY' => array('VALUE'), |
|
94 | 572 | 'STRUCT' => array('VALUE'), |
|
95 | 'PARAM' => array('PARAMS'), |
||
96 | 'METHODNAME' => array('METHODCALL'), |
||
97 | 'PARAMS' => array('METHODCALL', 'METHODRESPONSE'), |
||
98 | 'FAULT' => array('METHODRESPONSE'), |
||
99 | 'NIL' => array('VALUE'), // only used when extension activated |
||
100 | 'EX:NIL' => array('VALUE'), // only used when extension activated |
||
101 | ); |
||
102 | 712 | ||
103 | /** @var array $parsing_options */ |
||
104 | 712 | protected $parsing_options = array(); |
|
105 | |||
106 | /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */ |
||
107 | //protected $accept = 3; |
||
108 | |||
109 | /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ |
||
110 | protected $maxChunkLength = 4194304; |
||
111 | /** @var array |
||
112 | * Used keys: accept, target_charset, methodname_callback, plus the ones set here. |
||
113 | * We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not |
||
114 | * the element handler methods |
||
115 | */ |
||
116 | protected $current_parsing_options = array( |
||
117 | 712 | 'xmlrpc_null_extension' => false, |
|
118 | 'xmlrpc_return_datetimes' => false, |
||
119 | 'xmlrpc_reject_invalid_values' => false |
||
120 | 712 | ); |
|
121 | 2 | ||
122 | 2 | /** |
|
123 | 2 | * @param array $options integer keys: options passed to the inner xml parser |
|
124 | * string keys: |
||
125 | * - target_charset (string) |
||
126 | 710 | * - methodname_callback (callable) |
|
127 | * - xmlrpc_null_extension (bool) |
||
128 | 710 | * - xmlrpc_return_datetimes (bool) |
|
129 | * - xmlrpc_reject_invalid_values (bool) |
||
130 | */ |
||
131 | 710 | public function __construct(array $options = array()) |
|
132 | 709 | { |
|
133 | $this->parsing_options = $options; |
||
134 | } |
||
135 | 710 | ||
136 | /** |
||
137 | 710 | * Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh']. |
|
138 | * Logs to the error log any issues which do not cause the parsing to fail. |
||
139 | * |
||
140 | 710 | * @param string $data |
|
141 | 27 | * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS |
|
142 | 27 | * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE |
|
143 | 708 | * @param array $options integer-key options are passed to the xml parser, string-key options are used independently. |
|
144 | * These options are added to options received in the constructor. |
||
145 | * Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values |
||
146 | * are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used |
||
147 | 708 | * @return array see the definition of $this->_xh for the meaning of the results |
|
148 | * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) |
||
149 | * |
||
150 | 710 | * @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options |
|
151 | 710 | * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so |
|
152 | * that parsing will be completely independent of global state. Note that it might incur a small perf hit... |
||
153 | 710 | */ |
|
154 | public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array()) |
||
155 | { |
||
156 | 710 | $this->_xh = array( |
|
157 | 710 | 'ac' => '', |
|
158 | 'stack' => array(), |
||
159 | 710 | 'valuestack' => array(), |
|
160 | 3 | 'lv' => 0, |
|
161 | 3 | 'isf' => 0, |
|
162 | 3 | 'isf_reason' => '', |
|
163 | 'value' => null, |
||
164 | 3 | 'method' => false, // so we can check later if we got a methodname or not |
|
165 | 3 | 'params' => false, // so we can check later if we got a params tag or not |
|
166 | 3 | 'pt' => array(), |
|
167 | 'rt' => '', |
||
168 | ); |
||
169 | |||
170 | 710 | $len = strlen($data); |
|
171 | 710 | ||
172 | // we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop below |
||
173 | if ($len == 0) { |
||
174 | $this->_xh['isf'] = 3; |
||
175 | $this->_xh['isf_reason'] = 'XML error 5: empty document'; |
||
176 | return $this->_xh; |
||
177 | } |
||
178 | |||
179 | $this->current_parsing_options = array('accept' => $accept); |
||
180 | |||
181 | 710 | $mergedOptions = $this->parsing_options; |
|
182 | foreach ($options as $key => $val) { |
||
183 | $mergedOptions[$key] = $val; |
||
184 | 710 | } |
|
185 | |||
186 | foreach ($mergedOptions as $key => $val) { |
||
187 | 710 | // q: can php be built without ctype? should we use a regexp? |
|
188 | if (is_string($key) && !ctype_digit($key)) { |
||
189 | /// @todo on invalid options, throw/error-out instead of logging an error message? |
||
190 | switch($key) { |
||
191 | case 'target_charset': |
||
192 | 710 | if (function_exists('mb_convert_encoding')) { |
|
193 | 710 | $this->current_parsing_options['target_charset'] = $val; |
|
194 | } else { |
||
195 | $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring"); |
||
196 | } |
||
197 | 710 | break; |
|
198 | 708 | ||
199 | 3 | case 'methodname_callback': |
|
200 | 703 | if (is_callable($val)) { |
|
201 | 710 | $this->current_parsing_options['methodname_callback'] = $val; |
|
202 | } else { |
||
203 | 2 | $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable"); |
|
204 | 2 | } |
|
205 | break; |
||
206 | 703 | ||
207 | case 'xmlrpc_null_extension': |
||
208 | case 'xmlrpc_return_datetimes': |
||
209 | case 'xmlrpc_reject_invalid_values': |
||
210 | 710 | $this->current_parsing_options[$key] = $val; |
|
211 | 710 | break; |
|
212 | 2 | ||
213 | 2 | default: |
|
214 | $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": unsupported option: $key"); |
||
215 | 2 | } |
|
216 | unset($mergedOptions[$key]); |
||
217 | } |
||
218 | } |
||
219 | 710 | ||
220 | if (!isset($this->current_parsing_options['xmlrpc_null_extension'])) { |
||
221 | 710 | $this->current_parsing_options['xmlrpc_null_extension'] = PhpXmlRpc::$xmlrpc_null_extension; |
|
222 | } |
||
223 | 708 | if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) { |
|
224 | 708 | $this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes; |
|
225 | 708 | } |
|
226 | 708 | if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) { |
|
227 | 708 | $this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values; |
|
228 | 710 | } |
|
229 | 710 | ||
230 | 1 | // NB: we use '' instead of null to force charset detection from the xml declaration |
|
231 | $parser = xml_parser_create(''); |
||
232 | |||
233 | foreach ($mergedOptions as $key => $val) { |
||
234 | xml_parser_set_option($parser, $key, $val); |
||
235 | } |
||
236 | |||
237 | // always set this, in case someone tries to disable it via options... |
||
238 | 710 | xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); |
|
239 | 710 | ||
240 | 710 | switch ($returnType) { |
|
241 | 710 | case self::RETURN_PHP: |
|
242 | 710 | xml_set_element_handler($parser, array($this, 'xmlrpc_se'), array($this, 'xmlrpc_ee_fast')); |
|
243 | 710 | break; |
|
244 | 710 | case self::RETURN_EPIVALS: |
|
245 | 685 | xml_set_element_handler($parser, array($this, 'xmlrpc_se'), array($this, 'xmlrpc_ee_epi')); |
|
246 | break; |
||
247 | 1 | /// @todo log an error / throw / error-out on unsupported return type |
|
248 | 1 | case XMLParser::RETURN_XMLRPCVALS: |
|
249 | default: |
||
250 | 1 | xml_set_element_handler($parser, array($this, 'xmlrpc_se'), array($this, 'xmlrpc_ee')); |
|
251 | } |
||
252 | 685 | ||
253 | 685 | xml_set_character_data_handler($parser, array($this, 'xmlrpc_cd')); |
|
254 | 710 | xml_set_default_handler($parser, array($this, 'xmlrpc_dh')); |
|
255 | 710 | ||
256 | 400 | try { |
|
257 | // @see ticket #70 - we have to parse big xml docs in chunks to avoid errors |
||
258 | 1 | for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { |
|
259 | 1 | $chunk = substr($data, $offset, $this->maxChunkLength); |
|
260 | // error handling: xml not well formed |
||
261 | 1 | if (!@xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) { |
|
262 | $errCode = xml_get_error_code($parser); |
||
263 | $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode), |
||
264 | 399 | xml_get_current_line_number($parser), xml_get_current_column_number($parser)); |
|
265 | 399 | $this->_xh['isf'] = 3; |
|
266 | 399 | $this->_xh['isf_reason'] = $errStr; |
|
267 | } |
||
268 | // no need to parse further if we already have a fatal error |
||
269 | 399 | if ($this->_xh['isf'] >= 2) { |
|
270 | 22 | break; |
|
271 | } |
||
272 | 399 | } |
|
273 | 399 | /// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times |
|
274 | 399 | } catch (\Exception $e) { |
|
275 | 710 | xml_parser_free($parser); |
|
276 | 239 | $this->current_parsing_options = array(); |
|
277 | /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? |
||
278 | 1 | throw $e; |
|
279 | 1 | } catch (\Error $e) { |
|
280 | xml_parser_free($parser); |
||
281 | 1 | $this->current_parsing_options = array(); |
|
282 | //$this->accept = $prevAccept; |
||
283 | 710 | /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? |
|
284 | 710 | throw $e; |
|
285 | 710 | } |
|
286 | |||
287 | 710 | xml_parser_free($parser); |
|
288 | 710 | $this->current_parsing_options = array(); |
|
289 | 710 | ||
290 | // BC |
||
291 | 637 | if ($this->_xh['params'] === false) { |
|
0 ignored issues
–
show
introduced
by
![]() |
|||
292 | 637 | $this->_xh['params'] = array(); |
|
293 | 710 | } |
|
294 | 109 | ||
295 | 109 | return $this->_xh; |
|
296 | 710 | } |
|
297 | |||
298 | 289 | /** |
|
299 | * xml parser handler function for opening element tags. |
||
300 | * @internal |
||
301 | 688 | * |
|
302 | * @param resource $parser |
||
303 | 710 | * @param string $name |
|
304 | 710 | * @param $attrs |
|
305 | 23 | * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead |
|
306 | 23 | * @return void |
|
307 | 23 | * |
|
308 | 23 | * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing |
|
309 | * and remove the checking for $this->_xh['isf'] >= 2 everywhere |
||
310 | */ |
||
311 | public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false) |
||
312 | { |
||
313 | // if invalid xml-rpc already detected, skip all processing |
||
314 | if ($this->_xh['isf'] >= 2) { |
||
315 | 23 | return; |
|
316 | 23 | } |
|
317 | |||
318 | // check for correct element nesting |
||
319 | if (count($this->_xh['stack']) == 0) { |
||
320 | // top level element can only be of 2 types |
||
321 | /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: |
||
322 | 1 | /// there is only a single top level element in xml anyway |
|
323 | 1 | ||
324 | 1 | // BC |
|
325 | if ($acceptSingleVals === false) { |
||
326 | $accept = $this->current_parsing_options['accept']; |
||
327 | } else { |
||
328 | 710 | $this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__ . ' is deprecated'); |
|
329 | $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; |
||
330 | } |
||
331 | 710 | if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || |
|
332 | 710 | ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || |
|
333 | ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) || |
||
334 | ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) { |
||
335 | 710 | $this->_xh['rt'] = strtolower($name); |
|
336 | } else { |
||
337 | $this->_xh['isf'] = 2; |
||
338 | $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name; |
||
339 | |||
340 | return; |
||
341 | } |
||
342 | } else { |
||
343 | // not top level element: see if parent is OK |
||
344 | $parent = end($this->_xh['stack']); |
||
345 | if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) { |
||
346 | $this->_xh['isf'] = 2; |
||
347 | $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent"; |
||
348 | |||
349 | return; |
||
350 | } |
||
351 | } |
||
352 | |||
353 | switch ($name) { |
||
354 | // optimize for speed switch cases: most common cases first |
||
355 | case 'VALUE': |
||
356 | /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element |
||
357 | 710 | $this->_xh['vt'] = 'value'; // indicator: no value found yet |
|
358 | $this->_xh['ac'] = ''; |
||
359 | 710 | $this->_xh['lv'] = 1; |
|
360 | $this->_xh['php_class'] = null; |
||
361 | break; |
||
362 | |||
363 | case 'I8': |
||
364 | 709 | case 'EX:I8': |
|
365 | if (PHP_INT_SIZE === 4) { |
||
366 | 709 | // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! |
|
367 | 709 | $this->_xh['isf'] = 2; |
|
368 | $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode"; |
||
369 | 707 | ||
370 | 30 | return; |
|
371 | 30 | } |
|
372 | // fall through voluntarily |
||
373 | |||
374 | 707 | case 'I4': |
|
375 | case 'INT': |
||
376 | 705 | case 'STRING': |
|
377 | case 'BOOLEAN': |
||
378 | case 'DOUBLE': |
||
379 | 705 | case 'DATETIME.ISO8601': |
|
380 | 22 | case 'BASE64': |
|
381 | if ($this->_xh['vt'] != 'value') { |
||
382 | 705 | // two data elements inside a value: an error occurred! |
|
383 | 27 | $this->_xh['isf'] = 2; |
|
384 | $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; |
||
385 | |||
386 | return; |
||
387 | } |
||
388 | $this->_xh['ac'] = ''; // reset the accumulator |
||
389 | break; |
||
390 | |||
391 | case 'STRUCT': |
||
392 | case 'ARRAY': |
||
393 | if ($this->_xh['vt'] != 'value') { |
||
394 | // two data elements inside a value: an error occurred! |
||
395 | $this->_xh['isf'] = 2; |
||
396 | $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; |
||
397 | |||
398 | return; |
||
399 | } |
||
400 | // create an empty array to hold child values, and push it onto appropriate stack |
||
401 | $curVal = array( |
||
402 | 'values' => array(), |
||
403 | 'type' => $name, |
||
404 | ); |
||
405 | // check for out-of-band information to rebuild php objs and, in case it is found, save it |
||
406 | 707 | if (@isset($attrs['PHP_CLASS'])) { |
|
407 | 707 | $curVal['php_class'] = $attrs['PHP_CLASS']; |
|
408 | 239 | } |
|
409 | $this->_xh['valuestack'][] = $curVal; |
||
410 | 707 | $this->_xh['vt'] = 'data'; // be prepared for a data element next |
|
411 | 709 | break; |
|
412 | 709 | ||
413 | 709 | case 'DATA': |
|
414 | 709 | if ($this->_xh['vt'] != 'data') { |
|
415 | 709 | // two data elements inside a value: an error occurred! |
|
416 | 709 | $this->_xh['isf'] = 2; |
|
417 | 708 | $this->_xh['isf_reason'] = "found two data elements inside an array element"; |
|
418 | 708 | ||
419 | 708 | return; |
|
420 | 685 | } |
|
421 | |||
422 | case 'METHODCALL': |
||
423 | 685 | case 'METHODRESPONSE': |
|
424 | 594 | // valid elements that add little to processing |
|
425 | 477 | break; |
|
426 | 7 | ||
427 | case 'PARAMS': |
||
428 | $this->_xh['params'] = array(); |
||
429 | 7 | break; |
|
430 | 7 | ||
431 | 472 | case 'METHODNAME': |
|
432 | case 'NAME': |
||
433 | 22 | /// @todo we could check for 2 NAME elements inside a MEMBER element |
|
434 | 451 | $this->_xh['ac'] = ''; |
|
435 | break; |
||
436 | |||
437 | case 'FAULT': |
||
438 | $this->_xh['isf'] = 1; |
||
439 | break; |
||
440 | |||
441 | 46 | case 'MEMBER': |
|
442 | 46 | // set member name to null, in case we do not find in the xml later on |
|
443 | $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = null; |
||
444 | //$this->_xh['ac']=''; |
||
445 | 24 | // Drop trough intentionally |
|
446 | |||
447 | case 'PARAM': |
||
448 | 46 | // clear value type, so we can check later if no value has been passed for this param/member |
|
449 | $this->_xh['vt'] = null; |
||
450 | 408 | break; |
|
451 | |||
452 | case 'NIL': |
||
453 | case 'EX:NIL': |
||
454 | 25 | if ($this->current_parsing_options['xmlrpc_null_extension']) { |
|
455 | if ($this->_xh['vt'] != 'value') { |
||
456 | // two data elements inside a value: an error occurred! |
||
457 | $this->_xh['isf'] = 2; |
||
458 | $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; |
||
459 | |||
460 | 25 | return; |
|
461 | } |
||
462 | // reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs |
||
463 | $this->_xh['ac'] = ''; |
||
464 | |||
465 | 387 | } else { |
|
466 | $this->_xh['isf'] = 2; |
||
467 | $this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension'; |
||
468 | |||
469 | return; |
||
470 | } |
||
471 | 387 | break; |
|
472 | |||
473 | default: |
||
474 | 685 | // INVALID ELEMENT: RAISE ISF so that it is later recognized |
|
475 | 685 | /// @todo feature creep = allow a callback instead |
|
476 | 708 | $this->_xh['isf'] = 2; |
|
477 | 289 | $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name"; |
|
478 | 289 | ||
479 | 708 | return; |
|
480 | } |
||
481 | |||
482 | 289 | // Save current element name to stack, to validate nesting |
|
483 | 268 | $this->_xh['stack'][] = $name; |
|
484 | 268 | ||
485 | /// @todo optimization creep: move this inside the big switch() above |
||
486 | 22 | if ($name != 'VALUE') { |
|
487 | $this->_xh['lv'] = 0; |
||
488 | 289 | } |
|
489 | 708 | } |
|
490 | 239 | ||
491 | 239 | /** |
|
492 | 707 | * xml parser handler function for close element tags. |
|
493 | 707 | * @internal |
|
494 | * |
||
495 | 398 | * @param resource $parser |
|
496 | 398 | * @param string $name |
|
497 | 398 | * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility |
|
498 | 398 | * @return void |
|
499 | 22 | * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) |
|
500 | * |
||
501 | 398 | * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing |
|
502 | 707 | * and remove the checking for $this->_xh['isf'] >= 2 everywhere |
|
503 | */ |
||
504 | public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1) |
||
505 | 684 | { |
|
506 | 684 | if ($this->_xh['isf'] >= 2) { |
|
507 | 684 | return; |
|
508 | } |
||
509 | |||
510 | // push this element name from stack |
||
511 | 684 | // NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem. |
|
512 | 707 | // we also checked for proper nesting at start of elements... |
|
513 | 562 | $currElem = array_pop($this->_xh['stack']); |
|
0 ignored issues
–
show
|
|||
514 | 562 | ||
515 | 706 | switch ($name) { |
|
516 | 706 | case 'VALUE': |
|
517 | 23 | // If no scalar was inside <VALUE></VALUE>, it was a string value |
|
518 | 23 | if ($this->_xh['vt'] == 'value') { |
|
519 | 23 | $this->_xh['value'] = $this->_xh['ac']; |
|
520 | 23 | $this->_xh['vt'] = Value::$xmlrpcString; |
|
521 | 23 | } |
|
522 | |||
523 | // in case there is charset conversion required, do it here, to catch both cases of string values |
||
524 | 706 | if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) { |
|
525 | 706 | $this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8'); |
|
526 | 706 | } |
|
527 | 705 | ||
528 | 706 | if ($rebuildXmlrpcvals > 0) { |
|
529 | // build the xml-rpc val out of the data received, and substitute it |
||
530 | $temp = new Value($this->_xh['value'], $this->_xh['vt']); |
||
531 | // in case we got info about underlying php class, save it in the object we're rebuilding |
||
532 | 705 | if (isset($this->_xh['php_class'])) { |
|
533 | $temp->_php_class = $this->_xh['php_class']; |
||
534 | } |
||
535 | 710 | $this->_xh['value'] = $temp; |
|
536 | } elseif ($rebuildXmlrpcvals < 0) { |
||
537 | if ($this->_xh['vt'] == Value::$xmlrpcDateTime) { |
||
538 | $this->_xh['value'] = (object)array( |
||
539 | 'xmlrpc_type' => 'datetime', |
||
540 | 'scalar' => $this->_xh['value'], |
||
541 | 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value']) |
||
542 | ); |
||
543 | 27 | } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) { |
|
544 | $this->_xh['value'] = (object)array( |
||
545 | 27 | 'xmlrpc_type' => 'base64', |
|
546 | 27 | 'scalar' => $this->_xh['value'] |
|
547 | ); |
||
548 | } |
||
549 | } else { |
||
550 | /// @todo this should handle php-serialized objects, since std deserializing is done |
||
551 | /// by php_xmlrpc_decode, which we will not be calling... |
||
552 | //if (isset($this->_xh['php_class'])) { |
||
553 | //} |
||
554 | } |
||
555 | |||
556 | // check if we are inside an array or struct: |
||
557 | // if value just built is inside an array, let's move it into array on the stack |
||
558 | $vscount = count($this->_xh['valuestack']); |
||
559 | if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { |
||
560 | $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; |
||
561 | } |
||
562 | break; |
||
563 | |||
564 | case 'STRING': |
||
565 | 710 | $this->_xh['vt'] = Value::$xmlrpcString; |
|
566 | $this->_xh['lv'] = 3; // indicate we've found a value |
||
567 | $this->_xh['value'] = $this->_xh['ac']; |
||
568 | 710 | break; |
|
569 | |||
570 | case 'BOOLEAN': |
||
571 | 710 | $this->_xh['vt'] = Value::$xmlrpcBoolean; |
|
572 | 710 | $this->_xh['lv'] = 3; // indicate we've found a value |
|
573 | // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted, |
||
574 | // even though the spec never mentions them (see e.g. Blogger api docs) |
||
575 | 710 | // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here |
|
576 | // Note the non-strict type check: it will allow ' 1 ' |
||
577 | /// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime. |
||
578 | /// Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and |
||
579 | /// to changes in the way php compares strings (since 8.0, leading and trailing newlines are |
||
580 | /// accepted when deciding if a string numeric...) |
||
581 | if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') === 0) { |
||
582 | $this->_xh['value'] = true; |
||
583 | } else { |
||
584 | 696 | // log if receiving something strange, even though we set the value to false anyway |
|
585 | /// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL |
||
586 | if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') !== 0) { |
||
587 | 696 | if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' . |
|
588 | 696 | $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { |
|
589 | return; |
||
590 | } |
||
591 | } |
||
592 | $this->_xh['value'] = false; |
||
593 | } |
||
594 | 696 | break; |
|
595 | |||
596 | case 'EX:I8': |
||
597 | $name = 'i8'; |
||
598 | // fall through voluntarily |
||
599 | case 'I4': |
||
600 | case 'I8': |
||
601 | case 'INT': |
||
602 | // NB: we build the Value object with the original xml element name found, except for ex:i8. The |
||
603 | // `Value::scalarTyp()` function will do some normalization of the data |
||
604 | $this->_xh['vt'] = strtolower($name); |
||
605 | $this->_xh['lv'] = 3; // indicate we've found a value |
||
606 | if (!preg_match(PhpXmlRpc::$xmlrpc_int_format, $this->_xh['ac'])) { |
||
607 | if (!$this->handleParsingError('non numeric data received in INT value: ' . |
||
608 | $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { |
||
609 | return; |
||
610 | } |
||
611 | /// @todo: find a better way of reporting an error value than this! Use NaN? |
||
612 | $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; |
||
613 | } else { |
||
614 | // it's ok, add it on |
||
615 | $this->_xh['value'] = (int)$this->_xh['ac']; |
||
616 | } |
||
617 | 711 | break; |
|
618 | |||
619 | case 'DOUBLE': |
||
620 | $this->_xh['vt'] = Value::$xmlrpcDouble; |
||
621 | $this->_xh['lv'] = 3; // indicate we've found a value |
||
622 | if (!preg_match(PhpXmlRpc::$xmlrpc_double_format, $this->_xh['ac'])) { |
||
623 | if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' . |
||
624 | $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { |
||
625 | return; |
||
626 | } |
||
627 | |||
628 | $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; |
||
629 | } else { |
||
630 | // it's ok, add it on |
||
631 | $this->_xh['value'] = (double)$this->_xh['ac']; |
||
632 | } |
||
633 | break; |
||
634 | 711 | ||
635 | 711 | case 'DATETIME.ISO8601': |
|
636 | 695 | $this->_xh['vt'] = Value::$xmlrpcDateTime; |
|
637 | $this->_xh['lv'] = 3; // indicate we've found a value |
||
638 | if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) { |
||
639 | if (!$this->handleParsingError('invalid data received in DATETIME value: ' . |
||
640 | $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { |
||
641 | return; |
||
642 | } |
||
643 | } |
||
644 | if ($this->current_parsing_options['xmlrpc_return_datetimes']) { |
||
645 | try { |
||
646 | 529 | $this->_xh['value'] = new \DateTime($this->_xh['ac']); |
|
647 | |||
648 | 529 | // the default regex used to validate the date string a few lines above should make this case impossible, |
|
649 | // but one never knows... |
||
650 | 529 | } catch(\Exception $e) { |
|
651 | // what to do? We can not guarantee that a valid date can be created. We return null... |
||
652 | if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' . |
||
653 | $e->getMessage(), __METHOD__)) { |
||
654 | return; |
||
655 | } |
||
656 | } |
||
657 | } else { |
||
658 | 529 | $this->_xh['value'] = $this->_xh['ac']; |
|
659 | 529 | } |
|
660 | break; |
||
661 | 24 | ||
662 | case 'BASE64': |
||
663 | $this->_xh['vt'] = Value::$xmlrpcBase64; |
||
664 | $this->_xh['lv'] = 3; // indicate we've found a value |
||
665 | 506 | if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { |
|
666 | 506 | $v = base64_decode($this->_xh['ac'], true); |
|
667 | 4 | if ($v === false) { |
|
668 | $this->_xh['isf'] = 2; |
||
669 | 506 | $this->_xh['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh['ac']); |
|
670 | 4 | return; |
|
671 | } |
||
672 | 502 | } else { |
|
673 | $v = base64_decode($this->_xh['ac']); |
||
674 | if ($v === '' && $this->_xh['ac'] !== '') { |
||
675 | // only the empty string should decode to the empty string |
||
676 | 506 | $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value: ' . |
|
677 | 499 | $this->truncateValueForLog($this->_xh['ac'])); |
|
678 | } |
||
679 | } |
||
680 | 506 | $this->_xh['value'] = $v; |
|
681 | break; |
||
682 | |||
683 | case 'NAME': |
||
684 | $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac']; |
||
685 | break; |
||
686 | |||
687 | case 'MEMBER': |
||
688 | // add to array in the stack the last element built, unless no VALUE or no NAME were found |
||
689 | if ($this->_xh['vt']) { |
||
690 | $vscount = count($this->_xh['valuestack']); |
||
691 | if ($this->_xh['valuestack'][$vscount - 1]['name'] === null) { |
||
692 | if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__)) { |
||
693 | return; |
||
694 | } |
||
695 | $this->_xh['valuestack'][$vscount - 1]['name'] = ''; |
||
696 | 82 | } |
|
697 | $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; |
||
698 | } else { |
||
699 | if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__)) { |
||
700 | 82 | return; |
|
701 | } |
||
702 | 82 | } |
|
703 | break; |
||
704 | 82 | ||
705 | case 'DATA': |
||
706 | $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty |
||
707 | break; |
||
708 | |||
709 | case 'STRUCT': |
||
710 | case 'ARRAY': |
||
711 | // fetch out of stack array of values, and promote it to current value |
||
712 | 82 | $currVal = array_pop($this->_xh['valuestack']); |
|
713 | 82 | $this->_xh['value'] = $currVal['values']; |
|
714 | $this->_xh['vt'] = strtolower($name); |
||
715 | 78 | if (isset($currVal['php_class'])) { |
|
716 | $this->_xh['php_class'] = $currVal['php_class']; |
||
717 | } |
||
718 | 5 | break; |
|
719 | |||
720 | case 'PARAM': |
||
721 | // add to array of params the current value, unless no VALUE was found |
||
722 | /// @todo should we also check if there were two VALUE inside the PARAM? |
||
723 | if ($this->_xh['vt']) { |
||
724 | $this->_xh['params'][] = $this->_xh['value']; |
||
725 | $this->_xh['pt'][] = $this->_xh['vt']; |
||
726 | } else { |
||
727 | if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__)) { |
||
728 | return; |
||
729 | } |
||
730 | } |
||
731 | break; |
||
732 | |||
733 | case 'METHODNAME': |
||
734 | if (!preg_match(PhpXmlRpc::$xmlrpc_methodname_format, $this->_xh['ac'])) { |
||
735 | if (!$this->handleParsingError('invalid data received in METHODNAME: '. |
||
736 | $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { |
||
737 | return; |
||
738 | } |
||
739 | } |
||
740 | $methodName = trim($this->_xh['ac']); |
||
741 | $this->_xh['method'] = $methodName; |
||
742 | // we allow the callback to f.e. give us back a mangled method name by manipulating $this |
||
743 | if (isset($this->current_parsing_options['methodname_callback'])) { |
||
744 | call_user_func($this->current_parsing_options['methodname_callback'], $methodName, $this, $parser); |
||
745 | } |
||
746 | break; |
||
747 | |||
748 | case 'NIL': |
||
749 | case 'EX:NIL': |
||
750 | // NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant |
||
751 | //if ($this->current_parsing_options['xmlrpc_null_extension']) { |
||
752 | $this->_xh['vt'] = 'null'; |
||
753 | $this->_xh['value'] = null; |
||
754 | $this->_xh['lv'] = 3; |
||
755 | //} |
||
756 | break; |
||
757 | |||
758 | /// @todo add extra checking: |
||
759 | /// - FAULT should contain a single struct with the 2 expected members (check their name and type) |
||
760 | case 'PARAMS': |
||
761 | case 'FAULT': |
||
762 | break; |
||
763 | |||
764 | case 'METHODCALL': |
||
765 | /// @todo should we allow to accept this case via a call to handleParsingError ? |
||
766 | if ($this->_xh['method'] === false) { |
||
767 | $this->_xh['isf'] = 2; |
||
768 | $this->_xh['isf_reason'] = "missing METHODNAME element inside METHODCALL"; |
||
769 | } |
||
770 | break; |
||
771 | |||
772 | case 'METHODRESPONSE': |
||
773 | /// @todo should we allow to accept these cases via a call to handleParsingError ? |
||
774 | if ($this->_xh['isf'] != 1 && $this->_xh['params'] === false) { |
||
775 | $this->_xh['isf'] = 2; |
||
776 | $this->_xh['isf_reason'] = "missing both FAULT and PARAMS elements inside METHODRESPONSE"; |
||
777 | } elseif ($this->_xh['isf'] == 0 && count($this->_xh['params']) !== 1) { |
||
778 | $this->_xh['isf'] = 2; |
||
779 | $this->_xh['isf_reason'] = "PARAMS element inside METHODRESPONSE should have exactly 1 PARAM"; |
||
780 | } elseif ($this->_xh['isf'] == 1 && $this->_xh['params'] !== false) { |
||
781 | $this->_xh['isf'] = 2; |
||
782 | $this->_xh['isf_reason'] = "both FAULT and PARAMS elements found inside METHODRESPONSE"; |
||
783 | } |
||
784 | break; |
||
785 | |||
786 | default: |
||
787 | // End of INVALID ELEMENT |
||
788 | // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se, |
||
789 | // $this->_xh['isf'] is set to 2... |
||
790 | break; |
||
791 | } |
||
792 | } |
||
793 | |||
794 | /** |
||
795 | * Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values. |
||
796 | * @internal |
||
797 | * |
||
798 | * @param resource $parser |
||
799 | * @param string $name |
||
800 | * @return void |
||
801 | */ |
||
802 | public function xmlrpc_ee_fast($parser, $name) |
||
803 | { |
||
804 | $this->xmlrpc_ee($parser, $name, 0); |
||
805 | } |
||
806 | |||
807 | /** |
||
808 | * Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime). |
||
809 | * @internal |
||
810 | * |
||
811 | * @param resource $parser |
||
812 | * @param string $name |
||
813 | * @return void |
||
814 | */ |
||
815 | public function xmlrpc_ee_epi($parser, $name) |
||
816 | { |
||
817 | $this->xmlrpc_ee($parser, $name, -1); |
||
818 | } |
||
819 | |||
820 | /** |
||
821 | * xml parser handler function for character data. |
||
822 | * @internal |
||
823 | * |
||
824 | * @param resource $parser |
||
825 | * @param string $data |
||
826 | * @return void |
||
827 | */ |
||
828 | public function xmlrpc_cd($parser, $data) |
||
829 | { |
||
830 | // skip processing if xml fault already detected |
||
831 | if ($this->_xh['isf'] >= 2) { |
||
832 | return; |
||
833 | } |
||
834 | |||
835 | // "lookforvalue == 3" means that we've found an entire value and should discard any further character data |
||
836 | if ($this->_xh['lv'] != 3) { |
||
837 | $this->_xh['ac'] .= $data; |
||
838 | } |
||
839 | } |
||
840 | |||
841 | /** |
||
842 | * xml parser handler function for 'other stuff', i.e. not char data or element start/end tag. |
||
843 | * In fact, it only gets called on unknown entities... |
||
844 | * @internal |
||
845 | * |
||
846 | * @param $parser |
||
847 | * @param string data |
||
0 ignored issues
–
show
The type
PhpXmlRpc\Helper\data was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||
848 | * @return void |
||
849 | */ |
||
850 | public function xmlrpc_dh($parser, $data) |
||
851 | { |
||
852 | // skip processing if xml fault already detected |
||
853 | if ($this->_xh['isf'] >= 2) { |
||
854 | return; |
||
855 | } |
||
856 | |||
857 | if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') { |
||
858 | $this->_xh['ac'] .= $data; |
||
859 | } |
||
860 | } |
||
861 | |||
862 | /** |
||
863 | * xml charset encoding guessing helper function. |
||
864 | * Tries to determine the charset encoding of an XML chunk received over HTTP. |
||
865 | * |
||
866 | * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type, |
||
867 | * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers, |
||
868 | * which will be most probably using UTF-8 anyway... |
||
869 | * In order of importance checks: |
||
870 | * 1. http headers |
||
871 | * 2. BOM |
||
872 | * 3. XML declaration |
||
873 | * 4. guesses using mb_detect_encoding() |
||
874 | * |
||
875 | * @param string $httpHeader the http Content-type header |
||
876 | * @param string $xmlChunk xml content buffer |
||
877 | * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled). |
||
878 | * This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings |
||
879 | * @return string the encoding determined. Null if it can't be determined and mbstring is enabled, |
||
880 | * PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled |
||
881 | * |
||
882 | * @todo as of 2023, the relevant RFC for XML Media Types is now 7303, and for HTTP it is 9110. Check if the order of |
||
883 | * precedence implemented here is still correct |
||
884 | * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!! |
||
885 | * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make |
||
886 | * the method independent of global state |
||
887 | */ |
||
888 | public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null) |
||
889 | { |
||
890 | // discussion: see http://www.yale.edu/pclt/encoding/ |
||
891 | // 1 - test if encoding is specified in HTTP HEADERS |
||
892 | |||
893 | // Details: |
||
894 | // LWS: (\13\10)?( |\t)+ |
||
895 | // token: (any char but excluded stuff)+ |
||
896 | // quoted string: " (any char but double quotes and control chars)* " |
||
897 | // header: Content-type = ...; charset=value(; ...)* |
||
898 | // where value is of type token, no LWS allowed between 'charset' and value |
||
899 | // Note: we do not check for invalid chars in VALUE: |
||
900 | // this had better be done using pure ereg as below |
||
901 | // Note 2: we might be removing whitespace/tabs that ought to be left in if |
||
902 | // the received charset is a quoted string. But nobody uses such charset names... |
||
903 | |||
904 | /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it? |
||
905 | $matches = array(); |
||
906 | if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) { |
||
907 | return strtoupper(trim($matches[1], " \t\"")); |
||
908 | } |
||
909 | |||
910 | // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern |
||
911 | // (source: http://www.w3.org/TR/2000/REC-xml-20001006) |
||
912 | // NOTE: actually, according to the spec, even if we find the BOM and determine |
||
913 | // an encoding, we should check if there is an encoding specified |
||
914 | // in the xml declaration, and verify if they match. |
||
915 | /// @todo implement check as described above? |
||
916 | /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM) |
||
917 | if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { |
||
918 | return 'UCS-4'; |
||
919 | } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { |
||
920 | return 'UTF-16'; |
||
921 | } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) { |
||
922 | return 'UTF-8'; |
||
923 | } |
||
924 | |||
925 | // 3 - test if encoding is specified in the xml declaration |
||
926 | /// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that |
||
927 | /// case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6. |
||
928 | /// For lower versions, we could attempt usage of mb_ereg... |
||
929 | // Details: |
||
930 | // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ |
||
931 | // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* |
||
932 | // We could be stricter on version number: VersionNum ::= '1.' [0-9]+ |
||
933 | if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . |
||
934 | '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", |
||
935 | $xmlChunk, $matches)) { |
||
936 | return strtoupper(substr($matches[2], 1, -1)); |
||
937 | } |
||
938 | |||
939 | // 4 - if mbstring is available, let it do the guesswork |
||
940 | if (function_exists('mb_detect_encoding')) { |
||
941 | if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) { |
||
0 ignored issues
–
show
|
|||
942 | $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings; |
||
943 | } |
||
944 | if ($encodingPrefs) { |
||
945 | $enc = mb_detect_encoding($xmlChunk, $encodingPrefs); |
||
946 | } else { |
||
947 | $enc = mb_detect_encoding($xmlChunk); |
||
948 | } |
||
949 | // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII... |
||
950 | // IANA also likes better US-ASCII, so go with it |
||
951 | if ($enc == 'ASCII') { |
||
952 | $enc = 'US-' . $enc; |
||
953 | } |
||
954 | |||
955 | return $enc; |
||
956 | } else { |
||
957 | // No encoding specified: assume it is iso-8859-1, as per HTTP1.1? |
||
958 | // Both RFC 2616 (HTTP 1.1) and RFC 1945 (HTTP 1.0) clearly state that for text/xxx content types |
||
959 | // this should be the standard. And we should be getting text/xml as request and response. |
||
960 | // BUT we have to be backward compatible with the lib, which always used UTF-8 as default. Moreover, |
||
961 | // RFC 7231, which obsoletes the two RFC mentioned above, has changed the rules. It says: |
||
962 | // "The default charset of ISO-8859-1 for text media types has been removed; the default is now whatever |
||
963 | // the media type definition says." |
||
964 | return PhpXmlRpc::$xmlrpc_defencoding; |
||
965 | } |
||
966 | } |
||
967 | |||
968 | /** |
||
969 | * Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration). |
||
970 | * |
||
971 | * @param string $xmlChunk |
||
972 | * @return bool |
||
973 | * |
||
974 | * @todo rename to hasEncodingDeclaration |
||
975 | */ |
||
976 | public static function hasEncoding($xmlChunk) |
||
977 | { |
||
978 | // scan the first bytes of the data for a UTF-16 (or other) BOM pattern |
||
979 | // (source: http://www.w3.org/TR/2000/REC-xml-20001006) |
||
980 | if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { |
||
981 | return true; |
||
982 | } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { |
||
983 | return true; |
||
984 | } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) { |
||
985 | return true; |
||
986 | } |
||
987 | |||
988 | // test if encoding is specified in the xml declaration |
||
989 | // Details: |
||
990 | // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ |
||
991 | // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* |
||
992 | // We could be stricter on version number: VersionNum ::= '1.' [0-9]+ |
||
993 | if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . |
||
994 | '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", |
||
995 | $xmlChunk)) { |
||
996 | return true; |
||
997 | } |
||
998 | |||
999 | return false; |
||
1000 | } |
||
1001 | |||
1002 | /** |
||
1003 | * @param string $message |
||
1004 | * @param string $method method/file/line info |
||
1005 | * @return bool false if the caller has to stop parsing |
||
1006 | */ |
||
1007 | protected function handleParsingError($message, $method = '') |
||
1008 | { |
||
1009 | if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { |
||
1010 | $this->_xh['isf'] = 2; |
||
1011 | $this->_xh['isf_reason'] = ucfirst($message); |
||
1012 | return false; |
||
1013 | } else { |
||
1014 | $this->getLogger()->error('XML-RPC: ' . ($method != '' ? $method . ': ' : '') . $message); |
||
1015 | return true; |
||
1016 | } |
||
1017 | } |
||
1018 | |||
1019 | /** |
||
1020 | * Truncates unsafe data |
||
1021 | * @param string $data |
||
1022 | * @return string |
||
1023 | */ |
||
1024 | protected function truncateValueForLog($data) |
||
1025 | { |
||
1026 | if (strlen($data) > $this->maxLogValueLength) { |
||
1027 | return substr($data, 0, $this->maxLogValueLength - 3) . '...'; |
||
1028 | } |
||
1029 | |||
1030 | return $data; |
||
1031 | } |
||
1032 | |||
1033 | // *** BC layer *** |
||
1034 | |||
1035 | /** |
||
1036 | * xml parser handler function for opening element tags. |
||
1037 | * Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses. |
||
1038 | * @deprecated |
||
1039 | * |
||
1040 | * @param resource $parser |
||
1041 | * @param $name |
||
1042 | * @param $attrs |
||
1043 | * @return void |
||
1044 | */ |
||
1045 | public function xmlrpc_se_any($parser, $name, $attrs) |
||
1046 | { |
||
1047 | // this will be spamming the log if this method is in use... |
||
1048 | $this->logDeprecation('Method ' . __METHOD__ . ' is deprecated'); |
||
1049 | |||
1050 | $this->xmlrpc_se($parser, $name, $attrs, true); |
||
1051 | } |
||
1052 | |||
1053 | public function &__get($name) |
||
1054 | { |
||
1055 | switch ($name) { |
||
1056 | case '_xh': |
||
1057 | case 'xmlrpc_valid_parents': |
||
1058 | $this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated'); |
||
1059 | return $this->$name; |
||
1060 | default: |
||
1061 | /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... |
||
1062 | $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); |
||
1063 | trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); |
||
1064 | $result = null; |
||
1065 | return $result; |
||
1066 | } |
||
1067 | } |
||
1068 | |||
1069 | public function __set($name, $value) |
||
1070 | { |
||
1071 | switch ($name) { |
||
1072 | // this should only ever be called by subclasses which overtook `parse()` |
||
1073 | case 'accept': |
||
1074 | $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated'); |
||
1075 | $this->current_parsing_options['accept'] = $value; |
||
1076 | break; |
||
1077 | case '_xh': |
||
1078 | case 'xmlrpc_valid_parents': |
||
1079 | $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated'); |
||
1080 | $this->$name = $value; |
||
1081 | break; |
||
1082 | default: |
||
1083 | /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... |
||
1084 | $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); |
||
1085 | trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); |
||
1086 | } |
||
1087 | } |
||
1088 | |||
1089 | public function __isset($name) |
||
1090 | { |
||
1091 | switch ($name) { |
||
1092 | case 'accept': |
||
1093 | $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated'); |
||
1094 | return isset($this->current_parsing_options['accept']); |
||
1095 | case '_xh': |
||
1096 | case 'xmlrpc_valid_parents': |
||
1097 | $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated'); |
||
1098 | return isset($this->$name); |
||
1099 | default: |
||
1100 | return false; |
||
1101 | } |
||
1102 | } |
||
1103 | |||
1104 | public function __unset($name) |
||
1105 | { |
||
1106 | switch ($name) { |
||
1107 | // q: does this make sense at all? |
||
1108 | case 'accept': |
||
1109 | $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated'); |
||
1110 | unset($this->current_parsing_options['accept']); |
||
1111 | break; |
||
1112 | case '_xh': |
||
1113 | case 'xmlrpc_valid_parents': |
||
1114 | $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated'); |
||
1115 | unset($this->$name); |
||
1116 | break; |
||
1117 | default: |
||
1118 | /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... |
||
1119 | $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); |
||
1120 | trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); |
||
1121 | } |
||
1122 | } |
||
1123 | } |
||
1124 |