This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * Reader for XMP data containing properties relevant to images. |
||
4 | * |
||
5 | * This program is free software; you can redistribute it and/or modify |
||
6 | * it under the terms of the GNU General Public License as published by |
||
7 | * the Free Software Foundation; either version 2 of the License, or |
||
8 | * (at your option) any later version. |
||
9 | * |
||
10 | * This program is distributed in the hope that it will be useful, |
||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
13 | * GNU General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU General Public License along |
||
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
18 | * http://www.gnu.org/copyleft/gpl.html |
||
19 | * |
||
20 | * @file |
||
21 | * @ingroup Media |
||
22 | */ |
||
23 | |||
24 | use Psr\Log\LoggerAwareInterface; |
||
25 | use Psr\Log\LoggerInterface; |
||
26 | use Psr\Log\NullLogger; |
||
27 | use Wikimedia\ScopedCallback; |
||
0 ignored issues
–
show
|
|||
28 | |||
29 | /** |
||
30 | * Class for reading xmp data containing properties relevant to |
||
31 | * images, and spitting out an array that FormatMetadata accepts. |
||
32 | * |
||
33 | * Note, this is not meant to recognize every possible thing you can |
||
34 | * encode in XMP. It should recognize all the properties we want. |
||
35 | * For example it doesn't have support for structures with multiple |
||
36 | * nesting levels, as none of the properties we're supporting use that |
||
37 | * feature. If it comes across properties it doesn't recognize, it should |
||
38 | * ignore them. |
||
39 | * |
||
40 | * The public methods one would call in this class are |
||
41 | * - parse( $content ) |
||
42 | * Reads in xmp content. |
||
43 | * Can potentially be called multiple times with partial data each time. |
||
44 | * - parseExtended( $content ) |
||
45 | * Reads XMPExtended blocks (jpeg files only). |
||
46 | * - getResults |
||
47 | * Outputs a results array. |
||
48 | * |
||
49 | * Note XMP kind of looks like rdf. They are not the same thing - XMP is |
||
50 | * encoded as a specific subset of rdf. This class can read XMP. It cannot |
||
51 | * read rdf. |
||
52 | * |
||
53 | */ |
||
54 | class XMPReader implements LoggerAwareInterface { |
||
55 | /** @var array XMP item configuration array */ |
||
56 | protected $items; |
||
57 | |||
58 | /** @var array Array to hold the current element (and previous element, and so on) */ |
||
59 | private $curItem = []; |
||
60 | |||
61 | /** @var bool|string The structure name when processing nested structures. */ |
||
62 | private $ancestorStruct = false; |
||
63 | |||
64 | /** @var bool|string Temporary holder for character data that appears in xmp doc. */ |
||
65 | private $charContent = false; |
||
66 | |||
67 | /** @var array Stores the state the xmpreader is in (see MODE_FOO constants) */ |
||
68 | private $mode = []; |
||
69 | |||
70 | /** @var array Array to hold results */ |
||
71 | private $results = []; |
||
72 | |||
73 | /** @var bool If we're doing a seq or bag. */ |
||
74 | private $processingArray = false; |
||
75 | |||
76 | /** @var bool|string Used for lang alts only */ |
||
77 | private $itemLang = false; |
||
78 | |||
79 | /** @var resource A resource handle for the XML parser */ |
||
80 | private $xmlParser; |
||
81 | |||
82 | /** @var bool|string Character set like 'UTF-8' */ |
||
83 | private $charset = false; |
||
84 | |||
85 | /** @var int */ |
||
86 | private $extendedXMPOffset = 0; |
||
87 | |||
88 | /** @var int Flag determining if the XMP is safe to parse **/ |
||
89 | private $parsable = 0; |
||
90 | |||
91 | /** @var string Buffer of XML to parse **/ |
||
92 | private $xmlParsableBuffer = ''; |
||
93 | |||
94 | /** |
||
95 | * These are various mode constants. |
||
96 | * they are used to figure out what to do |
||
97 | * with an element when its encountered. |
||
98 | * |
||
99 | * For example, MODE_IGNORE is used when processing |
||
100 | * a property we're not interested in. So if a new |
||
101 | * element pops up when we're in that mode, we ignore it. |
||
102 | */ |
||
103 | const MODE_INITIAL = 0; |
||
104 | const MODE_IGNORE = 1; |
||
105 | const MODE_LI = 2; |
||
106 | const MODE_LI_LANG = 3; |
||
107 | const MODE_QDESC = 4; |
||
108 | |||
109 | // The following MODE constants are also used in the |
||
110 | // $items array to denote what type of property the item is. |
||
111 | const MODE_SIMPLE = 10; |
||
112 | const MODE_STRUCT = 11; // structure (associative array) |
||
113 | const MODE_SEQ = 12; // ordered list |
||
114 | const MODE_BAG = 13; // unordered list |
||
115 | const MODE_LANG = 14; |
||
116 | const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm. |
||
117 | const MODE_BAGSTRUCT = 16; // A BAG of Structs. |
||
118 | |||
119 | const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
||
120 | const NS_XML = 'http://www.w3.org/XML/1998/namespace'; |
||
121 | |||
122 | // States used while determining if XML is safe to parse |
||
123 | const PARSABLE_UNKNOWN = 0; |
||
124 | const PARSABLE_OK = 1; |
||
125 | const PARSABLE_BUFFERING = 2; |
||
126 | const PARSABLE_NO = 3; |
||
127 | |||
128 | /** |
||
129 | * @var LoggerInterface |
||
130 | */ |
||
131 | private $logger; |
||
132 | |||
133 | /** |
||
134 | * Constructor. |
||
135 | * |
||
136 | * Primary job is to initialize the XMLParser |
||
137 | */ |
||
138 | function __construct( LoggerInterface $logger = null ) { |
||
139 | |||
140 | if ( !function_exists( 'xml_parser_create_ns' ) ) { |
||
141 | // this should already be checked by this point |
||
142 | throw new RuntimeException( 'XMP support requires XML Parser' ); |
||
143 | } |
||
144 | if ( $logger ) { |
||
145 | $this->setLogger( $logger ); |
||
146 | } else { |
||
147 | $this->setLogger( new NullLogger() ); |
||
148 | } |
||
149 | |||
150 | $this->items = XMPInfo::getItems(); |
||
151 | |||
152 | $this->resetXMLParser(); |
||
153 | } |
||
154 | |||
155 | public function setLogger( LoggerInterface $logger ) { |
||
156 | $this->logger = $logger; |
||
157 | } |
||
158 | |||
159 | /** |
||
160 | * free the XML parser. |
||
161 | * |
||
162 | * @note It is unclear to me if we really need to do this ourselves |
||
163 | * or if php garbage collection will automatically free the xmlParser |
||
164 | * when it is no longer needed. |
||
165 | */ |
||
166 | private function destroyXMLParser() { |
||
167 | if ( $this->xmlParser ) { |
||
168 | xml_parser_free( $this->xmlParser ); |
||
169 | $this->xmlParser = null; |
||
170 | } |
||
171 | } |
||
172 | |||
173 | /** |
||
174 | * Main use is if a single item has multiple xmp documents describing it. |
||
175 | * For example in jpeg's with extendedXMP |
||
176 | */ |
||
177 | private function resetXMLParser() { |
||
178 | |||
179 | $this->destroyXMLParser(); |
||
180 | |||
181 | $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); |
||
182 | xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); |
||
183 | xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); |
||
184 | |||
185 | xml_set_element_handler( $this->xmlParser, |
||
186 | [ $this, 'startElement' ], |
||
187 | [ $this, 'endElement' ] ); |
||
188 | |||
189 | xml_set_character_data_handler( $this->xmlParser, [ $this, 'char' ] ); |
||
190 | |||
191 | $this->parsable = self::PARSABLE_UNKNOWN; |
||
192 | $this->xmlParsableBuffer = ''; |
||
193 | } |
||
194 | |||
195 | /** |
||
196 | * Check if this instance supports using this class |
||
197 | */ |
||
198 | public static function isSupported() { |
||
199 | return function_exists( 'xml_parser_create_ns' ) && class_exists( 'XMLReader' ); |
||
200 | } |
||
201 | |||
202 | /** Get the result array. Do some post-processing before returning |
||
203 | * the array, and transform any metadata that is special-cased. |
||
204 | * |
||
205 | * @return array Array of results as an array of arrays suitable for |
||
206 | * FormatMetadata::getFormattedData(). |
||
207 | */ |
||
208 | public function getResults() { |
||
209 | // xmp-special is for metadata that affects how stuff |
||
210 | // is extracted. For example xmpNote:HasExtendedXMP. |
||
211 | |||
212 | // It is also used to handle photoshop:AuthorsPosition |
||
213 | // which is weird and really part of another property, |
||
214 | // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard. |
||
215 | // The location fields also use it. |
||
216 | |||
217 | $data = $this->results; |
||
218 | |||
219 | if ( isset( $data['xmp-special']['AuthorsPosition'] ) |
||
220 | && is_string( $data['xmp-special']['AuthorsPosition'] ) |
||
221 | && isset( $data['xmp-general']['Artist'][0] ) |
||
222 | ) { |
||
223 | // Note, if there is more than one creator, |
||
224 | // this only applies to first. This also will |
||
225 | // only apply to the dc:Creator prop, not the |
||
226 | // exif:Artist prop. |
||
227 | |||
228 | $data['xmp-general']['Artist'][0] = |
||
229 | $data['xmp-special']['AuthorsPosition'] . ', ' |
||
230 | . $data['xmp-general']['Artist'][0]; |
||
231 | } |
||
232 | |||
233 | // Go through the LocationShown and LocationCreated |
||
234 | // changing it to the non-hierarchal form used by |
||
235 | // the other location fields. |
||
236 | |||
237 | View Code Duplication | if ( isset( $data['xmp-special']['LocationShown'][0] ) |
|
238 | && is_array( $data['xmp-special']['LocationShown'][0] ) |
||
239 | ) { |
||
240 | // the is_array is just paranoia. It should always |
||
241 | // be an array. |
||
242 | foreach ( $data['xmp-special']['LocationShown'] as $loc ) { |
||
243 | if ( !is_array( $loc ) ) { |
||
244 | // To avoid copying over the _type meta-fields. |
||
245 | continue; |
||
246 | } |
||
247 | foreach ( $loc as $field => $val ) { |
||
248 | $data['xmp-general'][$field . 'Dest'][] = $val; |
||
249 | } |
||
250 | } |
||
251 | } |
||
252 | View Code Duplication | if ( isset( $data['xmp-special']['LocationCreated'][0] ) |
|
253 | && is_array( $data['xmp-special']['LocationCreated'][0] ) |
||
254 | ) { |
||
255 | // the is_array is just paranoia. It should always |
||
256 | // be an array. |
||
257 | foreach ( $data['xmp-special']['LocationCreated'] as $loc ) { |
||
258 | if ( !is_array( $loc ) ) { |
||
259 | // To avoid copying over the _type meta-fields. |
||
260 | continue; |
||
261 | } |
||
262 | foreach ( $loc as $field => $val ) { |
||
263 | $data['xmp-general'][$field . 'Created'][] = $val; |
||
264 | } |
||
265 | } |
||
266 | } |
||
267 | |||
268 | // We don't want to return the special values, since they're |
||
269 | // special and not info to be stored about the file. |
||
270 | unset( $data['xmp-special'] ); |
||
271 | |||
272 | // Convert GPSAltitude to negative if below sea level. |
||
273 | if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) |
||
274 | && isset( $data['xmp-exif']['GPSAltitude'] ) |
||
275 | ) { |
||
276 | |||
277 | // Must convert to a real before multiplying by -1 |
||
278 | // XMPValidate guarantees there will always be a '/' in this value. |
||
279 | list( $nom, $denom ) = explode( '/', $data['xmp-exif']['GPSAltitude'] ); |
||
280 | $data['xmp-exif']['GPSAltitude'] = $nom / $denom; |
||
281 | |||
282 | if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' ) { |
||
283 | $data['xmp-exif']['GPSAltitude'] *= -1; |
||
284 | } |
||
285 | unset( $data['xmp-exif']['GPSAltitudeRef'] ); |
||
286 | } |
||
287 | |||
288 | return $data; |
||
289 | } |
||
290 | |||
291 | /** |
||
292 | * Main function to call to parse XMP. Use getResults to |
||
293 | * get results. |
||
294 | * |
||
295 | * Also catches any errors during processing, writes them to |
||
296 | * debug log, blanks result array and returns false. |
||
297 | * |
||
298 | * @param string $content XMP data |
||
299 | * @param bool $allOfIt If this is all the data (true) or if its split up (false). Default true |
||
300 | * @throws RuntimeException |
||
301 | * @return bool Success. |
||
302 | */ |
||
303 | public function parse( $content, $allOfIt = true ) { |
||
304 | if ( !$this->xmlParser ) { |
||
305 | $this->resetXMLParser(); |
||
306 | } |
||
307 | try { |
||
308 | |||
309 | // detect encoding by looking for BOM which is supposed to be in processing instruction. |
||
310 | // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf |
||
311 | if ( !$this->charset ) { |
||
312 | $bom = []; |
||
313 | if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/', |
||
314 | $content, $bom ) |
||
315 | ) { |
||
316 | switch ( $bom[0] ) { |
||
317 | case "\xFE\xFF": |
||
318 | $this->charset = 'UTF-16BE'; |
||
319 | break; |
||
320 | case "\xFF\xFE": |
||
321 | $this->charset = 'UTF-16LE'; |
||
322 | break; |
||
323 | case "\x00\x00\xFE\xFF": |
||
324 | $this->charset = 'UTF-32BE'; |
||
325 | break; |
||
326 | case "\xFF\xFE\x00\x00": |
||
327 | $this->charset = 'UTF-32LE'; |
||
328 | break; |
||
329 | case "\xEF\xBB\xBF": |
||
330 | $this->charset = 'UTF-8'; |
||
331 | break; |
||
332 | default: |
||
333 | // this should be impossible to get to |
||
334 | throw new RuntimeException( "Invalid BOM" ); |
||
335 | } |
||
336 | } else { |
||
337 | // standard specifically says, if no bom assume utf-8 |
||
338 | $this->charset = 'UTF-8'; |
||
339 | } |
||
340 | } |
||
341 | if ( $this->charset !== 'UTF-8' ) { |
||
342 | // don't convert if already utf-8 |
||
343 | MediaWiki\suppressWarnings(); |
||
344 | $content = iconv( $this->charset, 'UTF-8//IGNORE', $content ); |
||
345 | MediaWiki\restoreWarnings(); |
||
346 | } |
||
347 | |||
348 | // Ensure the XMP block does not have an xml doctype declaration, which |
||
349 | // could declare entities unsafe to parse with xml_parse (T85848/T71210). |
||
350 | if ( $this->parsable !== self::PARSABLE_OK ) { |
||
351 | if ( $this->parsable === self::PARSABLE_NO ) { |
||
352 | throw new RuntimeException( 'Unsafe doctype declaration in XML.' ); |
||
353 | } |
||
354 | |||
355 | $content = $this->xmlParsableBuffer . $content; |
||
356 | if ( !$this->checkParseSafety( $content ) ) { |
||
357 | if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) { |
||
358 | // parse wasn't Unsuccessful yet, so return true |
||
359 | // in this case. |
||
360 | return true; |
||
361 | } |
||
362 | $msg = ( $this->parsable === self::PARSABLE_NO ) ? |
||
363 | 'Unsafe doctype declaration in XML.' : |
||
364 | 'No root element found in XML.'; |
||
365 | throw new RuntimeException( $msg ); |
||
366 | } |
||
367 | } |
||
368 | |||
369 | $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); |
||
370 | if ( !$ok ) { |
||
371 | $code = xml_get_error_code( $this->xmlParser ); |
||
372 | $error = xml_error_string( $code ); |
||
373 | $line = xml_get_current_line_number( $this->xmlParser ); |
||
374 | $col = xml_get_current_column_number( $this->xmlParser ); |
||
375 | $offset = xml_get_current_byte_index( $this->xmlParser ); |
||
376 | |||
377 | $this->logger->warning( |
||
378 | '{method} : Error reading XMP content: {error} ' . |
||
379 | '(line: {line} column: {column} byte offset: {offset})', |
||
380 | [ |
||
381 | 'method' => __METHOD__, |
||
382 | 'error_code' => $code, |
||
383 | 'error' => $error, |
||
384 | 'line' => $line, |
||
385 | 'column' => $col, |
||
386 | 'offset' => $offset, |
||
387 | 'content' => $content, |
||
388 | ] ); |
||
389 | $this->results = []; // blank if error. |
||
390 | $this->destroyXMLParser(); |
||
391 | return false; |
||
392 | } |
||
393 | } catch ( Exception $e ) { |
||
394 | $this->logger->warning( |
||
395 | '{method} Exception caught while parsing: ' . $e->getMessage(), |
||
396 | [ |
||
397 | 'method' => __METHOD__, |
||
398 | 'exception' => $e, |
||
399 | 'content' => $content, |
||
400 | ] |
||
401 | ); |
||
402 | $this->results = []; |
||
403 | return false; |
||
404 | } |
||
405 | if ( $allOfIt ) { |
||
406 | $this->destroyXMLParser(); |
||
407 | } |
||
408 | |||
409 | return true; |
||
410 | } |
||
411 | |||
412 | /** Entry point for XMPExtended blocks in jpeg files |
||
413 | * |
||
414 | * @todo In serious need of testing |
||
415 | * @see http://www.adobe.ge/devnet/xmp/pdfs/XMPSpecificationPart3.pdf XMP spec part 3 page 20 |
||
416 | * @param string $content XMPExtended block minus the namespace signature |
||
417 | * @return bool If it succeeded. |
||
418 | */ |
||
419 | public function parseExtended( $content ) { |
||
420 | // @todo FIXME: This is untested. Hard to find example files |
||
421 | // or programs that make such files.. |
||
422 | $guid = substr( $content, 0, 32 ); |
||
423 | if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] ) |
||
424 | || $this->results['xmp-special']['HasExtendedXMP'] !== $guid |
||
425 | ) { |
||
426 | $this->logger->info( __METHOD__ . |
||
427 | " Ignoring XMPExtended block due to wrong guid (guid= '$guid')" ); |
||
428 | |||
429 | return false; |
||
430 | } |
||
431 | $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) ); |
||
432 | |||
433 | if ( !$len || |
||
434 | $len['length'] < 4 || |
||
435 | $len['offset'] < 0 || |
||
436 | $len['offset'] > $len['length'] |
||
437 | ) { |
||
438 | $this->logger->info( |
||
439 | __METHOD__ . 'Error reading extended XMP block, invalid length or offset.' |
||
440 | ); |
||
441 | |||
442 | return false; |
||
443 | } |
||
444 | |||
445 | // we're not very robust here. we should accept it in the wrong order. |
||
446 | // To quote the XMP standard: |
||
447 | // "A JPEG writer should write the ExtendedXMP marker segments in order, |
||
448 | // immediately following the StandardXMP. However, the JPEG standard |
||
449 | // does not require preservation of marker segment order. A robust JPEG |
||
450 | // reader should tolerate the marker segments in any order." |
||
451 | // On the other hand, the probability that an image will have more than |
||
452 | // 128k of metadata is rather low... so the probability that it will have |
||
453 | // > 128k, and be in the wrong order is very low... |
||
454 | |||
455 | if ( $len['offset'] !== $this->extendedXMPOffset ) { |
||
456 | $this->logger->info( __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was ' |
||
457 | . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')' ); |
||
458 | |||
459 | return false; |
||
460 | } |
||
461 | |||
462 | if ( $len['offset'] === 0 ) { |
||
463 | // if we're starting the extended block, we've probably already |
||
464 | // done the XMPStandard block, so reset. |
||
465 | $this->resetXMLParser(); |
||
466 | } |
||
467 | |||
468 | $this->extendedXMPOffset += $len['length']; |
||
469 | |||
470 | $actualContent = substr( $content, 40 ); |
||
471 | |||
472 | if ( $this->extendedXMPOffset === strlen( $actualContent ) ) { |
||
473 | $atEnd = true; |
||
474 | } else { |
||
475 | $atEnd = false; |
||
476 | } |
||
477 | |||
478 | $this->logger->debug( __METHOD__ . 'Parsing a XMPExtended block' ); |
||
479 | |||
480 | return $this->parse( $actualContent, $atEnd ); |
||
481 | } |
||
482 | |||
483 | /** |
||
484 | * Character data handler |
||
485 | * Called whenever character data is found in the xmp document. |
||
486 | * |
||
487 | * does nothing if we're in MODE_IGNORE or if the data is whitespace |
||
488 | * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character |
||
489 | * data in the other modes). |
||
490 | * |
||
491 | * As an example, this happens when we encounter XMP like: |
||
492 | * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio> |
||
493 | * and are processing the 0/10 bit. |
||
494 | * |
||
495 | * @param XMLParser $parser XMLParser reference to the xml parser |
||
496 | * @param string $data Character data |
||
497 | * @throws RuntimeException On invalid data |
||
498 | */ |
||
499 | function char( $parser, $data ) { |
||
500 | |||
501 | $data = trim( $data ); |
||
502 | if ( trim( $data ) === "" ) { |
||
503 | return; |
||
504 | } |
||
505 | |||
506 | if ( !isset( $this->mode[0] ) ) { |
||
507 | throw new RuntimeException( 'Unexpected character data before first rdf:Description element' ); |
||
508 | } |
||
509 | |||
510 | if ( $this->mode[0] === self::MODE_IGNORE ) { |
||
511 | return; |
||
512 | } |
||
513 | |||
514 | if ( $this->mode[0] !== self::MODE_SIMPLE |
||
515 | && $this->mode[0] !== self::MODE_QDESC |
||
516 | ) { |
||
517 | throw new RuntimeException( 'character data where not expected. (mode ' . $this->mode[0] . ')' ); |
||
518 | } |
||
519 | |||
520 | // to check, how does this handle w.s. |
||
521 | if ( $this->charContent === false ) { |
||
522 | $this->charContent = $data; |
||
523 | } else { |
||
524 | $this->charContent .= $data; |
||
525 | } |
||
526 | } |
||
527 | |||
528 | /** |
||
529 | * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't |
||
530 | * contain a doctype declaration which could contain a dos attack if we |
||
531 | * parse it and expand internal entities (T85848). |
||
532 | * |
||
533 | * @param string $content xml string to check for parse safety |
||
534 | * @return bool true if the xml is safe to parse, false otherwise |
||
535 | */ |
||
536 | private function checkParseSafety( $content ) { |
||
537 | $reader = new XMLReader(); |
||
538 | $result = null; |
||
539 | |||
540 | // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed |
||
541 | // instead of using XML(). |
||
542 | $reader->open( |
||
543 | 'data://text/plain,' . urlencode( $content ), |
||
544 | null, |
||
545 | LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET |
||
546 | ); |
||
547 | |||
548 | $oldDisable = libxml_disable_entity_loader( true ); |
||
549 | /** @noinspection PhpUnusedLocalVariableInspection */ |
||
550 | $reset = new ScopedCallback( |
||
0 ignored issues
–
show
$reset is not used, you could remove the assignment.
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value';
$higher = false;
if (rand(1, 6) > 3) {
$higher = true;
} else {
$higher = false;
}
Both the ![]() |
|||
551 | 'libxml_disable_entity_loader', |
||
552 | [ $oldDisable ] |
||
553 | ); |
||
554 | $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false ); |
||
555 | |||
556 | // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning |
||
557 | // when parsing truncated XML, which causes unit tests to fail. |
||
558 | MediaWiki\suppressWarnings(); |
||
559 | while ( $reader->read() ) { |
||
560 | View Code Duplication | if ( $reader->nodeType === XMLReader::ELEMENT ) { |
|
561 | // Reached the first element without hitting a doctype declaration |
||
562 | $this->parsable = self::PARSABLE_OK; |
||
563 | $result = true; |
||
564 | break; |
||
565 | } |
||
566 | View Code Duplication | if ( $reader->nodeType === XMLReader::DOC_TYPE ) { |
|
567 | $this->parsable = self::PARSABLE_NO; |
||
568 | $result = false; |
||
569 | break; |
||
570 | } |
||
571 | } |
||
572 | MediaWiki\restoreWarnings(); |
||
573 | |||
574 | if ( !is_null( $result ) ) { |
||
575 | return $result; |
||
576 | } |
||
577 | |||
578 | // Reached the end of the parsable xml without finding an element |
||
579 | // or doctype. Buffer and try again. |
||
580 | $this->parsable = self::PARSABLE_BUFFERING; |
||
581 | $this->xmlParsableBuffer = $content; |
||
582 | return false; |
||
583 | } |
||
584 | |||
585 | /** When we hit a closing element in MODE_IGNORE |
||
586 | * Check to see if this is the element we started to ignore, |
||
587 | * in which case we get out of MODE_IGNORE |
||
588 | * |
||
589 | * @param string $elm Namespace of element followed by a space and then tag name of element. |
||
590 | */ |
||
591 | private function endElementModeIgnore( $elm ) { |
||
592 | if ( $this->curItem[0] === $elm ) { |
||
593 | array_shift( $this->curItem ); |
||
594 | array_shift( $this->mode ); |
||
595 | } |
||
596 | } |
||
597 | |||
598 | /** |
||
599 | * Hit a closing element when in MODE_SIMPLE. |
||
600 | * This generally means that we finished processing a |
||
601 | * property value, and now have to save the result to the |
||
602 | * results array |
||
603 | * |
||
604 | * For example, when processing: |
||
605 | * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio> |
||
606 | * this deals with when we hit </exif:DigitalZoomRatio>. |
||
607 | * |
||
608 | * Or it could be if we hit the end element of a property |
||
609 | * of a compound data structure (like a member of an array). |
||
610 | * |
||
611 | * @param string $elm Namespace, space, and tag name. |
||
612 | */ |
||
613 | private function endElementModeSimple( $elm ) { |
||
614 | if ( $this->charContent !== false ) { |
||
615 | if ( $this->processingArray ) { |
||
616 | // if we're processing an array, use the original element |
||
617 | // name instead of rdf:li. |
||
618 | list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); |
||
619 | } else { |
||
620 | list( $ns, $tag ) = explode( ' ', $elm, 2 ); |
||
621 | } |
||
622 | $this->saveValue( $ns, $tag, $this->charContent ); |
||
0 ignored issues
–
show
It seems like
$this->charContent can also be of type boolean ; however, XMPReader::saveValue() does only seem to accept string , maybe add an additional type check?
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check: /**
* @return array|string
*/
function returnsDifferentValues($x) {
if ($x) {
return 'foo';
}
return array();
}
$x = returnsDifferentValues($y);
if (is_array($x)) {
// $x is an array.
}
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue. ![]() |
|||
623 | |||
624 | $this->charContent = false; // reset |
||
625 | } |
||
626 | array_shift( $this->curItem ); |
||
627 | array_shift( $this->mode ); |
||
628 | } |
||
629 | |||
630 | /** |
||
631 | * Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG |
||
632 | * generally means we've finished processing a nested structure. |
||
633 | * resets some internal variables to indicate that. |
||
634 | * |
||
635 | * Note this means we hit the closing element not the "</rdf:Seq>". |
||
636 | * |
||
637 | * @par For example, when processing: |
||
638 | * @code{,xml} |
||
639 | * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> |
||
640 | * </rdf:Seq> </exif:ISOSpeedRatings> |
||
641 | * @endcode |
||
642 | * |
||
643 | * This method is called when we hit the "</exif:ISOSpeedRatings>" tag. |
||
644 | * |
||
645 | * @param string $elm Namespace . space . tag name. |
||
646 | * @throws RuntimeException |
||
647 | */ |
||
648 | private function endElementNested( $elm ) { |
||
649 | |||
650 | /* cur item must be the same as $elm, unless if in MODE_STRUCT |
||
651 | in which case it could also be rdf:Description */ |
||
652 | if ( $this->curItem[0] !== $elm |
||
653 | && !( $elm === self::NS_RDF . ' Description' |
||
654 | && $this->mode[0] === self::MODE_STRUCT ) |
||
655 | ) { |
||
656 | throw new RuntimeException( "nesting mismatch. got a </$elm> but expected a </" . |
||
657 | $this->curItem[0] . '>' ); |
||
658 | } |
||
659 | |||
660 | // Validate structures. |
||
661 | list( $ns, $tag ) = explode( ' ', $elm, 2 ); |
||
662 | if ( isset( $this->items[$ns][$tag]['validate'] ) ) { |
||
663 | $info =& $this->items[$ns][$tag]; |
||
664 | $finalName = isset( $info['map_name'] ) |
||
665 | ? $info['map_name'] : $tag; |
||
666 | |||
667 | View Code Duplication | if ( is_array( $info['validate'] ) ) { |
|
668 | $validate = $info['validate']; |
||
669 | } else { |
||
670 | $validator = new XMPValidate( $this->logger ); |
||
671 | $validate = [ $validator, $info['validate'] ]; |
||
672 | } |
||
673 | |||
674 | if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { |
||
675 | // This can happen if all the members of the struct failed validation. |
||
676 | $this->logger->debug( __METHOD__ . " <$ns:$tag> has no valid members." ); |
||
677 | } elseif ( is_callable( $validate ) ) { |
||
678 | $val =& $this->results['xmp-' . $info['map_group']][$finalName]; |
||
679 | call_user_func_array( $validate, [ $info, &$val, false ] ); |
||
680 | if ( is_null( $val ) ) { |
||
681 | // the idea being the validation function will unset the variable if |
||
682 | // its invalid. |
||
683 | $this->logger->info( __METHOD__ . " <$ns:$tag> failed validation." ); |
||
684 | unset( $this->results['xmp-' . $info['map_group']][$finalName] ); |
||
685 | } |
||
686 | View Code Duplication | } else { |
|
687 | $this->logger->warning( __METHOD__ . " Validation function for $finalName (" |
||
688 | . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); |
||
689 | } |
||
690 | } |
||
691 | |||
692 | array_shift( $this->curItem ); |
||
693 | array_shift( $this->mode ); |
||
694 | $this->ancestorStruct = false; |
||
695 | $this->processingArray = false; |
||
696 | $this->itemLang = false; |
||
697 | } |
||
698 | |||
699 | /** |
||
700 | * Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) |
||
701 | * Add information about what type of element this is. |
||
702 | * |
||
703 | * Note we still have to hit the outer "</property>" |
||
704 | * |
||
705 | * @par For example, when processing: |
||
706 | * @code{,xml} |
||
707 | * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> |
||
708 | * </rdf:Seq> </exif:ISOSpeedRatings> |
||
709 | * @endcode |
||
710 | * |
||
711 | * This method is called when we hit the "</rdf:Seq>". |
||
712 | * (For comparison, we call endElementModeSimple when we |
||
713 | * hit the "</rdf:li>") |
||
714 | * |
||
715 | * @param string $elm Namespace . ' ' . element name |
||
716 | * @throws RuntimeException |
||
717 | */ |
||
718 | private function endElementModeLi( $elm ) { |
||
719 | list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); |
||
720 | $info = $this->items[$ns][$tag]; |
||
721 | $finalName = isset( $info['map_name'] ) |
||
722 | ? $info['map_name'] : $tag; |
||
723 | |||
724 | array_shift( $this->mode ); |
||
725 | |||
726 | if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { |
||
727 | $this->logger->debug( __METHOD__ . " Empty compund element $finalName." ); |
||
728 | |||
729 | return; |
||
730 | } |
||
731 | |||
732 | if ( $elm === self::NS_RDF . ' Seq' ) { |
||
733 | $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol'; |
||
734 | } elseif ( $elm === self::NS_RDF . ' Bag' ) { |
||
735 | $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul'; |
||
736 | } elseif ( $elm === self::NS_RDF . ' Alt' ) { |
||
737 | // extra if needed as you could theoretically have a non-language alt. |
||
738 | if ( $info['mode'] === self::MODE_LANG ) { |
||
739 | $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang'; |
||
740 | } |
||
741 | } else { |
||
742 | throw new RuntimeException( |
||
743 | __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." |
||
744 | ); |
||
745 | } |
||
746 | } |
||
747 | |||
748 | /** |
||
749 | * End element while in MODE_QDESC |
||
750 | * mostly when ending an element when we have a simple value |
||
751 | * that has qualifiers. |
||
752 | * |
||
753 | * Qualifiers aren't all that common, and we don't do anything |
||
754 | * with them. |
||
755 | * |
||
756 | * @param string $elm Namespace and element |
||
757 | */ |
||
758 | private function endElementModeQDesc( $elm ) { |
||
759 | |||
760 | if ( $elm === self::NS_RDF . ' value' ) { |
||
761 | list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); |
||
762 | $this->saveValue( $ns, $tag, $this->charContent ); |
||
0 ignored issues
–
show
It seems like
$this->charContent can also be of type boolean ; however, XMPReader::saveValue() does only seem to accept string , maybe add an additional type check?
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check: /**
* @return array|string
*/
function returnsDifferentValues($x) {
if ($x) {
return 'foo';
}
return array();
}
$x = returnsDifferentValues($y);
if (is_array($x)) {
// $x is an array.
}
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue. ![]() |
|||
763 | |||
764 | return; |
||
765 | } else { |
||
766 | array_shift( $this->mode ); |
||
767 | array_shift( $this->curItem ); |
||
768 | } |
||
769 | } |
||
770 | |||
771 | /** |
||
772 | * Handler for hitting a closing element. |
||
773 | * |
||
774 | * generally just calls a helper function depending on what |
||
775 | * mode we're in. |
||
776 | * |
||
777 | * Ignores the outer wrapping elements that are optional in |
||
778 | * xmp and have no meaning. |
||
779 | * |
||
780 | * @param XMLParser $parser |
||
781 | * @param string $elm Namespace . ' ' . element name |
||
782 | * @throws RuntimeException |
||
783 | */ |
||
784 | function endElement( $parser, $elm ) { |
||
785 | if ( $elm === ( self::NS_RDF . ' RDF' ) |
||
786 | || $elm === 'adobe:ns:meta/ xmpmeta' |
||
787 | || $elm === 'adobe:ns:meta/ xapmeta' |
||
788 | ) { |
||
789 | // ignore these. |
||
790 | return; |
||
791 | } |
||
792 | |||
793 | if ( $elm === self::NS_RDF . ' type' ) { |
||
794 | // these aren't really supported properly yet. |
||
795 | // However, it appears they almost never used. |
||
796 | $this->logger->info( __METHOD__ . ' encountered <rdf:type>' ); |
||
797 | } |
||
798 | |||
799 | View Code Duplication | if ( strpos( $elm, ' ' ) === false ) { |
|
800 | // This probably shouldn't happen. |
||
801 | // However, there is a bug in an adobe product |
||
802 | // that forgets the namespace on some things. |
||
803 | // (Luckily they are unimportant things). |
||
804 | $this->logger->info( __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." ); |
||
805 | |||
806 | return; |
||
807 | } |
||
808 | |||
809 | if ( count( $this->mode[0] ) === 0 ) { |
||
810 | // This should never ever happen and means |
||
811 | // there is a pretty major bug in this class. |
||
812 | throw new RuntimeException( 'Encountered end element with no mode' ); |
||
813 | } |
||
814 | |||
815 | if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) { |
||
816 | // just to be paranoid. Should always have a curItem, except for initially |
||
817 | // (aka during MODE_INITAL). |
||
818 | throw new RuntimeException( "Hit end element </$elm> but no curItem" ); |
||
819 | } |
||
820 | |||
821 | switch ( $this->mode[0] ) { |
||
822 | case self::MODE_IGNORE: |
||
823 | $this->endElementModeIgnore( $elm ); |
||
824 | break; |
||
825 | case self::MODE_SIMPLE: |
||
826 | $this->endElementModeSimple( $elm ); |
||
827 | break; |
||
828 | case self::MODE_STRUCT: |
||
829 | case self::MODE_SEQ: |
||
830 | case self::MODE_BAG: |
||
831 | case self::MODE_LANG: |
||
832 | case self::MODE_BAGSTRUCT: |
||
833 | $this->endElementNested( $elm ); |
||
834 | break; |
||
835 | case self::MODE_INITIAL: |
||
836 | if ( $elm === self::NS_RDF . ' Description' ) { |
||
837 | array_shift( $this->mode ); |
||
838 | } else { |
||
839 | throw new RuntimeException( 'Element ended unexpectedly while in MODE_INITIAL' ); |
||
840 | } |
||
841 | break; |
||
842 | case self::MODE_LI: |
||
843 | case self::MODE_LI_LANG: |
||
844 | $this->endElementModeLi( $elm ); |
||
845 | break; |
||
846 | case self::MODE_QDESC: |
||
847 | $this->endElementModeQDesc( $elm ); |
||
848 | break; |
||
849 | default: |
||
850 | $this->logger->warning( __METHOD__ . " no mode (elm = $elm)" ); |
||
851 | break; |
||
852 | } |
||
853 | } |
||
854 | |||
855 | /** |
||
856 | * Hit an opening element while in MODE_IGNORE |
||
857 | * |
||
858 | * XMP is extensible, so ignore any tag we don't understand. |
||
859 | * |
||
860 | * Mostly ignores, unless we encounter the element that we are ignoring. |
||
861 | * in which case we add it to the item stack, so we can ignore things |
||
862 | * that are nested, correctly. |
||
863 | * |
||
864 | * @param string $elm Namespace . ' ' . tag name |
||
865 | */ |
||
866 | private function startElementModeIgnore( $elm ) { |
||
867 | View Code Duplication | if ( $elm === $this->curItem[0] ) { |
|
868 | array_unshift( $this->curItem, $elm ); |
||
869 | array_unshift( $this->mode, self::MODE_IGNORE ); |
||
870 | } |
||
871 | } |
||
872 | |||
873 | /** |
||
874 | * Start element in MODE_BAG (unordered array) |
||
875 | * this should always be <rdf:Bag> |
||
876 | * |
||
877 | * @param string $elm Namespace . ' ' . tag |
||
878 | * @throws RuntimeException If we have an element that's not <rdf:Bag> |
||
879 | */ |
||
880 | View Code Duplication | private function startElementModeBag( $elm ) { |
|
881 | if ( $elm === self::NS_RDF . ' Bag' ) { |
||
882 | array_unshift( $this->mode, self::MODE_LI ); |
||
883 | } else { |
||
884 | throw new RuntimeException( "Expected <rdf:Bag> but got $elm." ); |
||
885 | } |
||
886 | } |
||
887 | |||
888 | /** |
||
889 | * Start element in MODE_SEQ (ordered array) |
||
890 | * this should always be <rdf:Seq> |
||
891 | * |
||
892 | * @param string $elm Namespace . ' ' . tag |
||
893 | * @throws RuntimeException If we have an element that's not <rdf:Seq> |
||
894 | */ |
||
895 | private function startElementModeSeq( $elm ) { |
||
896 | if ( $elm === self::NS_RDF . ' Seq' ) { |
||
897 | array_unshift( $this->mode, self::MODE_LI ); |
||
898 | } elseif ( $elm === self::NS_RDF . ' Bag' ) { |
||
899 | # bug 27105 |
||
900 | $this->logger->info( __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending' |
||
901 | . ' it is a Seq, since some buggy software is known to screw this up.' ); |
||
902 | array_unshift( $this->mode, self::MODE_LI ); |
||
903 | } else { |
||
904 | throw new RuntimeException( "Expected <rdf:Seq> but got $elm." ); |
||
905 | } |
||
906 | } |
||
907 | |||
908 | /** |
||
909 | * Start element in MODE_LANG (language alternative) |
||
910 | * this should always be <rdf:Alt> |
||
911 | * |
||
912 | * This tag tends to be used for metadata like describe this |
||
913 | * picture, which can be translated into multiple languages. |
||
914 | * |
||
915 | * XMP supports non-linguistic alternative selections, |
||
916 | * which are really only used for thumbnails, which |
||
917 | * we don't care about. |
||
918 | * |
||
919 | * @param string $elm Namespace . ' ' . tag |
||
920 | * @throws RuntimeException If we have an element that's not <rdf:Alt> |
||
921 | */ |
||
922 | View Code Duplication | private function startElementModeLang( $elm ) { |
|
923 | if ( $elm === self::NS_RDF . ' Alt' ) { |
||
924 | array_unshift( $this->mode, self::MODE_LI_LANG ); |
||
925 | } else { |
||
926 | throw new RuntimeException( "Expected <rdf:Seq> but got $elm." ); |
||
927 | } |
||
928 | } |
||
929 | |||
930 | /** |
||
931 | * Handle an opening element when in MODE_SIMPLE |
||
932 | * |
||
933 | * This should not happen often. This is for if a simple element |
||
934 | * already opened has a child element. Could happen for a |
||
935 | * qualified element. |
||
936 | * |
||
937 | * For example: |
||
938 | * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value> |
||
939 | * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description> |
||
940 | * </exif:DigitalZoomRatio> |
||
941 | * |
||
942 | * This method is called when processing the <rdf:Description> element |
||
943 | * |
||
944 | * @param string $elm Namespace and tag names separated by space. |
||
945 | * @param array $attribs Attributes of the element. |
||
946 | * @throws RuntimeException |
||
947 | */ |
||
948 | private function startElementModeSimple( $elm, $attribs ) { |
||
949 | if ( $elm === self::NS_RDF . ' Description' ) { |
||
950 | // If this value has qualifiers |
||
951 | array_unshift( $this->mode, self::MODE_QDESC ); |
||
952 | array_unshift( $this->curItem, $this->curItem[0] ); |
||
953 | |||
954 | if ( isset( $attribs[self::NS_RDF . ' value'] ) ) { |
||
955 | list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); |
||
956 | $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] ); |
||
957 | } |
||
958 | } elseif ( $elm === self::NS_RDF . ' value' ) { |
||
959 | // This should not be here. |
||
960 | throw new RuntimeException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' ); |
||
961 | View Code Duplication | } else { |
|
962 | // something else we don't recognize, like a qualifier maybe. |
||
963 | $this->logger->info( __METHOD__ . |
||
964 | " Encountered element <$elm> where only expecting character data as value of " . |
||
965 | $this->curItem[0] ); |
||
966 | array_unshift( $this->mode, self::MODE_IGNORE ); |
||
967 | array_unshift( $this->curItem, $elm ); |
||
968 | } |
||
969 | } |
||
970 | |||
971 | /** |
||
972 | * Start an element when in MODE_QDESC. |
||
973 | * This generally happens when a simple element has an inner |
||
974 | * rdf:Description to hold qualifier elements. |
||
975 | * |
||
976 | * For example in: |
||
977 | * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value> |
||
978 | * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description> |
||
979 | * </exif:DigitalZoomRatio> |
||
980 | * Called when processing the <rdf:value> or <foo:someQualifier>. |
||
981 | * |
||
982 | * @param string $elm Namespace and tag name separated by a space. |
||
983 | * |
||
984 | */ |
||
985 | private function startElementModeQDesc( $elm ) { |
||
986 | View Code Duplication | if ( $elm === self::NS_RDF . ' value' ) { |
|
987 | return; // do nothing |
||
988 | } else { |
||
989 | // otherwise its a qualifier, which we ignore |
||
990 | array_unshift( $this->mode, self::MODE_IGNORE ); |
||
991 | array_unshift( $this->curItem, $elm ); |
||
992 | } |
||
993 | } |
||
994 | |||
995 | /** |
||
996 | * Starting an element when in MODE_INITIAL |
||
997 | * This usually happens when we hit an element inside |
||
998 | * the outer rdf:Description |
||
999 | * |
||
1000 | * This is generally where most properties start. |
||
1001 | * |
||
1002 | * @param string $ns Namespace |
||
1003 | * @param string $tag Tag name (without namespace prefix) |
||
1004 | * @param array $attribs Array of attributes |
||
1005 | * @throws RuntimeException |
||
1006 | */ |
||
1007 | private function startElementModeInitial( $ns, $tag, $attribs ) { |
||
1008 | if ( $ns !== self::NS_RDF ) { |
||
1009 | |||
1010 | if ( isset( $this->items[$ns][$tag] ) ) { |
||
1011 | if ( isset( $this->items[$ns][$tag]['structPart'] ) ) { |
||
1012 | // If this element is supposed to appear only as |
||
1013 | // a child of a structure, but appears here (not as |
||
1014 | // a child of a struct), then something weird is |
||
1015 | // happening, so ignore this element and its children. |
||
1016 | |||
1017 | $this->logger->warning( "Encountered <$ns:$tag> outside" |
||
1018 | . " of its expected parent. Ignoring." ); |
||
1019 | |||
1020 | array_unshift( $this->mode, self::MODE_IGNORE ); |
||
1021 | array_unshift( $this->curItem, $ns . ' ' . $tag ); |
||
1022 | |||
1023 | return; |
||
1024 | } |
||
1025 | $mode = $this->items[$ns][$tag]['mode']; |
||
1026 | array_unshift( $this->mode, $mode ); |
||
1027 | array_unshift( $this->curItem, $ns . ' ' . $tag ); |
||
1028 | if ( $mode === self::MODE_STRUCT ) { |
||
1029 | $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] ) |
||
1030 | ? $this->items[$ns][$tag]['map_name'] : $tag; |
||
1031 | } |
||
1032 | if ( $this->charContent !== false ) { |
||
1033 | // Something weird. |
||
1034 | // Should not happen in valid XMP. |
||
1035 | throw new RuntimeException( 'tag nested in non-whitespace characters.' ); |
||
1036 | } |
||
1037 | View Code Duplication | } else { |
|
1038 | // This element is not on our list of allowed elements so ignore. |
||
1039 | $this->logger->debug( __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); |
||
1040 | array_unshift( $this->mode, self::MODE_IGNORE ); |
||
1041 | array_unshift( $this->curItem, $ns . ' ' . $tag ); |
||
1042 | |||
1043 | return; |
||
1044 | } |
||
1045 | } |
||
1046 | // process attributes |
||
1047 | $this->doAttribs( $attribs ); |
||
1048 | } |
||
1049 | |||
1050 | /** |
||
1051 | * Hit an opening element when in a Struct (MODE_STRUCT) |
||
1052 | * This is generally for fields of a compound property. |
||
1053 | * |
||
1054 | * Example of a struct (abbreviated; flash has more properties): |
||
1055 | * |
||
1056 | * <exif:Flash> <rdf:Description> <exif:Fired>True</exif:Fired> |
||
1057 | * <exif:Mode>1</exif:Mode></rdf:Description></exif:Flash> |
||
1058 | * |
||
1059 | * or: |
||
1060 | * |
||
1061 | * <exif:Flash rdf:parseType='Resource'> <exif:Fired>True</exif:Fired> |
||
1062 | * <exif:Mode>1</exif:Mode></exif:Flash> |
||
1063 | * |
||
1064 | * @param string $ns Namespace |
||
1065 | * @param string $tag Tag name (no ns) |
||
1066 | * @param array $attribs Array of attribs w/ values. |
||
1067 | * @throws RuntimeException |
||
1068 | */ |
||
1069 | private function startElementModeStruct( $ns, $tag, $attribs ) { |
||
1070 | if ( $ns !== self::NS_RDF ) { |
||
1071 | |||
1072 | if ( isset( $this->items[$ns][$tag] ) ) { |
||
1073 | if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] ) |
||
1074 | && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) |
||
1075 | ) { |
||
1076 | // This assumes that we don't have inter-namespace nesting |
||
1077 | // which we don't in all the properties we're interested in. |
||
1078 | throw new RuntimeException( " <$tag> appeared nested in <" . $this->ancestorStruct |
||
1079 | . "> where it is not allowed." ); |
||
1080 | } |
||
1081 | array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] ); |
||
1082 | array_unshift( $this->curItem, $ns . ' ' . $tag ); |
||
1083 | if ( $this->charContent !== false ) { |
||
1084 | // Something weird. |
||
1085 | // Should not happen in valid XMP. |
||
1086 | throw new RuntimeException( "tag <$tag> nested in non-whitespace characters (" . |
||
1087 | $this->charContent . ")." ); |
||
1088 | } |
||
1089 | } else { |
||
1090 | array_unshift( $this->mode, self::MODE_IGNORE ); |
||
1091 | array_unshift( $this->curItem, $elm ); |
||
0 ignored issues
–
show
|
|||
1092 | |||
1093 | return; |
||
1094 | } |
||
1095 | } |
||
1096 | |||
1097 | if ( $ns === self::NS_RDF && $tag === 'Description' ) { |
||
1098 | $this->doAttribs( $attribs ); |
||
1099 | array_unshift( $this->mode, self::MODE_STRUCT ); |
||
1100 | array_unshift( $this->curItem, $this->curItem[0] ); |
||
1101 | } |
||
1102 | } |
||
1103 | |||
1104 | /** |
||
1105 | * opening element in MODE_LI |
||
1106 | * process elements of arrays. |
||
1107 | * |
||
1108 | * Example: |
||
1109 | * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> |
||
1110 | * </rdf:Seq> </exif:ISOSpeedRatings> |
||
1111 | * This method is called when we hit the <rdf:li> element. |
||
1112 | * |
||
1113 | * @param string $elm Namespace . ' ' . tagname |
||
1114 | * @param array $attribs Attributes. (needed for BAGSTRUCTS) |
||
1115 | * @throws RuntimeException If gets a tag other than <rdf:li> |
||
1116 | */ |
||
1117 | private function startElementModeLi( $elm, $attribs ) { |
||
1118 | if ( ( $elm ) !== self::NS_RDF . ' li' ) { |
||
1119 | throw new RuntimeException( "<rdf:li> expected but got $elm." ); |
||
1120 | } |
||
1121 | |||
1122 | if ( !isset( $this->mode[1] ) ) { |
||
1123 | // This should never ever ever happen. Checking for it |
||
1124 | // to be paranoid. |
||
1125 | throw new RuntimeException( 'In mode Li, but no 2xPrevious mode!' ); |
||
1126 | } |
||
1127 | |||
1128 | if ( $this->mode[1] === self::MODE_BAGSTRUCT ) { |
||
1129 | // This list item contains a compound (STRUCT) value. |
||
1130 | array_unshift( $this->mode, self::MODE_STRUCT ); |
||
1131 | array_unshift( $this->curItem, $elm ); |
||
1132 | $this->processingArray = true; |
||
1133 | |||
1134 | if ( !isset( $this->curItem[1] ) ) { |
||
1135 | // be paranoid. |
||
1136 | throw new RuntimeException( 'Can not find parent of BAGSTRUCT.' ); |
||
1137 | } |
||
1138 | list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] ); |
||
1139 | $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] ) |
||
1140 | ? $this->items[$curNS][$curTag]['map_name'] : $curTag; |
||
1141 | |||
1142 | $this->doAttribs( $attribs ); |
||
1143 | } else { |
||
1144 | // Normal BAG or SEQ containing simple values. |
||
1145 | array_unshift( $this->mode, self::MODE_SIMPLE ); |
||
1146 | // need to add curItem[0] on again since one is for the specific item |
||
1147 | // and one is for the entire group. |
||
1148 | array_unshift( $this->curItem, $this->curItem[0] ); |
||
1149 | $this->processingArray = true; |
||
1150 | } |
||
1151 | } |
||
1152 | |||
1153 | /** |
||
1154 | * Opening element in MODE_LI_LANG. |
||
1155 | * process elements of language alternatives |
||
1156 | * |
||
1157 | * Example: |
||
1158 | * <dc:title> <rdf:Alt> <rdf:li xml:lang="x-default">My house |
||
1159 | * </rdf:li> </rdf:Alt> </dc:title> |
||
1160 | * |
||
1161 | * This method is called when we hit the <rdf:li> element. |
||
1162 | * |
||
1163 | * @param string $elm Namespace . ' ' . tag |
||
1164 | * @param array $attribs Array of elements (most importantly xml:lang) |
||
1165 | * @throws RuntimeException If gets a tag other than <rdf:li> or if no xml:lang |
||
1166 | */ |
||
1167 | private function startElementModeLiLang( $elm, $attribs ) { |
||
1168 | if ( $elm !== self::NS_RDF . ' li' ) { |
||
1169 | throw new RuntimeException( __METHOD__ . " <rdf:li> expected but got $elm." ); |
||
1170 | } |
||
1171 | if ( !isset( $attribs[self::NS_XML . ' lang'] ) |
||
1172 | || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[self::NS_XML . ' lang'] ) |
||
1173 | ) { |
||
1174 | throw new RuntimeException( __METHOD__ |
||
1175 | . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" ); |
||
1176 | } |
||
1177 | |||
1178 | // Lang is case-insensitive. |
||
1179 | $this->itemLang = strtolower( $attribs[self::NS_XML . ' lang'] ); |
||
1180 | |||
1181 | // need to add curItem[0] on again since one is for the specific item |
||
1182 | // and one is for the entire group. |
||
1183 | array_unshift( $this->curItem, $this->curItem[0] ); |
||
1184 | array_unshift( $this->mode, self::MODE_SIMPLE ); |
||
1185 | $this->processingArray = true; |
||
1186 | } |
||
1187 | |||
1188 | /** |
||
1189 | * Hits an opening element. |
||
1190 | * Generally just calls a helper based on what MODE we're in. |
||
1191 | * Also does some initial set up for the wrapper element |
||
1192 | * |
||
1193 | * @param XMLParser $parser |
||
1194 | * @param string $elm Namespace "<space>" element |
||
1195 | * @param array $attribs Attribute name => value |
||
1196 | * @throws RuntimeException |
||
1197 | */ |
||
1198 | function startElement( $parser, $elm, $attribs ) { |
||
1199 | |||
1200 | if ( $elm === self::NS_RDF . ' RDF' |
||
1201 | || $elm === 'adobe:ns:meta/ xmpmeta' |
||
1202 | || $elm === 'adobe:ns:meta/ xapmeta' |
||
1203 | ) { |
||
1204 | /* ignore. */ |
||
1205 | return; |
||
1206 | } elseif ( $elm === self::NS_RDF . ' Description' ) { |
||
1207 | if ( count( $this->mode ) === 0 ) { |
||
1208 | // outer rdf:desc |
||
1209 | array_unshift( $this->mode, self::MODE_INITIAL ); |
||
1210 | } |
||
1211 | } elseif ( $elm === self::NS_RDF . ' type' ) { |
||
1212 | // This doesn't support rdf:type properly. |
||
1213 | // In practise I have yet to see a file that |
||
1214 | // uses this element, however it is mentioned |
||
1215 | // on page 25 of part 1 of the xmp standard. |
||
1216 | // Also it seems as if exiv2 and exiftool do not support |
||
1217 | // this either (That or I misunderstand the standard) |
||
1218 | $this->logger->info( __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' ); |
||
1219 | } |
||
1220 | |||
1221 | View Code Duplication | if ( strpos( $elm, ' ' ) === false ) { |
|
1222 | // This probably shouldn't happen. |
||
1223 | $this->logger->info( __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." ); |
||
1224 | |||
1225 | return; |
||
1226 | } |
||
1227 | |||
1228 | list( $ns, $tag ) = explode( ' ', $elm, 2 ); |
||
1229 | |||
1230 | if ( count( $this->mode ) === 0 ) { |
||
1231 | // This should not happen. |
||
1232 | throw new RuntimeException( 'Error extracting XMP, ' |
||
1233 | . "encountered <$elm> with no mode" ); |
||
1234 | } |
||
1235 | |||
1236 | switch ( $this->mode[0] ) { |
||
1237 | case self::MODE_IGNORE: |
||
1238 | $this->startElementModeIgnore( $elm ); |
||
1239 | break; |
||
1240 | case self::MODE_SIMPLE: |
||
1241 | $this->startElementModeSimple( $elm, $attribs ); |
||
1242 | break; |
||
1243 | case self::MODE_INITIAL: |
||
1244 | $this->startElementModeInitial( $ns, $tag, $attribs ); |
||
1245 | break; |
||
1246 | case self::MODE_STRUCT: |
||
1247 | $this->startElementModeStruct( $ns, $tag, $attribs ); |
||
1248 | break; |
||
1249 | case self::MODE_BAG: |
||
1250 | case self::MODE_BAGSTRUCT: |
||
1251 | $this->startElementModeBag( $elm ); |
||
1252 | break; |
||
1253 | case self::MODE_SEQ: |
||
1254 | $this->startElementModeSeq( $elm ); |
||
1255 | break; |
||
1256 | case self::MODE_LANG: |
||
1257 | $this->startElementModeLang( $elm ); |
||
1258 | break; |
||
1259 | case self::MODE_LI_LANG: |
||
1260 | $this->startElementModeLiLang( $elm, $attribs ); |
||
1261 | break; |
||
1262 | case self::MODE_LI: |
||
1263 | $this->startElementModeLi( $elm, $attribs ); |
||
1264 | break; |
||
1265 | case self::MODE_QDESC: |
||
1266 | $this->startElementModeQDesc( $elm ); |
||
1267 | break; |
||
1268 | default: |
||
1269 | throw new RuntimeException( 'StartElement in unknown mode: ' . $this->mode[0] ); |
||
1270 | } |
||
1271 | } |
||
1272 | |||
1273 | // @codingStandardsIgnoreStart Generic.Files.LineLength |
||
1274 | /** |
||
1275 | * Process attributes. |
||
1276 | * Simple values can be stored as either a tag or attribute |
||
1277 | * |
||
1278 | * Often the initial "<rdf:Description>" tag just has all the simple |
||
1279 | * properties as attributes. |
||
1280 | * |
||
1281 | * @par Example: |
||
1282 | * @code |
||
1283 | * <rdf:Description rdf:about="" xmlns:exif="http://ns.adobe.com/exif/1.0/" exif:DigitalZoomRatio="0/10"> |
||
1284 | * @endcode |
||
1285 | * |
||
1286 | * @param array $attribs Array attribute=>value |
||
1287 | * @throws RuntimeException |
||
1288 | */ |
||
1289 | // @codingStandardsIgnoreEnd |
||
1290 | private function doAttribs( $attribs ) { |
||
1291 | // first check for rdf:parseType attribute, as that can change |
||
1292 | // how the attributes are interperted. |
||
1293 | |||
1294 | if ( isset( $attribs[self::NS_RDF . ' parseType'] ) |
||
1295 | && $attribs[self::NS_RDF . ' parseType'] === 'Resource' |
||
1296 | && $this->mode[0] === self::MODE_SIMPLE |
||
1297 | ) { |
||
1298 | // this is equivalent to having an inner rdf:Description |
||
1299 | $this->mode[0] = self::MODE_QDESC; |
||
1300 | } |
||
1301 | foreach ( $attribs as $name => $val ) { |
||
1302 | View Code Duplication | if ( strpos( $name, ' ' ) === false ) { |
|
1303 | // This shouldn't happen, but so far some old software forgets namespace |
||
1304 | // on rdf:about. |
||
1305 | $this->logger->info( __METHOD__ . ' Encountered non-namespaced attribute: ' |
||
1306 | . " $name=\"$val\". Skipping. " ); |
||
1307 | continue; |
||
1308 | } |
||
1309 | list( $ns, $tag ) = explode( ' ', $name, 2 ); |
||
1310 | if ( $ns === self::NS_RDF ) { |
||
1311 | if ( $tag === 'value' || $tag === 'resource' ) { |
||
1312 | // resource is for url. |
||
1313 | // value attribute is a weird way of just putting the contents. |
||
1314 | $this->char( $this->xmlParser, $val ); |
||
1315 | } |
||
1316 | } elseif ( isset( $this->items[$ns][$tag] ) ) { |
||
1317 | if ( $this->mode[0] === self::MODE_SIMPLE ) { |
||
1318 | throw new RuntimeException( __METHOD__ |
||
1319 | . " $ns:$tag found as attribute where not allowed" ); |
||
1320 | } |
||
1321 | $this->saveValue( $ns, $tag, $val ); |
||
1322 | } else { |
||
1323 | $this->logger->debug( __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); |
||
1324 | } |
||
1325 | } |
||
1326 | } |
||
1327 | |||
1328 | /** |
||
1329 | * Given an extracted value, save it to results array |
||
1330 | * |
||
1331 | * note also uses $this->ancestorStruct and |
||
1332 | * $this->processingArray to determine what name to |
||
1333 | * save the value under. (in addition to $tag). |
||
1334 | * |
||
1335 | * @param string $ns Namespace of tag this is for |
||
1336 | * @param string $tag Tag name |
||
1337 | * @param string $val Value to save |
||
1338 | */ |
||
1339 | private function saveValue( $ns, $tag, $val ) { |
||
1340 | |||
1341 | $info =& $this->items[$ns][$tag]; |
||
1342 | $finalName = isset( $info['map_name'] ) |
||
1343 | ? $info['map_name'] : $tag; |
||
1344 | if ( isset( $info['validate'] ) ) { |
||
1345 | View Code Duplication | if ( is_array( $info['validate'] ) ) { |
|
1346 | $validate = $info['validate']; |
||
1347 | } else { |
||
1348 | $validator = new XMPValidate( $this->logger ); |
||
1349 | $validate = [ $validator, $info['validate'] ]; |
||
1350 | } |
||
1351 | |||
1352 | if ( is_callable( $validate ) ) { |
||
1353 | call_user_func_array( $validate, [ $info, &$val, true ] ); |
||
1354 | // the reasoning behind using &$val instead of using the return value |
||
1355 | // is to be consistent between here and validating structures. |
||
1356 | if ( is_null( $val ) ) { |
||
1357 | $this->logger->info( __METHOD__ . " <$ns:$tag> failed validation." ); |
||
1358 | |||
1359 | return; |
||
1360 | } |
||
1361 | View Code Duplication | } else { |
|
1362 | $this->logger->warning( __METHOD__ . " Validation function for $finalName (" |
||
1363 | . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); |
||
1364 | } |
||
1365 | } |
||
1366 | |||
1367 | if ( $this->ancestorStruct && $this->processingArray ) { |
||
1368 | // Aka both an array and a struct. ( self::MODE_BAGSTRUCT ) |
||
1369 | $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val; |
||
1370 | } elseif ( $this->ancestorStruct ) { |
||
1371 | $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val; |
||
1372 | } elseif ( $this->processingArray ) { |
||
1373 | if ( $this->itemLang === false ) { |
||
1374 | // normal array |
||
1375 | $this->results['xmp-' . $info['map_group']][$finalName][] = $val; |
||
1376 | } else { |
||
1377 | // lang array. |
||
1378 | $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val; |
||
1379 | } |
||
1380 | } else { |
||
1381 | $this->results['xmp-' . $info['map_group']][$finalName] = $val; |
||
1382 | } |
||
1383 | } |
||
1384 | } |
||
1385 |
Let’s assume that you have a directory layout like this:
and let’s assume the following content of
Bar.php
:If both files
OtherDir/Foo.php
andSomeDir/Foo.php
are loaded in the same runtime, you will see a PHP error such as the following:PHP Fatal error: Cannot use SomeDir\Foo as Foo because the name is already in use in OtherDir/Foo.php
However, as
OtherDir/Foo.php
does not necessarily have to be loaded and the error is only triggered if it is loaded beforeOtherDir/Bar.php
, this problem might go unnoticed for a while. In order to prevent this error from surfacing, you must import the namespace with a different alias: