This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * Simulation of Microsoft Internet Explorer's MIME type detection algorithm. |
||
4 | * |
||
5 | * @file |
||
6 | * @todo Define the exact license of this file. |
||
7 | */ |
||
8 | |||
9 | /** |
||
10 | * This class simulates Microsoft Internet Explorer's terribly broken and |
||
11 | * insecure MIME type detection algorithm. It can be used to check web uploads |
||
12 | * with an apparently safe type, to see if IE will reinterpret them to produce |
||
13 | * something dangerous. |
||
14 | * |
||
15 | * It is full of bugs and strange design choices should not under any |
||
16 | * circumstances be used to determine a MIME type to present to a user or |
||
17 | * client. (Apple Safari developers, this means you too.) |
||
18 | * |
||
19 | * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have |
||
20 | * attempted to ensure that this code works in exactly the same way as Internet |
||
21 | * Explorer, it does not share any source code, or creative choices such as |
||
22 | * variable names, thus I (Tim Starling) claim copyright on it. |
||
23 | * |
||
24 | * It may be redistributed without restriction. To aid reuse, this class does |
||
25 | * not depend on any MediaWiki module. |
||
26 | */ |
||
27 | class IEContentAnalyzer { |
||
28 | /** |
||
29 | * Relevant data taken from the type table in IE 5 |
||
30 | */ |
||
31 | protected $baseTypeTable = [ |
||
32 | 'ambiguous' /*1*/ => [ |
||
33 | 'text/plain', |
||
34 | 'application/octet-stream', |
||
35 | 'application/x-netcdf', // [sic] |
||
36 | ], |
||
37 | 'text' /*3*/ => [ |
||
38 | 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64', |
||
39 | 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet' |
||
40 | ], |
||
41 | 'binary' /*4*/ => [ |
||
42 | 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif', |
||
43 | 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', |
||
44 | 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', |
||
45 | 'video/x-msvideo', 'video/mpeg', 'application/x-compressed', |
||
46 | 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java', |
||
47 | 'application/x-msdownload' |
||
48 | ], |
||
49 | 'html' /*5*/ => [ 'text/html' ], |
||
50 | ]; |
||
51 | |||
52 | /** |
||
53 | * Changes to the type table in later versions of IE |
||
54 | */ |
||
55 | protected $addedTypes = [ |
||
56 | 'ie07' => [ |
||
57 | 'text' => [ 'text/xml', 'application/xml' ] |
||
58 | ], |
||
59 | ]; |
||
60 | |||
61 | /** |
||
62 | * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a |
||
63 | * typical Windows installation. |
||
64 | * |
||
65 | * Used for extension to MIME type mapping if detection fails. |
||
66 | */ |
||
67 | protected $registry = [ |
||
68 | '.323' => 'text/h323', |
||
69 | '.3g2' => 'video/3gpp2', |
||
70 | '.3gp' => 'video/3gpp', |
||
71 | '.3gp2' => 'video/3gpp2', |
||
72 | '.3gpp' => 'video/3gpp', |
||
73 | '.aac' => 'audio/aac', |
||
74 | '.ac3' => 'audio/ac3', |
||
75 | '.accda' => 'application/msaccess', |
||
76 | '.accdb' => 'application/msaccess', |
||
77 | '.accdc' => 'application/msaccess', |
||
78 | '.accde' => 'application/msaccess', |
||
79 | '.accdr' => 'application/msaccess', |
||
80 | '.accdt' => 'application/msaccess', |
||
81 | '.ade' => 'application/msaccess', |
||
82 | '.adp' => 'application/msaccess', |
||
83 | '.adts' => 'audio/aac', |
||
84 | '.ai' => 'application/postscript', |
||
85 | '.aif' => 'audio/aiff', |
||
86 | '.aifc' => 'audio/aiff', |
||
87 | '.aiff' => 'audio/aiff', |
||
88 | '.amc' => 'application/x-mpeg', |
||
89 | '.application' => 'application/x-ms-application', |
||
90 | '.asf' => 'video/x-ms-asf', |
||
91 | '.asx' => 'video/x-ms-asf', |
||
92 | '.au' => 'audio/basic', |
||
93 | '.avi' => 'video/avi', |
||
94 | '.bmp' => 'image/bmp', |
||
95 | '.caf' => 'audio/x-caf', |
||
96 | '.cat' => 'application/vnd.ms-pki.seccat', |
||
97 | '.cbo' => 'application/sha', |
||
98 | '.cdda' => 'audio/aiff', |
||
99 | '.cer' => 'application/x-x509-ca-cert', |
||
100 | '.conf' => 'text/plain', |
||
101 | '.crl' => 'application/pkix-crl', |
||
102 | '.crt' => 'application/x-x509-ca-cert', |
||
103 | '.css' => 'text/css', |
||
104 | '.csv' => 'application/vnd.ms-excel', |
||
105 | '.der' => 'application/x-x509-ca-cert', |
||
106 | '.dib' => 'image/bmp', |
||
107 | '.dif' => 'video/x-dv', |
||
108 | '.dll' => 'application/x-msdownload', |
||
109 | '.doc' => 'application/msword', |
||
110 | '.docm' => 'application/vnd.ms-word.document.macroEnabled.12', |
||
111 | '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', |
||
112 | '.dot' => 'application/msword', |
||
113 | '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12', |
||
114 | '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', |
||
115 | '.dv' => 'video/x-dv', |
||
116 | '.dwfx' => 'model/vnd.dwfx+xps', |
||
117 | '.edn' => 'application/vnd.adobe.edn', |
||
118 | '.eml' => 'message/rfc822', |
||
119 | '.eps' => 'application/postscript', |
||
120 | '.etd' => 'application/x-ebx', |
||
121 | '.exe' => 'application/x-msdownload', |
||
122 | '.fdf' => 'application/vnd.fdf', |
||
123 | '.fif' => 'application/fractals', |
||
124 | '.gif' => 'image/gif', |
||
125 | '.gsm' => 'audio/x-gsm', |
||
126 | '.hqx' => 'application/mac-binhex40', |
||
127 | '.hta' => 'application/hta', |
||
128 | '.htc' => 'text/x-component', |
||
129 | '.htm' => 'text/html', |
||
130 | '.html' => 'text/html', |
||
131 | '.htt' => 'text/webviewhtml', |
||
132 | '.hxa' => 'application/xml', |
||
133 | '.hxc' => 'application/xml', |
||
134 | '.hxd' => 'application/octet-stream', |
||
135 | '.hxe' => 'application/xml', |
||
136 | '.hxf' => 'application/xml', |
||
137 | '.hxh' => 'application/octet-stream', |
||
138 | '.hxi' => 'application/octet-stream', |
||
139 | '.hxk' => 'application/xml', |
||
140 | '.hxq' => 'application/octet-stream', |
||
141 | '.hxr' => 'application/octet-stream', |
||
142 | '.hxs' => 'application/octet-stream', |
||
143 | '.hxt' => 'application/xml', |
||
144 | '.hxv' => 'application/xml', |
||
145 | '.hxw' => 'application/octet-stream', |
||
146 | '.ico' => 'image/x-icon', |
||
147 | '.iii' => 'application/x-iphone', |
||
148 | '.ins' => 'application/x-internet-signup', |
||
149 | '.iqy' => 'text/x-ms-iqy', |
||
150 | '.isp' => 'application/x-internet-signup', |
||
151 | '.jfif' => 'image/jpeg', |
||
152 | '.jnlp' => 'application/x-java-jnlp-file', |
||
153 | '.jpe' => 'image/jpeg', |
||
154 | '.jpeg' => 'image/jpeg', |
||
155 | '.jpg' => 'image/jpeg', |
||
156 | '.jtx' => 'application/x-jtx+xps', |
||
157 | '.latex' => 'application/x-latex', |
||
158 | '.log' => 'text/plain', |
||
159 | '.m1v' => 'video/mpeg', |
||
160 | '.m2v' => 'video/mpeg', |
||
161 | '.m3u' => 'audio/x-mpegurl', |
||
162 | '.mac' => 'image/x-macpaint', |
||
163 | '.man' => 'application/x-troff-man', |
||
164 | '.mda' => 'application/msaccess', |
||
165 | '.mdb' => 'application/msaccess', |
||
166 | '.mde' => 'application/msaccess', |
||
167 | '.mfp' => 'application/x-shockwave-flash', |
||
168 | '.mht' => 'message/rfc822', |
||
169 | '.mhtml' => 'message/rfc822', |
||
170 | '.mid' => 'audio/mid', |
||
171 | '.midi' => 'audio/mid', |
||
172 | '.mod' => 'video/mpeg', |
||
173 | '.mov' => 'video/quicktime', |
||
174 | '.mp2' => 'video/mpeg', |
||
175 | '.mp2v' => 'video/mpeg', |
||
176 | '.mp3' => 'audio/mpeg', |
||
177 | '.mp4' => 'video/mp4', |
||
178 | '.mpa' => 'video/mpeg', |
||
179 | '.mpe' => 'video/mpeg', |
||
180 | '.mpeg' => 'video/mpeg', |
||
181 | '.mpf' => 'application/vnd.ms-mediapackage', |
||
182 | '.mpg' => 'video/mpeg', |
||
183 | '.mpv2' => 'video/mpeg', |
||
184 | '.mqv' => 'video/quicktime', |
||
185 | '.NMW' => 'application/nmwb', |
||
186 | '.nws' => 'message/rfc822', |
||
187 | '.odc' => 'text/x-ms-odc', |
||
188 | '.ols' => 'application/vnd.ms-publisher', |
||
189 | '.p10' => 'application/pkcs10', |
||
190 | '.p12' => 'application/x-pkcs12', |
||
191 | '.p7b' => 'application/x-pkcs7-certificates', |
||
192 | '.p7c' => 'application/pkcs7-mime', |
||
193 | '.p7m' => 'application/pkcs7-mime', |
||
194 | '.p7r' => 'application/x-pkcs7-certreqresp', |
||
195 | '.p7s' => 'application/pkcs7-signature', |
||
196 | '.pct' => 'image/pict', |
||
197 | '.pdf' => 'application/pdf', |
||
198 | '.pdx' => 'application/vnd.adobe.pdx', |
||
199 | '.pfx' => 'application/x-pkcs12', |
||
200 | '.pic' => 'image/pict', |
||
201 | '.pict' => 'image/pict', |
||
202 | '.pinstall' => 'application/x-picasa-detect', |
||
203 | '.pko' => 'application/vnd.ms-pki.pko', |
||
204 | '.png' => 'image/png', |
||
205 | '.pnt' => 'image/x-macpaint', |
||
206 | '.pntg' => 'image/x-macpaint', |
||
207 | '.pot' => 'application/vnd.ms-powerpoint', |
||
208 | '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12', |
||
209 | '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template', |
||
210 | '.ppa' => 'application/vnd.ms-powerpoint', |
||
211 | '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12', |
||
212 | '.pps' => 'application/vnd.ms-powerpoint', |
||
213 | '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12', |
||
214 | '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', |
||
215 | '.ppt' => 'application/vnd.ms-powerpoint', |
||
216 | '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12', |
||
217 | '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation', |
||
218 | '.prf' => 'application/pics-rules', |
||
219 | '.ps' => 'application/postscript', |
||
220 | '.pub' => 'application/vnd.ms-publisher', |
||
221 | '.pwz' => 'application/vnd.ms-powerpoint', |
||
222 | '.py' => 'text/plain', |
||
223 | '.pyw' => 'text/plain', |
||
224 | '.qht' => 'text/x-html-insertion', |
||
225 | '.qhtm' => 'text/x-html-insertion', |
||
226 | '.qt' => 'video/quicktime', |
||
227 | '.qti' => 'image/x-quicktime', |
||
228 | '.qtif' => 'image/x-quicktime', |
||
229 | '.qtl' => 'application/x-quicktimeplayer', |
||
230 | '.rat' => 'application/rat-file', |
||
231 | '.rmf' => 'application/vnd.adobe.rmf', |
||
232 | '.rmi' => 'audio/mid', |
||
233 | '.rqy' => 'text/x-ms-rqy', |
||
234 | '.rtf' => 'application/msword', |
||
235 | '.sct' => 'text/scriptlet', |
||
236 | '.sd2' => 'audio/x-sd2', |
||
237 | '.sdp' => 'application/sdp', |
||
238 | '.shtml' => 'text/html', |
||
239 | '.sit' => 'application/x-stuffit', |
||
240 | '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12', |
||
241 | '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide', |
||
242 | '.slk' => 'application/vnd.ms-excel', |
||
243 | '.snd' => 'audio/basic', |
||
244 | '.so' => 'application/x-apachemodule', |
||
245 | '.sol' => 'text/plain', |
||
246 | '.sor' => 'text/plain', |
||
247 | '.spc' => 'application/x-pkcs7-certificates', |
||
248 | '.spl' => 'application/futuresplash', |
||
249 | '.sst' => 'application/vnd.ms-pki.certstore', |
||
250 | '.stl' => 'application/vnd.ms-pki.stl', |
||
251 | '.swf' => 'application/x-shockwave-flash', |
||
252 | '.thmx' => 'application/vnd.ms-officetheme', |
||
253 | '.tif' => 'image/tiff', |
||
254 | '.tiff' => 'image/tiff', |
||
255 | '.txt' => 'text/plain', |
||
256 | '.uls' => 'text/iuls', |
||
257 | '.vcf' => 'text/x-vcard', |
||
258 | '.vdx' => 'application/vnd.ms-visio.viewer', |
||
259 | '.vsd' => 'application/vnd.ms-visio.viewer', |
||
260 | '.vss' => 'application/vnd.ms-visio.viewer', |
||
261 | '.vst' => 'application/vnd.ms-visio.viewer', |
||
262 | '.vsx' => 'application/vnd.ms-visio.viewer', |
||
263 | '.vtx' => 'application/vnd.ms-visio.viewer', |
||
264 | '.wav' => 'audio/wav', |
||
265 | '.wax' => 'audio/x-ms-wax', |
||
266 | '.wbk' => 'application/msword', |
||
267 | '.wdp' => 'image/vnd.ms-photo', |
||
268 | '.wiz' => 'application/msword', |
||
269 | '.wm' => 'video/x-ms-wm', |
||
270 | '.wma' => 'audio/x-ms-wma', |
||
271 | '.wmd' => 'application/x-ms-wmd', |
||
272 | '.wmv' => 'video/x-ms-wmv', |
||
273 | '.wmx' => 'video/x-ms-wmx', |
||
274 | '.wmz' => 'application/x-ms-wmz', |
||
275 | '.wpl' => 'application/vnd.ms-wpl', |
||
276 | '.wsc' => 'text/scriptlet', |
||
277 | '.wvx' => 'video/x-ms-wvx', |
||
278 | '.xaml' => 'application/xaml+xml', |
||
279 | '.xbap' => 'application/x-ms-xbap', |
||
280 | '.xdp' => 'application/vnd.adobe.xdp+xml', |
||
281 | '.xfdf' => 'application/vnd.adobe.xfdf', |
||
282 | '.xht' => 'application/xhtml+xml', |
||
283 | '.xhtml' => 'application/xhtml+xml', |
||
284 | '.xla' => 'application/vnd.ms-excel', |
||
285 | '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12', |
||
286 | '.xlk' => 'application/vnd.ms-excel', |
||
287 | '.xll' => 'application/vnd.ms-excel', |
||
288 | '.xlm' => 'application/vnd.ms-excel', |
||
289 | '.xls' => 'application/vnd.ms-excel', |
||
290 | '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', |
||
291 | '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12', |
||
292 | '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', |
||
293 | '.xlt' => 'application/vnd.ms-excel', |
||
294 | '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12', |
||
295 | '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', |
||
296 | '.xlw' => 'application/vnd.ms-excel', |
||
297 | '.xml' => 'text/xml', |
||
298 | '.xps' => 'application/vnd.ms-xpsdocument', |
||
299 | '.xsl' => 'text/xml', |
||
300 | ]; |
||
301 | |||
302 | /** |
||
303 | * IE versions which have been analysed to bring you this class, and for |
||
304 | * which some substantive difference exists. These will appear as keys |
||
305 | * in the return value of getRealMimesFromData(). The names are chosen to sort correctly. |
||
306 | */ |
||
307 | protected $versions = [ 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ]; |
||
308 | |||
309 | /** |
||
310 | * Type table with versions expanded |
||
311 | */ |
||
312 | protected $typeTable = []; |
||
313 | |||
314 | /** constructor */ |
||
315 | function __construct() { |
||
316 | // Construct versioned type arrays from the base type array plus additions |
||
317 | $types = $this->baseTypeTable; |
||
318 | foreach ( $this->versions as $version ) { |
||
319 | if ( isset( $this->addedTypes[$version] ) ) { |
||
320 | foreach ( $this->addedTypes[$version] as $format => $addedTypes ) { |
||
321 | $types[$format] = array_merge( $types[$format], $addedTypes ); |
||
322 | } |
||
323 | } |
||
324 | $this->typeTable[$version] = $types; |
||
325 | } |
||
326 | } |
||
327 | |||
328 | /** |
||
329 | * Get the MIME types from getMimesFromData(), but convert the result from IE's |
||
330 | * idiosyncratic private types into something other apps will understand. |
||
331 | * |
||
332 | * @param string $fileName the file name (unused at present) |
||
333 | * @param string $chunk the first 256 bytes of the file |
||
334 | * @param string $proposed the MIME type proposed by the server |
||
335 | * |
||
336 | * @return Array: map of IE version to detected MIME type |
||
0 ignored issues
–
show
|
|||
337 | */ |
||
338 | public function getRealMimesFromData( $fileName, $chunk, $proposed ) { |
||
339 | $types = $this->getMimesFromData( $fileName, $chunk, $proposed ); |
||
340 | $types = array_map( [ $this, 'translateMimeType' ], $types ); |
||
341 | return $types; |
||
342 | } |
||
343 | |||
344 | /** |
||
345 | * Translate a MIME type from IE's idiosyncratic private types into |
||
346 | * more commonly understood type strings |
||
347 | * @param $type |
||
348 | * @return string |
||
349 | */ |
||
350 | public function translateMimeType( $type ) { |
||
351 | static $table = [ |
||
352 | 'image/pjpeg' => 'image/jpeg', |
||
353 | 'image/x-png' => 'image/png', |
||
354 | 'image/x-wmf' => 'application/x-msmetafile', |
||
355 | 'image/bmp' => 'image/x-bmp', |
||
356 | 'application/x-zip-compressed' => 'application/zip', |
||
357 | 'application/x-compressed' => 'application/x-compress', |
||
358 | 'application/x-gzip-compressed' => 'application/x-gzip', |
||
359 | 'audio/mid' => 'audio/midi', |
||
360 | ]; |
||
361 | if ( isset( $table[$type] ) ) { |
||
362 | $type = $table[$type]; |
||
363 | } |
||
364 | return $type; |
||
365 | } |
||
366 | |||
367 | /** |
||
368 | * Get the untranslated MIME types for all known versions |
||
369 | * |
||
370 | * @param string $fileName the file name (unused at present) |
||
371 | * @param string $chunk the first 256 bytes of the file |
||
372 | * @param string $proposed the MIME type proposed by the server |
||
373 | * |
||
374 | * @return Array: map of IE version to detected MIME type |
||
0 ignored issues
–
show
The doc-type
Array: could not be parsed: Unknown type name "Array:" at position 0. (view supported doc-types)
This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types. ![]() |
|||
375 | */ |
||
376 | public function getMimesFromData( $fileName, $chunk, $proposed ) { |
||
377 | $types = []; |
||
378 | foreach ( $this->versions as $version ) { |
||
379 | $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ); |
||
380 | } |
||
381 | return $types; |
||
382 | } |
||
383 | |||
384 | /** |
||
385 | * Get the MIME type for a given named version |
||
386 | * @param $version |
||
387 | * @param $fileName |
||
388 | * @param $chunk |
||
389 | * @param $proposed |
||
390 | * @return bool|string |
||
391 | */ |
||
392 | protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) { |
||
393 | // Strip text after a semicolon |
||
394 | $semiPos = strpos( $proposed, ';' ); |
||
395 | if ( $semiPos !== false ) { |
||
396 | $proposed = substr( $proposed, 0, $semiPos ); |
||
397 | } |
||
398 | |||
399 | $proposedFormat = $this->getDataFormat( $version, $proposed ); |
||
400 | if ( $proposedFormat == 'unknown' |
||
401 | && $proposed != 'multipart/mixed' |
||
402 | && $proposed != 'multipart/x-mixed-replace' ) |
||
403 | { |
||
404 | return $proposed; |
||
405 | } |
||
406 | if ( strval( $chunk ) === '' ) { |
||
407 | return $proposed; |
||
408 | } |
||
409 | |||
410 | // Truncate chunk at 255 bytes |
||
411 | $chunk = substr( $chunk, 0, 255 ); |
||
412 | |||
413 | // IE does the Check*Headers() calls last, and instead does the following image |
||
414 | // type checks by directly looking for the magic numbers. What I do here should |
||
415 | // have the same effect since the magic number checks are identical in both cases. |
||
416 | $result = $this->sampleData( $version, $chunk ); |
||
417 | $sampleFound = $result['found']; |
||
418 | $counters = $result['counters']; |
||
419 | $binaryType = $this->checkBinaryHeaders( $version, $chunk ); |
||
420 | $textType = $this->checkTextHeaders( $version, $chunk ); |
||
421 | |||
422 | if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) { |
||
423 | return 'text/html'; |
||
424 | } |
||
425 | if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) { |
||
426 | return 'image/gif'; |
||
427 | } |
||
428 | if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' ) |
||
429 | && $binaryType == 'image/pjpeg' ) |
||
430 | { |
||
431 | return $proposed; |
||
432 | } |
||
433 | // PNG check added in IE 7 |
||
434 | if ( $version >= 'ie07' |
||
435 | && ( $proposed == 'image/x-png' || $proposed == 'image/png' ) |
||
436 | && $binaryType == 'image/x-png' ) |
||
437 | { |
||
438 | return $proposed; |
||
439 | } |
||
440 | |||
441 | // CDF was removed in IE 7 so it won't be in $sampleFound for later versions |
||
442 | if ( isset( $sampleFound['cdf'] ) ) { |
||
443 | return 'application/x-cdf'; |
||
444 | } |
||
445 | |||
446 | // RSS and Atom were added in IE 7 so they won't be in $sampleFound for |
||
447 | // previous versions |
||
448 | if ( isset( $sampleFound['rss'] ) ) { |
||
449 | return 'application/rss+xml'; |
||
450 | } |
||
451 | if ( isset( $sampleFound['rdf-tag'] ) |
||
452 | && isset( $sampleFound['rdf-url'] ) |
||
453 | && isset( $sampleFound['rdf-purl'] ) ) |
||
454 | { |
||
455 | return 'application/rss+xml'; |
||
456 | } |
||
457 | if ( isset( $sampleFound['atom'] ) ) { |
||
458 | return 'application/atom+xml'; |
||
459 | } |
||
460 | |||
461 | View Code Duplication | if ( isset( $sampleFound['xml'] ) ) { |
|
462 | // TODO: I'm not sure under what circumstances this flag is enabled |
||
463 | if ( strpos( $version, 'strict' ) !== false ) { |
||
464 | if ( $proposed == 'text/html' || $proposed == 'text/xml' ) { |
||
465 | return 'text/xml'; |
||
466 | } |
||
467 | } else { |
||
468 | return 'text/xml'; |
||
469 | } |
||
470 | } |
||
471 | View Code Duplication | if ( isset( $sampleFound['html'] ) ) { |
|
472 | // TODO: I'm not sure under what circumstances this flag is enabled |
||
473 | if ( strpos( $version, 'nohtml' ) !== false ) { |
||
474 | if ( $proposed == 'text/plain' ) { |
||
475 | return 'text/html'; |
||
476 | } |
||
477 | } else { |
||
478 | return 'text/html'; |
||
479 | } |
||
480 | } |
||
481 | if ( isset( $sampleFound['xbm'] ) ) { |
||
482 | return 'image/x-bitmap'; |
||
483 | } |
||
484 | if ( isset( $sampleFound['binhex'] ) ) { |
||
485 | return 'application/macbinhex40'; |
||
486 | } |
||
487 | View Code Duplication | if ( isset( $sampleFound['scriptlet'] ) ) { |
|
488 | if ( strpos( $version, 'strict' ) !== false ) { |
||
489 | if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) { |
||
490 | return 'text/scriptlet'; |
||
491 | } |
||
492 | } else { |
||
493 | return 'text/scriptlet'; |
||
494 | } |
||
495 | } |
||
496 | |||
497 | // Freaky heuristics to determine if the data is text or binary |
||
498 | // The heuristic is of course broken for non-ASCII text |
||
499 | if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) |
||
500 | < ( $counters['ctrl'] + $counters['high'] ) * 16 ) |
||
501 | { |
||
502 | $kindOfBinary = true; |
||
503 | $type = $binaryType ? $binaryType : $textType; |
||
504 | if ( $type === false ) { |
||
505 | $type = 'application/octet-stream'; |
||
506 | } |
||
507 | } else { |
||
508 | $kindOfBinary = false; |
||
509 | $type = $textType ? $textType : $binaryType; |
||
510 | if ( $type === false ) { |
||
511 | $type = 'text/plain'; |
||
512 | } |
||
513 | } |
||
514 | |||
515 | // Check if the output format is ambiguous |
||
516 | // This generally means that detection failed, real types aren't ambiguous |
||
517 | $detectedFormat = $this->getDataFormat( $version, $type ); |
||
518 | if ( $detectedFormat != 'ambiguous' ) { |
||
519 | return $type; |
||
520 | } |
||
521 | |||
522 | if ( $proposedFormat != 'ambiguous' ) { |
||
523 | // FormatAgreesWithData() |
||
524 | if ( $proposedFormat == 'text' && !$kindOfBinary ) { |
||
525 | return $proposed; |
||
526 | } |
||
527 | if ( $proposedFormat == 'binary' && $kindOfBinary ) { |
||
528 | return $proposed; |
||
529 | } |
||
530 | if ( $proposedFormat == 'html' ) { |
||
531 | return $proposed; |
||
532 | } |
||
533 | } |
||
534 | |||
535 | // Find a MIME type by searching the registry for the file extension. |
||
536 | $dotPos = strrpos( $fileName, '.' ); |
||
537 | if ( $dotPos === false ) { |
||
538 | return $type; |
||
539 | } |
||
540 | $ext = substr( $fileName, $dotPos ); |
||
541 | if ( isset( $this->registry[$ext] ) ) { |
||
542 | return $this->registry[$ext]; |
||
543 | } |
||
544 | |||
545 | // TODO: If the extension has an application registered to it, IE will return |
||
546 | // application/octet-stream. We'll skip that, so we could erroneously |
||
547 | // return text/plain or application/x-netcdf where application/octet-stream |
||
548 | // would be correct. |
||
549 | |||
550 | return $type; |
||
551 | } |
||
552 | |||
553 | /** |
||
554 | * Check for text headers at the start of the chunk |
||
555 | * Confirmed same in 5 and 7. |
||
556 | * @param $version |
||
557 | * @param $chunk |
||
558 | * @return bool|string |
||
559 | */ |
||
560 | private function checkTextHeaders( $version, $chunk ) { |
||
561 | $chunk2 = substr( $chunk, 0, 2 ); |
||
562 | $chunk4 = substr( $chunk, 0, 4 ); |
||
563 | $chunk5 = substr( $chunk, 0, 5 ); |
||
564 | if ( $chunk4 == '%PDF' ) { |
||
565 | return 'application/pdf'; |
||
566 | } |
||
567 | if ( $chunk2 == '%!' ) { |
||
568 | return 'application/postscript'; |
||
569 | } |
||
570 | if ( $chunk5 == '{\\rtf' ) { |
||
571 | return 'text/richtext'; |
||
572 | } |
||
573 | if ( $chunk5 == 'begin' ) { |
||
574 | return 'application/base64'; |
||
575 | } |
||
576 | return false; |
||
577 | } |
||
578 | |||
579 | /** |
||
580 | * Check for binary headers at the start of the chunk |
||
581 | * Confirmed same in 5 and 7. |
||
582 | * @param $version |
||
583 | * @param $chunk |
||
584 | * @return bool|string |
||
585 | */ |
||
586 | private function checkBinaryHeaders( $version, $chunk ) { |
||
587 | $chunk2 = substr( $chunk, 0, 2 ); |
||
588 | $chunk3 = substr( $chunk, 0, 3 ); |
||
589 | $chunk4 = substr( $chunk, 0, 4 ); |
||
590 | $chunk5 = substr( $chunk, 0, 5 ); |
||
591 | $chunk5uc = strtoupper( $chunk5 ); |
||
592 | $chunk8 = substr( $chunk, 0, 8 ); |
||
593 | if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) { |
||
594 | return 'image/gif'; |
||
595 | } |
||
596 | if ( $chunk2 == "\xff\xd8" ) { |
||
597 | return 'image/pjpeg'; // actually plain JPEG but this is what IE returns |
||
598 | } |
||
599 | |||
600 | if ( $chunk2 == 'BM' |
||
601 | && substr( $chunk, 6, 2 ) == "\000\000" |
||
602 | && substr( $chunk, 8, 2 ) == "\000\000" ) |
||
603 | { |
||
604 | return 'image/bmp'; // another non-standard MIME |
||
605 | } |
||
606 | if ( $chunk4 == 'RIFF' |
||
607 | && substr( $chunk, 8, 4 ) == 'WAVE' ) |
||
608 | { |
||
609 | return 'audio/wav'; |
||
610 | } |
||
611 | // These were integer literals in IE |
||
612 | // Perhaps the author was not sure what the target endianness was |
||
613 | if ( $chunk4 == ".sd\000" |
||
614 | || $chunk4 == ".snd" |
||
615 | || $chunk4 == "\000ds." |
||
616 | || $chunk4 == "dns." ) |
||
617 | { |
||
618 | return 'audio/basic'; |
||
619 | } |
||
620 | if ( $chunk3 == "MM\000" ) { |
||
621 | return 'image/tiff'; |
||
622 | } |
||
623 | if ( $chunk2 == 'MZ' ) { |
||
624 | return 'application/x-msdownload'; |
||
625 | } |
||
626 | if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) { |
||
627 | return 'image/x-png'; // [sic] |
||
628 | } |
||
629 | if ( strlen( $chunk ) >= 5 ) { |
||
630 | $byte2 = ord( $chunk[2] ); |
||
631 | $byte4 = ord( $chunk[4] ); |
||
632 | if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) { |
||
633 | return 'image/x-jg'; |
||
634 | } |
||
635 | } |
||
636 | // More endian confusion? |
||
637 | if ( $chunk4 == 'MROF' ) { |
||
638 | return 'audio/x-aiff'; |
||
639 | } |
||
640 | $chunk4_8 = substr( $chunk, 8, 4 ); |
||
641 | if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) { |
||
642 | return 'audio/x-aiff'; |
||
643 | } |
||
644 | if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) { |
||
645 | return 'video/avi'; |
||
646 | } |
||
647 | if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) { |
||
648 | return 'video/mpeg'; |
||
649 | } |
||
650 | if ( $chunk4 == "\001\000\000\000" |
||
651 | && substr( $chunk, 40, 4 ) == ' EMF' ) |
||
652 | { |
||
653 | return 'image/x-emf'; |
||
654 | } |
||
655 | if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) { |
||
656 | return 'image/x-wmf'; |
||
657 | } |
||
658 | if ( $chunk4 == "\xca\xfe\xba\xbe" ) { |
||
659 | return 'application/java'; |
||
660 | } |
||
661 | if ( $chunk2 == 'PK' ) { |
||
662 | return 'application/x-zip-compressed'; |
||
663 | } |
||
664 | if ( $chunk2 == "\x1f\x9d" ) { |
||
665 | return 'application/x-compressed'; |
||
666 | } |
||
667 | if ( $chunk2 == "\x1f\x8b" ) { |
||
668 | return 'application/x-gzip-compressed'; |
||
669 | } |
||
670 | // Skip redundant check for ZIP |
||
671 | if ( $chunk5 == "MThd\000" ) { |
||
672 | return 'audio/mid'; |
||
673 | } |
||
674 | if ( $chunk4 == '%PDF' ) { |
||
675 | return 'application/pdf'; |
||
676 | } |
||
677 | return false; |
||
678 | } |
||
679 | |||
680 | /** |
||
681 | * Do heuristic checks on the bulk of the data sample. |
||
682 | * Search for HTML tags. |
||
683 | * @param $version |
||
684 | * @param $chunk |
||
685 | * @return array |
||
686 | */ |
||
687 | protected function sampleData( $version, $chunk ) { |
||
688 | $found = []; |
||
689 | $counters = [ |
||
690 | 'ctrl' => 0, |
||
691 | 'high' => 0, |
||
692 | 'low' => 0, |
||
693 | 'lf' => 0, |
||
694 | 'cr' => 0, |
||
695 | 'ff' => 0 |
||
696 | ]; |
||
697 | $htmlTags = [ |
||
698 | 'html', |
||
699 | 'head', |
||
700 | 'title', |
||
701 | 'body', |
||
702 | 'script', |
||
703 | 'a href', |
||
704 | 'pre', |
||
705 | 'img', |
||
706 | 'plaintext', |
||
707 | 'table' |
||
708 | ]; |
||
709 | $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
||
710 | $rdfPurl = 'http://purl.org/rss/1.0/'; |
||
711 | $xbmMagic1 = '#define'; |
||
712 | $xbmMagic2 = '_width'; |
||
713 | $xbmMagic3 = '_bits'; |
||
714 | $binhexMagic = 'converted with BinHex'; |
||
715 | $chunkLength = strlen( $chunk ); |
||
716 | |||
717 | for ( $offset = 0; $offset < $chunkLength; $offset++ ) { |
||
718 | $curChar = $chunk[$offset]; |
||
719 | if ( $curChar == "\x0a" ) { |
||
720 | $counters['lf']++; |
||
721 | continue; |
||
722 | } elseif ( $curChar == "\x0d" ) { |
||
723 | $counters['cr']++; |
||
724 | continue; |
||
725 | } elseif ( $curChar == "\x0c" ) { |
||
726 | $counters['ff']++; |
||
727 | continue; |
||
728 | } elseif ( $curChar == "\t" ) { |
||
729 | $counters['low']++; |
||
730 | continue; |
||
731 | } elseif ( ord( $curChar ) < 32 ) { |
||
732 | $counters['ctrl']++; |
||
733 | continue; |
||
734 | } elseif ( ord( $curChar ) >= 128 ) { |
||
735 | $counters['high']++; |
||
736 | continue; |
||
737 | } |
||
738 | |||
739 | $counters['low']++; |
||
740 | if ( $curChar == '<' ) { |
||
741 | // XML |
||
742 | $remainder = substr( $chunk, $offset + 1 ); |
||
743 | if ( !strncasecmp( $remainder, '?XML', 4 ) ) { |
||
744 | $nextChar = substr( $chunk, $offset + 5, 1 ); |
||
745 | if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) { |
||
746 | $found['xml'] = true; |
||
747 | } |
||
748 | } |
||
749 | // Scriptlet (JSP) |
||
750 | if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) { |
||
751 | $found['scriptlet'] = true; |
||
752 | break; |
||
753 | } |
||
754 | // HTML |
||
755 | foreach ( $htmlTags as $tag ) { |
||
756 | if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) { |
||
757 | $found['html'] = true; |
||
758 | } |
||
759 | } |
||
760 | // Skip broken check for additional tags (HR etc.) |
||
761 | |||
762 | // CHANNEL replaced by RSS, RDF and FEED in IE 7 |
||
763 | if ( $version < 'ie07' ) { |
||
764 | if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) { |
||
765 | $found['cdf'] = true; |
||
766 | } |
||
767 | } else { |
||
768 | // RSS |
||
769 | if ( !strncasecmp( $remainder, 'RSS', 3 ) ) { |
||
770 | $found['rss'] = true; |
||
771 | break; // return from SampleData |
||
772 | } |
||
773 | if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) { |
||
774 | $found['rdf-tag'] = true; |
||
775 | // no break |
||
776 | } |
||
777 | if ( !strncasecmp( $remainder, 'FEED', 4 ) ) { |
||
778 | $found['atom'] = true; |
||
779 | break; |
||
780 | } |
||
781 | } |
||
782 | continue; |
||
783 | } |
||
784 | // Skip broken check for --> |
||
785 | |||
786 | // RSS URL checks |
||
787 | // For some reason both URLs must appear before it is recognised |
||
788 | $remainder = substr( $chunk, $offset ); |
||
789 | View Code Duplication | if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) { |
|
790 | $found['rdf-url'] = true; |
||
791 | if ( isset( $found['rdf-tag'] ) |
||
792 | && isset( $found['rdf-purl'] ) ) // [sic] |
||
793 | { |
||
794 | break; |
||
795 | } |
||
796 | continue; |
||
797 | } |
||
798 | |||
799 | View Code Duplication | if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) { |
|
800 | if ( isset( $found['rdf-tag'] ) |
||
801 | && isset( $found['rdf-url'] ) ) // [sic] |
||
802 | { |
||
803 | break; |
||
804 | } |
||
805 | continue; |
||
806 | } |
||
807 | |||
808 | // XBM checks |
||
809 | View Code Duplication | if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) { |
|
810 | $found['xbm1'] = true; |
||
811 | continue; |
||
812 | } |
||
813 | if ( $curChar == '_' ) { |
||
814 | if ( isset( $found['xbm2'] ) ) { |
||
815 | View Code Duplication | if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) { |
|
816 | $found['xbm'] = true; |
||
817 | break; |
||
818 | } |
||
819 | } elseif ( isset( $found['xbm1'] ) ) { |
||
820 | if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) { |
||
821 | $found['xbm2'] = true; |
||
822 | } |
||
823 | } |
||
824 | } |
||
825 | |||
826 | // BinHex |
||
827 | if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) { |
||
828 | $found['binhex'] = true; |
||
829 | } |
||
830 | } |
||
831 | return [ 'found' => $found, 'counters' => $counters ]; |
||
832 | } |
||
833 | |||
834 | /** |
||
835 | * @param $version |
||
836 | * @param $type |
||
837 | * @return int|string |
||
838 | */ |
||
839 | protected function getDataFormat( $version, $type ) { |
||
840 | $types = $this->typeTable[$version]; |
||
841 | if ( $type == '(null)' || strval( $type ) === '' ) { |
||
842 | return 'ambiguous'; |
||
843 | } |
||
844 | foreach ( $types as $format => $list ) { |
||
845 | if ( in_array( $type, $list ) ) { |
||
846 | return $format; |
||
847 | } |
||
848 | } |
||
849 | return 'unknown'; |
||
850 | } |
||
851 | } |
||
852 |
This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.