|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* Module defining helper functions for detecting and dealing with MIME types. |
|
4
|
|
|
* |
|
5
|
|
|
* This program is free software; you can redistribute it and/or modify |
|
6
|
|
|
* it under the terms of the GNU General Public License as published by |
|
7
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
8
|
|
|
* (at your option) any later version. |
|
9
|
|
|
* |
|
10
|
|
|
* This program is distributed in the hope that it will be useful, |
|
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13
|
|
|
* GNU General Public License for more details. |
|
14
|
|
|
* |
|
15
|
|
|
* You should have received a copy of the GNU General Public License along |
|
16
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
|
17
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
18
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
|
19
|
|
|
* |
|
20
|
|
|
* @file |
|
21
|
|
|
*/ |
|
22
|
|
|
|
|
23
|
|
|
/** |
|
24
|
|
|
* Defines a set of well known MIME types |
|
25
|
|
|
* This is used as a fallback to mime.types files. |
|
26
|
|
|
* An extensive list of well known MIME types is provided by |
|
27
|
|
|
* the file mime.types in the includes directory. |
|
28
|
|
|
* |
|
29
|
|
|
* This list concatenated with mime.types is used to create a MIME <-> ext |
|
30
|
|
|
* map. Each line contains a MIME type followed by a space separated list of |
|
31
|
|
|
* extensions. If multiple extensions for a single MIME type exist or if |
|
32
|
|
|
* multiple MIME types exist for a single extension then in most cases |
|
33
|
|
|
* MediaWiki assumes that the first extension following the MIME type is the |
|
34
|
|
|
* canonical extension, and the first time a MIME type appears for a certain |
|
35
|
|
|
* extension is considered the canonical MIME type. |
|
36
|
|
|
* |
|
37
|
|
|
* (Note that appending $wgMimeTypeFile to the end of MM_WELL_KNOWN_MIME_TYPES |
|
38
|
|
|
* sucks because you can't redefine canonical types. This could be fixed by |
|
39
|
|
|
* appending MM_WELL_KNOWN_MIME_TYPES behind $wgMimeTypeFile, but who knows |
|
40
|
|
|
* what will break? In practice this probably isn't a problem anyway -- Bryan) |
|
41
|
|
|
*/ |
|
42
|
|
|
define( 'MM_WELL_KNOWN_MIME_TYPES', <<<END_STRING |
|
43
|
|
|
application/ogg ogx ogg ogm ogv oga spx |
|
44
|
|
|
application/pdf pdf |
|
45
|
|
|
application/vnd.oasis.opendocument.chart odc |
|
46
|
|
|
application/vnd.oasis.opendocument.chart-template otc |
|
47
|
|
|
application/vnd.oasis.opendocument.database odb |
|
48
|
|
|
application/vnd.oasis.opendocument.formula odf |
|
49
|
|
|
application/vnd.oasis.opendocument.formula-template otf |
|
50
|
|
|
application/vnd.oasis.opendocument.graphics odg |
|
51
|
|
|
application/vnd.oasis.opendocument.graphics-template otg |
|
52
|
|
|
application/vnd.oasis.opendocument.image odi |
|
53
|
|
|
application/vnd.oasis.opendocument.image-template oti |
|
54
|
|
|
application/vnd.oasis.opendocument.presentation odp |
|
55
|
|
|
application/vnd.oasis.opendocument.presentation-template otp |
|
56
|
|
|
application/vnd.oasis.opendocument.spreadsheet ods |
|
57
|
|
|
application/vnd.oasis.opendocument.spreadsheet-template ots |
|
58
|
|
|
application/vnd.oasis.opendocument.text odt |
|
59
|
|
|
application/vnd.oasis.opendocument.text-master otm |
|
60
|
|
|
application/vnd.oasis.opendocument.text-template ott |
|
61
|
|
|
application/vnd.oasis.opendocument.text-web oth |
|
62
|
|
|
application/javascript js |
|
63
|
|
|
application/x-shockwave-flash swf |
|
64
|
|
|
audio/midi mid midi kar |
|
65
|
|
|
audio/mpeg mpga mpa mp2 mp3 |
|
66
|
|
|
audio/x-aiff aif aiff aifc |
|
67
|
|
|
audio/x-wav wav |
|
68
|
|
|
audio/ogg oga spx ogg |
|
69
|
|
|
image/x-bmp bmp |
|
70
|
|
|
image/gif gif |
|
71
|
|
|
image/jpeg jpeg jpg jpe |
|
72
|
|
|
image/png png |
|
73
|
|
|
image/svg+xml svg |
|
74
|
|
|
image/svg svg |
|
75
|
|
|
image/tiff tiff tif |
|
76
|
|
|
image/vnd.djvu djvu |
|
77
|
|
|
image/x.djvu djvu |
|
78
|
|
|
image/x-djvu djvu |
|
79
|
|
|
image/x-portable-pixmap ppm |
|
80
|
|
|
image/x-xcf xcf |
|
81
|
|
|
text/plain txt |
|
82
|
|
|
text/html html htm |
|
83
|
|
|
video/ogg ogv ogm ogg |
|
84
|
|
|
video/mpeg mpg mpeg |
|
85
|
|
|
END_STRING |
|
86
|
|
|
); |
|
87
|
|
|
|
|
88
|
|
|
/** |
|
89
|
|
|
* Defines a set of well known MIME info entries |
|
90
|
|
|
* This is used as a fallback to mime.info files. |
|
91
|
|
|
* An extensive list of well known MIME types is provided by |
|
92
|
|
|
* the file mime.info in the includes directory. |
|
93
|
|
|
*/ |
|
94
|
|
|
define( 'MM_WELL_KNOWN_MIME_INFO', <<<END_STRING |
|
95
|
|
|
application/pdf [OFFICE] |
|
96
|
|
|
application/vnd.oasis.opendocument.chart [OFFICE] |
|
97
|
|
|
application/vnd.oasis.opendocument.chart-template [OFFICE] |
|
98
|
|
|
application/vnd.oasis.opendocument.database [OFFICE] |
|
99
|
|
|
application/vnd.oasis.opendocument.formula [OFFICE] |
|
100
|
|
|
application/vnd.oasis.opendocument.formula-template [OFFICE] |
|
101
|
|
|
application/vnd.oasis.opendocument.graphics [OFFICE] |
|
102
|
|
|
application/vnd.oasis.opendocument.graphics-template [OFFICE] |
|
103
|
|
|
application/vnd.oasis.opendocument.image [OFFICE] |
|
104
|
|
|
application/vnd.oasis.opendocument.image-template [OFFICE] |
|
105
|
|
|
application/vnd.oasis.opendocument.presentation [OFFICE] |
|
106
|
|
|
application/vnd.oasis.opendocument.presentation-template [OFFICE] |
|
107
|
|
|
application/vnd.oasis.opendocument.spreadsheet [OFFICE] |
|
108
|
|
|
application/vnd.oasis.opendocument.spreadsheet-template [OFFICE] |
|
109
|
|
|
application/vnd.oasis.opendocument.text [OFFICE] |
|
110
|
|
|
application/vnd.oasis.opendocument.text-template [OFFICE] |
|
111
|
|
|
application/vnd.oasis.opendocument.text-master [OFFICE] |
|
112
|
|
|
application/vnd.oasis.opendocument.text-web [OFFICE] |
|
113
|
|
|
application/javascript text/javascript application/x-javascript [EXECUTABLE] |
|
114
|
|
|
application/x-shockwave-flash [MULTIMEDIA] |
|
115
|
|
|
audio/midi [AUDIO] |
|
116
|
|
|
audio/x-aiff [AUDIO] |
|
117
|
|
|
audio/x-wav [AUDIO] |
|
118
|
|
|
audio/mp3 audio/mpeg [AUDIO] |
|
119
|
|
|
application/ogg audio/ogg video/ogg [MULTIMEDIA] |
|
120
|
|
|
image/x-bmp image/x-ms-bmp image/bmp [BITMAP] |
|
121
|
|
|
image/gif [BITMAP] |
|
122
|
|
|
image/jpeg [BITMAP] |
|
123
|
|
|
image/png [BITMAP] |
|
124
|
|
|
image/svg+xml [DRAWING] |
|
125
|
|
|
image/tiff [BITMAP] |
|
126
|
|
|
image/vnd.djvu [BITMAP] |
|
127
|
|
|
image/x-xcf [BITMAP] |
|
128
|
|
|
image/x-portable-pixmap [BITMAP] |
|
129
|
|
|
text/plain [TEXT] |
|
130
|
|
|
text/html [TEXT] |
|
131
|
|
|
video/ogg [VIDEO] |
|
132
|
|
|
video/mpeg [VIDEO] |
|
133
|
|
|
unknown/unknown application/octet-stream application/x-empty [UNKNOWN] |
|
134
|
|
|
END_STRING |
|
135
|
|
|
); |
|
136
|
|
|
|
|
137
|
|
|
/** |
|
138
|
|
|
* Implements functions related to MIME types such as detection and mapping to |
|
139
|
|
|
* file extension. |
|
140
|
|
|
* |
|
141
|
|
|
* Instances of this class are stateless, there only needs to be one global instance |
|
142
|
|
|
* of MimeMagic. Please use MimeMagic::singleton() to get that instance. |
|
143
|
|
|
*/ |
|
144
|
|
|
class MimeMagic { |
|
145
|
|
|
/** |
|
146
|
|
|
* @var array Mapping of media types to arrays of MIME types. |
|
147
|
|
|
* This is used by findMediaType and getMediaType, respectively |
|
148
|
|
|
*/ |
|
149
|
|
|
protected $mMediaTypes = null; |
|
150
|
|
|
|
|
151
|
|
|
/** @var array Map of MIME type aliases |
|
152
|
|
|
*/ |
|
153
|
|
|
protected $mMimeTypeAliases = null; |
|
154
|
|
|
|
|
155
|
|
|
/** @var array Map of MIME types to file extensions (as a space separated list) |
|
156
|
|
|
*/ |
|
157
|
|
|
protected $mMimeToExt = null; |
|
158
|
|
|
|
|
159
|
|
|
/** @var array Map of file extensions types to MIME types (as a space separated list) |
|
160
|
|
|
*/ |
|
161
|
|
|
public $mExtToMime = null; |
|
162
|
|
|
|
|
163
|
|
|
/** @var IEContentAnalyzer |
|
164
|
|
|
*/ |
|
165
|
|
|
protected $mIEAnalyzer; |
|
166
|
|
|
|
|
167
|
|
|
/** @var string Extra MIME types, set for example by media handling extensions |
|
168
|
|
|
*/ |
|
169
|
|
|
private $mExtraTypes = ''; |
|
170
|
|
|
|
|
171
|
|
|
/** @var string Extra MIME info, set for example by media handling extensions |
|
172
|
|
|
*/ |
|
173
|
|
|
private $mExtraInfo = ''; |
|
174
|
|
|
|
|
175
|
|
|
/** @var Config */ |
|
176
|
|
|
private $mConfig; |
|
177
|
|
|
|
|
178
|
|
|
/** @var MimeMagic The singleton instance |
|
179
|
|
|
*/ |
|
180
|
|
|
private static $instance = null; |
|
181
|
|
|
|
|
182
|
|
|
/** Initializes the MimeMagic object. This is called by MimeMagic::singleton(). |
|
183
|
|
|
* |
|
184
|
|
|
* This constructor parses the mime.types and mime.info files and build internal mappings. |
|
185
|
|
|
* |
|
186
|
|
|
* @todo Make this constructor private once everything uses the singleton instance |
|
187
|
|
|
* @param Config $config |
|
188
|
|
|
*/ |
|
189
|
|
|
function __construct( Config $config = null ) { |
|
190
|
|
|
if ( !$config ) { |
|
191
|
|
|
wfDebug( __METHOD__ . ' called with no Config instance passed to it' ); |
|
192
|
|
|
$config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' ); |
|
193
|
|
|
} |
|
194
|
|
|
$this->mConfig = $config; |
|
195
|
|
|
|
|
196
|
|
|
/** |
|
197
|
|
|
* --- load mime.types --- |
|
198
|
|
|
*/ |
|
199
|
|
|
|
|
200
|
|
|
global $IP; |
|
201
|
|
|
|
|
202
|
|
|
# Allow media handling extensions adding MIME-types and MIME-info |
|
203
|
|
|
Hooks::run( 'MimeMagicInit', [ $this ] ); |
|
204
|
|
|
|
|
205
|
|
|
$types = MM_WELL_KNOWN_MIME_TYPES; |
|
206
|
|
|
|
|
207
|
|
|
$mimeTypeFile = $this->mConfig->get( 'MimeTypeFile' ); |
|
208
|
|
|
if ( $mimeTypeFile == 'includes/mime.types' ) { |
|
209
|
|
|
$mimeTypeFile = "$IP/$mimeTypeFile"; |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
View Code Duplication |
if ( $mimeTypeFile ) { |
|
213
|
|
|
if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) { |
|
214
|
|
|
wfDebug( __METHOD__ . ": loading mime types from $mimeTypeFile\n" ); |
|
215
|
|
|
$types .= "\n"; |
|
216
|
|
|
$types .= file_get_contents( $mimeTypeFile ); |
|
217
|
|
|
} else { |
|
218
|
|
|
wfDebug( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" ); |
|
219
|
|
|
} |
|
220
|
|
|
} else { |
|
221
|
|
|
wfDebug( __METHOD__ . ": no mime types file defined, using built-ins only.\n" ); |
|
222
|
|
|
} |
|
223
|
|
|
|
|
224
|
|
|
$types .= "\n" . $this->mExtraTypes; |
|
225
|
|
|
|
|
226
|
|
|
$types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types ); |
|
227
|
|
|
$types = str_replace( "\t", " ", $types ); |
|
228
|
|
|
|
|
229
|
|
|
$this->mMimeToExt = []; |
|
230
|
|
|
$this->mExtToMime = []; |
|
231
|
|
|
|
|
232
|
|
|
$lines = explode( "\n", $types ); |
|
233
|
|
|
foreach ( $lines as $s ) { |
|
234
|
|
|
$s = trim( $s ); |
|
235
|
|
|
if ( empty( $s ) ) { |
|
236
|
|
|
continue; |
|
237
|
|
|
} |
|
238
|
|
|
if ( strpos( $s, '#' ) === 0 ) { |
|
239
|
|
|
continue; |
|
240
|
|
|
} |
|
241
|
|
|
|
|
242
|
|
|
$s = strtolower( $s ); |
|
243
|
|
|
$i = strpos( $s, ' ' ); |
|
244
|
|
|
|
|
245
|
|
|
if ( $i === false ) { |
|
246
|
|
|
continue; |
|
247
|
|
|
} |
|
248
|
|
|
|
|
249
|
|
|
$mime = substr( $s, 0, $i ); |
|
250
|
|
|
$ext = trim( substr( $s, $i + 1 ) ); |
|
251
|
|
|
|
|
252
|
|
|
if ( empty( $ext ) ) { |
|
253
|
|
|
continue; |
|
254
|
|
|
} |
|
255
|
|
|
|
|
256
|
|
|
if ( !empty( $this->mMimeToExt[$mime] ) ) { |
|
257
|
|
|
$this->mMimeToExt[$mime] .= ' ' . $ext; |
|
258
|
|
|
} else { |
|
259
|
|
|
$this->mMimeToExt[$mime] = $ext; |
|
260
|
|
|
} |
|
261
|
|
|
|
|
262
|
|
|
$extensions = explode( ' ', $ext ); |
|
263
|
|
|
|
|
264
|
|
|
foreach ( $extensions as $e ) { |
|
265
|
|
|
$e = trim( $e ); |
|
266
|
|
|
if ( empty( $e ) ) { |
|
267
|
|
|
continue; |
|
268
|
|
|
} |
|
269
|
|
|
|
|
270
|
|
|
if ( !empty( $this->mExtToMime[$e] ) ) { |
|
271
|
|
|
$this->mExtToMime[$e] .= ' ' . $mime; |
|
272
|
|
|
} else { |
|
273
|
|
|
$this->mExtToMime[$e] = $mime; |
|
274
|
|
|
} |
|
275
|
|
|
} |
|
276
|
|
|
} |
|
277
|
|
|
|
|
278
|
|
|
/** |
|
279
|
|
|
* --- load mime.info --- |
|
280
|
|
|
*/ |
|
281
|
|
|
|
|
282
|
|
|
$mimeInfoFile = $this->mConfig->get( 'MimeInfoFile' ); |
|
283
|
|
|
if ( $mimeInfoFile == 'includes/mime.info' ) { |
|
284
|
|
|
$mimeInfoFile = "$IP/$mimeInfoFile"; |
|
285
|
|
|
} |
|
286
|
|
|
|
|
287
|
|
|
$info = MM_WELL_KNOWN_MIME_INFO; |
|
288
|
|
|
|
|
289
|
|
View Code Duplication |
if ( $mimeInfoFile ) { |
|
290
|
|
|
if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) { |
|
291
|
|
|
wfDebug( __METHOD__ . ": loading mime info from $mimeInfoFile\n" ); |
|
292
|
|
|
$info .= "\n"; |
|
293
|
|
|
$info .= file_get_contents( $mimeInfoFile ); |
|
294
|
|
|
} else { |
|
295
|
|
|
wfDebug( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" ); |
|
296
|
|
|
} |
|
297
|
|
|
} else { |
|
298
|
|
|
wfDebug( __METHOD__ . ": no mime info file defined, using built-ins only.\n" ); |
|
299
|
|
|
} |
|
300
|
|
|
|
|
301
|
|
|
$info .= "\n" . $this->mExtraInfo; |
|
302
|
|
|
|
|
303
|
|
|
$info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info ); |
|
304
|
|
|
$info = str_replace( "\t", " ", $info ); |
|
305
|
|
|
|
|
306
|
|
|
$this->mMimeTypeAliases = []; |
|
307
|
|
|
$this->mMediaTypes = []; |
|
308
|
|
|
|
|
309
|
|
|
$lines = explode( "\n", $info ); |
|
310
|
|
|
foreach ( $lines as $s ) { |
|
311
|
|
|
$s = trim( $s ); |
|
312
|
|
|
if ( empty( $s ) ) { |
|
313
|
|
|
continue; |
|
314
|
|
|
} |
|
315
|
|
|
if ( strpos( $s, '#' ) === 0 ) { |
|
316
|
|
|
continue; |
|
317
|
|
|
} |
|
318
|
|
|
|
|
319
|
|
|
$s = strtolower( $s ); |
|
320
|
|
|
$i = strpos( $s, ' ' ); |
|
321
|
|
|
|
|
322
|
|
|
if ( $i === false ) { |
|
323
|
|
|
continue; |
|
324
|
|
|
} |
|
325
|
|
|
|
|
326
|
|
|
# print "processing MIME INFO line $s<br>"; |
|
327
|
|
|
|
|
328
|
|
|
$match = []; |
|
329
|
|
|
if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) { |
|
330
|
|
|
$s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s ); |
|
331
|
|
|
$mtype = trim( strtoupper( $match[1] ) ); |
|
332
|
|
|
} else { |
|
333
|
|
|
$mtype = MEDIATYPE_UNKNOWN; |
|
334
|
|
|
} |
|
335
|
|
|
|
|
336
|
|
|
$m = explode( ' ', $s ); |
|
337
|
|
|
|
|
338
|
|
|
if ( !isset( $this->mMediaTypes[$mtype] ) ) { |
|
339
|
|
|
$this->mMediaTypes[$mtype] = []; |
|
340
|
|
|
} |
|
341
|
|
|
|
|
342
|
|
|
foreach ( $m as $mime ) { |
|
343
|
|
|
$mime = trim( $mime ); |
|
344
|
|
|
if ( empty( $mime ) ) { |
|
345
|
|
|
continue; |
|
346
|
|
|
} |
|
347
|
|
|
|
|
348
|
|
|
$this->mMediaTypes[$mtype][] = $mime; |
|
349
|
|
|
} |
|
350
|
|
|
|
|
351
|
|
|
if ( count( $m ) > 1 ) { |
|
352
|
|
|
$main = $m[0]; |
|
353
|
|
|
$mCount = count( $m ); |
|
354
|
|
|
for ( $i = 1; $i < $mCount; $i += 1 ) { |
|
355
|
|
|
$mime = $m[$i]; |
|
356
|
|
|
$this->mMimeTypeAliases[$mime] = $main; |
|
357
|
|
|
} |
|
358
|
|
|
} |
|
359
|
|
|
} |
|
360
|
|
|
} |
|
361
|
|
|
|
|
362
|
|
|
/** |
|
363
|
|
|
* Get an instance of this class |
|
364
|
|
|
* @return MimeMagic |
|
365
|
|
|
*/ |
|
366
|
|
|
public static function singleton() { |
|
367
|
|
|
if ( self::$instance === null ) { |
|
368
|
|
|
self::$instance = new MimeMagic( |
|
369
|
|
|
ConfigFactory::getDefaultInstance()->makeConfig( 'main' ) |
|
370
|
|
|
); |
|
371
|
|
|
} |
|
372
|
|
|
return self::$instance; |
|
373
|
|
|
} |
|
374
|
|
|
|
|
375
|
|
|
/** |
|
376
|
|
|
* Adds to the list mapping MIME to file extensions. |
|
377
|
|
|
* As an extension author, you are encouraged to submit patches to |
|
378
|
|
|
* MediaWiki's core to add new MIME types to mime.types. |
|
379
|
|
|
* @param string $types |
|
380
|
|
|
*/ |
|
381
|
|
|
public function addExtraTypes( $types ) { |
|
382
|
|
|
$this->mExtraTypes .= "\n" . $types; |
|
383
|
|
|
} |
|
384
|
|
|
|
|
385
|
|
|
/** |
|
386
|
|
|
* Adds to the list mapping MIME to media type. |
|
387
|
|
|
* As an extension author, you are encouraged to submit patches to |
|
388
|
|
|
* MediaWiki's core to add new MIME info to mime.info. |
|
389
|
|
|
* @param string $info |
|
390
|
|
|
*/ |
|
391
|
|
|
public function addExtraInfo( $info ) { |
|
392
|
|
|
$this->mExtraInfo .= "\n" . $info; |
|
393
|
|
|
} |
|
394
|
|
|
|
|
395
|
|
|
/** |
|
396
|
|
|
* Returns a list of file extensions for a given MIME type as a space |
|
397
|
|
|
* separated string or null if the MIME type was unrecognized. Resolves |
|
398
|
|
|
* MIME type aliases. |
|
399
|
|
|
* |
|
400
|
|
|
* @param string $mime |
|
401
|
|
|
* @return string|null |
|
402
|
|
|
*/ |
|
403
|
|
|
public function getExtensionsForType( $mime ) { |
|
404
|
|
|
$mime = strtolower( $mime ); |
|
405
|
|
|
|
|
406
|
|
|
// Check the mime-to-ext map |
|
407
|
|
|
if ( isset( $this->mMimeToExt[$mime] ) ) { |
|
408
|
|
|
return $this->mMimeToExt[$mime]; |
|
409
|
|
|
} |
|
410
|
|
|
|
|
411
|
|
|
// Resolve the MIME type to the canonical type |
|
412
|
|
|
if ( isset( $this->mMimeTypeAliases[$mime] ) ) { |
|
413
|
|
|
$mime = $this->mMimeTypeAliases[$mime]; |
|
414
|
|
|
if ( isset( $this->mMimeToExt[$mime] ) ) { |
|
415
|
|
|
return $this->mMimeToExt[$mime]; |
|
416
|
|
|
} |
|
417
|
|
|
} |
|
418
|
|
|
|
|
419
|
|
|
return null; |
|
420
|
|
|
} |
|
421
|
|
|
|
|
422
|
|
|
/** |
|
423
|
|
|
* Returns a list of MIME types for a given file extension as a space |
|
424
|
|
|
* separated string or null if the extension was unrecognized. |
|
425
|
|
|
* |
|
426
|
|
|
* @param string $ext |
|
427
|
|
|
* @return string|null |
|
428
|
|
|
*/ |
|
429
|
|
|
public function getTypesForExtension( $ext ) { |
|
430
|
|
|
$ext = strtolower( $ext ); |
|
431
|
|
|
|
|
432
|
|
|
$r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null; |
|
433
|
|
|
return $r; |
|
434
|
|
|
} |
|
435
|
|
|
|
|
436
|
|
|
/** |
|
437
|
|
|
* Returns a single MIME type for a given file extension or null if unknown. |
|
438
|
|
|
* This is always the first type from the list returned by getTypesForExtension($ext). |
|
439
|
|
|
* |
|
440
|
|
|
* @param string $ext |
|
441
|
|
|
* @return string|null |
|
442
|
|
|
*/ |
|
443
|
|
|
public function guessTypesForExtension( $ext ) { |
|
444
|
|
|
$m = $this->getTypesForExtension( $ext ); |
|
445
|
|
|
if ( is_null( $m ) ) { |
|
446
|
|
|
return null; |
|
447
|
|
|
} |
|
448
|
|
|
|
|
449
|
|
|
// TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient |
|
450
|
|
|
$m = trim( $m ); |
|
451
|
|
|
$m = preg_replace( '/\s.*$/', '', $m ); |
|
452
|
|
|
|
|
453
|
|
|
return $m; |
|
454
|
|
|
} |
|
455
|
|
|
|
|
456
|
|
|
/** |
|
457
|
|
|
* Tests if the extension matches the given MIME type. Returns true if a |
|
458
|
|
|
* match was found, null if the MIME type is unknown, and false if the |
|
459
|
|
|
* MIME type is known but no matches where found. |
|
460
|
|
|
* |
|
461
|
|
|
* @param string $extension |
|
462
|
|
|
* @param string $mime |
|
463
|
|
|
* @return bool|null |
|
464
|
|
|
*/ |
|
465
|
|
|
public function isMatchingExtension( $extension, $mime ) { |
|
466
|
|
|
$ext = $this->getExtensionsForType( $mime ); |
|
467
|
|
|
|
|
468
|
|
|
if ( !$ext ) { |
|
|
|
|
|
|
469
|
|
|
return null; // Unknown MIME type |
|
470
|
|
|
} |
|
471
|
|
|
|
|
472
|
|
|
$ext = explode( ' ', $ext ); |
|
473
|
|
|
|
|
474
|
|
|
$extension = strtolower( $extension ); |
|
475
|
|
|
return in_array( $extension, $ext ); |
|
476
|
|
|
} |
|
477
|
|
|
|
|
478
|
|
|
/** |
|
479
|
|
|
* Returns true if the MIME type is known to represent an image format |
|
480
|
|
|
* supported by the PHP GD library. |
|
481
|
|
|
* |
|
482
|
|
|
* @param string $mime |
|
483
|
|
|
* |
|
484
|
|
|
* @return bool |
|
485
|
|
|
*/ |
|
486
|
|
|
public function isPHPImageType( $mime ) { |
|
487
|
|
|
// As defined by imagegetsize and image_type_to_mime |
|
488
|
|
|
static $types = [ |
|
489
|
|
|
'image/gif', 'image/jpeg', 'image/png', |
|
490
|
|
|
'image/x-bmp', 'image/xbm', 'image/tiff', |
|
491
|
|
|
'image/jp2', 'image/jpeg2000', 'image/iff', |
|
492
|
|
|
'image/xbm', 'image/x-xbitmap', |
|
493
|
|
|
'image/vnd.wap.wbmp', 'image/vnd.xiff', |
|
494
|
|
|
'image/x-photoshop', |
|
495
|
|
|
'application/x-shockwave-flash', |
|
496
|
|
|
]; |
|
497
|
|
|
|
|
498
|
|
|
return in_array( $mime, $types ); |
|
499
|
|
|
} |
|
500
|
|
|
|
|
501
|
|
|
/** |
|
502
|
|
|
* Returns true if the extension represents a type which can |
|
503
|
|
|
* be reliably detected from its content. Use this to determine |
|
504
|
|
|
* whether strict content checks should be applied to reject |
|
505
|
|
|
* invalid uploads; if we can't identify the type we won't |
|
506
|
|
|
* be able to say if it's invalid. |
|
507
|
|
|
* |
|
508
|
|
|
* @todo Be more accurate when using fancy MIME detector plugins; |
|
509
|
|
|
* right now this is the bare minimum getimagesize() list. |
|
510
|
|
|
* @param string $extension |
|
511
|
|
|
* @return bool |
|
512
|
|
|
*/ |
|
513
|
|
|
function isRecognizableExtension( $extension ) { |
|
514
|
|
|
static $types = [ |
|
515
|
|
|
// Types recognized by getimagesize() |
|
516
|
|
|
'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd', |
|
517
|
|
|
'bmp', 'tiff', 'tif', 'jpc', 'jp2', |
|
518
|
|
|
'jpx', 'jb2', 'swc', 'iff', 'wbmp', |
|
519
|
|
|
'xbm', |
|
520
|
|
|
|
|
521
|
|
|
// Formats we recognize magic numbers for |
|
522
|
|
|
'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', |
|
523
|
|
|
'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka', |
|
524
|
|
|
'webp', |
|
525
|
|
|
|
|
526
|
|
|
// XML formats we sure hope we recognize reliably |
|
527
|
|
|
'svg', |
|
528
|
|
|
]; |
|
529
|
|
|
return in_array( strtolower( $extension ), $types ); |
|
530
|
|
|
} |
|
531
|
|
|
|
|
532
|
|
|
/** |
|
533
|
|
|
* Improves a MIME type using the file extension. Some file formats are very generic, |
|
534
|
|
|
* so their MIME type is not very meaningful. A more useful MIME type can be derived |
|
535
|
|
|
* by looking at the file extension. Typically, this method would be called on the |
|
536
|
|
|
* result of guessMimeType(). |
|
537
|
|
|
* |
|
538
|
|
|
* @param string $mime The MIME type, typically guessed from a file's content. |
|
539
|
|
|
* @param string $ext The file extension, as taken from the file name |
|
540
|
|
|
* |
|
541
|
|
|
* @return string The MIME type |
|
542
|
|
|
*/ |
|
543
|
|
|
public function improveTypeFromExtension( $mime, $ext ) { |
|
544
|
|
|
if ( $mime === 'unknown/unknown' ) { |
|
545
|
|
|
if ( $this->isRecognizableExtension( $ext ) ) { |
|
546
|
|
|
wfDebug( __METHOD__ . ': refusing to guess mime type for .' . |
|
547
|
|
|
"$ext file, we should have recognized it\n" ); |
|
548
|
|
|
} else { |
|
549
|
|
|
// Not something we can detect, so simply |
|
550
|
|
|
// trust the file extension |
|
551
|
|
|
$mime = $this->guessTypesForExtension( $ext ); |
|
552
|
|
|
} |
|
553
|
|
|
} elseif ( $mime === 'application/x-opc+zip' ) { |
|
554
|
|
|
if ( $this->isMatchingExtension( $ext, $mime ) ) { |
|
555
|
|
|
// A known file extension for an OPC file, |
|
556
|
|
|
// find the proper MIME type for that file extension |
|
557
|
|
|
$mime = $this->guessTypesForExtension( $ext ); |
|
558
|
|
|
} else { |
|
559
|
|
|
wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " . |
|
560
|
|
|
".$ext is not a known OPC extension.\n" ); |
|
561
|
|
|
$mime = 'application/zip'; |
|
562
|
|
|
} |
|
563
|
|
|
} elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) { |
|
564
|
|
|
// Textual types are sometimes not recognized properly. |
|
565
|
|
|
// If detected as text/plain, and has an extension which is textual |
|
566
|
|
|
// improve to the extension's type. For example, csv and json are often |
|
567
|
|
|
// misdetected as text/plain. |
|
568
|
|
|
$mime = $this->guessTypesForExtension( $ext ); |
|
569
|
|
|
} |
|
570
|
|
|
|
|
571
|
|
|
# Media handling extensions can improve the MIME detected |
|
572
|
|
|
Hooks::run( 'MimeMagicImproveFromExtension', [ $this, $ext, &$mime ] ); |
|
573
|
|
|
|
|
574
|
|
|
if ( isset( $this->mMimeTypeAliases[$mime] ) ) { |
|
575
|
|
|
$mime = $this->mMimeTypeAliases[$mime]; |
|
576
|
|
|
} |
|
577
|
|
|
|
|
578
|
|
|
wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" ); |
|
579
|
|
|
return $mime; |
|
580
|
|
|
} |
|
581
|
|
|
|
|
582
|
|
|
/** |
|
583
|
|
|
* MIME type detection. This uses detectMimeType to detect the MIME type |
|
584
|
|
|
* of the file, but applies additional checks to determine some well known |
|
585
|
|
|
* file formats that may be missed or misinterpreted by the default MIME |
|
586
|
|
|
* detection (namely XML based formats like XHTML or SVG, as well as ZIP |
|
587
|
|
|
* based formats like OPC/ODF files). |
|
588
|
|
|
* |
|
589
|
|
|
* @param string $file The file to check |
|
590
|
|
|
* @param string|bool $ext The file extension, or true (default) to extract it from the filename. |
|
591
|
|
|
* Set it to false to ignore the extension. DEPRECATED! Set to false, use |
|
592
|
|
|
* improveTypeFromExtension($mime, $ext) later to improve MIME type. |
|
593
|
|
|
* |
|
594
|
|
|
* @return string The MIME type of $file |
|
595
|
|
|
*/ |
|
596
|
|
|
public function guessMimeType( $file, $ext = true ) { |
|
597
|
|
|
if ( $ext ) { // TODO: make $ext default to false. Or better, remove it. |
|
598
|
|
|
wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " . |
|
599
|
|
|
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
|
600
|
|
|
} |
|
601
|
|
|
|
|
602
|
|
|
$mime = $this->doGuessMimeType( $file, $ext ); |
|
603
|
|
|
|
|
604
|
|
|
if ( !$mime ) { |
|
605
|
|
|
wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" ); |
|
606
|
|
|
$mime = $this->detectMimeType( $file, $ext ); |
|
607
|
|
|
} |
|
608
|
|
|
|
|
609
|
|
|
if ( isset( $this->mMimeTypeAliases[$mime] ) ) { |
|
610
|
|
|
$mime = $this->mMimeTypeAliases[$mime]; |
|
611
|
|
|
} |
|
612
|
|
|
|
|
613
|
|
|
wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" ); |
|
614
|
|
|
return $mime; |
|
615
|
|
|
} |
|
616
|
|
|
|
|
617
|
|
|
/** |
|
618
|
|
|
* Guess the MIME type from the file contents. |
|
619
|
|
|
* |
|
620
|
|
|
* @todo Remove $ext param |
|
621
|
|
|
* |
|
622
|
|
|
* @param string $file |
|
623
|
|
|
* @param mixed $ext |
|
624
|
|
|
* @return bool|string |
|
625
|
|
|
* @throws MWException |
|
626
|
|
|
*/ |
|
627
|
|
|
private function doGuessMimeType( $file, $ext ) { |
|
628
|
|
|
// Read a chunk of the file |
|
629
|
|
|
MediaWiki\suppressWarnings(); |
|
630
|
|
|
$f = fopen( $file, 'rb' ); |
|
631
|
|
|
MediaWiki\restoreWarnings(); |
|
632
|
|
|
|
|
633
|
|
|
if ( !$f ) { |
|
634
|
|
|
return 'unknown/unknown'; |
|
635
|
|
|
} |
|
636
|
|
|
|
|
637
|
|
|
$fsize = filesize( $file ); |
|
638
|
|
|
if ( $fsize === false ) { |
|
639
|
|
|
return 'unknown/unknown'; |
|
640
|
|
|
} |
|
641
|
|
|
|
|
642
|
|
|
$head = fread( $f, 1024 ); |
|
643
|
|
|
$tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR |
|
644
|
|
|
if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) { |
|
645
|
|
|
throw new MWException( |
|
646
|
|
|
"Seeking $tailLength bytes from EOF failed in " . __METHOD__ ); |
|
647
|
|
|
} |
|
648
|
|
|
$tail = $tailLength ? fread( $f, $tailLength ) : ''; |
|
649
|
|
|
fclose( $f ); |
|
650
|
|
|
|
|
651
|
|
|
wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" ); |
|
652
|
|
|
|
|
653
|
|
|
// Hardcode a few magic number checks... |
|
654
|
|
|
$headers = [ |
|
655
|
|
|
// Multimedia... |
|
656
|
|
|
'MThd' => 'audio/midi', |
|
657
|
|
|
'OggS' => 'application/ogg', |
|
658
|
|
|
|
|
659
|
|
|
// Image formats... |
|
660
|
|
|
// Note that WMF may have a bare header, no magic number. |
|
661
|
|
|
"\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives? |
|
662
|
|
|
"\xd7\xcd\xc6\x9a" => 'application/x-msmetafile', |
|
663
|
|
|
'%PDF' => 'application/pdf', |
|
664
|
|
|
'gimp xcf' => 'image/x-xcf', |
|
665
|
|
|
|
|
666
|
|
|
// Some forbidden fruit... |
|
667
|
|
|
'MZ' => 'application/octet-stream', // DOS/Windows executable |
|
668
|
|
|
"\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary |
|
669
|
|
|
"\x7fELF" => 'application/octet-stream', // ELF binary |
|
670
|
|
|
]; |
|
671
|
|
|
|
|
672
|
|
|
foreach ( $headers as $magic => $candidate ) { |
|
673
|
|
|
if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) { |
|
674
|
|
|
wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" ); |
|
675
|
|
|
return $candidate; |
|
676
|
|
|
} |
|
677
|
|
|
} |
|
678
|
|
|
|
|
679
|
|
|
/* Look for WebM and Matroska files */ |
|
680
|
|
|
if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) { |
|
681
|
|
|
$doctype = strpos( $head, "\x42\x82" ); |
|
682
|
|
|
if ( $doctype ) { |
|
683
|
|
|
// Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers) |
|
684
|
|
|
$data = substr( $head, $doctype + 3, 8 ); |
|
685
|
|
|
if ( strncmp( $data, "matroska", 8 ) == 0 ) { |
|
686
|
|
|
wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" ); |
|
687
|
|
|
return "video/x-matroska"; |
|
688
|
|
|
} elseif ( strncmp( $data, "webm", 4 ) == 0 ) { |
|
689
|
|
|
wfDebug( __METHOD__ . ": recognized file as video/webm\n" ); |
|
690
|
|
|
return "video/webm"; |
|
691
|
|
|
} |
|
692
|
|
|
} |
|
693
|
|
|
wfDebug( __METHOD__ . ": unknown EBML file\n" ); |
|
694
|
|
|
return "unknown/unknown"; |
|
695
|
|
|
} |
|
696
|
|
|
|
|
697
|
|
|
/* Look for WebP */ |
|
698
|
|
|
if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0 ) { |
|
699
|
|
|
wfDebug( __METHOD__ . ": recognized file as image/webp\n" ); |
|
700
|
|
|
return "image/webp"; |
|
701
|
|
|
} |
|
702
|
|
|
|
|
703
|
|
|
/** |
|
704
|
|
|
* Look for PHP. Check for this before HTML/XML... Warning: this is a |
|
705
|
|
|
* heuristic, and won't match a file with a lot of non-PHP before. It |
|
706
|
|
|
* will also match text files which could be PHP. :) |
|
707
|
|
|
* |
|
708
|
|
|
* @todo FIXME: For this reason, the check is probably useless -- an attacker |
|
709
|
|
|
* could almost certainly just pad the file with a lot of nonsense to |
|
710
|
|
|
* circumvent the check in any case where it would be a security |
|
711
|
|
|
* problem. On the other hand, it causes harmful false positives (bug |
|
712
|
|
|
* 16583). The heuristic has been cut down to exclude three-character |
|
713
|
|
|
* strings like "<? ", but should it be axed completely? |
|
714
|
|
|
*/ |
|
715
|
|
|
if ( ( strpos( $head, '<?php' ) !== false ) || |
|
716
|
|
|
( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) || |
|
717
|
|
|
( strpos( $head, "<\x00?\x00 " ) !== false ) || |
|
718
|
|
|
( strpos( $head, "<\x00?\x00\n" ) !== false ) || |
|
719
|
|
|
( strpos( $head, "<\x00?\x00\t" ) !== false ) || |
|
720
|
|
|
( strpos( $head, "<\x00?\x00=" ) !== false ) ) { |
|
721
|
|
|
|
|
722
|
|
|
wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" ); |
|
723
|
|
|
return 'application/x-php'; |
|
724
|
|
|
} |
|
725
|
|
|
|
|
726
|
|
|
/** |
|
727
|
|
|
* look for XML formats (XHTML and SVG) |
|
728
|
|
|
*/ |
|
729
|
|
|
$xml = new XmlTypeCheck( $file ); |
|
730
|
|
|
if ( $xml->wellFormed ) { |
|
731
|
|
|
$xmlMimeTypes = $this->mConfig->get( 'XMLMimeTypes' ); |
|
732
|
|
|
if ( isset( $xmlMimeTypes[$xml->getRootElement()] ) ) { |
|
733
|
|
|
return $xmlMimeTypes[$xml->getRootElement()]; |
|
734
|
|
|
} else { |
|
735
|
|
|
return 'application/xml'; |
|
736
|
|
|
} |
|
737
|
|
|
} |
|
738
|
|
|
|
|
739
|
|
|
/** |
|
740
|
|
|
* look for shell scripts |
|
741
|
|
|
*/ |
|
742
|
|
|
$script_type = null; |
|
743
|
|
|
|
|
744
|
|
|
# detect by shebang |
|
745
|
|
|
if ( substr( $head, 0, 2 ) == "#!" ) { |
|
746
|
|
|
$script_type = "ASCII"; |
|
747
|
|
|
} elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) { |
|
748
|
|
|
$script_type = "UTF-8"; |
|
749
|
|
|
} elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) { |
|
750
|
|
|
$script_type = "UTF-16BE"; |
|
751
|
|
|
} elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) { |
|
752
|
|
|
$script_type = "UTF-16LE"; |
|
753
|
|
|
} |
|
754
|
|
|
|
|
755
|
|
|
if ( $script_type ) { |
|
|
|
|
|
|
756
|
|
|
if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) { |
|
757
|
|
|
// Quick and dirty fold down to ASCII! |
|
758
|
|
|
$pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ]; |
|
759
|
|
|
$chars = unpack( $pack[$script_type], substr( $head, 2 ) ); |
|
760
|
|
|
$head = ''; |
|
761
|
|
|
foreach ( $chars as $codepoint ) { |
|
762
|
|
|
if ( $codepoint < 128 ) { |
|
763
|
|
|
$head .= chr( $codepoint ); |
|
764
|
|
|
} else { |
|
765
|
|
|
$head .= '?'; |
|
766
|
|
|
} |
|
767
|
|
|
} |
|
768
|
|
|
} |
|
769
|
|
|
|
|
770
|
|
|
$match = []; |
|
771
|
|
|
|
|
772
|
|
|
if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) { |
|
773
|
|
|
$mime = "application/x-{$match[2]}"; |
|
774
|
|
|
wfDebug( __METHOD__ . ": shell script recognized as $mime\n" ); |
|
775
|
|
|
return $mime; |
|
776
|
|
|
} |
|
777
|
|
|
} |
|
778
|
|
|
|
|
779
|
|
|
// Check for ZIP variants (before getimagesize) |
|
780
|
|
|
if ( strpos( $tail, "PK\x05\x06" ) !== false ) { |
|
781
|
|
|
wfDebug( __METHOD__ . ": ZIP header present in $file\n" ); |
|
782
|
|
|
return $this->detectZipType( $head, $tail, $ext ); |
|
783
|
|
|
} |
|
784
|
|
|
|
|
785
|
|
|
MediaWiki\suppressWarnings(); |
|
786
|
|
|
$gis = getimagesize( $file ); |
|
787
|
|
|
MediaWiki\restoreWarnings(); |
|
788
|
|
|
|
|
789
|
|
View Code Duplication |
if ( $gis && isset( $gis['mime'] ) ) { |
|
|
|
|
|
|
790
|
|
|
$mime = $gis['mime']; |
|
791
|
|
|
wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" ); |
|
792
|
|
|
return $mime; |
|
793
|
|
|
} |
|
794
|
|
|
|
|
795
|
|
|
// Also test DjVu |
|
796
|
|
|
$deja = new DjVuImage( $file ); |
|
797
|
|
|
if ( $deja->isValid() ) { |
|
798
|
|
|
wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" ); |
|
799
|
|
|
return 'image/vnd.djvu'; |
|
800
|
|
|
} |
|
801
|
|
|
|
|
802
|
|
|
# Media handling extensions can guess the MIME by content |
|
803
|
|
|
# It's intentionally here so that if core is wrong about a type (false positive), |
|
804
|
|
|
# people will hopefully nag and submit patches :) |
|
805
|
|
|
$mime = false; |
|
806
|
|
|
# Some strings by reference for performance - assuming well-behaved hooks |
|
807
|
|
|
Hooks::run( |
|
808
|
|
|
'MimeMagicGuessFromContent', |
|
809
|
|
|
[ $this, &$head, &$tail, $file, &$mime ] |
|
810
|
|
|
); |
|
811
|
|
|
|
|
812
|
|
|
return $mime; |
|
813
|
|
|
} |
|
814
|
|
|
|
|
815
|
|
|
/** |
|
816
|
|
|
* Detect application-specific file type of a given ZIP file from its |
|
817
|
|
|
* header data. Currently works for OpenDocument and OpenXML types... |
|
818
|
|
|
* If can't tell, returns 'application/zip'. |
|
819
|
|
|
* |
|
820
|
|
|
* @param string $header Some reasonably-sized chunk of file header |
|
821
|
|
|
* @param string|null $tail The tail of the file |
|
822
|
|
|
* @param string|bool $ext The file extension, or true to extract it from the filename. |
|
823
|
|
|
* Set it to false (default) to ignore the extension. DEPRECATED! Set to false, |
|
824
|
|
|
* use improveTypeFromExtension($mime, $ext) later to improve MIME type. |
|
825
|
|
|
* |
|
826
|
|
|
* @return string |
|
827
|
|
|
*/ |
|
828
|
|
|
function detectZipType( $header, $tail = null, $ext = false ) { |
|
829
|
|
|
if ( $ext ) { # TODO: remove $ext param |
|
830
|
|
|
wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " . |
|
831
|
|
|
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
|
832
|
|
|
} |
|
833
|
|
|
|
|
834
|
|
|
$mime = 'application/zip'; |
|
835
|
|
|
$opendocTypes = [ |
|
836
|
|
|
'chart-template', |
|
837
|
|
|
'chart', |
|
838
|
|
|
'formula-template', |
|
839
|
|
|
'formula', |
|
840
|
|
|
'graphics-template', |
|
841
|
|
|
'graphics', |
|
842
|
|
|
'image-template', |
|
843
|
|
|
'image', |
|
844
|
|
|
'presentation-template', |
|
845
|
|
|
'presentation', |
|
846
|
|
|
'spreadsheet-template', |
|
847
|
|
|
'spreadsheet', |
|
848
|
|
|
'text-template', |
|
849
|
|
|
'text-master', |
|
850
|
|
|
'text-web', |
|
851
|
|
|
'text' ]; |
|
852
|
|
|
|
|
853
|
|
|
// http://lists.oasis-open.org/archives/office/200505/msg00006.html |
|
854
|
|
|
$types = '(?:' . implode( '|', $opendocTypes ) . ')'; |
|
855
|
|
|
$opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/"; |
|
856
|
|
|
|
|
857
|
|
|
$openxmlRegex = "/^\[Content_Types\].xml/"; |
|
858
|
|
|
|
|
859
|
|
|
if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) { |
|
860
|
|
|
$mime = $matches[1]; |
|
861
|
|
|
wfDebug( __METHOD__ . ": detected $mime from ZIP archive\n" ); |
|
862
|
|
|
} elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) { |
|
863
|
|
|
$mime = "application/x-opc+zip"; |
|
864
|
|
|
# TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere |
|
865
|
|
|
if ( $ext !== true && $ext !== false ) { |
|
866
|
|
|
/** This is the mode used by getPropsFromPath |
|
867
|
|
|
* These MIME's are stored in the database, where we don't really want |
|
868
|
|
|
* x-opc+zip, because we use it only for internal purposes |
|
869
|
|
|
*/ |
|
870
|
|
|
if ( $this->isMatchingExtension( $ext, $mime ) ) { |
|
|
|
|
|
|
871
|
|
|
/* A known file extension for an OPC file, |
|
872
|
|
|
* find the proper mime type for that file extension |
|
873
|
|
|
*/ |
|
874
|
|
|
$mime = $this->guessTypesForExtension( $ext ); |
|
|
|
|
|
|
875
|
|
|
} else { |
|
876
|
|
|
$mime = "application/zip"; |
|
877
|
|
|
} |
|
878
|
|
|
} |
|
879
|
|
|
wfDebug( __METHOD__ . ": detected an Open Packaging Conventions archive: $mime\n" ); |
|
880
|
|
|
} elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" && |
|
881
|
|
|
( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false && |
|
882
|
|
|
preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) { |
|
883
|
|
|
if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) { |
|
884
|
|
|
$mime = "application/msword"; |
|
885
|
|
|
} |
|
886
|
|
|
switch ( substr( $header, 512, 6 ) ) { |
|
887
|
|
|
case "\xEC\xA5\xC1\x00\x0E\x00": |
|
888
|
|
|
case "\xEC\xA5\xC1\x00\x1C\x00": |
|
889
|
|
|
case "\xEC\xA5\xC1\x00\x43\x00": |
|
890
|
|
|
$mime = "application/vnd.ms-powerpoint"; |
|
891
|
|
|
break; |
|
892
|
|
|
case "\xFD\xFF\xFF\xFF\x10\x00": |
|
893
|
|
|
case "\xFD\xFF\xFF\xFF\x1F\x00": |
|
894
|
|
|
case "\xFD\xFF\xFF\xFF\x22\x00": |
|
895
|
|
|
case "\xFD\xFF\xFF\xFF\x23\x00": |
|
896
|
|
|
case "\xFD\xFF\xFF\xFF\x28\x00": |
|
897
|
|
|
case "\xFD\xFF\xFF\xFF\x29\x00": |
|
898
|
|
|
case "\xFD\xFF\xFF\xFF\x10\x02": |
|
899
|
|
|
case "\xFD\xFF\xFF\xFF\x1F\x02": |
|
900
|
|
|
case "\xFD\xFF\xFF\xFF\x22\x02": |
|
901
|
|
|
case "\xFD\xFF\xFF\xFF\x23\x02": |
|
902
|
|
|
case "\xFD\xFF\xFF\xFF\x28\x02": |
|
903
|
|
|
case "\xFD\xFF\xFF\xFF\x29\x02": |
|
904
|
|
|
$mime = "application/vnd.msexcel"; |
|
905
|
|
|
break; |
|
906
|
|
|
} |
|
907
|
|
|
|
|
908
|
|
|
wfDebug( __METHOD__ . ": detected a MS Office document with OPC trailer\n" ); |
|
909
|
|
|
} else { |
|
910
|
|
|
wfDebug( __METHOD__ . ": unable to identify type of ZIP archive\n" ); |
|
911
|
|
|
} |
|
912
|
|
|
return $mime; |
|
913
|
|
|
} |
|
914
|
|
|
|
|
915
|
|
|
/** |
|
916
|
|
|
* Internal MIME type detection. Detection is done using an external |
|
917
|
|
|
* program, if $wgMimeDetectorCommand is set. Otherwise, the fileinfo |
|
918
|
|
|
* extension is tried if it is available. If detection fails and $ext |
|
919
|
|
|
* is not false, the MIME type is guessed from the file extension, |
|
920
|
|
|
* using guessTypesForExtension. |
|
921
|
|
|
* |
|
922
|
|
|
* If the MIME type is still unknown, getimagesize is used to detect the |
|
923
|
|
|
* MIME type if the file is an image. If no MIME type can be determined, |
|
924
|
|
|
* this function returns 'unknown/unknown'. |
|
925
|
|
|
* |
|
926
|
|
|
* @param string $file The file to check |
|
927
|
|
|
* @param string|bool $ext The file extension, or true (default) to extract it from the filename. |
|
928
|
|
|
* Set it to false to ignore the extension. DEPRECATED! Set to false, use |
|
929
|
|
|
* improveTypeFromExtension($mime, $ext) later to improve MIME type. |
|
930
|
|
|
* |
|
931
|
|
|
* @return string The MIME type of $file |
|
932
|
|
|
*/ |
|
933
|
|
|
private function detectMimeType( $file, $ext = true ) { |
|
934
|
|
|
/** @todo Make $ext default to false. Or better, remove it. */ |
|
935
|
|
|
if ( $ext ) { |
|
936
|
|
|
wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " |
|
937
|
|
|
. "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
|
938
|
|
|
} |
|
939
|
|
|
|
|
940
|
|
|
$mimeDetectorCommand = $this->mConfig->get( 'MimeDetectorCommand' ); |
|
941
|
|
|
$m = null; |
|
942
|
|
|
if ( $mimeDetectorCommand ) { |
|
943
|
|
|
$args = wfEscapeShellArg( $file ); |
|
944
|
|
|
$m = wfShellExec( "$mimeDetectorCommand $args" ); |
|
945
|
|
|
} elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) { |
|
946
|
|
|
$mime_magic_resource = finfo_open( FILEINFO_MIME ); |
|
947
|
|
|
|
|
948
|
|
|
if ( $mime_magic_resource ) { |
|
949
|
|
|
$m = finfo_file( $mime_magic_resource, $file ); |
|
950
|
|
|
finfo_close( $mime_magic_resource ); |
|
951
|
|
|
} else { |
|
952
|
|
|
wfDebug( __METHOD__ . ": finfo_open failed on " . FILEINFO_MIME . "!\n" ); |
|
953
|
|
|
} |
|
954
|
|
|
} else { |
|
955
|
|
|
wfDebug( __METHOD__ . ": no magic mime detector found!\n" ); |
|
956
|
|
|
} |
|
957
|
|
|
|
|
958
|
|
|
if ( $m ) { |
|
959
|
|
|
# normalize |
|
960
|
|
|
$m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc |
|
961
|
|
|
$m = trim( $m ); |
|
962
|
|
|
$m = strtolower( $m ); |
|
963
|
|
|
|
|
964
|
|
|
if ( strpos( $m, 'unknown' ) !== false ) { |
|
965
|
|
|
$m = null; |
|
|
|
|
|
|
966
|
|
|
} else { |
|
967
|
|
|
wfDebug( __METHOD__ . ": magic mime type of $file: $m\n" ); |
|
968
|
|
|
return $m; |
|
969
|
|
|
} |
|
970
|
|
|
} |
|
971
|
|
|
|
|
972
|
|
|
// If desired, look at extension as a fallback. |
|
973
|
|
|
if ( $ext === true ) { |
|
974
|
|
|
$i = strrpos( $file, '.' ); |
|
975
|
|
|
$ext = strtolower( $i ? substr( $file, $i + 1 ) : '' ); |
|
976
|
|
|
} |
|
977
|
|
|
if ( $ext ) { |
|
978
|
|
|
if ( $this->isRecognizableExtension( $ext ) ) { |
|
|
|
|
|
|
979
|
|
|
wfDebug( __METHOD__ . ": refusing to guess mime type for .$ext file, " |
|
980
|
|
|
. "we should have recognized it\n" ); |
|
981
|
|
|
} else { |
|
982
|
|
|
$m = $this->guessTypesForExtension( $ext ); |
|
|
|
|
|
|
983
|
|
|
if ( $m ) { |
|
|
|
|
|
|
984
|
|
|
wfDebug( __METHOD__ . ": extension mime type of $file: $m\n" ); |
|
985
|
|
|
return $m; |
|
986
|
|
|
} |
|
987
|
|
|
} |
|
988
|
|
|
} |
|
989
|
|
|
|
|
990
|
|
|
// Unknown type |
|
991
|
|
|
wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" ); |
|
992
|
|
|
return 'unknown/unknown'; |
|
993
|
|
|
} |
|
994
|
|
|
|
|
995
|
|
|
/** |
|
996
|
|
|
* Determine the media type code for a file, using its MIME type, name and |
|
997
|
|
|
* possibly its contents. |
|
998
|
|
|
* |
|
999
|
|
|
* This function relies on the findMediaType(), mapping extensions and MIME |
|
1000
|
|
|
* types to media types. |
|
1001
|
|
|
* |
|
1002
|
|
|
* @todo analyse file if need be |
|
1003
|
|
|
* @todo look at multiple extension, separately and together. |
|
1004
|
|
|
* |
|
1005
|
|
|
* @param string $path Full path to the image file, in case we have to look at the contents |
|
1006
|
|
|
* (if null, only the MIME type is used to determine the media type code). |
|
1007
|
|
|
* @param string $mime MIME type. If null it will be guessed using guessMimeType. |
|
1008
|
|
|
* |
|
1009
|
|
|
* @return string A value to be used with the MEDIATYPE_xxx constants. |
|
1010
|
|
|
*/ |
|
1011
|
|
|
function getMediaType( $path = null, $mime = null ) { |
|
1012
|
|
|
if ( !$mime && !$path ) { |
|
|
|
|
|
|
1013
|
|
|
return MEDIATYPE_UNKNOWN; |
|
1014
|
|
|
} |
|
1015
|
|
|
|
|
1016
|
|
|
// If MIME type is unknown, guess it |
|
1017
|
|
|
if ( !$mime ) { |
|
|
|
|
|
|
1018
|
|
|
$mime = $this->guessMimeType( $path, false ); |
|
1019
|
|
|
} |
|
1020
|
|
|
|
|
1021
|
|
|
// Special code for ogg - detect if it's video (theora), |
|
1022
|
|
|
// else label it as sound. |
|
1023
|
|
|
if ( $mime == 'application/ogg' && file_exists( $path ) ) { |
|
1024
|
|
|
|
|
1025
|
|
|
// Read a chunk of the file |
|
1026
|
|
|
$f = fopen( $path, "rt" ); |
|
1027
|
|
|
if ( !$f ) { |
|
1028
|
|
|
return MEDIATYPE_UNKNOWN; |
|
1029
|
|
|
} |
|
1030
|
|
|
$head = fread( $f, 256 ); |
|
1031
|
|
|
fclose( $f ); |
|
1032
|
|
|
|
|
1033
|
|
|
$head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) ); |
|
1034
|
|
|
|
|
1035
|
|
|
// This is an UGLY HACK, file should be parsed correctly |
|
1036
|
|
|
if ( strpos( $head, 'theora' ) !== false ) { |
|
1037
|
|
|
return MEDIATYPE_VIDEO; |
|
1038
|
|
|
} elseif ( strpos( $head, 'vorbis' ) !== false ) { |
|
1039
|
|
|
return MEDIATYPE_AUDIO; |
|
1040
|
|
|
} elseif ( strpos( $head, 'flac' ) !== false ) { |
|
1041
|
|
|
return MEDIATYPE_AUDIO; |
|
1042
|
|
|
} elseif ( strpos( $head, 'speex' ) !== false ) { |
|
1043
|
|
|
return MEDIATYPE_AUDIO; |
|
1044
|
|
|
} else { |
|
1045
|
|
|
return MEDIATYPE_MULTIMEDIA; |
|
1046
|
|
|
} |
|
1047
|
|
|
} |
|
1048
|
|
|
|
|
1049
|
|
|
// Check for entry for full MIME type |
|
1050
|
|
|
if ( $mime ) { |
|
1051
|
|
|
$type = $this->findMediaType( $mime ); |
|
1052
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) { |
|
1053
|
|
|
return $type; |
|
1054
|
|
|
} |
|
1055
|
|
|
} |
|
1056
|
|
|
|
|
1057
|
|
|
// Check for entry for file extension |
|
1058
|
|
|
if ( $path ) { |
|
|
|
|
|
|
1059
|
|
|
$i = strrpos( $path, '.' ); |
|
1060
|
|
|
$e = strtolower( $i ? substr( $path, $i + 1 ) : '' ); |
|
1061
|
|
|
|
|
1062
|
|
|
// TODO: look at multi-extension if this fails, parse from full path |
|
1063
|
|
|
$type = $this->findMediaType( '.' . $e ); |
|
1064
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) { |
|
1065
|
|
|
return $type; |
|
1066
|
|
|
} |
|
1067
|
|
|
} |
|
1068
|
|
|
|
|
1069
|
|
|
// Check major MIME type |
|
1070
|
|
|
if ( $mime ) { |
|
1071
|
|
|
$i = strpos( $mime, '/' ); |
|
1072
|
|
|
if ( $i !== false ) { |
|
1073
|
|
|
$major = substr( $mime, 0, $i ); |
|
1074
|
|
|
$type = $this->findMediaType( $major ); |
|
1075
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) { |
|
1076
|
|
|
return $type; |
|
1077
|
|
|
} |
|
1078
|
|
|
} |
|
1079
|
|
|
} |
|
1080
|
|
|
|
|
1081
|
|
|
if ( !$type ) { |
|
|
|
|
|
|
1082
|
|
|
$type = MEDIATYPE_UNKNOWN; |
|
1083
|
|
|
} |
|
1084
|
|
|
|
|
1085
|
|
|
return $type; |
|
1086
|
|
|
} |
|
1087
|
|
|
|
|
1088
|
|
|
/** |
|
1089
|
|
|
* Returns a media code matching the given MIME type or file extension. |
|
1090
|
|
|
* File extensions are represented by a string starting with a dot (.) to |
|
1091
|
|
|
* distinguish them from MIME types. |
|
1092
|
|
|
* |
|
1093
|
|
|
* This function relies on the mapping defined by $this->mMediaTypes |
|
1094
|
|
|
* @access private |
|
1095
|
|
|
* @param string $extMime |
|
1096
|
|
|
* @return int|string |
|
1097
|
|
|
*/ |
|
1098
|
|
|
function findMediaType( $extMime ) { |
|
1099
|
|
|
if ( strpos( $extMime, '.' ) === 0 ) { |
|
1100
|
|
|
// If it's an extension, look up the MIME types |
|
1101
|
|
|
$m = $this->getTypesForExtension( substr( $extMime, 1 ) ); |
|
1102
|
|
|
if ( !$m ) { |
|
|
|
|
|
|
1103
|
|
|
return MEDIATYPE_UNKNOWN; |
|
1104
|
|
|
} |
|
1105
|
|
|
|
|
1106
|
|
|
$m = explode( ' ', $m ); |
|
1107
|
|
|
} else { |
|
1108
|
|
|
// Normalize MIME type |
|
1109
|
|
|
if ( isset( $this->mMimeTypeAliases[$extMime] ) ) { |
|
1110
|
|
|
$extMime = $this->mMimeTypeAliases[$extMime]; |
|
1111
|
|
|
} |
|
1112
|
|
|
|
|
1113
|
|
|
$m = [ $extMime ]; |
|
1114
|
|
|
} |
|
1115
|
|
|
|
|
1116
|
|
|
foreach ( $m as $mime ) { |
|
1117
|
|
|
foreach ( $this->mMediaTypes as $type => $codes ) { |
|
1118
|
|
|
if ( in_array( $mime, $codes, true ) ) { |
|
1119
|
|
|
return $type; |
|
1120
|
|
|
} |
|
1121
|
|
|
} |
|
1122
|
|
|
} |
|
1123
|
|
|
|
|
1124
|
|
|
return MEDIATYPE_UNKNOWN; |
|
1125
|
|
|
} |
|
1126
|
|
|
|
|
1127
|
|
|
/** |
|
1128
|
|
|
* Get the MIME types that various versions of Internet Explorer would |
|
1129
|
|
|
* detect from a chunk of the content. |
|
1130
|
|
|
* |
|
1131
|
|
|
* @param string $fileName The file name (unused at present) |
|
1132
|
|
|
* @param string $chunk The first 256 bytes of the file |
|
1133
|
|
|
* @param string $proposed The MIME type proposed by the server |
|
1134
|
|
|
* @return array |
|
1135
|
|
|
*/ |
|
1136
|
|
|
public function getIEMimeTypes( $fileName, $chunk, $proposed ) { |
|
1137
|
|
|
$ca = $this->getIEContentAnalyzer(); |
|
1138
|
|
|
return $ca->getRealMimesFromData( $fileName, $chunk, $proposed ); |
|
1139
|
|
|
} |
|
1140
|
|
|
|
|
1141
|
|
|
/** |
|
1142
|
|
|
* Get a cached instance of IEContentAnalyzer |
|
1143
|
|
|
* |
|
1144
|
|
|
* @return IEContentAnalyzer |
|
1145
|
|
|
*/ |
|
1146
|
|
|
protected function getIEContentAnalyzer() { |
|
1147
|
|
|
if ( is_null( $this->mIEAnalyzer ) ) { |
|
1148
|
|
|
$this->mIEAnalyzer = new IEContentAnalyzer; |
|
1149
|
|
|
} |
|
1150
|
|
|
return $this->mIEAnalyzer; |
|
1151
|
|
|
} |
|
1152
|
|
|
} |
|
1153
|
|
|
|
In PHP, under loose comparison (like
==, or!=, orswitchconditions), values of different types might be equal.For
stringvalues, the empty string''is a special case, in particular the following results might be unexpected: