1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Module defining helper functions for detecting and dealing with MIME types. |
4
|
|
|
* |
5
|
|
|
* This program is free software; you can redistribute it and/or modify |
6
|
|
|
* it under the terms of the GNU General Public License as published by |
7
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
8
|
|
|
* (at your option) any later version. |
9
|
|
|
* |
10
|
|
|
* This program is distributed in the hope that it will be useful, |
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13
|
|
|
* GNU General Public License for more details. |
14
|
|
|
* |
15
|
|
|
* You should have received a copy of the GNU General Public License along |
16
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc., |
17
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
19
|
|
|
* |
20
|
|
|
* @file |
21
|
|
|
*/ |
22
|
|
|
use Psr\Log\LoggerAwareInterface; |
23
|
|
|
use Psr\Log\LoggerInterface; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* Implements functions related to MIME types such as detection and mapping to file extension |
27
|
|
|
* |
28
|
|
|
* @since 1.28 |
29
|
|
|
*/ |
30
|
|
|
class MimeAnalyzer implements LoggerAwareInterface { |
31
|
|
|
/** @var string */ |
32
|
|
|
protected $typeFile; |
33
|
|
|
/** @var string */ |
34
|
|
|
protected $infoFile; |
35
|
|
|
/** @var string */ |
36
|
|
|
protected $xmlTypes; |
37
|
|
|
/** @var callable */ |
38
|
|
|
protected $initCallback; |
39
|
|
|
/** @var callable */ |
40
|
|
|
protected $detectCallback; |
41
|
|
|
/** @var callable */ |
42
|
|
|
protected $guessCallback; |
43
|
|
|
/** @var callable */ |
44
|
|
|
protected $extCallback; |
45
|
|
|
/** @var array Mapping of media types to arrays of MIME types */ |
46
|
|
|
protected $mediaTypes = null; |
47
|
|
|
/** @var array Map of MIME type aliases */ |
48
|
|
|
protected $mimeTypeAliases = null; |
49
|
|
|
/** @var array Map of MIME types to file extensions (as a space separated list) */ |
50
|
|
|
protected $mimetoExt = null; |
51
|
|
|
|
52
|
|
|
/** @var array Map of file extensions types to MIME types (as a space separated list) */ |
53
|
|
|
public $mExtToMime = null; // legacy name; field accessed by hooks |
54
|
|
|
|
55
|
|
|
/** @var IEContentAnalyzer */ |
56
|
|
|
protected $IEAnalyzer; |
57
|
|
|
|
58
|
|
|
/** @var string Extra MIME types, set for example by media handling extensions */ |
59
|
|
|
private $extraTypes = ''; |
60
|
|
|
/** @var string Extra MIME info, set for example by media handling extensions */ |
61
|
|
|
private $extraInfo = ''; |
62
|
|
|
|
63
|
|
|
/** @var LoggerInterface */ |
64
|
|
|
private $logger; |
65
|
|
|
|
66
|
|
|
/** |
67
|
|
|
* Defines a set of well known MIME types |
68
|
|
|
* This is used as a fallback to mime.types files. |
69
|
|
|
* An extensive list of well known MIME types is provided by |
70
|
|
|
* the file mime.types in the includes directory. |
71
|
|
|
* |
72
|
|
|
* This list concatenated with mime.types is used to create a MIME <-> ext |
73
|
|
|
* map. Each line contains a MIME type followed by a space separated list of |
74
|
|
|
* extensions. If multiple extensions for a single MIME type exist or if |
75
|
|
|
* multiple MIME types exist for a single extension then in most cases |
76
|
|
|
* MediaWiki assumes that the first extension following the MIME type is the |
77
|
|
|
* canonical extension, and the first time a MIME type appears for a certain |
78
|
|
|
* extension is considered the canonical MIME type. |
79
|
|
|
* |
80
|
|
|
* (Note that appending the type file list to the end of self::$wellKnownTypes |
81
|
|
|
* sucks because you can't redefine canonical types. This could be fixed by |
82
|
|
|
* appending self::$wellKnownTypes behind type file list, but who knows |
83
|
|
|
* what will break? In practice this probably isn't a problem anyway -- Bryan) |
84
|
|
|
*/ |
85
|
|
|
protected static $wellKnownTypes = <<<EOT |
86
|
|
|
application/ogg ogx ogg ogm ogv oga spx |
87
|
|
|
application/pdf pdf |
88
|
|
|
application/vnd.oasis.opendocument.chart odc |
89
|
|
|
application/vnd.oasis.opendocument.chart-template otc |
90
|
|
|
application/vnd.oasis.opendocument.database odb |
91
|
|
|
application/vnd.oasis.opendocument.formula odf |
92
|
|
|
application/vnd.oasis.opendocument.formula-template otf |
93
|
|
|
application/vnd.oasis.opendocument.graphics odg |
94
|
|
|
application/vnd.oasis.opendocument.graphics-template otg |
95
|
|
|
application/vnd.oasis.opendocument.image odi |
96
|
|
|
application/vnd.oasis.opendocument.image-template oti |
97
|
|
|
application/vnd.oasis.opendocument.presentation odp |
98
|
|
|
application/vnd.oasis.opendocument.presentation-template otp |
99
|
|
|
application/vnd.oasis.opendocument.spreadsheet ods |
100
|
|
|
application/vnd.oasis.opendocument.spreadsheet-template ots |
101
|
|
|
application/vnd.oasis.opendocument.text odt |
102
|
|
|
application/vnd.oasis.opendocument.text-master otm |
103
|
|
|
application/vnd.oasis.opendocument.text-template ott |
104
|
|
|
application/vnd.oasis.opendocument.text-web oth |
105
|
|
|
application/javascript js |
106
|
|
|
application/x-shockwave-flash swf |
107
|
|
|
audio/midi mid midi kar |
108
|
|
|
audio/mpeg mpga mpa mp2 mp3 |
109
|
|
|
audio/x-aiff aif aiff aifc |
110
|
|
|
audio/x-wav wav |
111
|
|
|
audio/ogg oga spx ogg |
112
|
|
|
image/x-bmp bmp |
113
|
|
|
image/gif gif |
114
|
|
|
image/jpeg jpeg jpg jpe |
115
|
|
|
image/png png |
116
|
|
|
image/svg+xml svg |
117
|
|
|
image/svg svg |
118
|
|
|
image/tiff tiff tif |
119
|
|
|
image/vnd.djvu djvu |
120
|
|
|
image/x.djvu djvu |
121
|
|
|
image/x-djvu djvu |
122
|
|
|
image/x-portable-pixmap ppm |
123
|
|
|
image/x-xcf xcf |
124
|
|
|
text/plain txt |
125
|
|
|
text/html html htm |
126
|
|
|
video/ogg ogv ogm ogg |
127
|
|
|
video/mpeg mpg mpeg |
128
|
|
|
EOT; |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Defines a set of well known MIME info entries |
132
|
|
|
* This is used as a fallback to mime.info files. |
133
|
|
|
* An extensive list of well known MIME types is provided by |
134
|
|
|
* the file mime.info in the includes directory. |
135
|
|
|
*/ |
136
|
|
|
protected static $wellKnownInfo = <<<EOT |
137
|
|
|
application/pdf [OFFICE] |
138
|
|
|
application/vnd.oasis.opendocument.chart [OFFICE] |
139
|
|
|
application/vnd.oasis.opendocument.chart-template [OFFICE] |
140
|
|
|
application/vnd.oasis.opendocument.database [OFFICE] |
141
|
|
|
application/vnd.oasis.opendocument.formula [OFFICE] |
142
|
|
|
application/vnd.oasis.opendocument.formula-template [OFFICE] |
143
|
|
|
application/vnd.oasis.opendocument.graphics [OFFICE] |
144
|
|
|
application/vnd.oasis.opendocument.graphics-template [OFFICE] |
145
|
|
|
application/vnd.oasis.opendocument.image [OFFICE] |
146
|
|
|
application/vnd.oasis.opendocument.image-template [OFFICE] |
147
|
|
|
application/vnd.oasis.opendocument.presentation [OFFICE] |
148
|
|
|
application/vnd.oasis.opendocument.presentation-template [OFFICE] |
149
|
|
|
application/vnd.oasis.opendocument.spreadsheet [OFFICE] |
150
|
|
|
application/vnd.oasis.opendocument.spreadsheet-template [OFFICE] |
151
|
|
|
application/vnd.oasis.opendocument.text [OFFICE] |
152
|
|
|
application/vnd.oasis.opendocument.text-template [OFFICE] |
153
|
|
|
application/vnd.oasis.opendocument.text-master [OFFICE] |
154
|
|
|
application/vnd.oasis.opendocument.text-web [OFFICE] |
155
|
|
|
application/javascript text/javascript application/x-javascript [EXECUTABLE] |
156
|
|
|
application/x-shockwave-flash [MULTIMEDIA] |
157
|
|
|
audio/midi [AUDIO] |
158
|
|
|
audio/x-aiff [AUDIO] |
159
|
|
|
audio/x-wav [AUDIO] |
160
|
|
|
audio/mp3 audio/mpeg [AUDIO] |
161
|
|
|
application/ogg audio/ogg video/ogg [MULTIMEDIA] |
162
|
|
|
image/x-bmp image/x-ms-bmp image/bmp [BITMAP] |
163
|
|
|
image/gif [BITMAP] |
164
|
|
|
image/jpeg [BITMAP] |
165
|
|
|
image/png [BITMAP] |
166
|
|
|
image/svg+xml [DRAWING] |
167
|
|
|
image/tiff [BITMAP] |
168
|
|
|
image/vnd.djvu [BITMAP] |
169
|
|
|
image/x-xcf [BITMAP] |
170
|
|
|
image/x-portable-pixmap [BITMAP] |
171
|
|
|
text/plain [TEXT] |
172
|
|
|
text/html [TEXT] |
173
|
|
|
video/ogg [VIDEO] |
174
|
|
|
video/mpeg [VIDEO] |
175
|
|
|
unknown/unknown application/octet-stream application/x-empty [UNKNOWN] |
176
|
|
|
EOT; |
177
|
|
|
|
178
|
|
|
/** |
179
|
|
|
* @param array $params Configuration map, includes: |
180
|
|
|
* - typeFile: path to file with the list of known MIME types |
181
|
|
|
* - infoFile: path to file with the MIME type info |
182
|
|
|
* - xmlTypes: map of root element names to XML MIME types |
183
|
|
|
* - initCallback: initialization callback that is passed this object [optional] |
184
|
|
|
* - detectCallback: alternative to finfo that returns the mime type for a file. |
185
|
|
|
* For example, the callback can return the output of "file -bi". [optional] |
186
|
|
|
* - guessCallback: callback to improve the guessed MIME type using the file data. |
187
|
|
|
* This is intended for fixing mistakes in fileinfo or "detectCallback". [optional] |
188
|
|
|
* - extCallback: callback to improve the guessed MIME type using the extension. [optional] |
189
|
|
|
* - logger: PSR-3 logger [optional] |
190
|
|
|
* @note Constructing these instances is expensive due to file reads. |
191
|
|
|
* A service or singleton pattern should be used to avoid creating instances again and again. |
192
|
|
|
*/ |
193
|
|
|
public function __construct( array $params ) { |
194
|
|
|
$this->typeFile = $params['typeFile']; |
195
|
|
|
$this->infoFile = $params['infoFile']; |
196
|
|
|
$this->xmlTypes = $params['xmlTypes']; |
197
|
|
|
$this->initCallback = isset( $params['initCallback'] ) |
198
|
|
|
? $params['initCallback'] |
199
|
|
|
: null; |
200
|
|
|
$this->detectCallback = isset( $params['detectCallback'] ) |
201
|
|
|
? $params['detectCallback'] |
202
|
|
|
: null; |
203
|
|
|
$this->guessCallback = isset( $params['guessCallback'] ) |
204
|
|
|
? $params['guessCallback'] |
205
|
|
|
: null; |
206
|
|
|
$this->extCallback = isset( $params['extCallback'] ) |
207
|
|
|
? $params['extCallback'] |
208
|
|
|
: null; |
209
|
|
|
$this->logger = isset( $params['logger'] ) |
210
|
|
|
? $params['logger'] |
211
|
|
|
: new \Psr\Log\NullLogger(); |
212
|
|
|
|
213
|
|
|
$this->loadFiles(); |
214
|
|
|
} |
215
|
|
|
|
216
|
|
|
protected function loadFiles() { |
217
|
|
|
/** |
218
|
|
|
* --- load mime.types --- |
219
|
|
|
*/ |
220
|
|
|
|
221
|
|
|
# Allow media handling extensions adding MIME-types and MIME-info |
222
|
|
|
if ( $this->initCallback ) { |
223
|
|
|
call_user_func( $this->initCallback, $this ); |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
$types = self::$wellKnownTypes; |
227
|
|
|
|
228
|
|
|
$mimeTypeFile = $this->typeFile; |
229
|
|
View Code Duplication |
if ( $mimeTypeFile ) { |
230
|
|
|
if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) { |
231
|
|
|
$this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" ); |
232
|
|
|
$types .= "\n"; |
233
|
|
|
$types .= file_get_contents( $mimeTypeFile ); |
234
|
|
|
} else { |
235
|
|
|
$this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" ); |
236
|
|
|
} |
237
|
|
|
} else { |
238
|
|
|
$this->logger->info( __METHOD__ . |
239
|
|
|
": no mime types file defined, using built-ins only.\n" ); |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
$types .= "\n" . $this->extraTypes; |
243
|
|
|
|
244
|
|
|
$types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types ); |
245
|
|
|
$types = str_replace( "\t", " ", $types ); |
246
|
|
|
|
247
|
|
|
$this->mimetoExt = []; |
248
|
|
|
$this->mExtToMime = []; |
249
|
|
|
|
250
|
|
|
$lines = explode( "\n", $types ); |
251
|
|
|
foreach ( $lines as $s ) { |
252
|
|
|
$s = trim( $s ); |
253
|
|
|
if ( empty( $s ) ) { |
254
|
|
|
continue; |
255
|
|
|
} |
256
|
|
|
if ( strpos( $s, '#' ) === 0 ) { |
257
|
|
|
continue; |
258
|
|
|
} |
259
|
|
|
|
260
|
|
|
$s = strtolower( $s ); |
261
|
|
|
$i = strpos( $s, ' ' ); |
262
|
|
|
|
263
|
|
|
if ( $i === false ) { |
264
|
|
|
continue; |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
$mime = substr( $s, 0, $i ); |
268
|
|
|
$ext = trim( substr( $s, $i + 1 ) ); |
269
|
|
|
|
270
|
|
|
if ( empty( $ext ) ) { |
271
|
|
|
continue; |
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
if ( !empty( $this->mimetoExt[$mime] ) ) { |
275
|
|
|
$this->mimetoExt[$mime] .= ' ' . $ext; |
276
|
|
|
} else { |
277
|
|
|
$this->mimetoExt[$mime] = $ext; |
278
|
|
|
} |
279
|
|
|
|
280
|
|
|
$extensions = explode( ' ', $ext ); |
281
|
|
|
|
282
|
|
|
foreach ( $extensions as $e ) { |
283
|
|
|
$e = trim( $e ); |
284
|
|
|
if ( empty( $e ) ) { |
285
|
|
|
continue; |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
if ( !empty( $this->mExtToMime[$e] ) ) { |
289
|
|
|
$this->mExtToMime[$e] .= ' ' . $mime; |
290
|
|
|
} else { |
291
|
|
|
$this->mExtToMime[$e] = $mime; |
292
|
|
|
} |
293
|
|
|
} |
294
|
|
|
} |
295
|
|
|
|
296
|
|
|
/** |
297
|
|
|
* --- load mime.info --- |
298
|
|
|
*/ |
299
|
|
|
|
300
|
|
|
$mimeInfoFile = $this->infoFile; |
301
|
|
|
|
302
|
|
|
$info = self::$wellKnownInfo; |
303
|
|
|
|
304
|
|
View Code Duplication |
if ( $mimeInfoFile ) { |
305
|
|
|
if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) { |
306
|
|
|
$this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" ); |
307
|
|
|
$info .= "\n"; |
308
|
|
|
$info .= file_get_contents( $mimeInfoFile ); |
309
|
|
|
} else { |
310
|
|
|
$this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" ); |
311
|
|
|
} |
312
|
|
|
} else { |
313
|
|
|
$this->logger->info( __METHOD__ . |
314
|
|
|
": no mime info file defined, using built-ins only.\n" ); |
315
|
|
|
} |
316
|
|
|
|
317
|
|
|
$info .= "\n" . $this->extraInfo; |
318
|
|
|
|
319
|
|
|
$info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info ); |
320
|
|
|
$info = str_replace( "\t", " ", $info ); |
321
|
|
|
|
322
|
|
|
$this->mimeTypeAliases = []; |
323
|
|
|
$this->mediaTypes = []; |
324
|
|
|
|
325
|
|
|
$lines = explode( "\n", $info ); |
326
|
|
|
foreach ( $lines as $s ) { |
327
|
|
|
$s = trim( $s ); |
328
|
|
|
if ( empty( $s ) ) { |
329
|
|
|
continue; |
330
|
|
|
} |
331
|
|
|
if ( strpos( $s, '#' ) === 0 ) { |
332
|
|
|
continue; |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
$s = strtolower( $s ); |
336
|
|
|
$i = strpos( $s, ' ' ); |
337
|
|
|
|
338
|
|
|
if ( $i === false ) { |
339
|
|
|
continue; |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
# print "processing MIME INFO line $s<br>"; |
343
|
|
|
|
344
|
|
|
$match = []; |
345
|
|
|
if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) { |
346
|
|
|
$s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s ); |
347
|
|
|
$mtype = trim( strtoupper( $match[1] ) ); |
348
|
|
|
} else { |
349
|
|
|
$mtype = MEDIATYPE_UNKNOWN; |
350
|
|
|
} |
351
|
|
|
|
352
|
|
|
$m = explode( ' ', $s ); |
353
|
|
|
|
354
|
|
|
if ( !isset( $this->mediaTypes[$mtype] ) ) { |
355
|
|
|
$this->mediaTypes[$mtype] = []; |
356
|
|
|
} |
357
|
|
|
|
358
|
|
|
foreach ( $m as $mime ) { |
359
|
|
|
$mime = trim( $mime ); |
360
|
|
|
if ( empty( $mime ) ) { |
361
|
|
|
continue; |
362
|
|
|
} |
363
|
|
|
|
364
|
|
|
$this->mediaTypes[$mtype][] = $mime; |
365
|
|
|
} |
366
|
|
|
|
367
|
|
|
if ( count( $m ) > 1 ) { |
368
|
|
|
$main = $m[0]; |
369
|
|
|
$mCount = count( $m ); |
370
|
|
|
for ( $i = 1; $i < $mCount; $i += 1 ) { |
371
|
|
|
$mime = $m[$i]; |
372
|
|
|
$this->mimeTypeAliases[$mime] = $main; |
373
|
|
|
} |
374
|
|
|
} |
375
|
|
|
} |
376
|
|
|
} |
377
|
|
|
|
378
|
|
|
public function setLogger( LoggerInterface $logger ) { |
379
|
|
|
$this->logger = $logger; |
380
|
|
|
} |
381
|
|
|
|
382
|
|
|
/** |
383
|
|
|
* Adds to the list mapping MIME to file extensions. |
384
|
|
|
* As an extension author, you are encouraged to submit patches to |
385
|
|
|
* MediaWiki's core to add new MIME types to mime.types. |
386
|
|
|
* @param string $types |
387
|
|
|
*/ |
388
|
|
|
public function addExtraTypes( $types ) { |
389
|
|
|
$this->extraTypes .= "\n" . $types; |
390
|
|
|
} |
391
|
|
|
|
392
|
|
|
/** |
393
|
|
|
* Adds to the list mapping MIME to media type. |
394
|
|
|
* As an extension author, you are encouraged to submit patches to |
395
|
|
|
* MediaWiki's core to add new MIME info to mime.info. |
396
|
|
|
* @param string $info |
397
|
|
|
*/ |
398
|
|
|
public function addExtraInfo( $info ) { |
399
|
|
|
$this->extraInfo .= "\n" . $info; |
400
|
|
|
} |
401
|
|
|
|
402
|
|
|
/** |
403
|
|
|
* Returns a list of file extensions for a given MIME type as a space |
404
|
|
|
* separated string or null if the MIME type was unrecognized. Resolves |
405
|
|
|
* MIME type aliases. |
406
|
|
|
* |
407
|
|
|
* @param string $mime |
408
|
|
|
* @return string|null |
409
|
|
|
*/ |
410
|
|
|
public function getExtensionsForType( $mime ) { |
411
|
|
|
$mime = strtolower( $mime ); |
412
|
|
|
|
413
|
|
|
// Check the mime-to-ext map |
414
|
|
|
if ( isset( $this->mimetoExt[$mime] ) ) { |
415
|
|
|
return $this->mimetoExt[$mime]; |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
// Resolve the MIME type to the canonical type |
419
|
|
|
if ( isset( $this->mimeTypeAliases[$mime] ) ) { |
420
|
|
|
$mime = $this->mimeTypeAliases[$mime]; |
421
|
|
|
if ( isset( $this->mimetoExt[$mime] ) ) { |
422
|
|
|
return $this->mimetoExt[$mime]; |
423
|
|
|
} |
424
|
|
|
} |
425
|
|
|
|
426
|
|
|
return null; |
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
/** |
430
|
|
|
* Returns a list of MIME types for a given file extension as a space |
431
|
|
|
* separated string or null if the extension was unrecognized. |
432
|
|
|
* |
433
|
|
|
* @param string $ext |
434
|
|
|
* @return string|null |
435
|
|
|
*/ |
436
|
|
|
public function getTypesForExtension( $ext ) { |
437
|
|
|
$ext = strtolower( $ext ); |
438
|
|
|
|
439
|
|
|
$r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null; |
440
|
|
|
return $r; |
441
|
|
|
} |
442
|
|
|
|
443
|
|
|
/** |
444
|
|
|
* Returns a single MIME type for a given file extension or null if unknown. |
445
|
|
|
* This is always the first type from the list returned by getTypesForExtension($ext). |
446
|
|
|
* |
447
|
|
|
* @param string $ext |
448
|
|
|
* @return string|null |
449
|
|
|
*/ |
450
|
|
|
public function guessTypesForExtension( $ext ) { |
451
|
|
|
$m = $this->getTypesForExtension( $ext ); |
452
|
|
|
if ( is_null( $m ) ) { |
453
|
|
|
return null; |
454
|
|
|
} |
455
|
|
|
|
456
|
|
|
// TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient |
457
|
|
|
$m = trim( $m ); |
458
|
|
|
$m = preg_replace( '/\s.*$/', '', $m ); |
459
|
|
|
|
460
|
|
|
return $m; |
461
|
|
|
} |
462
|
|
|
|
463
|
|
|
/** |
464
|
|
|
* Tests if the extension matches the given MIME type. Returns true if a |
465
|
|
|
* match was found, null if the MIME type is unknown, and false if the |
466
|
|
|
* MIME type is known but no matches where found. |
467
|
|
|
* |
468
|
|
|
* @param string $extension |
469
|
|
|
* @param string $mime |
470
|
|
|
* @return bool|null |
471
|
|
|
*/ |
472
|
|
|
public function isMatchingExtension( $extension, $mime ) { |
473
|
|
|
$ext = $this->getExtensionsForType( $mime ); |
474
|
|
|
|
475
|
|
|
if ( !$ext ) { |
|
|
|
|
476
|
|
|
return null; // Unknown MIME type |
477
|
|
|
} |
478
|
|
|
|
479
|
|
|
$ext = explode( ' ', $ext ); |
480
|
|
|
|
481
|
|
|
$extension = strtolower( $extension ); |
482
|
|
|
return in_array( $extension, $ext ); |
483
|
|
|
} |
484
|
|
|
|
485
|
|
|
/** |
486
|
|
|
* Returns true if the MIME type is known to represent an image format |
487
|
|
|
* supported by the PHP GD library. |
488
|
|
|
* |
489
|
|
|
* @param string $mime |
490
|
|
|
* |
491
|
|
|
* @return bool |
492
|
|
|
*/ |
493
|
|
|
public function isPHPImageType( $mime ) { |
494
|
|
|
// As defined by imagegetsize and image_type_to_mime |
495
|
|
|
static $types = [ |
496
|
|
|
'image/gif', 'image/jpeg', 'image/png', |
497
|
|
|
'image/x-bmp', 'image/xbm', 'image/tiff', |
498
|
|
|
'image/jp2', 'image/jpeg2000', 'image/iff', |
499
|
|
|
'image/xbm', 'image/x-xbitmap', |
500
|
|
|
'image/vnd.wap.wbmp', 'image/vnd.xiff', |
501
|
|
|
'image/x-photoshop', |
502
|
|
|
'application/x-shockwave-flash', |
503
|
|
|
]; |
504
|
|
|
|
505
|
|
|
return in_array( $mime, $types ); |
506
|
|
|
} |
507
|
|
|
|
508
|
|
|
/** |
509
|
|
|
* Returns true if the extension represents a type which can |
510
|
|
|
* be reliably detected from its content. Use this to determine |
511
|
|
|
* whether strict content checks should be applied to reject |
512
|
|
|
* invalid uploads; if we can't identify the type we won't |
513
|
|
|
* be able to say if it's invalid. |
514
|
|
|
* |
515
|
|
|
* @todo Be more accurate when using fancy MIME detector plugins; |
516
|
|
|
* right now this is the bare minimum getimagesize() list. |
517
|
|
|
* @param string $extension |
518
|
|
|
* @return bool |
519
|
|
|
*/ |
520
|
|
|
function isRecognizableExtension( $extension ) { |
521
|
|
|
static $types = [ |
522
|
|
|
// Types recognized by getimagesize() |
523
|
|
|
'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd', |
524
|
|
|
'bmp', 'tiff', 'tif', 'jpc', 'jp2', |
525
|
|
|
'jpx', 'jb2', 'swc', 'iff', 'wbmp', |
526
|
|
|
'xbm', |
527
|
|
|
|
528
|
|
|
// Formats we recognize magic numbers for |
529
|
|
|
'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', |
530
|
|
|
'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka', |
531
|
|
|
'webp', |
532
|
|
|
|
533
|
|
|
// XML formats we sure hope we recognize reliably |
534
|
|
|
'svg', |
535
|
|
|
]; |
536
|
|
|
return in_array( strtolower( $extension ), $types ); |
537
|
|
|
} |
538
|
|
|
|
539
|
|
|
/** |
540
|
|
|
* Improves a MIME type using the file extension. Some file formats are very generic, |
541
|
|
|
* so their MIME type is not very meaningful. A more useful MIME type can be derived |
542
|
|
|
* by looking at the file extension. Typically, this method would be called on the |
543
|
|
|
* result of guessMimeType(). |
544
|
|
|
* |
545
|
|
|
* @param string $mime The MIME type, typically guessed from a file's content. |
546
|
|
|
* @param string $ext The file extension, as taken from the file name |
547
|
|
|
* |
548
|
|
|
* @return string The MIME type |
549
|
|
|
*/ |
550
|
|
|
public function improveTypeFromExtension( $mime, $ext ) { |
551
|
|
|
if ( $mime === 'unknown/unknown' ) { |
552
|
|
|
if ( $this->isRecognizableExtension( $ext ) ) { |
553
|
|
|
$this->logger->info( __METHOD__ . ': refusing to guess mime type for .' . |
554
|
|
|
"$ext file, we should have recognized it\n" ); |
555
|
|
|
} else { |
556
|
|
|
// Not something we can detect, so simply |
557
|
|
|
// trust the file extension |
558
|
|
|
$mime = $this->guessTypesForExtension( $ext ); |
559
|
|
|
} |
560
|
|
|
} elseif ( $mime === 'application/x-opc+zip' ) { |
561
|
|
|
if ( $this->isMatchingExtension( $ext, $mime ) ) { |
562
|
|
|
// A known file extension for an OPC file, |
563
|
|
|
// find the proper MIME type for that file extension |
564
|
|
|
$mime = $this->guessTypesForExtension( $ext ); |
565
|
|
|
} else { |
566
|
|
|
$this->logger->info( __METHOD__ . |
567
|
|
|
": refusing to guess better type for $mime file, " . |
568
|
|
|
".$ext is not a known OPC extension.\n" ); |
569
|
|
|
$mime = 'application/zip'; |
570
|
|
|
} |
571
|
|
|
} elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) { |
572
|
|
|
// Textual types are sometimes not recognized properly. |
573
|
|
|
// If detected as text/plain, and has an extension which is textual |
574
|
|
|
// improve to the extension's type. For example, csv and json are often |
575
|
|
|
// misdetected as text/plain. |
576
|
|
|
$mime = $this->guessTypesForExtension( $ext ); |
577
|
|
|
} |
578
|
|
|
|
579
|
|
|
# Media handling extensions can improve the MIME detected |
580
|
|
|
$callback = $this->extCallback; |
581
|
|
|
if ( $callback ) { |
582
|
|
|
$callback( $this, $ext, $mime /* by reference */ ); |
583
|
|
|
} |
584
|
|
|
|
585
|
|
|
if ( isset( $this->mimeTypeAliases[$mime] ) ) { |
586
|
|
|
$mime = $this->mimeTypeAliases[$mime]; |
587
|
|
|
} |
588
|
|
|
|
589
|
|
|
$this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" ); |
590
|
|
|
return $mime; |
591
|
|
|
} |
592
|
|
|
|
593
|
|
|
/** |
594
|
|
|
* MIME type detection. This uses detectMimeType to detect the MIME type |
595
|
|
|
* of the file, but applies additional checks to determine some well known |
596
|
|
|
* file formats that may be missed or misinterpreted by the default MIME |
597
|
|
|
* detection (namely XML based formats like XHTML or SVG, as well as ZIP |
598
|
|
|
* based formats like OPC/ODF files). |
599
|
|
|
* |
600
|
|
|
* @param string $file The file to check |
601
|
|
|
* @param string|bool $ext The file extension, or true (default) to extract |
602
|
|
|
* it from the filename. Set it to false to ignore the extension. DEPRECATED! |
603
|
|
|
* Set to false, use improveTypeFromExtension($mime, $ext) later to improve MIME type. |
604
|
|
|
* |
605
|
|
|
* @return string The MIME type of $file |
606
|
|
|
*/ |
607
|
|
|
public function guessMimeType( $file, $ext = true ) { |
608
|
|
|
if ( $ext ) { // TODO: make $ext default to false. Or better, remove it. |
609
|
|
|
$this->logger->info( __METHOD__ . |
610
|
|
|
": WARNING: use of the \$ext parameter is deprecated. " . |
611
|
|
|
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
612
|
|
|
} |
613
|
|
|
|
614
|
|
|
$mime = $this->doGuessMimeType( $file, $ext ); |
615
|
|
|
|
616
|
|
|
if ( !$mime ) { |
617
|
|
|
$this->logger->info( __METHOD__ . |
618
|
|
|
": internal type detection failed for $file (.$ext)...\n" ); |
619
|
|
|
$mime = $this->detectMimeType( $file, $ext ); |
620
|
|
|
} |
621
|
|
|
|
622
|
|
|
if ( isset( $this->mimeTypeAliases[$mime] ) ) { |
623
|
|
|
$mime = $this->mimeTypeAliases[$mime]; |
624
|
|
|
} |
625
|
|
|
|
626
|
|
|
$this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" ); |
627
|
|
|
return $mime; |
628
|
|
|
} |
629
|
|
|
|
630
|
|
|
/** |
631
|
|
|
* Guess the MIME type from the file contents. |
632
|
|
|
* |
633
|
|
|
* @todo Remove $ext param |
634
|
|
|
* |
635
|
|
|
* @param string $file |
636
|
|
|
* @param mixed $ext |
637
|
|
|
* @return bool|string |
638
|
|
|
* @throws UnexpectedValueException |
639
|
|
|
*/ |
640
|
|
|
private function doGuessMimeType( $file, $ext ) { |
641
|
|
|
// Read a chunk of the file |
642
|
|
|
MediaWiki\suppressWarnings(); |
643
|
|
|
$f = fopen( $file, 'rb' ); |
644
|
|
|
MediaWiki\restoreWarnings(); |
645
|
|
|
|
646
|
|
|
if ( !$f ) { |
647
|
|
|
return 'unknown/unknown'; |
648
|
|
|
} |
649
|
|
|
|
650
|
|
|
$fsize = filesize( $file ); |
651
|
|
|
if ( $fsize === false ) { |
652
|
|
|
return 'unknown/unknown'; |
653
|
|
|
} |
654
|
|
|
|
655
|
|
|
$head = fread( $f, 1024 ); |
656
|
|
|
$tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR |
657
|
|
|
if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) { |
658
|
|
|
throw new UnexpectedValueException( |
659
|
|
|
"Seeking $tailLength bytes from EOF failed in " . __METHOD__ ); |
660
|
|
|
} |
661
|
|
|
$tail = $tailLength ? fread( $f, $tailLength ) : ''; |
662
|
|
|
fclose( $f ); |
663
|
|
|
|
664
|
|
|
$this->logger->info( __METHOD__ . |
665
|
|
|
": analyzing head and tail of $file for magic numbers.\n" ); |
666
|
|
|
|
667
|
|
|
// Hardcode a few magic number checks... |
668
|
|
|
$headers = [ |
669
|
|
|
// Multimedia... |
670
|
|
|
'MThd' => 'audio/midi', |
671
|
|
|
'OggS' => 'application/ogg', |
672
|
|
|
|
673
|
|
|
// Image formats... |
674
|
|
|
// Note that WMF may have a bare header, no magic number. |
675
|
|
|
"\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives? |
676
|
|
|
"\xd7\xcd\xc6\x9a" => 'application/x-msmetafile', |
677
|
|
|
'%PDF' => 'application/pdf', |
678
|
|
|
'gimp xcf' => 'image/x-xcf', |
679
|
|
|
|
680
|
|
|
// Some forbidden fruit... |
681
|
|
|
'MZ' => 'application/octet-stream', // DOS/Windows executable |
682
|
|
|
"\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary |
683
|
|
|
"\x7fELF" => 'application/octet-stream', // ELF binary |
684
|
|
|
]; |
685
|
|
|
|
686
|
|
|
foreach ( $headers as $magic => $candidate ) { |
687
|
|
|
if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) { |
688
|
|
|
$this->logger->info( __METHOD__ . |
689
|
|
|
": magic header in $file recognized as $candidate\n" ); |
690
|
|
|
return $candidate; |
691
|
|
|
} |
692
|
|
|
} |
693
|
|
|
|
694
|
|
|
/* Look for WebM and Matroska files */ |
695
|
|
|
if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) { |
696
|
|
|
$doctype = strpos( $head, "\x42\x82" ); |
697
|
|
|
if ( $doctype ) { |
698
|
|
|
// Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers) |
699
|
|
|
$data = substr( $head, $doctype + 3, 8 ); |
700
|
|
|
if ( strncmp( $data, "matroska", 8 ) == 0 ) { |
701
|
|
|
$this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" ); |
702
|
|
|
return "video/x-matroska"; |
703
|
|
|
} elseif ( strncmp( $data, "webm", 4 ) == 0 ) { |
704
|
|
|
$this->logger->info( __METHOD__ . ": recognized file as video/webm\n" ); |
705
|
|
|
return "video/webm"; |
706
|
|
|
} |
707
|
|
|
} |
708
|
|
|
$this->logger->info( __METHOD__ . ": unknown EBML file\n" ); |
709
|
|
|
return "unknown/unknown"; |
710
|
|
|
} |
711
|
|
|
|
712
|
|
|
/* Look for WebP */ |
713
|
|
|
if ( strncmp( $head, "RIFF", 4 ) == 0 && |
714
|
|
|
strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0 |
715
|
|
|
) { |
716
|
|
|
$this->logger->info( __METHOD__ . ": recognized file as image/webp\n" ); |
717
|
|
|
return "image/webp"; |
718
|
|
|
} |
719
|
|
|
|
720
|
|
|
/** |
721
|
|
|
* Look for PHP. Check for this before HTML/XML... Warning: this is a |
722
|
|
|
* heuristic, and won't match a file with a lot of non-PHP before. It |
723
|
|
|
* will also match text files which could be PHP. :) |
724
|
|
|
* |
725
|
|
|
* @todo FIXME: For this reason, the check is probably useless -- an attacker |
726
|
|
|
* could almost certainly just pad the file with a lot of nonsense to |
727
|
|
|
* circumvent the check in any case where it would be a security |
728
|
|
|
* problem. On the other hand, it causes harmful false positives (bug |
729
|
|
|
* 16583). The heuristic has been cut down to exclude three-character |
730
|
|
|
* strings like "<? ", but should it be axed completely? |
731
|
|
|
*/ |
732
|
|
|
if ( ( strpos( $head, '<?php' ) !== false ) || |
733
|
|
|
( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) || |
734
|
|
|
( strpos( $head, "<\x00?\x00 " ) !== false ) || |
735
|
|
|
( strpos( $head, "<\x00?\x00\n" ) !== false ) || |
736
|
|
|
( strpos( $head, "<\x00?\x00\t" ) !== false ) || |
737
|
|
|
( strpos( $head, "<\x00?\x00=" ) !== false ) ) { |
738
|
|
|
|
739
|
|
|
$this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" ); |
740
|
|
|
return 'application/x-php'; |
741
|
|
|
} |
742
|
|
|
|
743
|
|
|
/** |
744
|
|
|
* look for XML formats (XHTML and SVG) |
745
|
|
|
*/ |
746
|
|
|
$xml = new XmlTypeCheck( $file ); |
747
|
|
|
if ( $xml->wellFormed ) { |
748
|
|
|
$xmlTypes = $this->xmlTypes; |
749
|
|
|
if ( isset( $xmlTypes[$xml->getRootElement()] ) ) { |
750
|
|
|
return $xmlTypes[$xml->getRootElement()]; |
751
|
|
|
} else { |
752
|
|
|
return 'application/xml'; |
753
|
|
|
} |
754
|
|
|
} |
755
|
|
|
|
756
|
|
|
/** |
757
|
|
|
* look for shell scripts |
758
|
|
|
*/ |
759
|
|
|
$script_type = null; |
760
|
|
|
|
761
|
|
|
# detect by shebang |
762
|
|
|
if ( substr( $head, 0, 2 ) == "#!" ) { |
763
|
|
|
$script_type = "ASCII"; |
764
|
|
|
} elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) { |
765
|
|
|
$script_type = "UTF-8"; |
766
|
|
|
} elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) { |
767
|
|
|
$script_type = "UTF-16BE"; |
768
|
|
|
} elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) { |
769
|
|
|
$script_type = "UTF-16LE"; |
770
|
|
|
} |
771
|
|
|
|
772
|
|
|
if ( $script_type ) { |
|
|
|
|
773
|
|
|
if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) { |
774
|
|
|
// Quick and dirty fold down to ASCII! |
775
|
|
|
$pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ]; |
776
|
|
|
$chars = unpack( $pack[$script_type], substr( $head, 2 ) ); |
777
|
|
|
$head = ''; |
778
|
|
|
foreach ( $chars as $codepoint ) { |
779
|
|
|
if ( $codepoint < 128 ) { |
780
|
|
|
$head .= chr( $codepoint ); |
781
|
|
|
} else { |
782
|
|
|
$head .= '?'; |
783
|
|
|
} |
784
|
|
|
} |
785
|
|
|
} |
786
|
|
|
|
787
|
|
|
$match = []; |
788
|
|
|
|
789
|
|
|
if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) { |
790
|
|
|
$mime = "application/x-{$match[2]}"; |
791
|
|
|
$this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" ); |
792
|
|
|
return $mime; |
793
|
|
|
} |
794
|
|
|
} |
795
|
|
|
|
796
|
|
|
// Check for ZIP variants (before getimagesize) |
797
|
|
|
if ( strpos( $tail, "PK\x05\x06" ) !== false ) { |
798
|
|
|
$this->logger->info( __METHOD__ . ": ZIP header present in $file\n" ); |
799
|
|
|
return $this->detectZipType( $head, $tail, $ext ); |
800
|
|
|
} |
801
|
|
|
|
802
|
|
|
MediaWiki\suppressWarnings(); |
803
|
|
|
$gis = getimagesize( $file ); |
804
|
|
|
MediaWiki\restoreWarnings(); |
805
|
|
|
|
806
|
|
|
if ( $gis && isset( $gis['mime'] ) ) { |
807
|
|
|
$mime = $gis['mime']; |
808
|
|
|
$this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" ); |
809
|
|
|
return $mime; |
810
|
|
|
} |
811
|
|
|
|
812
|
|
|
# Media handling extensions can guess the MIME by content |
813
|
|
|
# It's intentionally here so that if core is wrong about a type (false positive), |
814
|
|
|
# people will hopefully nag and submit patches :) |
815
|
|
|
$mime = false; |
816
|
|
|
# Some strings by reference for performance - assuming well-behaved hooks |
817
|
|
|
$callback = $this->guessCallback; |
818
|
|
|
if ( $callback ) { |
819
|
|
|
$callback( $this, $head, $tail, $file, $mime /* by reference */ ); |
820
|
|
|
}; |
821
|
|
|
|
822
|
|
|
return $mime; |
823
|
|
|
} |
824
|
|
|
|
825
|
|
|
/** |
826
|
|
|
* Detect application-specific file type of a given ZIP file from its |
827
|
|
|
* header data. Currently works for OpenDocument and OpenXML types... |
828
|
|
|
* If can't tell, returns 'application/zip'. |
829
|
|
|
* |
830
|
|
|
* @param string $header Some reasonably-sized chunk of file header |
831
|
|
|
* @param string|null $tail The tail of the file |
832
|
|
|
* @param string|bool $ext The file extension, or true to extract it from the filename. |
833
|
|
|
* Set it to false (default) to ignore the extension. DEPRECATED! Set to false, |
834
|
|
|
* use improveTypeFromExtension($mime, $ext) later to improve MIME type. |
835
|
|
|
* |
836
|
|
|
* @return string |
837
|
|
|
*/ |
838
|
|
|
function detectZipType( $header, $tail = null, $ext = false ) { |
839
|
|
|
if ( $ext ) { # TODO: remove $ext param |
840
|
|
|
$this->logger->info( __METHOD__ . |
841
|
|
|
": WARNING: use of the \$ext parameter is deprecated. " . |
842
|
|
|
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
843
|
|
|
} |
844
|
|
|
|
845
|
|
|
$mime = 'application/zip'; |
846
|
|
|
$opendocTypes = [ |
847
|
|
|
'chart-template', |
848
|
|
|
'chart', |
849
|
|
|
'formula-template', |
850
|
|
|
'formula', |
851
|
|
|
'graphics-template', |
852
|
|
|
'graphics', |
853
|
|
|
'image-template', |
854
|
|
|
'image', |
855
|
|
|
'presentation-template', |
856
|
|
|
'presentation', |
857
|
|
|
'spreadsheet-template', |
858
|
|
|
'spreadsheet', |
859
|
|
|
'text-template', |
860
|
|
|
'text-master', |
861
|
|
|
'text-web', |
862
|
|
|
'text' ]; |
863
|
|
|
|
864
|
|
|
// https://lists.oasis-open.org/archives/office/200505/msg00006.html |
865
|
|
|
$types = '(?:' . implode( '|', $opendocTypes ) . ')'; |
866
|
|
|
$opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/"; |
867
|
|
|
|
868
|
|
|
$openxmlRegex = "/^\[Content_Types\].xml/"; |
869
|
|
|
|
870
|
|
|
if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) { |
871
|
|
|
$mime = $matches[1]; |
872
|
|
|
$this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" ); |
873
|
|
|
} elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) { |
874
|
|
|
$mime = "application/x-opc+zip"; |
875
|
|
|
# TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere |
876
|
|
|
if ( $ext !== true && $ext !== false ) { |
877
|
|
|
/** This is the mode used by getPropsFromPath |
878
|
|
|
* These MIME's are stored in the database, where we don't really want |
879
|
|
|
* x-opc+zip, because we use it only for internal purposes |
880
|
|
|
*/ |
881
|
|
|
if ( $this->isMatchingExtension( $ext, $mime ) ) { |
|
|
|
|
882
|
|
|
/* A known file extension for an OPC file, |
883
|
|
|
* find the proper mime type for that file extension |
884
|
|
|
*/ |
885
|
|
|
$mime = $this->guessTypesForExtension( $ext ); |
|
|
|
|
886
|
|
|
} else { |
887
|
|
|
$mime = "application/zip"; |
888
|
|
|
} |
889
|
|
|
} |
890
|
|
|
$this->logger->info( __METHOD__ . |
891
|
|
|
": detected an Open Packaging Conventions archive: $mime\n" ); |
892
|
|
|
} elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" && |
893
|
|
|
( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false && |
894
|
|
|
preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) { |
895
|
|
|
if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) { |
896
|
|
|
$mime = "application/msword"; |
897
|
|
|
} |
898
|
|
|
switch ( substr( $header, 512, 6 ) ) { |
899
|
|
|
case "\xEC\xA5\xC1\x00\x0E\x00": |
900
|
|
|
case "\xEC\xA5\xC1\x00\x1C\x00": |
901
|
|
|
case "\xEC\xA5\xC1\x00\x43\x00": |
902
|
|
|
$mime = "application/vnd.ms-powerpoint"; |
903
|
|
|
break; |
904
|
|
|
case "\xFD\xFF\xFF\xFF\x10\x00": |
905
|
|
|
case "\xFD\xFF\xFF\xFF\x1F\x00": |
906
|
|
|
case "\xFD\xFF\xFF\xFF\x22\x00": |
907
|
|
|
case "\xFD\xFF\xFF\xFF\x23\x00": |
908
|
|
|
case "\xFD\xFF\xFF\xFF\x28\x00": |
909
|
|
|
case "\xFD\xFF\xFF\xFF\x29\x00": |
910
|
|
|
case "\xFD\xFF\xFF\xFF\x10\x02": |
911
|
|
|
case "\xFD\xFF\xFF\xFF\x1F\x02": |
912
|
|
|
case "\xFD\xFF\xFF\xFF\x22\x02": |
913
|
|
|
case "\xFD\xFF\xFF\xFF\x23\x02": |
914
|
|
|
case "\xFD\xFF\xFF\xFF\x28\x02": |
915
|
|
|
case "\xFD\xFF\xFF\xFF\x29\x02": |
916
|
|
|
$mime = "application/vnd.msexcel"; |
917
|
|
|
break; |
918
|
|
|
} |
919
|
|
|
|
920
|
|
|
$this->logger->info( __METHOD__ . |
921
|
|
|
": detected a MS Office document with OPC trailer\n" ); |
922
|
|
|
} else { |
923
|
|
|
$this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" ); |
924
|
|
|
} |
925
|
|
|
return $mime; |
926
|
|
|
} |
927
|
|
|
|
928
|
|
|
/** |
929
|
|
|
* Internal MIME type detection. Detection is done using the fileinfo |
930
|
|
|
* extension if it is available. It can be overriden by callback, which could |
931
|
|
|
* use an external program, for example. If detection fails and $ext is not false, |
932
|
|
|
* the MIME type is guessed from the file extension, using guessTypesForExtension. |
933
|
|
|
* |
934
|
|
|
* If the MIME type is still unknown, getimagesize is used to detect the |
935
|
|
|
* MIME type if the file is an image. If no MIME type can be determined, |
936
|
|
|
* this function returns 'unknown/unknown'. |
937
|
|
|
* |
938
|
|
|
* @param string $file The file to check |
939
|
|
|
* @param string|bool $ext The file extension, or true (default) to extract it from the filename. |
940
|
|
|
* Set it to false to ignore the extension. DEPRECATED! Set to false, use |
941
|
|
|
* improveTypeFromExtension($mime, $ext) later to improve MIME type. |
942
|
|
|
* |
943
|
|
|
* @return string The MIME type of $file |
944
|
|
|
*/ |
945
|
|
|
private function detectMimeType( $file, $ext = true ) { |
946
|
|
|
/** @todo Make $ext default to false. Or better, remove it. */ |
947
|
|
|
if ( $ext ) { |
948
|
|
|
$this->logger->info( __METHOD__ . |
949
|
|
|
": WARNING: use of the \$ext parameter is deprecated. " |
950
|
|
|
. "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
951
|
|
|
} |
952
|
|
|
|
953
|
|
|
$callback = $this->detectCallback; |
954
|
|
|
$m = null; |
955
|
|
|
if ( $callback ) { |
956
|
|
|
$m = $callback( $file ); |
957
|
|
|
} elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) { |
958
|
|
|
$mime_magic_resource = finfo_open( FILEINFO_MIME ); |
959
|
|
|
|
960
|
|
|
if ( $mime_magic_resource ) { |
961
|
|
|
$m = finfo_file( $mime_magic_resource, $file ); |
962
|
|
|
finfo_close( $mime_magic_resource ); |
963
|
|
|
} else { |
964
|
|
|
$this->logger->info( __METHOD__ . |
965
|
|
|
": finfo_open failed on " . FILEINFO_MIME . "!\n" ); |
966
|
|
|
} |
967
|
|
|
} else { |
968
|
|
|
$this->logger->info( __METHOD__ . ": no magic mime detector found!\n" ); |
969
|
|
|
} |
970
|
|
|
|
971
|
|
|
if ( $m ) { |
972
|
|
|
# normalize |
973
|
|
|
$m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc |
974
|
|
|
$m = trim( $m ); |
975
|
|
|
$m = strtolower( $m ); |
976
|
|
|
|
977
|
|
|
if ( strpos( $m, 'unknown' ) !== false ) { |
978
|
|
|
$m = null; |
|
|
|
|
979
|
|
|
} else { |
980
|
|
|
$this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" ); |
981
|
|
|
return $m; |
982
|
|
|
} |
983
|
|
|
} |
984
|
|
|
|
985
|
|
|
// If desired, look at extension as a fallback. |
986
|
|
|
if ( $ext === true ) { |
987
|
|
|
$i = strrpos( $file, '.' ); |
988
|
|
|
$ext = strtolower( $i ? substr( $file, $i + 1 ) : '' ); |
989
|
|
|
} |
990
|
|
|
if ( $ext ) { |
991
|
|
|
if ( $this->isRecognizableExtension( $ext ) ) { |
|
|
|
|
992
|
|
|
$this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, " |
993
|
|
|
. "we should have recognized it\n" ); |
994
|
|
|
} else { |
995
|
|
|
$m = $this->guessTypesForExtension( $ext ); |
|
|
|
|
996
|
|
|
if ( $m ) { |
|
|
|
|
997
|
|
|
$this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" ); |
998
|
|
|
return $m; |
999
|
|
|
} |
1000
|
|
|
} |
1001
|
|
|
} |
1002
|
|
|
|
1003
|
|
|
// Unknown type |
1004
|
|
|
$this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" ); |
1005
|
|
|
return 'unknown/unknown'; |
1006
|
|
|
} |
1007
|
|
|
|
1008
|
|
|
/** |
1009
|
|
|
* Determine the media type code for a file, using its MIME type, name and |
1010
|
|
|
* possibly its contents. |
1011
|
|
|
* |
1012
|
|
|
* This function relies on the findMediaType(), mapping extensions and MIME |
1013
|
|
|
* types to media types. |
1014
|
|
|
* |
1015
|
|
|
* @todo analyse file if need be |
1016
|
|
|
* @todo look at multiple extension, separately and together. |
1017
|
|
|
* |
1018
|
|
|
* @param string $path Full path to the image file, in case we have to look at the contents |
1019
|
|
|
* (if null, only the MIME type is used to determine the media type code). |
1020
|
|
|
* @param string $mime MIME type. If null it will be guessed using guessMimeType. |
1021
|
|
|
* |
1022
|
|
|
* @return string A value to be used with the MEDIATYPE_xxx constants. |
1023
|
|
|
*/ |
1024
|
|
|
function getMediaType( $path = null, $mime = null ) { |
1025
|
|
|
if ( !$mime && !$path ) { |
|
|
|
|
1026
|
|
|
return MEDIATYPE_UNKNOWN; |
1027
|
|
|
} |
1028
|
|
|
|
1029
|
|
|
// If MIME type is unknown, guess it |
1030
|
|
|
if ( !$mime ) { |
|
|
|
|
1031
|
|
|
$mime = $this->guessMimeType( $path, false ); |
1032
|
|
|
} |
1033
|
|
|
|
1034
|
|
|
// Special code for ogg - detect if it's video (theora), |
1035
|
|
|
// else label it as sound. |
1036
|
|
|
if ( $mime == 'application/ogg' && file_exists( $path ) ) { |
1037
|
|
|
|
1038
|
|
|
// Read a chunk of the file |
1039
|
|
|
$f = fopen( $path, "rt" ); |
1040
|
|
|
if ( !$f ) { |
1041
|
|
|
return MEDIATYPE_UNKNOWN; |
1042
|
|
|
} |
1043
|
|
|
$head = fread( $f, 256 ); |
1044
|
|
|
fclose( $f ); |
1045
|
|
|
|
1046
|
|
|
$head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) ); |
1047
|
|
|
|
1048
|
|
|
// This is an UGLY HACK, file should be parsed correctly |
1049
|
|
|
if ( strpos( $head, 'theora' ) !== false ) { |
1050
|
|
|
return MEDIATYPE_VIDEO; |
1051
|
|
|
} elseif ( strpos( $head, 'vorbis' ) !== false ) { |
1052
|
|
|
return MEDIATYPE_AUDIO; |
1053
|
|
|
} elseif ( strpos( $head, 'flac' ) !== false ) { |
1054
|
|
|
return MEDIATYPE_AUDIO; |
1055
|
|
|
} elseif ( strpos( $head, 'speex' ) !== false ) { |
1056
|
|
|
return MEDIATYPE_AUDIO; |
1057
|
|
|
} else { |
1058
|
|
|
return MEDIATYPE_MULTIMEDIA; |
1059
|
|
|
} |
1060
|
|
|
} |
1061
|
|
|
|
1062
|
|
|
$type = null; |
1063
|
|
|
// Check for entry for full MIME type |
1064
|
|
|
if ( $mime ) { |
1065
|
|
|
$type = $this->findMediaType( $mime ); |
1066
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) { |
1067
|
|
|
return $type; |
1068
|
|
|
} |
1069
|
|
|
} |
1070
|
|
|
|
1071
|
|
|
// Check for entry for file extension |
1072
|
|
|
if ( $path ) { |
|
|
|
|
1073
|
|
|
$i = strrpos( $path, '.' ); |
1074
|
|
|
$e = strtolower( $i ? substr( $path, $i + 1 ) : '' ); |
1075
|
|
|
|
1076
|
|
|
// TODO: look at multi-extension if this fails, parse from full path |
1077
|
|
|
$type = $this->findMediaType( '.' . $e ); |
1078
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) { |
1079
|
|
|
return $type; |
1080
|
|
|
} |
1081
|
|
|
} |
1082
|
|
|
|
1083
|
|
|
// Check major MIME type |
1084
|
|
|
if ( $mime ) { |
1085
|
|
|
$i = strpos( $mime, '/' ); |
1086
|
|
|
if ( $i !== false ) { |
1087
|
|
|
$major = substr( $mime, 0, $i ); |
1088
|
|
|
$type = $this->findMediaType( $major ); |
1089
|
|
|
if ( $type !== MEDIATYPE_UNKNOWN ) { |
1090
|
|
|
return $type; |
1091
|
|
|
} |
1092
|
|
|
} |
1093
|
|
|
} |
1094
|
|
|
|
1095
|
|
|
if ( !$type ) { |
|
|
|
|
1096
|
|
|
$type = MEDIATYPE_UNKNOWN; |
1097
|
|
|
} |
1098
|
|
|
|
1099
|
|
|
return $type; |
1100
|
|
|
} |
1101
|
|
|
|
1102
|
|
|
/** |
1103
|
|
|
* Returns a media code matching the given MIME type or file extension. |
1104
|
|
|
* File extensions are represented by a string starting with a dot (.) to |
1105
|
|
|
* distinguish them from MIME types. |
1106
|
|
|
* |
1107
|
|
|
* This function relies on the mapping defined by $this->mMediaTypes |
1108
|
|
|
* @access private |
1109
|
|
|
* @param string $extMime |
1110
|
|
|
* @return int|string |
1111
|
|
|
*/ |
1112
|
|
|
function findMediaType( $extMime ) { |
1113
|
|
|
if ( strpos( $extMime, '.' ) === 0 ) { |
1114
|
|
|
// If it's an extension, look up the MIME types |
1115
|
|
|
$m = $this->getTypesForExtension( substr( $extMime, 1 ) ); |
1116
|
|
|
if ( !$m ) { |
|
|
|
|
1117
|
|
|
return MEDIATYPE_UNKNOWN; |
1118
|
|
|
} |
1119
|
|
|
|
1120
|
|
|
$m = explode( ' ', $m ); |
1121
|
|
|
} else { |
1122
|
|
|
// Normalize MIME type |
1123
|
|
|
if ( isset( $this->mimeTypeAliases[$extMime] ) ) { |
1124
|
|
|
$extMime = $this->mimeTypeAliases[$extMime]; |
1125
|
|
|
} |
1126
|
|
|
|
1127
|
|
|
$m = [ $extMime ]; |
1128
|
|
|
} |
1129
|
|
|
|
1130
|
|
|
foreach ( $m as $mime ) { |
1131
|
|
|
foreach ( $this->mediaTypes as $type => $codes ) { |
1132
|
|
|
if ( in_array( $mime, $codes, true ) ) { |
1133
|
|
|
return $type; |
1134
|
|
|
} |
1135
|
|
|
} |
1136
|
|
|
} |
1137
|
|
|
|
1138
|
|
|
return MEDIATYPE_UNKNOWN; |
1139
|
|
|
} |
1140
|
|
|
|
1141
|
|
|
/** |
1142
|
|
|
* Get the MIME types that various versions of Internet Explorer would |
1143
|
|
|
* detect from a chunk of the content. |
1144
|
|
|
* |
1145
|
|
|
* @param string $fileName The file name (unused at present) |
1146
|
|
|
* @param string $chunk The first 256 bytes of the file |
1147
|
|
|
* @param string $proposed The MIME type proposed by the server |
1148
|
|
|
* @return array |
1149
|
|
|
*/ |
1150
|
|
|
public function getIEMimeTypes( $fileName, $chunk, $proposed ) { |
1151
|
|
|
$ca = $this->getIEContentAnalyzer(); |
1152
|
|
|
return $ca->getRealMimesFromData( $fileName, $chunk, $proposed ); |
1153
|
|
|
} |
1154
|
|
|
|
1155
|
|
|
/** |
1156
|
|
|
* Get a cached instance of IEContentAnalyzer |
1157
|
|
|
* |
1158
|
|
|
* @return IEContentAnalyzer |
1159
|
|
|
*/ |
1160
|
|
|
protected function getIEContentAnalyzer() { |
1161
|
|
|
if ( is_null( $this->IEAnalyzer ) ) { |
1162
|
|
|
$this->IEAnalyzer = new IEContentAnalyzer; |
1163
|
|
|
} |
1164
|
|
|
return $this->IEAnalyzer; |
1165
|
|
|
} |
1166
|
|
|
} |
1167
|
|
|
|
In PHP, under loose comparison (like
==
, or!=
, orswitch
conditions), values of different types might be equal.For
string
values, the empty string''
is a special case, in particular the following results might be unexpected: