1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* MIME.php, provides functions for determining MIME types and getting info about MIME types |
4
|
|
|
* Copyright (C) 2003 Arend van Beelen, Auton Rijnsburg. [email protected] |
5
|
|
|
* |
6
|
|
|
* Updated for Codendi by Nicolas Terray 2008 |
7
|
|
|
* |
8
|
|
|
* Copyright (c) Xerox Corporation, Codendi Team, 2001-2009. All rights reserved |
9
|
|
|
* |
10
|
|
|
* This file is a part of Codendi. |
11
|
|
|
* |
12
|
|
|
* Codendi is free software; you can redistribute it and/or modify |
13
|
|
|
* it under the terms of the GNU General Public License as published by |
14
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
15
|
|
|
* (at your option) any later version. |
16
|
|
|
* |
17
|
|
|
* Codendi is distributed in the hope that it will be useful, |
18
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
19
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20
|
|
|
* GNU General Public License for more details. |
21
|
|
|
* |
22
|
|
|
* You should have received a copy of the GNU General Public License |
23
|
|
|
* along with Codendi. If not, see <http://www.gnu.org/licenses/>. |
24
|
|
|
*/ |
25
|
|
|
class MIME { |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Hold an instance of the class |
29
|
|
|
*/ |
30
|
|
|
private static $_instance; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* A private constructor; prevents direct creation of object |
34
|
|
|
*/ |
35
|
|
|
private function __construct() { |
36
|
|
|
$this->XDG_DATA_DIRS = explode(':', (isset($_ENV['XDG_DATA_DIRS'])?$_ENV['XDG_DATA_DIRS']:'/usr/local/share/:/usr/share/')); |
37
|
|
|
} |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* Singleton |
41
|
|
|
* @return MIME |
42
|
|
|
*/ |
43
|
|
|
public static function instance() { |
44
|
|
|
if (!isset(self::$_instance)) { |
45
|
|
|
$c = __CLASS__; |
46
|
|
|
self::$_instance = new $c; |
47
|
|
|
} |
48
|
|
|
return self::$_instance; |
49
|
|
|
} |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* tries to determine the mimetype of the given file |
53
|
|
|
* if the second variable is false, the file won't be opened and magic checking will be skipped |
54
|
|
|
*/ |
55
|
|
|
function type($filename, $openfile = true) { |
56
|
|
|
$mimetype = ''; |
57
|
|
|
$matchlen = 0; |
58
|
|
|
|
59
|
|
|
$basename = basename($filename); |
60
|
|
|
|
61
|
|
|
// load the glob files if they haven't been loaded already |
62
|
|
|
if(!isset($this->globFileLines)) { |
63
|
|
|
$this->globFileLines = array(); |
64
|
|
|
|
65
|
|
|
// go through the data dirs to search for the globbing files |
66
|
|
|
foreach($this->XDG_DATA_DIRS as $dir) { |
67
|
|
|
// read the file |
68
|
|
|
if(file_exists("$dir/mime/globs") && ($lines = file("$dir/mime/globs")) !== false) { |
69
|
|
|
$this->globFileLines = array_merge($this->globFileLines, $lines); |
70
|
|
|
} |
71
|
|
|
} |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
// check the globs twice (both case sensitive and insensitive) |
75
|
|
|
for($i = 0; $i < 2; $i++) { |
76
|
|
|
// walk through the file line by line |
77
|
|
|
foreach($this->globFileLines as $line) { |
78
|
|
|
// check whether the line is a comment |
79
|
|
|
if($line{0} == '#') { |
80
|
|
|
continue; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
// strip the newline character, but leave any spaces |
84
|
|
|
$line = substr($line, 0, strlen($line) - 1); |
85
|
|
|
|
86
|
|
|
list($mime, $glob) = explode(':', $line, 2); |
87
|
|
|
|
88
|
|
|
// check for a possible direct match |
89
|
|
|
if($basename == $glob) { |
90
|
|
|
return $mime; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
// match the globs |
94
|
|
|
$flag = ($i > 0 ? FNM_CASEFOLD : 0); |
95
|
|
|
if(fnmatch($glob, $basename, $flag) == true && strlen($glob) > $matchlen) { |
|
|
|
|
96
|
|
|
$mimetype = $mime; |
97
|
|
|
$matchlen = strlen($glob); |
98
|
|
|
} |
99
|
|
|
} |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
// check for hits |
103
|
|
|
if($mimetype != '') { |
104
|
|
|
return $mimetype; |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
// if globbing didn't return any results we're going to do some magic |
108
|
|
|
// quit now if we may not or cannot open the file |
109
|
|
|
if($openfile == false || ($fp = fopen($filename, 'r')) == false) { |
|
|
|
|
110
|
|
|
return ''; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
// load the magic files if they weren't loaded yet |
114
|
|
|
if(!isset($this->magicRules)) { |
115
|
|
|
$this->magicRules = array(); |
116
|
|
|
|
117
|
|
|
// go through the data dirs to search for the magic files |
118
|
|
|
foreach(array_reverse($this->XDG_DATA_DIRS) as $dir) { |
119
|
|
|
// read the file |
120
|
|
|
if(!file_exists("$dir/mime/magic") || ($buffer = file_get_contents("$dir/mime/magic")) === false) { |
121
|
|
|
continue; |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
// check the file type |
125
|
|
|
if(substr($buffer, 0, 12) != "MIME-Magic\0\n") { |
126
|
|
|
continue; |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
$buffer = substr($buffer, 12); |
130
|
|
|
|
131
|
|
|
// go through the entire file |
132
|
|
|
while($buffer != '') { |
133
|
|
|
if($buffer{0} != '[' && $buffer{0} != '>' && ($buffer{0} < '0' || $buffer{0} > '9')) { |
134
|
|
|
break; |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
switch($buffer{0}) { |
138
|
|
|
// create an entry for a new mimetype |
139
|
|
|
case '[': |
140
|
|
|
$mime = substr($buffer, 1, strpos($buffer, ']') - 1); |
141
|
|
|
$this->magicRules[$mime] = array(); |
142
|
|
|
$parents[0] =& $this->magicRules[$mime]; |
143
|
|
|
$buffer = substr($buffer, strlen($mime) + 3); |
144
|
|
|
break; |
145
|
|
|
|
146
|
|
|
// add a new rule to the current mimetype |
147
|
|
|
case '>': |
148
|
|
|
default: |
149
|
|
|
$indent = ($buffer{0} == '>' ? 0 : intval($buffer)); |
150
|
|
|
$buffer = substr($buffer, strpos($buffer, '>') + 1); |
151
|
|
|
$parents[$indent][] = new MIME_MagicRule; |
152
|
|
|
$rulenum = sizeof($parents[$indent]) - 1; |
153
|
|
|
$parents[$indent][$rulenum]->start_offset = intval($buffer); $buffer = substr($buffer, strpos($buffer, '=') + 1); |
154
|
|
|
$value_length = 256 * ord($buffer{0}) + ord($buffer{1}); $buffer = substr($buffer, 2); |
155
|
|
|
$parents[$indent][$rulenum]->value = substr($buffer, 0, $value_length); $buffer = substr($buffer, $value_length); |
156
|
|
|
$parents[$indent][$rulenum]->mask = ($buffer{0} != '&' ? str_repeat("\xff", $value_length) : substr($buffer, 1, $value_length)); if($buffer{0} == '&') $buffer = substr($buffer, $value_length + 1); |
157
|
|
|
$parents[$indent][$rulenum]->word_size = ($buffer{0} != '~' ? 1 : intval(substr($buffer, 1))); while($buffer{0} != '+' && $buffer{0} != "\n" && $buffer != '') $buffer = substr($buffer, 1); |
158
|
|
|
$parents[$indent][$rulenum]->range_length = ($buffer{0} != '+' ? 1 : intval($buffer)); $buffer = substr($buffer, strpos($buffer, "\n") + 1); |
159
|
|
|
$parents[$indent][$rulenum]->children = array(); |
160
|
|
|
$parents[$indent + 1] =& $parents[$indent][$rulenum]->children; |
161
|
|
|
break; |
162
|
|
|
} |
163
|
|
|
} |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
// sort the array so items with high priority will get on top |
167
|
|
|
ksort($this->magicRules); |
168
|
|
|
$magicRules = array_reverse($this->magicRules); |
169
|
|
|
reset($this->magicRules); |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
// call the recursive function for all mime types |
173
|
|
|
foreach($this->magicRules as $mime => $rules) { |
174
|
|
|
foreach($rules as $rule) { |
175
|
|
|
if($this->applyRecursiveMagic($rule, $fp) == true) { |
|
|
|
|
176
|
|
|
list($priority, $mimetype) = explode(':', $mime, 2); |
|
|
|
|
177
|
|
|
fclose($fp); |
178
|
|
|
return $mimetype; |
179
|
|
|
} |
180
|
|
|
} |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
// nothing worked, I will now only determine whether the file is binary or text |
184
|
|
|
fseek($fp, 0); |
185
|
|
|
$length = (filesize($filename) > 50 ? 50 : filesize($filename)); |
186
|
|
|
$data = fread($fp, $length); |
187
|
|
|
fclose($fp); |
188
|
|
|
for($i = 0; $i < $length; $i++) { |
189
|
|
|
if(!isset($data{$i}) || ($data{$i} < "\x20" && $data{$i} != "\x09" && $data{$i} != "\x0a" && $data{$i} != "\x0d")) { |
190
|
|
|
return 'application/octet-stream'; |
191
|
|
|
} |
192
|
|
|
} |
193
|
|
|
return 'text/plain'; |
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
/** |
197
|
|
|
* apply the magic rules recursivily -- helper function for type() |
198
|
|
|
*/ |
199
|
|
|
private function applyRecursiveMagic(MIME_MagicRule $rule, $fp) { |
200
|
|
|
fseek($fp, $rule->start_offset); |
201
|
|
|
$data = fread($fp, strlen($rule->value) + $rule->range_length); |
202
|
|
|
if(strstr($data, $rule->value) !== false) { |
203
|
|
|
if(sizeof($rule->children) == 0) { |
204
|
|
|
return true; |
205
|
|
|
} |
206
|
|
|
else { |
207
|
|
|
foreach($rule->children as $child) { |
208
|
|
|
if($this->applyRecursiveMagic($child, $fp) == true) { |
209
|
|
|
return true; |
210
|
|
|
} |
211
|
|
|
} |
212
|
|
|
} |
213
|
|
|
} |
214
|
|
|
return false; |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
/** |
218
|
|
|
* gets the textual description of the mimetype, optionally in the specified language |
219
|
|
|
*/ |
220
|
|
|
function description($mimetype, $language = 'en') { |
221
|
|
|
$this->description = ''; |
222
|
|
|
$this->lang = $language; |
223
|
|
|
$this->read = false; |
224
|
|
|
|
225
|
|
|
// go through the data dirs to search for the XML file for the specified mime type |
226
|
|
|
foreach($this->XDG_DATA_DIRS as $dir) { |
227
|
|
|
$filename = "$dir/mime/$mimetype.xml"; |
228
|
|
|
|
229
|
|
|
// open the XML file |
230
|
|
|
if(!file_exists($filename) || ($fp = fopen($filename, 'r')) == false) { |
231
|
|
|
continue; |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
// initialize XML parser |
235
|
|
|
$xml_parser = xml_parser_create(); |
236
|
|
|
xml_set_element_handler($xml_parser, array($this, 'description_StartElement'), array($this, 'description_EndElement')); |
237
|
|
|
xml_set_character_data_handler($xml_parser, array($this, 'description_Data')); |
238
|
|
|
|
239
|
|
|
// read the file and parse |
240
|
|
|
while($data = str_replace("\n", "", fread($fp, 4096))) { |
241
|
|
|
if(!xml_parse($xml_parser, $data, feof($fp))) { |
242
|
|
|
error_log("ERROR: Couldn't parse $filename: ". |
243
|
|
|
xml_error_string(xml_get_error_code($xml_parser))); |
244
|
|
|
break; |
245
|
|
|
} |
246
|
|
|
} |
247
|
|
|
fclose($fp); |
248
|
|
|
} |
249
|
|
|
|
250
|
|
|
return $this->description; |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
/** |
254
|
|
|
* helper function for description() |
255
|
|
|
*/ |
256
|
|
|
private function description_StartElement($parser, $name, $attrs) { |
257
|
|
|
$this->read = false; |
258
|
|
|
if($name == 'COMMENT') { |
259
|
|
|
if(!isset($attrs['XML:LANG']) || $attrs['XML:LANG'] == $this->lang) { |
260
|
|
|
$this->read = true; |
261
|
|
|
} |
262
|
|
|
} |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
/** |
266
|
|
|
* helper function for description() |
267
|
|
|
*/ |
268
|
|
|
private function description_EndElement($parser, $name) { |
269
|
|
|
$this->read = false; |
270
|
|
|
} |
271
|
|
|
|
272
|
|
|
/** |
273
|
|
|
* helper function for description() |
274
|
|
|
*/ |
275
|
|
|
private function description_Data($parser, $data) { |
276
|
|
|
if($this->read == true) { |
|
|
|
|
277
|
|
|
$this->description = $data; |
278
|
|
|
} |
279
|
|
|
} |
280
|
|
|
|
281
|
|
|
private $XDG_DATA_DIRS; |
282
|
|
|
private $globFileLines; |
283
|
|
|
private $magicRules; |
284
|
|
|
private $description; |
285
|
|
|
private $lang; |
286
|
|
|
private $read; |
287
|
|
|
} |
288
|
|
|
|
289
|
|
|
/** |
290
|
|
|
* helper class for MIME::type() |
291
|
|
|
*/ |
292
|
|
|
class MIME_MagicRule { |
293
|
|
|
var $start_offset; |
294
|
|
|
var $value; |
295
|
|
|
var $mask; |
296
|
|
|
var $word_size; |
297
|
|
|
var $range_length; |
298
|
|
|
var $children; |
299
|
|
|
} |
300
|
|
|
|
301
|
|
|
?> |
302
|
|
|
|
When comparing two booleans, it is generally considered safer to use the strict comparison operator.