|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace ImageMimeTypeGuesser\Detectors; |
|
4
|
|
|
|
|
5
|
|
|
use \ImageMimeTypeGuesser\Detectors\AbstractDetector; |
|
6
|
|
|
|
|
7
|
|
|
class SniffFirstFourBytes extends AbstractDetector |
|
8
|
|
|
{ |
|
9
|
|
|
|
|
10
|
|
|
/** |
|
11
|
|
|
* Try to detect mime type by sniffing the first four bytes. |
|
12
|
|
|
* |
|
13
|
|
|
* Returns: |
|
14
|
|
|
* - mime type (string) (if it is in fact an image, and type could be determined) |
|
15
|
|
|
* - false (if it is not an image type that the server knowns about) |
|
16
|
|
|
* - null (if nothing can be determined) |
|
17
|
|
|
* |
|
18
|
|
|
* @param string $filePath The path to the file |
|
19
|
|
|
* @return string|false|null mimetype (if it is an image, and type could be determined), |
|
20
|
|
|
* false (if it is not an image type that the server knowns about) |
|
21
|
|
|
* or null (if nothing can be determined) |
|
22
|
|
|
*/ |
|
23
|
7 |
|
protected function doDetect($filePath) |
|
24
|
|
|
{ |
|
25
|
7 |
|
$handle = @fopen($filePath, 'r'); |
|
26
|
7 |
|
if ($handle === false) { |
|
27
|
|
|
return null; |
|
28
|
|
|
} |
|
29
|
|
|
// 20 bytes is sufficient for all our sniffers, except image/svg+xml. |
|
30
|
|
|
// The svg sniffer takes care of reading more |
|
31
|
7 |
|
$sampleBin = @fread($handle, 20); |
|
32
|
7 |
|
if ($sampleBin === false) { |
|
33
|
|
|
return null; |
|
34
|
|
|
} |
|
35
|
7 |
|
$firstByte = $sampleBin[0]; |
|
36
|
7 |
|
$sampleHex = strtoupper(bin2hex($sampleBin)); |
|
37
|
|
|
|
|
38
|
7 |
|
$hexPatterns = []; |
|
39
|
7 |
|
$binPatterns = []; |
|
40
|
|
|
|
|
41
|
|
|
// https://www.rapidtables.com/convert/number/hex-to-ascii.html |
|
42
|
|
|
switch ($firstByte) { |
|
43
|
7 |
|
case "\x00": |
|
44
|
2 |
|
$hexPatterns[] = ['image/x-icon', "/^00000(1?2)00/"]; |
|
45
|
|
|
|
|
46
|
2 |
|
if (preg_match("/^.{8}6A502020/", $sampleHex) === 1) { |
|
47
|
|
|
// jpeg-2000 - a bit more complex, as block size may vary |
|
48
|
|
|
// https://www.file-recovery.com/jp2-signature-format.htm |
|
49
|
2 |
|
$block1Size = hexdec("0x" . substr($sampleHex, 0, 8)); |
|
50
|
|
|
|
|
51
|
2 |
|
$moreBytes = @fread($handle, $block1Size + 4 + 8); |
|
|
|
|
|
|
52
|
2 |
|
if ($moreBytes !== false) { |
|
53
|
2 |
|
$sampleBin .= $moreBytes; |
|
54
|
|
|
} |
|
55
|
2 |
|
if (substr($sampleBin, $block1Size + 4, 4) == 'ftyp') { |
|
|
|
|
|
|
56
|
2 |
|
$subtyp = substr($sampleBin, $block1Size + 8, 4); |
|
57
|
2 |
|
if ($subtyp == 'mjp2') { |
|
58
|
|
|
return 'video/mj2'; |
|
59
|
|
|
} else { |
|
60
|
2 |
|
return 'image/' . rtrim($subtyp); |
|
61
|
|
|
} |
|
62
|
|
|
} |
|
63
|
|
|
} |
|
64
|
|
|
|
|
65
|
|
|
break; |
|
66
|
|
|
|
|
67
|
6 |
|
case "8": |
|
68
|
|
|
$binPatterns[] = ['application/psd', "/^8BPS/"]; |
|
69
|
|
|
break; |
|
70
|
|
|
|
|
71
|
6 |
|
case "B": |
|
72
|
|
|
$binPatterns[] = ['image/bmp', "/^BM/"]; |
|
73
|
|
|
break; |
|
74
|
|
|
|
|
75
|
6 |
|
case "G": |
|
76
|
2 |
|
$binPatterns[] = ['image/gif', "/^GIF8(7|9)a/"]; |
|
77
|
2 |
|
break; |
|
78
|
|
|
|
|
79
|
5 |
|
case "I": |
|
80
|
2 |
|
$hexPatterns[] = ['image/tiff', "/^(49492A00|4D4D002A)/"]; |
|
81
|
2 |
|
break; |
|
82
|
|
|
|
|
83
|
4 |
|
case "R": |
|
84
|
|
|
// PS: Another library is more specific: /^RIFF.{4}WEBPVP/ |
|
85
|
|
|
// Is "VP" always there? |
|
86
|
1 |
|
$binPatterns[] = ['image/webp', "/^RIFF.{4}WEBP/"]; |
|
87
|
1 |
|
break; |
|
88
|
|
|
|
|
89
|
3 |
|
case "<": |
|
90
|
|
|
// Another library looks for end bracket for svg. |
|
91
|
|
|
// We do not, as it requires more bytes read. |
|
92
|
|
|
// Note that <xml> tag might be big too... - so we read in 200 extra |
|
93
|
|
|
$moreBytes = @fread($handle, 200); |
|
94
|
|
|
if ($moreBytes !== false) { |
|
95
|
|
|
$sampleBin .= $moreBytes; |
|
96
|
|
|
} |
|
97
|
|
|
$binPatterns[] = ['image/svg+xml', "/^(<\?xml[^>]*\?>.*)?<svg/is"]; |
|
98
|
|
|
break; |
|
99
|
|
|
|
|
100
|
3 |
|
case "f": |
|
101
|
|
|
//$hexPatterns[] = ['image/heic', "/667479706865(6963|6978|7663|696D|6973|766D|7673)/"]; |
|
102
|
|
|
//$hexPatterns[] = ['image/heif', "/667479706D(69|73)6631)/"]; |
|
103
|
|
|
$binPatterns[] = ['image/heic', "/ftyphe(ic|ix|vc|im|is|vm|vs)/"]; |
|
104
|
|
|
$binPatterns[] = ['image/heif', "/ftypm(i|s)f1/"]; |
|
105
|
|
|
break; |
|
106
|
|
|
|
|
107
|
3 |
|
case "\x89": |
|
108
|
2 |
|
$hexPatterns[] = ['image/png', "/^89504E470D0A1A0A/"]; |
|
109
|
2 |
|
break; |
|
110
|
|
|
|
|
111
|
2 |
|
case "\xFF": |
|
112
|
2 |
|
$hexPatterns[] = ['image/jpeg', "/^FFD8FF(DB|E0|EE|E1)/"]; |
|
113
|
2 |
|
break; |
|
114
|
|
|
} |
|
115
|
|
|
|
|
116
|
6 |
|
foreach ($hexPatterns as list($mime, $pattern)) { |
|
117
|
4 |
|
if (preg_match($pattern, $sampleHex) === 1) { |
|
118
|
4 |
|
return $mime; |
|
119
|
|
|
} |
|
120
|
|
|
} |
|
121
|
3 |
|
foreach ($binPatterns as list($mime, $pattern)) { |
|
122
|
3 |
|
if (preg_match($pattern, $sampleBin) === 1) { |
|
123
|
3 |
|
return $mime; |
|
124
|
|
|
} |
|
125
|
|
|
} |
|
126
|
1 |
|
return null; |
|
127
|
|
|
|
|
128
|
|
|
/* |
|
129
|
|
|
https://en.wikipedia.org/wiki/List_of_file_signatures |
|
130
|
|
|
https://github.com/zjsxwc/mime-type-sniffer/blob/master/src/MimeTypeSniffer/MimeTypeSniffer.php |
|
131
|
|
|
http://phil.lavin.me.uk/2011/12/php-accurately-detecting-the-type-of-a-file/ |
|
132
|
|
|
|
|
133
|
|
|
*/ |
|
134
|
|
|
// TODO: JPEG 2000 |
|
135
|
|
|
// mime types: image/jp2, image/jpf, image/jpx, image/jpm |
|
136
|
|
|
// http://fileformats.archiveteam.org/wiki/JPEG_2000 |
|
137
|
|
|
// https://www.file-recovery.com/jp2-signature-format.htm |
|
138
|
|
|
/* |
|
139
|
|
|
From: https://github.com/Tinram/File-Identifier/blob/master/src/FileSignatures.php |
|
140
|
|
|
'JPG 2000' => '00 00 00 0c 6a 50 20 20 0d 0a 87 0a', |
|
141
|
|
|
https://filesignatures.net/index.php?page=search&search=JP2&mode=EXT |
|
142
|
|
|
*/ |
|
143
|
|
|
} |
|
144
|
|
|
} |
|
145
|
|
|
|