1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace ImageMimeTypeGuesser\Detectors; |
4
|
|
|
|
5
|
|
|
use \ImageMimeTypeGuesser\Detectors\AbstractDetector; |
6
|
|
|
|
7
|
|
|
class SniffFirstFourBytes extends AbstractDetector |
8
|
|
|
{ |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Try to detect mime type by sniffing the first four bytes. |
12
|
|
|
* |
13
|
|
|
* Returns: |
14
|
|
|
* - mime type (string) (if it is in fact an image, and type could be determined) |
15
|
|
|
* - false (if it is not an image type that the server knowns about) |
16
|
|
|
* - null (if nothing can be determined) |
17
|
|
|
* |
18
|
|
|
* @param string $filePath The path to the file |
19
|
|
|
* @return string|false|null mimetype (if it is an image, and type could be determined), |
20
|
|
|
* false (if it is not an image type that the server knowns about) |
21
|
|
|
* or null (if nothing can be determined) |
22
|
|
|
*/ |
23
|
7 |
|
protected function doDetect($filePath) |
24
|
|
|
{ |
25
|
7 |
|
$handle = @fopen($filePath, 'r'); |
26
|
7 |
|
if ($handle === false) { |
27
|
|
|
return null; |
28
|
|
|
} |
29
|
|
|
// 20 bytes is sufficient for all our sniffers, except image/svg+xml. |
30
|
|
|
// The svg sniffer takes care of reading more |
31
|
7 |
|
$sampleBin = @fread($handle, 20); |
32
|
7 |
|
if ($sampleBin === false) { |
33
|
|
|
return null; |
34
|
|
|
} |
35
|
7 |
|
$firstByte = $sampleBin[0]; |
36
|
7 |
|
$sampleHex = strtoupper(bin2hex($sampleBin)); |
37
|
|
|
|
38
|
7 |
|
$hexPatterns = []; |
39
|
7 |
|
$binPatterns = []; |
40
|
|
|
|
41
|
|
|
// https://www.rapidtables.com/convert/number/hex-to-ascii.html |
42
|
|
|
switch ($firstByte) { |
43
|
7 |
|
case "\x00": |
44
|
2 |
|
$hexPatterns[] = ['image/x-icon', "/^00000(1?2)00/"]; |
45
|
|
|
|
46
|
2 |
|
if (preg_match("/^.{8}6A502020/", $sampleHex) === 1) { |
47
|
|
|
// jpeg-2000 - a bit more complex, as block size may vary |
48
|
|
|
// https://www.file-recovery.com/jp2-signature-format.htm |
49
|
2 |
|
$block1Size = hexdec("0x" . substr($sampleHex, 0, 8)); |
50
|
|
|
|
51
|
2 |
|
$moreBytes = @fread($handle, $block1Size + 4 + 8); |
|
|
|
|
52
|
2 |
|
if ($moreBytes !== false) { |
53
|
2 |
|
$sampleBin .= $moreBytes; |
54
|
|
|
} |
55
|
2 |
|
if (substr($sampleBin, $block1Size + 4, 4) == 'ftyp') { |
|
|
|
|
56
|
2 |
|
$subtyp = substr($sampleBin, $block1Size + 8, 4); |
57
|
2 |
|
if ($subtyp == 'mjp2') { |
58
|
|
|
return 'video/mj2'; |
59
|
|
|
} else { |
60
|
2 |
|
return 'image/' . rtrim($subtyp); |
61
|
|
|
} |
62
|
|
|
} |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
break; |
66
|
|
|
|
67
|
6 |
|
case "8": |
68
|
|
|
$binPatterns[] = ['application/psd', "/^8BPS/"]; |
69
|
|
|
break; |
70
|
|
|
|
71
|
6 |
|
case "B": |
72
|
|
|
$binPatterns[] = ['image/bmp', "/^BM/"]; |
73
|
|
|
break; |
74
|
|
|
|
75
|
6 |
|
case "G": |
76
|
2 |
|
$binPatterns[] = ['image/gif', "/^GIF8(7|9)a/"]; |
77
|
2 |
|
break; |
78
|
|
|
|
79
|
5 |
|
case "I": |
80
|
2 |
|
$hexPatterns[] = ['image/tiff', "/^(49492A00|4D4D002A)/"]; |
81
|
2 |
|
break; |
82
|
|
|
|
83
|
4 |
|
case "R": |
84
|
|
|
// PS: Another library is more specific: /^RIFF.{4}WEBPVP/ |
85
|
|
|
// Is "VP" always there? |
86
|
1 |
|
$binPatterns[] = ['image/webp', "/^RIFF.{4}WEBP/"]; |
87
|
1 |
|
break; |
88
|
|
|
|
89
|
3 |
|
case "<": |
90
|
|
|
// Another library looks for end bracket for svg. |
91
|
|
|
// We do not, as it requires more bytes read. |
92
|
|
|
// Note that <xml> tag might be big too... - so we read in 200 extra |
93
|
|
|
$moreBytes = @fread($handle, 200); |
94
|
|
|
if ($moreBytes !== false) { |
95
|
|
|
$sampleBin .= $moreBytes; |
96
|
|
|
} |
97
|
|
|
$binPatterns[] = ['image/svg+xml', "/^(<\?xml[^>]*\?>.*)?<svg/is"]; |
98
|
|
|
break; |
99
|
|
|
|
100
|
3 |
|
case "f": |
101
|
|
|
//$hexPatterns[] = ['image/heic', "/667479706865(6963|6978|7663|696D|6973|766D|7673)/"]; |
102
|
|
|
//$hexPatterns[] = ['image/heif', "/667479706D(69|73)6631)/"]; |
103
|
|
|
$binPatterns[] = ['image/heic', "/ftyphe(ic|ix|vc|im|is|vm|vs)/"]; |
104
|
|
|
$binPatterns[] = ['image/heif', "/ftypm(i|s)f1/"]; |
105
|
|
|
break; |
106
|
|
|
|
107
|
3 |
|
case "\x89": |
108
|
2 |
|
$hexPatterns[] = ['image/png', "/^89504E470D0A1A0A/"]; |
109
|
2 |
|
break; |
110
|
|
|
|
111
|
2 |
|
case "\xFF": |
112
|
2 |
|
$hexPatterns[] = ['image/jpeg', "/^FFD8FF(DB|E0|EE|E1)/"]; |
113
|
2 |
|
break; |
114
|
|
|
} |
115
|
|
|
|
116
|
6 |
|
foreach ($hexPatterns as list($mime, $pattern)) { |
117
|
4 |
|
if (preg_match($pattern, $sampleHex) === 1) { |
118
|
4 |
|
return $mime; |
119
|
|
|
} |
120
|
|
|
} |
121
|
3 |
|
foreach ($binPatterns as list($mime, $pattern)) { |
122
|
3 |
|
if (preg_match($pattern, $sampleBin) === 1) { |
123
|
3 |
|
return $mime; |
124
|
|
|
} |
125
|
|
|
} |
126
|
1 |
|
return null; |
127
|
|
|
|
128
|
|
|
/* |
129
|
|
|
https://en.wikipedia.org/wiki/List_of_file_signatures |
130
|
|
|
https://github.com/zjsxwc/mime-type-sniffer/blob/master/src/MimeTypeSniffer/MimeTypeSniffer.php |
131
|
|
|
http://phil.lavin.me.uk/2011/12/php-accurately-detecting-the-type-of-a-file/ |
132
|
|
|
|
133
|
|
|
*/ |
134
|
|
|
// TODO: JPEG 2000 |
135
|
|
|
// mime types: image/jp2, image/jpf, image/jpx, image/jpm |
136
|
|
|
// http://fileformats.archiveteam.org/wiki/JPEG_2000 |
137
|
|
|
// https://www.file-recovery.com/jp2-signature-format.htm |
138
|
|
|
/* |
139
|
|
|
From: https://github.com/Tinram/File-Identifier/blob/master/src/FileSignatures.php |
140
|
|
|
'JPG 2000' => '00 00 00 0c 6a 50 20 20 0d 0a 87 0a', |
141
|
|
|
https://filesignatures.net/index.php?page=search&search=JP2&mode=EXT |
142
|
|
|
*/ |
143
|
|
|
} |
144
|
|
|
} |
145
|
|
|
|