1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @package fwolflib |
4
|
|
|
* @subpackage class |
5
|
|
|
* @copyright Copyright 2004-2010, Fwolf |
6
|
|
|
* @author Fwolf <[email protected]> |
7
|
|
|
*/ |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
require_once(dirname(__FILE__) . '/fwolflib.php'); |
11
|
|
|
require_once(FWOLFLIB . 'func/regex_match.php'); |
12
|
|
|
require_once(FWOLFLIB . 'func/string.php'); |
13
|
|
|
|
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* Parse a mail format message |
17
|
|
|
* |
18
|
|
|
* @package fwolflib |
19
|
|
|
* @subpackage class |
20
|
|
|
* @copyright Copyright 2004-2008, Fwolf |
21
|
|
|
* @author Fwolf <[email protected]> |
22
|
|
|
* @since 2007-08-05 |
23
|
|
|
* @version $Id$ |
24
|
|
|
*/ |
25
|
|
|
class MailParser extends Fwolflib{ |
|
|
|
|
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Error message |
29
|
|
|
* @var string |
30
|
|
|
*/ |
31
|
|
|
public $mErrorMsg = ''; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Error number |
35
|
|
|
* 0: no error |
36
|
|
|
* @var int |
37
|
|
|
*/ |
38
|
|
|
public $mErrorNo = 0; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* Whole mail message |
42
|
|
|
* @var string |
43
|
|
|
*/ |
44
|
|
|
protected $mMsg = ''; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* Attachment count named by this class |
48
|
|
|
* Attachment already got name is not counted here. |
49
|
|
|
* @var int |
50
|
|
|
*/ |
51
|
|
|
public $mMsgAttachmentNamedCount = 0; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Body part of mail |
55
|
|
|
* @var string |
56
|
|
|
*/ |
57
|
|
|
protected $mMsgBody = ''; |
58
|
|
|
|
59
|
|
|
/** |
60
|
|
|
* Contents in mail body |
61
|
|
|
* @var array |
62
|
|
|
*/ |
63
|
|
|
public $mMsgBodyContent = ''; |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* Header part of mail |
67
|
|
|
* @var string |
68
|
|
|
*/ |
69
|
|
|
protected $mMsgHeader = ''; |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Delivered-To: in mail header |
73
|
|
|
* @var string |
74
|
|
|
*/ |
75
|
|
|
public $mMsgHeaderDeliveredTo = ''; |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* From: in mail header |
79
|
|
|
* @var string |
80
|
|
|
*/ |
81
|
|
|
public $mMsgHeaderFrom = ''; |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* Message-ID: in mail header |
85
|
|
|
* @var string |
86
|
|
|
*/ |
87
|
|
|
public $mMsgHeaderMessageId = ''; |
88
|
|
|
|
89
|
|
|
/** |
90
|
|
|
* Subject: in mail header |
91
|
|
|
* @var string |
92
|
|
|
*/ |
93
|
|
|
public $mMsgHeaderSubject = ''; |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* To: in mail header |
97
|
|
|
* @var string |
98
|
|
|
*/ |
99
|
|
|
public $mMsgHeaderTo = ''; |
100
|
|
|
|
101
|
|
|
/** |
102
|
|
|
* X-Sender: in mail header |
103
|
|
|
* @var string |
104
|
|
|
*/ |
105
|
|
|
public $mMsgHeaderXSender = ''; |
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* Simple info about mail message |
109
|
|
|
* @var array |
110
|
|
|
*/ |
111
|
|
|
public $mMsgInfo = array(); |
112
|
|
|
|
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* Constructor |
116
|
|
|
* @param string $msg Mail message |
117
|
|
|
*/ |
118
|
|
|
public function __construct($msg = '') { |
119
|
|
|
if (!empty($msg)) { |
120
|
|
|
$this->mMsg = $msg; |
121
|
|
|
$this->Parse(); |
122
|
|
|
} |
123
|
|
|
} // end of func __construct |
124
|
|
|
|
125
|
|
|
|
126
|
|
|
/** |
127
|
|
|
* Name an un-named attachment |
128
|
|
|
* Generate from microtime |
129
|
|
|
* @param string $mime Mime type of attachment |
130
|
|
|
*/ |
131
|
|
|
protected function NameAttachment($mime) { |
132
|
|
|
// Get name without extension |
133
|
|
|
//list($msec, $sec) = explode(' ', microtime()); |
134
|
|
|
//$s_name = $sec . substr($msec, 1); |
135
|
|
|
// These name seems not belong ONE mail, change to another way ... |
136
|
|
|
|
137
|
|
|
// Get name by datetime & md5sum(messageid) & attachment number |
138
|
|
|
$this->mMsgAttachmentNamedCount ++; |
139
|
|
|
// Design to name up to 99 attachments, adds '0' before name if attachments count below 10 |
140
|
|
|
$s_name = date('Ymd') . '_' . substr(md5($this->mMsgHeaderMessageId), 0, 8) . '_' . ((1 == strlen(strval($this->mMsgAttachmentNamedCount)))?'0':'') . strval($this->mMsgAttachmentNamedCount); |
141
|
|
|
|
142
|
|
|
// Get extension from mime type |
143
|
|
|
$ar = array( |
144
|
|
|
'application/java-archive' => '.jar', |
145
|
|
|
'application/java-serialized-object' => '.ser', |
146
|
|
|
'application/java-vm' => '.class', |
147
|
|
|
'application/msaccess' => '.mdb', |
148
|
|
|
'application/msword' => '.doc', |
149
|
|
|
'application/ogg' => '.ogg', |
150
|
|
|
'application/pdf' => '.pdf', |
151
|
|
|
'application/pgp-signature' => '.pgp', |
152
|
|
|
'application/postscript' => '.ps', |
153
|
|
|
'application/rar' => '.rar', |
154
|
|
|
'application/rdf+xml' => '.rdf', |
155
|
|
|
'application/rss+xml' => '.rss', |
156
|
|
|
'application/rtf' => '.rtf', |
157
|
|
|
'application/xml' => '.xml', |
158
|
|
|
'application/zip' => '.zip', |
159
|
|
|
'application/vnd.google-earth.kml+xml' => '.kml', |
160
|
|
|
'application/vnd.google-earth.kmz' => '.kmz', |
161
|
|
|
'application/vnd.mozilla.xul+xml' => '.xul', |
162
|
|
|
'application/vnd.ms-excel' => '.xls', |
163
|
|
|
'application/vnd.ms-powerpoint' => '.ppt', |
164
|
|
|
'application/vnd.oasis.opendocument.chart' => '.odc', |
165
|
|
|
'application/vnd.oasis.opendocument.database' => '.odb', |
166
|
|
|
'application/vnd.oasis.opendocument.formula' => '.odf', |
167
|
|
|
'application/vnd.oasis.opendocument.graphics' => '.odg', |
168
|
|
|
'application/vnd.oasis.opendocument.graphics-template' => '.otg', |
169
|
|
|
'application/vnd.oasis.opendocument.image' => '.odi', |
170
|
|
|
'application/vnd.oasis.opendocument.presentation' => '.odp', |
171
|
|
|
'application/vnd.oasis.opendocument.presentation-template' => '.otp', |
172
|
|
|
'application/vnd.oasis.opendocument.spreadsheet' => '.ods', |
173
|
|
|
'application/vnd.oasis.opendocument.spreadsheet-template' => '.ots', |
174
|
|
|
'application/vnd.oasis.opendocument.text' => '.odt', |
175
|
|
|
'application/vnd.oasis.opendocument.text-master' => '.odm', |
176
|
|
|
'application/vnd.oasis.opendocument.text-template' => '.ott', |
177
|
|
|
'application/vnd.oasis.opendocument.text-web' => '.oth', |
178
|
|
|
'application/vnd.visio' => '.vsd', |
179
|
|
|
'application/x-7z-compressed' => '.7z', |
180
|
|
|
'application/x-bittorrent' => '.torrent', |
181
|
|
|
'application/x-cab' => '.cab', |
182
|
|
|
'application/x-debian-package' => '.deb', |
183
|
|
|
'application/x-flac' => '.flac', |
184
|
|
|
'application/x-freemind' => '.mm', |
185
|
|
|
'application/x-gtar' => '.tgz', |
186
|
|
|
'application/x-httpd-php' => '.php', |
187
|
|
|
'application/x-httpd-php-source' => '.phps', |
188
|
|
|
'application/x-iso9660-image' => '.iso', |
189
|
|
|
'application/x-javascript' => '.js', |
190
|
|
|
'application/x-latex' => '.latex', |
191
|
|
|
'application/x-lha' => '.lha', |
192
|
|
|
'application/x-lzh' => '.lzh', |
193
|
|
|
'application/x-msdos-program' => '.exe', |
194
|
|
|
'application/x-msi' => '.msi', |
195
|
|
|
'application/x-object' => '.o', |
196
|
|
|
'application/x-redhat-package-manager' => '.rpm', |
197
|
|
|
'application/x-sh' => '.sh', |
198
|
|
|
'application/x-shockwave-flash' => '.swf', |
199
|
|
|
'application/x-tar' => '.tar', |
200
|
|
|
'application/x-tcl' => '.tcl', |
201
|
|
|
'application/x-xfig' => '.fig', |
202
|
|
|
'application/x-xpinstall' => '.xpi', |
203
|
|
|
'audio/midi' => '.mid', |
204
|
|
|
'audio/mpeg' => '.mpga', |
205
|
|
|
'audio/x-aiff' => '.aif', |
206
|
|
|
'audio/x-ms-wma' => '.wma', |
207
|
|
|
'audio/x-pn-realaudio' => '.ra', |
208
|
|
|
'audio/x-realaudio' => '.ra', |
209
|
|
|
'audio/x-wav' => '.wav', |
210
|
|
|
'image/gif' => '.gif', |
211
|
|
|
'image/jpeg' => '.jpg', |
212
|
|
|
'image/pcx' => '.pcx', |
213
|
|
|
'image/png' => '.png', |
214
|
|
|
'image/svg+xml' => '.svg', |
215
|
|
|
'image/tiff' => '.tif', |
216
|
|
|
'image/x-icon' => '.ico', |
217
|
|
|
'image/x-ms-bmp' => '.bmp', |
218
|
|
|
'image/x-photoshop' => '.psd', |
219
|
|
|
'message/rfc822' => '.eml', |
220
|
|
|
'text/calendar' => '.ics', |
221
|
|
|
'text/css' => '.css', |
222
|
|
|
'text/csv' => '.csv', |
223
|
|
|
'text/html' => '.html', |
224
|
|
|
'text/plain' => '.txt', |
225
|
|
|
'text/tab-separated-values' => '.tsv', |
226
|
|
|
'text/x-c++hdr' => '.hpp', |
227
|
|
|
'text/x-c++src' => '.cpp', |
228
|
|
|
'text/x-chdr' => '.h', |
229
|
|
|
'text/x-csrc' => '.c', |
230
|
|
|
'text/x-diff' => '.diff', |
231
|
|
|
'text/x-java' => '.java', |
232
|
|
|
'text/x-pascal' => '.pas', |
233
|
|
|
'text/x-perl' => '.pl', |
234
|
|
|
'text/x-python' => '.py', |
235
|
|
|
'text/x-sh' => '.sh', |
236
|
|
|
'text/x-tcl' => '.tcl', |
237
|
|
|
'text/x-tex' => '.tex', |
238
|
|
|
'text/x-vcalendar' => '.vcs', |
239
|
|
|
'text/x-vcard' => '.vcf', |
240
|
|
|
'video/3gpp' => '.3gp', |
241
|
|
|
'video/fli' => '.fli', |
242
|
|
|
'video/mpeg' => '.mpg', |
243
|
|
|
'video/mp4' => '.mp4', |
244
|
|
|
'video/quicktime' => '.mov', |
245
|
|
|
'video/x-ms-asf' => '.asf', |
246
|
|
|
'video/x-ms-wmv' => '.wmv', |
247
|
|
|
'video/x-msvideo' => '.avi', |
248
|
|
|
'x-world/x-vrml' => '.vrml' |
249
|
|
|
); |
250
|
|
|
if (isset($ar[$mime])) |
251
|
|
|
$s_ext = $ar[$mime]; |
252
|
|
|
else |
253
|
|
|
$s_ext = ''; |
254
|
|
|
|
255
|
|
|
return $s_name . $s_ext; |
256
|
|
|
} // end of func NameAttachment |
257
|
|
|
|
258
|
|
|
|
259
|
|
|
/** |
260
|
|
|
* Parse mail message |
261
|
|
|
* @param string $msg Mail message, If given, will reset all vars and start a new parse process. On default, deal $mMsg will not cause a reset. |
262
|
|
|
* @see $mMsg |
263
|
|
|
*/ |
264
|
|
|
public function Parse($msg = '') { |
265
|
|
|
// Reset all when $msg is given |
266
|
|
|
if (!empty($msg)) { |
267
|
|
|
$this->Reset(); |
268
|
|
|
$this->mMsg = $msg; |
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
// Msg length |
272
|
|
|
$this->mMsgInfo['msg_length'] = strlen($this->mMsg); |
273
|
|
|
$this->mMsg = trim($this->mMsg); |
274
|
|
|
$this->mMsgInfo['msg_length_trimmed'] = strlen($this->mMsg); |
275
|
|
|
|
276
|
|
|
// Split header & body, find FIRST empty line |
277
|
|
|
if (0 == preg_match('/\n{2}/m', $this->mMsg, $matches, PREG_OFFSET_CAPTURE)) { |
278
|
|
|
// No empty line, what's wrong ? |
279
|
|
|
$this->mErrorNo = 1; |
280
|
|
|
$this->mErrorMsg = 'Didn\'t find empty line which split header & body.'; |
281
|
|
|
return ; |
282
|
|
|
} else { |
283
|
|
|
// Got the split position |
284
|
|
|
$i = $matches[0][1]; |
285
|
|
|
$this->mMsgHeader = substr($this->mMsg, 0, $i); |
286
|
|
|
$this->mMsgBody = substr($this->mMsg, $i + 1); |
287
|
|
|
|
288
|
|
|
$this->mMsgInfo['header_length'] = strlen($this->mMsgHeader); |
289
|
|
|
$this->mMsgInfo['body_length'] = strlen($this->mMsgBody); |
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
// Parse header & body |
293
|
|
|
$this->ParseHeader(); |
294
|
|
|
$this->ParseBody(); |
295
|
|
|
} // end of func Parse |
296
|
|
|
|
297
|
|
|
|
298
|
|
|
/** |
299
|
|
|
* Parse body part of mail |
300
|
|
|
* @see $mMsgBody |
301
|
|
|
*/ |
302
|
|
|
protected function ParseBody() { |
303
|
|
|
// Find first boundary |
304
|
|
|
// Content-Type: multipart/mixed; boundary="----=_NextPart_000_0018_01C74EFC.64789E20" |
305
|
|
|
$b = RegexMatch('/boundary=("?)([^"]+?)\1[;\s]?/', $this->mMsgHeader); |
|
|
|
|
306
|
|
|
if (is_array($b)) |
307
|
|
|
$b = $b[1]; |
308
|
|
|
$this->ParseBodyContent($this->mMsgBody, $b); |
309
|
|
|
|
310
|
|
|
} // end of func ParseBody |
311
|
|
|
|
312
|
|
|
|
313
|
|
|
/** |
314
|
|
|
* Parse content of mail body, recursive |
315
|
|
|
* @param string $c Content of mail, with boundary or inline |
316
|
|
|
* @param string $b boundary, empty str means inline |
317
|
|
|
* @see $mMsgBodyContent |
318
|
|
|
*/ |
319
|
|
|
protected function ParseBodyContent($c, $b) { |
320
|
|
|
if (empty($b)) { |
321
|
|
|
// Inline |
322
|
|
|
$c = trim($c); |
323
|
|
|
// Some mail client add '--' after close boundary, remove it |
324
|
|
|
if (2 == strlen($c) && '--' == $c) |
325
|
|
|
$c = ''; |
326
|
|
|
if (!empty($c)) |
327
|
|
|
$this->mMsgBodyContent[] = $this->ParseDecode($c); |
328
|
|
|
} else { |
329
|
|
|
// Split msg with boundary |
330
|
|
|
|
331
|
|
|
// Confirm boundary first, some mail client will modify boundary slightly(add several '-' before it, or change it's content) |
332
|
|
|
// Content-Type: multipart/mixed; boundary="----=_NextPart_000_0018_01C74EFC.64789E20" |
333
|
|
|
// ------=_NextPart_000_0018_01C74EFC.64789E20 |
334
|
|
|
|
335
|
|
|
// Seems message is splitted by "--boundary" |
336
|
|
|
// Content-Type: multipart/mixed; boundary="K8nIJk4ghYZn606h" |
337
|
|
|
/* |
338
|
|
|
$bnew = RegexMatch("/\n?(--$b)/", $c); |
339
|
|
|
if (!empty($bnew)) { |
340
|
|
|
$b = $bnew[0]; |
341
|
|
|
} |
342
|
|
|
*/ |
343
|
|
|
$b = "--$b"; |
344
|
|
|
|
345
|
|
|
// Using new boundary, find every part |
346
|
|
|
//echo memory_get_usage() . "$b<br />\n"; |
347
|
|
|
$ar = explode($b, $c); |
348
|
|
|
//echo memory_get_usage() . "$b<br />\n"; |
349
|
|
|
if (!empty($ar)) { |
350
|
|
|
foreach ($ar as $part) { |
351
|
|
|
// Parse every part |
352
|
|
|
// Un-standard boundary declare: |
353
|
|
|
// boundary=Apple-Mail-10-288581275 |
354
|
|
|
// I used a regex recall '\1' |
355
|
|
|
// Also +? to refuse '贪婪' of regex |
356
|
|
|
$b_part = RegexMatch('/boundary=("?)([^"]+?)\1[;\s]+/', $part); |
|
|
|
|
357
|
|
|
//print_r($b_part); |
358
|
|
|
// If multi boundary found, choose the firse one. |
359
|
|
|
// Then choose value \2 |
360
|
|
|
if (isset($b_part[1]) && is_array($b_part[1])) |
361
|
|
|
$b_part = $b_part[0][1]; |
362
|
|
|
elseif (is_array($b_part)) |
363
|
|
|
$b_part = $b_part[1]; |
364
|
|
|
|
365
|
|
|
// Remove 'boundary=...' from part, or it will find 'new' boundary recurrently. |
366
|
|
|
if (!empty($b_part)) { |
367
|
|
|
$part = str_replace("boundary=\"$b_part\"", '', $part); |
368
|
|
|
$part = str_replace("boundary=$b_part", '', $part); |
369
|
|
|
} |
370
|
|
|
$this->ParseBodyContent(trim($part), $b_part); |
371
|
|
|
} |
372
|
|
|
} |
373
|
|
|
} |
374
|
|
|
} // end of func ParseBodyContent |
375
|
|
|
|
376
|
|
|
|
377
|
|
|
/** |
378
|
|
|
* Decode parts of mail body, usually find by ParseBodyContent * |
379
|
|
|
* @param string $c Parts string |
380
|
|
|
* @return array |
381
|
|
|
* @see ParseBodyContent() |
382
|
|
|
*/ |
383
|
|
|
protected function ParseDecode($c) { |
384
|
|
|
/* |
385
|
|
|
Content-Type: image/jpeg |
386
|
|
|
Content-Type: multipart/alternative; |
387
|
|
|
Content-Type: text/plain; charset=iso-8859-1 |
388
|
|
|
Content-Type: text/plain; charset=ISO-8859-1; format=flowed |
389
|
|
|
Content-Transfer-Encoding: base64 |
390
|
|
|
Content-Transfer-Encoding: quoted-printable |
391
|
|
|
Content-Transfer-Encoding: 7bit |
392
|
|
|
Content-Disposition: inline; |
393
|
|
|
filename="ma_Jusko_Attack_of_the_Targa.jpg" |
394
|
|
|
Content-Type: application/pgp-signature; name="signature.asc" |
395
|
|
|
Content-Disposition: attachment; filename="DbExchange.tgz" |
396
|
|
|
Content-Disposition: attachment; filename*=utf-8''20071010-%E7%9B%91%E7%AE |
397
|
|
|
Content-ID: <[email protected]> |
398
|
|
|
*/ |
399
|
|
|
$c = trim($c); |
400
|
|
|
|
401
|
|
|
// Find "header" part, identify by "two \n" |
402
|
|
|
//$s_h = substr($c, 0, strpos($c, "\n\n")); |
403
|
|
|
//if (empty($s_h)) { |
404
|
|
|
|
405
|
|
|
// Find "header" part, identify by 'Content' |
406
|
|
|
if (!(('Content' == substr($c, 0, 7)) || ('content' == substr($c, 0, 7)))) { |
407
|
|
|
// No content define, output directly |
408
|
|
|
$rs = array('type' => 'text/plain', |
409
|
|
|
'content' => $c, |
410
|
|
|
'charset' => '', |
411
|
|
|
'encoding' => '', |
412
|
|
|
'filename' => '' |
413
|
|
|
); |
414
|
|
|
} else { |
415
|
|
|
// Read the "header" defination, set the rs options and parse body string |
416
|
|
|
$i = strpos($c, "\n\n"); |
417
|
|
|
if (false === $i) { |
418
|
|
|
// Special situation, eg: only have 1 line (header define): |
419
|
|
|
// Content-Type: multipart/alternative; |
420
|
|
|
$s_header = $c; |
421
|
|
|
$s_body = ''; |
422
|
|
|
} else { |
423
|
|
|
$s_header = substr($c, 0, $i); |
424
|
|
|
$s_body = substr($c, $i +1); |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
// Prepare default value |
428
|
|
|
$rs = array(); |
429
|
|
|
$rs['type'] = ''; |
430
|
|
|
$rs['content'] = ''; |
431
|
|
|
$rs['charset'] = ''; |
432
|
|
|
$rs['encoding'] = ''; |
433
|
|
|
$rs['filename'] = ''; |
434
|
|
|
|
435
|
|
|
// Is there a Content-Type define ? |
436
|
|
|
$s_t = RegexMatch('/Content-Type: ([\w\d\/\-\+\.]+)[;\s]/i', $s_header); |
|
|
|
|
437
|
|
|
if (!empty($s_t)) { |
438
|
|
|
$rs['type'] = $s_t; |
439
|
|
|
// I don't know is this way right, but this will got an empty content, works correctly. |
440
|
|
|
// Multi-part container |
441
|
|
|
if ('multipart/alternative' == $s_t) |
442
|
|
|
return $rs; |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
// Charset ? |
446
|
|
|
$s_t = RegexMatch('/charset=([\w\d\-]+)[;\s]/i', $s_header); |
|
|
|
|
447
|
|
|
if (!empty($s_t)) { |
448
|
|
|
$rs['charset'] = $s_t; |
449
|
|
|
// Convert content to utf-8 encoding |
450
|
|
|
if ('utf-8' != strtolower($s_t)) |
451
|
|
|
$s_body = mb_convert_encoding($s_body, 'utf-8', $s_t); |
452
|
|
|
} |
453
|
|
|
|
454
|
|
|
// quoted-printable encoding ? its format like '=0D=0A' |
455
|
|
|
if (0 < substr_count($s_header, 'quoted-printable')) { |
456
|
|
|
$s_body = quoted_printable_decode($s_body); |
457
|
|
|
$rs['encoding'] = 'quoted-printable'; |
458
|
|
|
} |
459
|
|
|
|
460
|
|
|
// Base64 encoding ? |
461
|
|
|
if (0 < substr_count($s_header, 'base64')) { |
462
|
|
|
$s_body = base64_decode($s_body); |
463
|
|
|
$rs['encoding'] = 'base64'; |
464
|
|
|
} |
465
|
|
|
|
466
|
|
|
// Content-Disposition:, means this is an attachment |
467
|
|
|
if (0 < substr_count($s_header, 'Content-Disposition:') |
468
|
|
|
|| 0 < substr_count($s_header, 'Content-ID:') |
469
|
|
|
|| 0 < substr_count($s_header, 'attachment') |
470
|
|
|
|| 0 < substr_count($s_header, 'name=') |
471
|
|
|
|| 'image/jpeg' == $rs['type'] |
472
|
|
|
|| 'image/gif' == $rs['type'] |
473
|
|
|
//|| 0 < substr_count($s_header, 'filename') |
474
|
|
|
) { |
475
|
|
|
// Find the filename or name it, if filename is empty, this is not an attachment |
476
|
|
|
/* |
477
|
|
|
Content-Type: application/pgp-signature; name="signature.asc" |
478
|
|
|
Content-Disposition: attachment; filename="DbExchange.tgz" |
479
|
|
|
Content-Disposition: attachment; filename*=utf-8''20071010-%E7%9B%91%E7%AE |
480
|
|
|
Content-ID: <[email protected]> |
481
|
|
|
*/ |
482
|
|
|
$s_t = RegexMatch('/name="([^"]*)"/i', $s_header); |
|
|
|
|
483
|
|
|
if (empty($s_t)) { |
484
|
|
|
$s_t = RegexMatch('/filename*=([^\s]*)\s+/i', $s_header); |
|
|
|
|
485
|
|
|
} |
486
|
|
|
// Avoid multi name define |
487
|
|
|
while (is_array($s_t)) { |
488
|
|
|
$s_t = $s_t[0]; |
489
|
|
|
} |
490
|
|
|
// Still can't get filename |
491
|
|
|
if (empty($s_t)) { |
492
|
|
|
// Name it ... |
493
|
|
|
$rs['filename'] = $this->NameAttachment($rs['type']); |
494
|
|
|
} else { |
495
|
|
|
// Set the filename |
496
|
|
|
//$rs['filename'] = imap_utf8($s_t); |
497
|
|
|
$rs['filename'] = Rfc2047Decode($s_t); |
|
|
|
|
498
|
|
|
} |
499
|
|
|
|
500
|
|
|
// Some bad mail client didn't set the attach mime right |
501
|
|
|
// Content-Type: application/octet-stream; |
502
|
|
|
// name="Dave_Nitsche_036.jpg" |
503
|
|
View Code Duplication |
if ('.jpg' == strtolower(substr($rs['filename'], strlen($rs['filename']) - 4))) |
|
|
|
|
504
|
|
|
$rs['type'] = 'image/jpeg'; |
505
|
|
View Code Duplication |
if ('.gif' == strtolower(substr($rs['filename'], strlen($rs['filename']) - 4))) |
|
|
|
|
506
|
|
|
$rs['type'] = 'image/gif'; |
507
|
|
View Code Duplication |
if ('.png' == strtolower(substr($rs['filename'], strlen($rs['filename']) - 4))) { |
|
|
|
|
508
|
|
|
$rs['type'] = 'image/png'; |
509
|
|
|
} |
510
|
|
|
|
511
|
|
|
} else { |
512
|
|
|
// Not an attachment |
513
|
|
|
$rs['filename'] = ''; |
514
|
|
|
} |
515
|
|
|
|
516
|
|
|
// 7bit, 8bit, inline need no change to $s_body |
517
|
|
|
|
518
|
|
|
// Set $s_body |
519
|
|
|
$rs['content'] = $s_body; |
520
|
|
|
} |
521
|
|
|
|
522
|
|
|
return $rs; |
523
|
|
|
} // end of func ParseDecode |
524
|
|
|
|
525
|
|
|
|
526
|
|
|
/** |
527
|
|
|
* Parse header part of mail |
528
|
|
|
* @see $mMsgHeader |
529
|
|
|
*/ |
530
|
|
|
protected function ParseHeader() { |
531
|
|
|
// Delivered-To: [email protected] |
532
|
|
|
$this->mMsgHeaderDeliveredTo = RegexMatch('/^Delivered-To: (.*)/m', $this->mMsgHeader); |
|
|
|
|
533
|
|
|
// From: "Sammy Benjamin" <[email protected]> |
534
|
|
|
$this->mMsgHeaderFrom = RegexMatch('/^From: (.*)/m', $this->mMsgHeader); |
|
|
|
|
535
|
|
|
// Message-ID: <061c01c74f26$c1df0ac0$d6422241@psasquatch> |
536
|
|
|
$this->mMsgHeaderMessageId = RegexMatch('/^Message-ID: <(.*)>/m', $this->mMsgHeader); |
|
|
|
|
537
|
|
|
if (empty($this->mMsgHeaderMessageId)) { |
538
|
|
|
// Fake a message-id |
539
|
|
|
$this->mMsgHeaderMessageId = md5($this->mMsgHeader); |
540
|
|
|
} |
541
|
|
|
// Subject: |
542
|
|
|
//$this->mMsgHeaderSubject = imap_utf8(RegexMatch('/^Subject: (.*)/m', $this->mMsgHeader)); |
543
|
|
|
$this->mMsgHeaderSubject = Rfc2047Decode(RegexMatch('/^Subject: (.*)/m', $this->mMsgHeader)); |
|
|
|
|
544
|
|
|
// To: <Undisclosed-Recipient:;@gmail-pop.l.google.com> |
545
|
|
|
$this->mMsgHeaderTo = RegexMatch('/^To: (.*)/m', $this->mMsgHeader); |
|
|
|
|
546
|
|
|
// X-Sender: [email protected] |
547
|
|
|
$this->mMsgHeaderXSender = RegexMatch('/^X-Sender: (.*)/m', $this->mMsgHeader); |
|
|
|
|
548
|
|
|
} // end of func ParseHeader |
549
|
|
|
|
550
|
|
|
|
551
|
|
|
/** |
552
|
|
|
* Reset all vars, prepare to a new parse process. |
553
|
|
|
* |
554
|
|
|
* @param boolean $bInit Re-do init. |
555
|
|
|
*/ |
556
|
|
|
public function Reset($bInit = false) |
557
|
|
|
{ |
558
|
|
|
//:TODO: reset all data-vars, include $this->mMsg |
559
|
|
|
$this->mErrorMsg = ''; |
560
|
|
|
$this->mErrorNo = 0; |
561
|
|
|
$this->mMsg = ''; |
562
|
|
|
$this->mMsgAttachmentCount = 0; |
|
|
|
|
563
|
|
|
$this->mMsgBody = ''; |
564
|
|
|
$this->mMsgBodyContent = array(); |
565
|
|
|
$this->mMsgHeader = ''; |
566
|
|
|
$this->mMsgHeaderDeliveredTo = ''; |
567
|
|
|
$this->mMsgHeaderFrom = ''; |
568
|
|
|
$this->mMsgHeaderMessageId = ''; |
569
|
|
|
$this->mMsgHeaderSubject = ''; |
570
|
|
|
$this->mMsgHeaderTo = ''; |
571
|
|
|
$this->mMsgHeaderXSender = ''; |
572
|
|
|
$this->mMsgInfo = array(); |
573
|
|
|
} // end of func Reset |
574
|
|
|
|
575
|
|
|
} // end of class MailParser |
576
|
|
|
|
577
|
|
|
/* |
578
|
|
|
// Test: |
579
|
|
|
require_once('fwolflib/func/ecl.php'); |
580
|
|
|
//$mailtext = file_get_contents('1171559981.7971_1.wf:2,'); |
581
|
|
|
$mailtext = file_get_contents('1171559410.7880_1.wf:2,'); |
582
|
|
|
|
583
|
|
|
$mp=new MailParser($mailtext); |
584
|
|
|
|
585
|
|
|
ecl("Message length: " . number_format($mp->mMsgInfo['msg_length'])); |
586
|
|
|
ecl("Message length trimmed: " . $mp->mMsgInfo['msg_length_trimmed']); |
587
|
|
|
ecl("Header length: " . $mp->mMsgInfo['header_length']); |
588
|
|
|
ecl("Body length: " . $mp->mMsgInfo['body_length']); |
589
|
|
|
ecl("Delivered-To: " . htmlentities($mp->mMsgHeaderDeliveredTo)); |
590
|
|
|
ecl("From: " . htmlentities($mp->mMsgHeaderFrom)); |
591
|
|
|
ecl("Message-ID: <" . htmlentities($mp->mMsgHeaderMessageId) . ">"); |
592
|
|
|
ecl("Subject: " . htmlentities($mp->mMsgHeaderSubject)); |
593
|
|
|
ecl("To: " . htmlentities($mp->mMsgHeaderTo)); |
594
|
|
|
ecl("X-Sender: " . htmlentities($mp->mMsgHeaderXSender)); |
595
|
|
|
|
596
|
|
|
ecl("Body content part count: " . count($mp->mMsgBodyContent)); |
597
|
|
|
// :DEBUG: |
598
|
|
|
foreach ($mp->mMsgBodyContent as $val) { |
599
|
|
|
echo "----------\nContent type: {$val['type']} <br />\n"; |
600
|
|
|
echo " Length: " . strlen($val['content']) . "<br />\n"; |
601
|
|
|
echo " Charset: " . $val['charset'] . "<br />\n"; |
602
|
|
|
echo " Encoding: " . $val['encoding'] . "<br />\n"; |
603
|
|
|
echo " Filename: " . $val['filename'] . "<br />\n"; |
604
|
|
|
} |
605
|
|
|
// Output mail message & attachment |
606
|
|
|
ecl("==================================================="); |
607
|
|
|
if (0 < count($mp->mMsgBodyContent)) { |
608
|
|
|
foreach ($mp->mMsgBodyContent as $val) { |
609
|
|
|
if (empty($val['filename'])) { |
610
|
|
|
// Common message |
611
|
|
|
$c = $val['content']; |
612
|
|
|
if ('text/plain' == $val['type']) |
613
|
|
|
$c = nl2br($c); |
614
|
|
|
echo $c; |
615
|
|
|
} else { |
616
|
|
|
// Attachment |
617
|
|
|
ecl($val['filename']); |
618
|
|
|
if ('image/' == substr($val['type'], 0, 6)) |
619
|
|
|
ecl('<img src="data:' . $val['type'] . ';base64,' . base64_encode($val['content']) . '"/>'); |
620
|
|
|
} |
621
|
|
|
} |
622
|
|
|
} |
623
|
|
|
|
624
|
|
|
if (0 != $mp->mErrorNo) { |
625
|
|
|
ecl($mp->mErrorMsg); |
626
|
|
|
} |
627
|
|
|
|
628
|
|
|
*/ |
629
|
|
|
|
630
|
|
|
//ecl("to: " . htmlentities($mp->to)); |
631
|
|
|
//ecl("subject: " . htmlentities($mp->subject)); |
632
|
|
|
//ecl("recieved: " . htmlentities($mp->received)); |
633
|
|
|
|
634
|
|
|
class parseMail { |
635
|
|
|
var $from=""; |
636
|
|
|
var $to=""; |
637
|
|
|
var $subject=""; |
638
|
|
|
var $received=""; |
639
|
|
|
var $date=""; |
640
|
|
|
var $message_id=""; |
641
|
|
|
var $content_type=""; |
642
|
|
|
var $part =array(); |
643
|
|
|
|
644
|
|
|
// decode a mail header |
645
|
|
|
function parseMail($text="") { |
|
|
|
|
646
|
|
|
$start=0; |
647
|
|
|
$lastheader=""; |
|
|
|
|
648
|
|
|
while (true) { |
649
|
|
|
$end=strpos($text,"\n",$start); |
650
|
|
|
$line=substr($text,$start,$end-$start); |
651
|
|
|
$start=$end+1; |
652
|
|
|
if ($line=="") break; // end of headers! |
653
|
|
|
if (substr($line,0,1)=="\t") { |
654
|
|
|
$$last.="\n".$line; |
|
|
|
|
655
|
|
|
} |
656
|
|
|
if (preg_match("/^(From:)\s*(.*)$/",$line,$matches)) { |
657
|
|
|
$last="from"; |
658
|
|
|
$$last=$matches[2]; |
659
|
|
|
} |
660
|
|
|
if (preg_match("/^(Received:)\s*(.*)$/",$line,$matches)) { |
661
|
|
|
$last="received"; |
662
|
|
|
$$last=$matches[2]; |
663
|
|
|
} |
664
|
|
|
if (preg_match("/^(To:)\s*(.*)$/",$line,$matches)) { |
665
|
|
|
$last="to"; |
666
|
|
|
$$last=$matches[2]; |
667
|
|
|
} |
668
|
|
|
if (preg_match("/^(Subject:)\s*(.*)$/",$line,$matches)) { |
669
|
|
|
$last="subject"; |
670
|
|
|
$$last=$matches[2]; |
671
|
|
|
} |
672
|
|
|
if (preg_match("/^(Date:)\s*(.*)$/",$line,$matches)) { |
673
|
|
|
$last="date"; |
674
|
|
|
$$last=$matches[2]; |
675
|
|
|
} |
676
|
|
|
if (preg_match("/^(Content-Type:)\s*(.*)$/",$line,$matches)) { |
677
|
|
|
$last="content_type"; |
678
|
|
|
$$last=$matches[2]; |
679
|
|
|
} |
680
|
|
|
if (preg_match("/^(Message-Id:)\s*(.*)$/",$line,$matches)) { |
681
|
|
|
$last="message_id"; |
682
|
|
|
$$last=$matches[2]; |
683
|
|
|
} |
684
|
|
|
} |
685
|
|
|
$this->from=$from; |
|
|
|
|
686
|
|
|
$this->received=$received; |
|
|
|
|
687
|
|
|
$this->to=$to; |
|
|
|
|
688
|
|
|
$this->subject=$subject; |
|
|
|
|
689
|
|
|
$this->date=$date; |
|
|
|
|
690
|
|
|
$this->content_type=$content_type; |
|
|
|
|
691
|
|
|
$this->message_id=$message_id; |
|
|
|
|
692
|
|
|
|
693
|
|
|
if (preg_match("/^multipart\/mixed;/",$content_type)) { |
694
|
|
|
$b=strpos($content_type,"boundary="); |
695
|
|
|
$boundary=substr($content_type,$b+strlen("boundary=")); |
696
|
|
|
$boundary=substr($boundary,1,strlen($boundary)-2); |
697
|
|
|
$this->multipartSplit($boundary,substr($text,$start)); |
698
|
|
|
|
699
|
|
|
} else { |
700
|
|
|
$this->part[0]['Content-Type']=$content_type; |
701
|
|
|
$this->part[0]['content']=substr($text,$start); |
702
|
|
|
} |
703
|
|
|
} |
704
|
|
|
// decode a multipart header |
705
|
|
|
function multipartHeaders($partid,$mailbody) { |
|
|
|
|
706
|
|
|
$text=substr($mailbody,$this->part[$partid]['start'], |
707
|
|
|
$this->part[$partid]['ende']-$this->part[$partid]['start']); |
708
|
|
|
|
709
|
|
|
$start=0; |
710
|
|
|
$lastheader=""; |
|
|
|
|
711
|
|
|
while (true) { |
712
|
|
|
$end=strpos($text,"\n",$start); |
713
|
|
|
$line=substr($text,$start,$end-$start); |
714
|
|
|
$start=$end+1; |
715
|
|
|
if ($line=="") break; // end of headers! |
716
|
|
|
if (substr($line,0,1)=="\t") { |
717
|
|
|
$$last.="\n".$line; |
|
|
|
|
718
|
|
|
} |
719
|
|
|
if (preg_match("/^(Content-Type:)\s*(.*)$/",$line,$matches)) { |
720
|
|
|
$last="c_t"; |
721
|
|
|
$$last=$matches[2]; |
722
|
|
|
} |
723
|
|
|
if (preg_match("/^(Content-Transfer-Encoding:)\s*(.*)$/",$line,$matches)) { |
724
|
|
|
$last="c_t_e"; |
725
|
|
|
$$last=$matches[2]; |
726
|
|
|
} |
727
|
|
|
if (preg_match("/^(Content-Description:)\s*(.*)$/",$line,$matches)) { |
728
|
|
|
$last="c_desc"; |
729
|
|
|
$$last=$matches[2]; |
730
|
|
|
} |
731
|
|
|
if (preg_match("/^(Content-Disposition:)\s*(.*)$/",$line,$matches)) { |
732
|
|
|
$last="c_disp"; |
733
|
|
|
$$last=$matches[2]; |
734
|
|
|
} |
735
|
|
|
} |
736
|
|
|
if ($c_t_e=="base64") { |
|
|
|
|
737
|
|
|
$this->part[$partid]['content']=base64_decode(substr($text,$start)); |
738
|
|
|
$c_t_e="8bit"; |
739
|
|
|
} else { |
740
|
|
|
$this->part[$partid]['content']=substr($text,$start); |
741
|
|
|
} |
742
|
|
|
$this->part[$partid]['Content-Type']=$c_t; |
|
|
|
|
743
|
|
|
$this->part[$partid]['Content-Transfer-Encoding']=$c_t_e; |
|
|
|
|
744
|
|
|
$this->part[$partid]['Content-Description']=$c_desc; |
|
|
|
|
745
|
|
|
$this->part[$partid]['Content-Disposition']=$c_disp; |
|
|
|
|
746
|
|
|
unset($this->part[$partid]['start']); |
747
|
|
|
unset($this->part[$partid]['ende']); |
748
|
|
|
} |
749
|
|
|
// we have a multipart message body |
750
|
|
|
// split the parts |
751
|
|
|
function multipartSplit($boundary,$text) { |
|
|
|
|
752
|
|
|
$start=0; |
753
|
|
|
$b_len=strlen("--".$boundary); |
754
|
|
|
$partcount=0; |
755
|
|
|
while (true) { // should have an emergency exit... |
756
|
|
|
$end=strpos($text,"--".$boundary,$start); |
757
|
|
|
if (substr($text,$end+$b_len,1)=="\n") { |
758
|
|
|
// '\n' => part boundary |
759
|
|
|
$this->part[$partcount]['start']=$end+$b_len+1; |
760
|
|
View Code Duplication |
if ($partcount) { |
|
|
|
|
761
|
|
|
$this->part[$partcount-1]['ende']=$end-1; |
762
|
|
|
$this->multipartHeaders($partcount-1,$text); |
763
|
|
|
} |
764
|
|
|
$start=$end+$b_len+1; |
765
|
|
|
$partcount++; |
766
|
|
View Code Duplication |
} else { |
|
|
|
|
767
|
|
|
// '--' => end boundary |
768
|
|
|
$this->part[$partcount-1]['ende']=$end-1; |
769
|
|
|
$this->multipartHeaders($partcount-1,$text); |
770
|
|
|
break; |
771
|
|
|
} |
772
|
|
|
} |
773
|
|
|
} |
774
|
|
|
} |
775
|
|
|
|
776
|
|
|
?> |
|
|
|
|
777
|
|
|
|
This class, trait or interface has been deprecated. The supplier of the file has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the type will be removed from the class and what other constant to use instead.