EmailParse::_parse_body()   F
last analyzed

Complexity

Conditions 40
Paths 229

Size

Total Lines 190
Code Lines 81

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 40
eloc 81
nc 229
nop 1
dl 0
loc 190
rs 2.9708
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Class to parse and email in to its header and body parts for use in posting
5
 *
6
 * @package   ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
9
 *
10
 * @version 2.0 Beta 1
11
 *
12
 */
13
14
namespace ElkArte\Maillist;
15
16
use ValueError;
17
18
/**
19
 * Class to parse and email in to its header and body parts for use in posting
20
 *
21
 * What it does:
22
 *
23
 * - Can read from a supplied string, stdin, or from the failed email database
24
 * - Parses and decodes headers, return them in a named array $headers
25
 * - Parses, decodes, and translates message body returns body and plain_body sections
26
 * - Parses and decodes attachments returns attachments and inline_files
27
 *
28
 * Load class
29
 * Initiate as
30
 *  - $email_message = new EmailParse();
31
 *
32
 * Make the call, loads data, and performs all necessary parsings
33
 * - $email_message->read_email(true); // Read data and parse it, prefer HTML section
34
 *
35
 * Load data:
36
 * - $email_message->read_data(); // load data from stdin
37
 * - $email_message->read_data($data); // load data from a supplied string
38
 *
39
 * Get some email details:
40
 * - $email_message->headers // All the headers in an array
41
 * - $email_message->body // The decoded / translated message
42
 * - $email_message->raw_message // The entire message w/headers as read
43
 * - $email_message->plain_body // The plain text version of the message
44
 * - $email_message->attachments // Any attachments with key = filename
45
 * - $email_message->inline_files // Any in-line attachments with key = filename
46
 *
47
 * Optional functions:
48
 * - $email_message->load_address(); // Returns array with to/from/cc addresses
49
 * - $email_message->load_key(); // Returns the security key is found, also sets
50
 * message_key, message_type, and message_id
51
 * - $email_message->load_spam(); // Returns boolean on if spam headers are set
52
 * - $email_message->load_ip(); // Set ip origin of the email if available
53
 * - $email_message->load_returnpath(); // Load the message return path
54
 *
55
 * @package Maillist
56
 */
57
class EmailParse
58
{
59
	/** @var string The full message section (headers, body, etc.) we are working on */
60
	public $raw_message;
61
62
	/** @var string[] Attachments found after the message */
63
	public $attachments = [];
64
65
	/** @var string[] Attachments that we designated as inline with the text */
66
	public $inline_files = [];
67
68
	/** @var string Parsed and decoded message body may be plain text or HTML */
69
	public $body;
70
71
	/** @var string Parsed and decoded message body, only plain text version */
72
	public $plain_body;
73
74
	/** @var array All the parsed message headers */
75
	public $headers = [];
76
77
	/** @var string Full security key */
78
	public $message_key_id;
79
80
	/** @var string Message hex-code */
81
	public $message_key;
82
83
	/** @var string Message type of the key p, m, or t */
84
	public $message_type;
85
86
	/** @var bool If any HTML was found in the message */
87
	public $html_found = false;
88
89
	/** @var bool If any positive spam headers were found in the message */
90
	public $spam_found = false;
91
92
	/** @var int Message id of the key */
93
	public $message_id;
94
95
	/** @var string Holds the return path as set in the email header */
96
	public $return_path;
97
98
	/** @var string Holds the message subject */
99
	public $subject;
100
101
	/** @var array Holds the email to from & cc emails and names */
102
	public $email = [];
103
104
	/** @var string|bool Holds the sending ip of the email */
105
	public $ip = false;
106
107
	/** @var bool If the file was converted to utf8 */
108
	public $_converted_utf8 = false;
109
110
	/** @var bool Whether the message is a DSN (Delivery Status Notification - aka "bounce"),
111
	 * indicating failed delivery */
112
	public $_is_dsn = false;
113
114
	/** @var array Holds the field/value/type report codes from DSN messages
115
	    Accessible as [$field]['type'] and [$field]['value'] */
116
	public $_dsn;
117
118
	/** @var array Wrapped multipart/mixed sections */
119
	public $plain_parts = [];
120
121
	/** @var string Holds the current email address, to, from, cc */
122
	private $_email_address;
123
124
	/** @var string Holds the current email name */
125
	private $_email_name;
126
127
	/** @var array Holds each boundary section of the message */
128
	private $_boundary_section = [];
129
130
	/** @var int The total number of boundary sections */
131
	private $_boundary_section_count = 0;
132
133
	/** @var string The message header block */
134
	private $_header_block;
135
136
	/**
137
	 * Main email routine, calls the necessary functions to parse the data so that
138
	 *  it's available.
139
	 *
140
	 * What it does:
141
	 *
142
	 * - read/load data
143
	 * - split headers from the body
144
	 * - break header string in to individual header keys
145
	 * - determine content type and character encoding
146
	 * - convert message body's
147
	 *
148
	 * @param bool $html - flag to determine if we are saving HTML or not
149
	 * @param string $data - full header+message string
150
	 * @param string $location - optional, used for debug
151
	 */
152
	public function read_email($html = false, $data = '', $location = ''): void
153
	{
154
		// Main will read, split, parse, decode an email
155
		$this->read_data($data, $location);
156
157
		if ($this->raw_message !== '')
158
		{
159
			$this->_split_headers();
160
			$this->_parse_headers();
161
			$this->_parse_content_headers();
162
			$this->_parse_body($html);
163
			$this->load_subject();
164
			$this->_is_dsn = $this->_check_dsn();
165
		}
166
	}
167
168
	/**
169
	 * Loads an email message from stdin, file, or from a supplied string
170
	 *
171
	 * @param string $data optional, if supplied, must be a full headers+body email string
172
	 * @param string $location optional, used for debug
173
	 */
174
	public function read_data($data = '', $location = ''): void
175
	{
176
		// Supplied a string of data, simply use it
177
		if ($data !== null)
0 ignored issues
show
introduced by
The condition $data !== null is always true.
Loading history...
178
		{
179
			$this->raw_message = empty($data) ? false : $data;
0 ignored issues
show
Documentation Bug introduced by
It seems like empty($data) ? false : $data can also be of type false. However, the property $raw_message is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
180
		}
181
		// Not running from the CLI must be from the ACP
182
		elseif (!defined('STDIN'))
183
		{
184
			$this->_readFailed($location);
185
		}
186
		// Load file data straight from the pipe
187
		else
188
		{
189
			$this->raw_message = file_get_contents('php://stdin');
190
		}
191
	}
192
193
	/**
194
	 * Compile, if available, the plain text sections into a single string
195
	 *
196
	 * @return string
197
	 */
198
	public function getPlainBody(): string
199
	{
200
		if (!empty($this->plain_parts))
201
		{
202
			return implode("\n", $this->plain_parts);
203
		}
204
205
		return $this->plain_body;
206
	}
207
208
	/**
209
	 * Load a message for parsing by reading it from the DB or from a debug file
210
	 *
211
	 * - Must have admin permissions
212
	 *
213
	 * @param string $location
214
	 */
215
	private function _readFailed($location): void
216
	{
217
		// Called from the ACP, you must have approved permissions
218
		if (isset($_POST['item']))
219
		{
220
			isAllowedTo(['admin_forum', 'approve_emails']);
221
222
			// Read in the file from the failed log table
223
			$this->raw_message = $this->_query_load_email($_POST['item']);
224
		}
225
		// Debugging file, just used for testing
226
		elseif (file_exists($location . '/elk-test.eml'))
227
		{
228
			isAllowedTo('admin_forum');
229
			$this->raw_message = file_get_contents($location . '/elk-test.eml');
230
		}
231
	}
232
233
	/**
234
	 * Loads an email message from the database
235
	 *
236
	 * @param int $id id of the email to retrieve from the failed log
237
	 *
238
	 * @return string
239
	 */
240
	private function _query_load_email($id): string
241
	{
242
		$db = database();
243
244
		// Nothing to load then
245
		if (empty($id))
246
		{
247
			return '';
248
		}
249
250
		$request = $db->query('', '
251
			SELECT 
252
				message
253
			FROM {db_prefix}postby_emails_error
254
			WHERE id_email = {int:id}
255
			LIMIT 1',
256
			[
257
				'id' => $id
258
			]
259
		);
260
		[$message] = $request->fetch_row();
261
		$request->free_result();
262
263
		return $message;
264
	}
265
266
	/**
267
	 * Separate the email message headers from the message body
268
	 *
269
	 * The header is separated from the body by
270
	 *  - 1 the first empty line or
271
	 *  - 2 a line that does not start with a tab, a field name followed by a colon or a space
272
	 */
273
	private function _split_headers(): void
274
	{
275
		$this->_header_block = '';
276
		$match = [];
277
278
		// The header block ends based on condition (1) or (2)
279
		if (!preg_match('~^(.*?)\r?\n(?:\r?\n|(?!(\t|[\w-]+:|[ ])))(.*)~s', $this->raw_message, $match))
280
		{
281
			return;
282
		}
283
284
		// Actually, no headers in this boundary
285
		if (empty($match[1]) || !str_contains($match[1], ':'))
286
		{
287
			$this->_header_block = '';
288
			$this->body = $this->raw_message;
289
		}
290
		else
291
		{
292
			$this->_header_block = $match[1];
293
			$this->body = $match[3];
294
		}
295
	}
296
297
	/**
298
	 * Takes the header block created with _split_headers and separates it
299
	 * in to header keys => value pairs
300
	 */
301
	private function _parse_headers(): void
302
	{
303
		// Remove windows style \r\n's
304
		$this->_header_block = str_replace("\r\n", "\n", $this->_header_block);
305
306
		// unfolding multi-line headers, a CRLF immediately followed by a LWSP-char is equivalent to the LWSP-char
307
		$this->_header_block = preg_replace("~\n(\t| )+~", ' ', $this->_header_block);
308
309
		// Build the array of headers
310
		$headers = explode("\n", trim($this->_header_block));
311
		foreach ($headers as $header)
312
		{
313
			$pos = strpos($header, ':');
314
			$header_key = substr($header, 0, $pos);
315
			$pos++;
316
317
			// Invalid, empty or generally malformed header
318
			if (!$header_key || $pos === strlen($header) || ($header[$pos] !== ' ' && $header[$pos] !== "\t"))
319
			{
320
				continue;
321
			}
322
323
			// The header key (standardized) and value
324
			$header_value = substr($header, $pos + 1);
325
			$header_key = strtolower(trim($header_key));
326
327
			// Decode and add it in to our headers array, if we have content-type twice, overwrite.
328
			if (!isset($this->headers[$header_key]))
329
			{
330
				$this->headers[$header_key] = $this->_decode_header($header_value);
331
			}
332
			elseif ($header_key === 'content-type' || $header_key === 'content-transfer-encoding')
333
			{
334
				// Only one is ever valid, so use the last one and hope it's right
335
				$this->headers[$header_key] = $this->_decode_header($header_value);
336
			}
337
			else
338
			{
339
				$this->headers[$header_key] .= ' ' . $this->_decode_header($header_value);
340
			}
341
		}
342
	}
343
344
	/**
345
	 * Converts a header string to ascii/UTF8
346
	 *
347
	 * What it does:
348
	 *
349
	 * - Headers, mostly subject, and names may be encoded as quoted printable or base64
350
	 * to allow for non ascii characters in those fields.
351
	 * - This encoding is separate from the message body encoding and must be
352
	 * determined since this encoding is not directly specified by the headers themselves
353
	 *
354
	 * @param string $val
355
	 * @param bool $strict
356
	 * @return string
357
	 */
358
	private function _decode_header($val, $strict = false): string
359
	{
360
		// Check if this header even needs to be decoded.
361
		if (!str_contains($val, '=?') || !str_contains($val, '?='))
362
		{
363
			return trim($val);
364
		}
365
366
		// If iconv mime is available, just use it and be done
367
		if (function_exists('iconv_mime_decode'))
368
		{
369
			$decoded = iconv_mime_decode($val, $strict ? 1 : 2, 'UTF-8');
370
371
			// Bad decode, or partial decode
372
			if ($decoded !== false && !str_contains($decoded, '=?iso'))
373
			{
374
				return $decoded;
375
			}
376
		}
377
378
		// The RFC 2047-3 defines an encoded-word as a sequence of characters that
379
		// begins with "=?", ends with "?=", and has two "?"s in between. After the first question mark
380
		// is the name of the character encoding being used; after the second question mark
381
		// is the manner in which it's being encoded into plain ASCII (Q=quoted printable, B=base64);
382
		// and after the third question mark is the text itself.
383
		// Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=
384
		$matches = [];
385
		if (preg_match_all('~(.*?)(=\?([^?]+)\?(Q|B)\?([^?]*)\?=)([^=\(]*)~i', $val, $matches))
386
		{
387
			$decoded = '';
388
			for ($i = 0, $num = count($matches[4]); $i < $num; $i++)
389
			{
390
				// [1]leading text, [2]=? to ?=, [3]character set, [4]Q or B, [5]the encoded text [6]trailing text
391
				$leading_text = $matches[1][$i];
392
				$encoded_charset = $matches[3][$i];
393
				$encoded_type = strtolower($matches[4][$i]);
394
				$encoded_text = $matches[5][$i];
395
				$trailing_text = $matches[6][$i];
396
397
				if ($strict)
398
				{
399
					// Technically the encoded word can only be by itself or in a cname
400
					$check = trim($leading_text);
401
					if ($i === 0 && !empty($check) && $check[0] !== '(')
402
					{
403
						$decoded .= $matches[0][$i];
404
						continue;
405
					}
406
				}
407
408
				// Decode and convert our string
409
				if ($encoded_type === 'q')
410
				{
411
					$decoded_text = $this->_decode_string(str_replace('_', ' ', $encoded_text), 'quoted-printable', $encoded_charset);
412
				}
413
				elseif ($encoded_type === 'b')
414
				{
415
					$decoded_text = $this->_decode_string($encoded_text, 'base64', $encoded_charset);
416
				}
417
418
				// Add back in anything after the closing ?=
419
				if (!empty($encoded_text))
420
				{
421
					$decoded_text .= $trailing_text;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $decoded_text does not seem to be defined for all execution paths leading up to this point.
Loading history...
422
				}
423
424
				// Add back in the leading text to the now decoded value
425
				if (!empty($leading_text))
426
				{
427
					$decoded_text = $leading_text . $decoded_text;
428
				}
429
430
				$decoded .= $decoded_text;
431
			}
432
433
			$val = $decoded;
434
		}
435
436
		return trim($val);
437
	}
438
439
	/**
440
	 * Decodes base64 or quoted-printable strings
441
	 * Converts from one character set to utf-8
442
	 *
443
	 * @param string $string
444
	 * @param string $encoding
445
	 * @param string $charset
446
	 *
447
	 * @return string
448
	 */
449
	private function _decode_string($string, $encoding, $charset = ''): string
450
	{
451
		// Decode if it's quoted printable or base64 encoded
452
		if ($encoding === 'quoted-printable')
453
		{
454
			$string = preg_replace('~(^|\r\n)=A0($|\r\n)~m', '=0D=0A=0D=0A', $string);
455
			$string = quoted_printable_decode(preg_replace('~=\r?\n~', '', $string));
456
		}
457
		elseif ($encoding === 'base64')
458
		{
459
			$string = base64_decode($string);
460
			if (isset($this->headers['content-type']) && !str_contains($this->headers['content-type'], 'text/'))
461
			{
462
				return $string;
463
			}
464
		}
465
466
		// Convert this to utf-8 if needed.
467
		if (!empty($charset) && $charset !== 'UTF-8')
468
		{
469
			$string = $this->_charset_convert($string, strtoupper($charset), 'UTF-8');
470
		}
471
472
		return str_replace("\r\n", "\n", $string ?? '');
473
	}
474
475
	/**
476
	 * Pick the best possible function to convert a strings character set if any exist
477
	 *
478
	 * @param string $string
479
	 * @param string $from
480
	 * @param string $to
481
	 *
482
	 * @return string
483
	 */
484
	private function _charset_convert($string, $from, $to): string
485
	{
486
		// Let's assume we have one of the functions available to us
487
		$this->_converted_utf8 = true;
488
		$string_save = $string;
489
490
		// Use iconv if it is available
491
		if (function_exists('iconv'))
492
		{
493
			$string = @iconv($from, $to . '//TRANSLIT//IGNORE', $string);
494
		}
495
496
		// No iconv or a false response from it
497
		if (!function_exists('iconv') || ($string === false))
498
		{
499
			if (function_exists('mb_convert_encoding'))
500
			{
501
				// Replace unknown characters with a space
502
				@ini_set('mbstring.substitute_character', '32');
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for ini_set(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

502
				/** @scrutinizer ignore-unhandled */ @ini_set('mbstring.substitute_character', '32');

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
503
				try
504
				{
505
					$string = mb_convert_encoding($string_save, $to, $from);
506
				}
507
				catch (ValueError)
508
				{
509
					// nothing, bad character set
510
				}
511
			}
512
			elseif (function_exists('recode_string'))
513
			{
514
				$string = @recode_string($from . '..' . $to, $string_save);
515
			}
516
			else
517
			{
518
				$this->_converted_utf8 = false;
519
			}
520
		}
521
522
		return empty($string) ? $string_save : $string;
0 ignored issues
show
Bug Best Practice introduced by
The expression return empty($string) ? $string_save : $string could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
523
	}
524
525
	/**
526
	 * Content headers need to be set, so we can properly decode the message body.
527
	 *
528
	 * What it does:
529
	 *
530
	 * - Content headers often use the optional parameter value syntax that needs to be
531
	 * specially processed.
532
	 * - Parses or sets defaults for the following:
533
	 * content-type, content-disposition, content-transfer-encoding
534
	 */
535
	private function _parse_content_headers(): void
536
	{
537
		// What kind of message content do we have?
538
		if (isset($this->headers['content-type']))
539
		{
540
			$this->_parse_content_header_parameters($this->headers['content-type'], 'content-type');
541
			if (empty($this->headers['x-parameters']['content-type']['charset']))
542
			{
543
				$this->headers['x-parameters']['content-type']['charset'] = 'UTF-8';
544
			}
545
		}
546
		else
547
		{
548
			// No content header given so we assume plain text
549
			$this->headers['content-type'] = 'text/plain';
550
			$this->headers['x-parameters']['content-type']['charset'] = 'UTF-8';
551
		}
552
553
		// Any special content or assume standard inline
554
		if (isset($this->headers['content-disposition']))
555
		{
556
			$this->_parse_content_header_parameters($this->headers['content-disposition'], 'content-disposition');
557
		}
558
		else
559
		{
560
			$this->headers['content-disposition'] = 'inline';
561
		}
562
563
		// How this message been encoded, utf8, quoted printable, other??, if none given assume standard 7bit
564
		if (isset($this->headers['content-transfer-encoding']))
565
		{
566
			$this->_parse_content_header_parameters($this->headers['content-transfer-encoding'], 'content-transfer-encoding');
567
		}
568
		else
569
		{
570
			$this->headers['content-transfer-encoding'] = '7bit';
571
		}
572
	}
573
574
	/**
575
	 * Checks if a given header has any optional parameter values
576
	 *
577
	 * A header like Content-type: text/plain; charset=iso-8859-1 will become
578
	 * - headers[Content-type] = text/plain
579
	 * - headers['x-parameters'][charset] = iso-8859-1
580
	 *
581
	 * If parameters are found, sets the primary value to the given key and the additional
582
	 * values are placed to our catch-all x-parameters key. Done this way to prevent
583
	 * overwriting a primary header key with a secondary one
584
	 *
585
	 * @param string $value
586
	 * @param string $key
587
	 */
588
	private function _parse_content_header_parameters($value, $key): void
589
	{
590
		$matches = [];
591
592
		// Does the header key contain parameter values?
593
		$pos = strpos($value, ';');
594
		if ($pos !== false)
595
		{
596
			// Assign the primary value to the key
597
			$this->headers[$key] = strtolower(trim(substr($value, 0, $pos)));
598
599
			// Place any parameter values in the x-parameters key
600
			$parameters = ltrim(substr($value, $pos + 1));
601
			if (!empty($parameters) && preg_match_all('~([A-Za-z-]+)="?(.*?)"?\s*(?:;|$)~', $parameters, $matches))
602
			{
603
				$count = count($matches[0]);
604
				for ($i = 0; $i < $count; $i++)
605
				{
606
					$subKey = strtolower($matches[1][$i]);
607
					$this->headers['x-parameters'][$key][$subKey] = $matches[2][$i];
608
				}
609
			}
610
		}
611
		// No parameters associated with this header
612
		else
613
		{
614
			$this->headers[$key] = strtolower(trim($value));
615
		}
616
	}
617
618
	/**
619
	 * Based on the message content type, determine how to best proceed
620
	 *
621
	 * @param bool $html
622
	 */
623
	private function _parse_body($html = false): void
624
	{
625
		// Based on the content type for this body, determine what to do
626
		switch ($this->headers['content-type'])
627
		{
628
			// The text/plain content type is the generic subtype for plain text. It is the default specified by RFC 822.
629
			case 'text/plain':
630
				$this->body = $this->_decode_string($this->body, $this->headers['content-transfer-encoding'], $this->headers['x-parameters']['content-type']['charset']);
631
				$this->plain_body = $this->body;
632
				break;
633
			// The text/html content type is an Internet Media Type as well as a MIME content type.
634
			case 'text/html':
635
			case 'text/watch-html':
636
				$this->html_found = true;
637
				$this->body = $this->_decode_string($this->body, $this->headers['content-transfer-encoding'], $this->headers['x-parameters']['content-type']['charset']);
638
				break;
639
			// We don't process the following, noted here, so people know why
640
			//
641
			// multipart/digest - used to send collections of plain-text messages
642
			// multipart/byteranges - defined as a part of the HTTP message protocol. It includes two or more parts,
643
			// each with its own Content-Type and Content-Range fields
644
			// multipart/form-data - intended to allow information providers to express file upload requests uniformly
645
			// text/enriched - Uses a very limited set of formatting commands all with <command name></command name>
646
			// text/richtext - Obsolete version of the above
647
			//
648
			case 'multipart/digest':
649
			case 'multipart/byteranges':
650
			case 'multipart/form-data':
651
			case 'text/enriched':
652
			case 'text/richtext':
653
				break;
654
			// The following are considered multipart messages; as such they *should* contain several sections each
655
			// representing the same message in various ways such as plain text (mandatory), HTML section, and
656
			// encoded section such as quoted printable as well as attachments both as files and inline
657
			//
658
			// multipart/alternative - the same information is presented in different body parts in different forms.
659
			// The body parts are ordered by increasing complexity and accuracy
660
			// multipart/mixed -  used when the body parts are independent and need to be bundled in a particular order
661
			// multipart/parallel - display all the parts simultaneously on hardware and software that can do so (image with audio)
662
			// multipart/related - used for compound documents, those messages in which the separate body parts are intended to work
663
			// together to provide the full meaning of the message
664
			// multipart/report - defined for returning delivery status reports, with optional included messages
665
			// multipart/signed -provides a security framework for MIME parts
666
			// multipart/encrypted - as above provides a security framework for MIME parts
667
			// message/rfc822 - used to enclose a complete message within a message
668
			//
669
			case 'multipart/alternative':
670
			case 'multipart/mixed':
671
			case 'multipart/parallel':
672
			case 'multipart/related':
673
			case 'multipart/report':
674
			case 'multipart/signed':
675
			case 'multipart/encrypted':
676
			case 'multipart/relative':
677
			case 'multipart/appledouble':
678
			case 'application/vnd.wap.multipart.related':
679
			case 'message/rfc822':
680
				if (!isset($this->headers['x-parameters']['content-type']['boundary']))
681
				{
682
					// No boundary's but presented as multipart?, then we must have an incomplete message
683
					$this->body = '';
684
685
					return;
686
				}
687
688
				// Break up the message on the boundary --sections, each boundary section will have its
689
				// own Content Type and Encoding, we will process each as such
690
				$this->_boundary_split($this->headers['x-parameters']['content-type']['boundary'], $html);
691
692
				// We found multiple sections, let's go through each
693
				if ($this->_boundary_section_count > 0)
694
				{
695
					$html_ids = [];
696
					$text_ids = [];
697
					$this->body = '';
698
					$this->plain_body = '';
699
					$bypass = ['application/pgp-encrypted', 'application/pgp-signature', 'application/pgp-keys'];
700
701
					// Go through each boundary section
702
					for ($i = 0; $i < $this->_boundary_section_count; $i++)
703
					{
704
						// Stuff we can't or don't want to process
705
						if (in_array($this->_boundary_section[$i]->headers['content-type'], $bypass, true))
706
						{
707
							continue;
708
						}
709
710
						// HTML sections
711
						if ($this->_boundary_section[$i]->headers['content-type'] === 'text/html')
712
						{
713
							$html_ids[] = $i;
714
						}
715
						// Plain section
716
						elseif ($this->_boundary_section[$i]->headers['content-type'] === 'text/plain'
717
							&& $this->_boundary_section[$i]->headers['content-disposition'] !== 'attachment')
718
						{
719
							$text_ids[] = $i;
720
						}
721
						// Message is a DSN (Delivery Status Notification)
722
						elseif ($this->_boundary_section[$i]->headers['content-type'] === 'message/delivery-status')
723
						{
724
							$this->_process_DSN($i);
725
						}
726
727
						// Attachments, we love em
728
						$this->_process_attachments($i);
729
					}
730
731
					// We always return a plain text version for use
732
					if (!empty($text_ids))
733
					{
734
						foreach ($text_ids as $id)
735
						{
736
							// Join or use the last?
737
							if ($this->headers['content-type'] === 'multipart/mixed')
738
							{
739
								$this->plain_body .= ' ' . $this->_decode_body($this->_boundary_section[$id]->body);
740
							}
741
							// Such as multipart/alternative, use the last one as it will be most accurate
742
							else
743
							{
744
								$this->plain_body = $this->_boundary_section[$id]->body;
745
							}
746
						}
747
					}
748
					elseif (!empty($html_ids))
749
					{
750
						// For emails that have no plain text section, which they should to be valid, still ...
751
						$this->plain_body .= $this->_boundary_section[key($html_ids)]->body;
752
753
						$this->plain_body = str_ireplace('<p>', "\n\n", $this->plain_body);
0 ignored issues
show
Documentation Bug introduced by
It seems like str_ireplace('<p>', ' ', $this->plain_body) can also be of type array. However, the property $plain_body is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
754
						$this->plain_body = str_ireplace(['<br />', '<br>', '</p>', '</div>'], "\n", $this->plain_body);
755
						$this->plain_body = strip_tags($this->plain_body);
0 ignored issues
show
Bug introduced by
It seems like $this->plain_body can also be of type array; however, parameter $string of strip_tags() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

755
						$this->plain_body = strip_tags(/** @scrutinizer ignore-type */ $this->plain_body);
Loading history...
756
					}
757
758
					$this->plain_body = $this->_decode_body($this->plain_body);
759
760
					// If they want the HTML section, and it's available, we need to set it
761
					if ($html && !empty($html_ids))
762
					{
763
						$this->html_found = true;
764
						$text_ids = $html_ids;
765
					}
766
767
					if (!empty($text_ids))
768
					{
769
						// For all the chosen sections
770
						foreach ($text_ids as $id)
771
						{
772
							// Join or use the last? These could be HTML sections as well
773
							if ($this->headers['content-type'] === 'multipart/mixed')
774
							{
775
								$this->body .= ' ' . $this->_boundary_section[$id]->body;
776
							}
777
							// Such as multipart/alternative, use the last found
778
							else
779
							{
780
								$this->body = $this->_boundary_section[$id]->body;
781
							}
782
783
							// A section may have its own attachments, if it had its own unique boundary sections,
784
							// we need to check and add them in as needed
785
							foreach ($this->_boundary_section[$id]->attachments as $key => $value)
786
							{
787
								$this->attachments[$key] = $value;
788
							}
789
790
							foreach ($this->_boundary_section[$id]->inline_files as $key => $value)
791
							{
792
								$this->inline_files[$key] = $value;
793
							}
794
						}
795
796
						$this->body = $this->_decode_body($this->body);
797
798
						// Return the right set of x-parameters and content type for the body we are returning
799
						if (isset($this->_boundary_section[$text_ids[0]]->headers['x-parameters']))
800
						{
801
							$this->headers['x-parameters'] = $this->_boundary_section[$text_ids[0]]->headers['x-parameters'];
802
						}
803
804
						$this->headers['content-type'] = $this->_boundary_section[$text_ids[0]]->headers['content-type'];
805
					}
806
				}
807
808
				break;
809
			default:
810
				// deal with all the rest (e.g., image/xyx) the standard way
811
				$this->body = $this->_decode_string($this->body, $this->headers['content-transfer-encoding'], $this->headers['x-parameters']['content-type']['charset']);
812
				break;
813
		}
814
	}
815
816
	/**
817
	 * Split up multipart messages and process each section separately
818
	 * as its own email object
819
	 *
820
	 * @param string $boundary
821
	 * @param bool $html - flag to indicate html content
822
	 */
823
	private function _boundary_split($boundary, $html): void
824
	{
825
		// Split this message up on its boundary sections
826
		$parts = explode('--' . $boundary, $this->body);
827
		foreach ($parts as $part)
828
		{
829
			$part = trim($part);
830
831
			// Nothing or epilogue section?
832
			if (empty($part) || (strcmp($part, '--') === 0))
833
			{
834
				continue;
835
			}
836
837
			// Parse this section just like it was a separate email
838
			$boundary_section = new EmailParse();
839
			$boundary_section->read_email($html, $part);
840
841
			// Save the data that we need and release the parser
842
			$this->_boundary_section[$this->_boundary_section_count] = [];
843
			$this->_boundary_section[$this->_boundary_section_count]['body'] = $boundary_section->body;
844
			$this->_boundary_section[$this->_boundary_section_count]['plain_body'] = $boundary_section->plain_body;
845
			$this->_boundary_section[$this->_boundary_section_count]['headers'] = $boundary_section->headers;
846
			$this->_boundary_section[$this->_boundary_section_count]['attachments'] = $boundary_section->attachments;
847
			$this->_boundary_section[$this->_boundary_section_count]['inline_files'] = $boundary_section->inline_files;
848
			$this->_boundary_section[$this->_boundary_section_count] = (object) $this->_boundary_section[$this->_boundary_section_count];
849
850
			// If this boundary section is part of an outer boundary section
851
			if (!empty($boundary_section->plain_body
852
					&& $this->headers["content-type"] === "multipart/mixed"
853
					&& $this->headers['content-disposition'] !== 'attachment')
854
				&& $this->_boundary_section[$this->_boundary_section_count]->headers['content-disposition'] !== 'attachment')
855
			{
856
				$this->plain_parts[] = $boundary_section->plain_body;
857
			}
858
859
			$this->_boundary_section_count++;
860
861
			unset($boundary_section);
862
		}
863
	}
864
865
	/**
866
	 * If the boundary is a failed email response, set the DSN flag for the admin
867
	 *
868
	 * @param int $i The section being worked
869
	 */
870
	private function _process_DSN($i): void
871
	{
872
		// These sections often have extra blank lines, so cannot be counted on to be
873
		// fully accessible in ->headers. The "body" of this section contains values
874
		// formatted by FIELD: [TYPE;] VALUE
875
		$dsn_body = [];
876
		foreach (explode("\n", str_replace("\r\n", "\n", $this->_boundary_section[$i]->body)) as $line)
877
		{
878
			$type = '';
879
			[$field, $rest] = array_pad(explode(':', $line), 2, '');
880
881
			if (str_contains($line, ';'))
882
			{
883
				[$type, $val] = explode(';', $rest);
884
			}
885
			else
886
			{
887
				$val = $rest;
888
			}
889
890
			$dsn_body[strtolower(trim($field))] = ['type' => trim($type), 'value' => trim($val)];
891
		}
892
893
		switch ($dsn_body['action']['value'])
894
		{
895
			case 'delayed':
896
				// Remove this if we don't want to flag delayed delivery addresses as "dirty"
897
				// May be caused by temporary net failures, e.g., DNS outage
898
				// Lack of break is intentional
899
			case 'failed':
900
				// The email failed to be delivered.
901
				$this->_is_dsn = true;
902
				$this->_dsn = ['headers' => $this->_boundary_section[$i]->headers, 'body' => $dsn_body];
903
				break;
904
			default:
905
				$this->_is_dsn = false;
906
		}
907
	}
908
909
	/**
910
	 * If the boundary section is "attachment" or "inline", process and save the data
911
	 *
912
	 * - Data is saved in ->attachments or ->inline_files
913
	 *
914
	 * @param int $i The section being worked
915
	 */
916
	private function _process_attachments($i): void
917
	{
918
		if ($this->_boundary_section[$i]->headers['content-disposition'] === 'attachment'
919
			|| $this->_boundary_section[$i]->headers['content-disposition'] === 'inline'
920
			|| $this->_boundary_section[$i]->headers['content-disposition'] === '*'
921
			|| isset($this->_boundary_section[$i]->headers['content-id']))
922
		{
923
			// Get the attachments file name
924
			if (isset($this->_boundary_section[$i]->headers['x-parameters']['content-disposition']['filename']))
925
			{
926
				$file_name = $this->_boundary_section[$i]->headers['x-parameters']['content-disposition']['filename'];
927
			}
928
			elseif (isset($this->_boundary_section[$i]->headers['x-parameters']['content-type']['name']))
929
			{
930
				$file_name = $this->_boundary_section[$i]->headers['x-parameters']['content-type']['name'];
931
			}
932
			else
933
			{
934
				return;
935
			}
936
937
			// Escape all potentially unsafe characters from the filename
938
			$file_name = preg_replace('~(^\.)|/|[\n|\r]|(\.$)~m', '_', $file_name);
939
940
			// Load the attachment data
941
			$this->attachments[$file_name] = $this->_boundary_section[$i]->body;
942
943
			// Inline attachments are a bit more complicated.
944
			if (isset($this->_boundary_section[$i]->headers['content-id']) && $this->_boundary_section[$i]->headers['content-disposition'] === 'inline')
945
			{
946
				$this->inline_files[$file_name] = trim($this->_boundary_section[$i]->headers['content-id'], ' <>');
947
			}
948
		}
949
	}
950
951
	/**
952
	 * Checks the body text to see if it may need to be further decoded
953
	 *
954
	 * What it does:
955
	 *
956
	 * - Sadly, what's in the body text is not always what the header claims, or the
957
	 * header is just wrong. Copy/paste in to email from other apps etc.
958
	 * This does an extra check for quoted printable DNA and if found, decodes the
959
	 * message as such.
960
	 *
961
	 * @param string $val
962
	 * @return string
963
	 */
964
	private function _decode_body($val): string
965
	{
966
		if (empty($val))
967
		{
968
			return $val;
969
		}
970
971
		// The encoding tag can be missing in the headers or just wrong
972
		if (preg_match('~(?:=C2|=A0|=D2|=D4|=96)~', $val))
973
		{
974
			// Remove /r/n to be just /n
975
			$val = preg_replace('~(=0D=0A)~', "\n", $val);
976
977
			// utf8 non-breaking space which does not decode right
978
			$val = preg_replace('~(=C2=A0)~', ' ', $val);
979
980
			// Smart quotes they will decode to black diamonds or other, but if
981
			// UTF-8 these may be valid non-smart quotes
982
			if ($this->headers['x-parameters']['content-type']['charset'] !== 'UTF-8')
983
			{
984
				$val = str_replace(['=D4', '=D5', '=D2', '=D3', '=A0'], ["'", "'", '"', '"', ''], $val);
985
			}
986
987
			$val = $this->_decode_string($val, 'quoted-printable');
988
		}
989
		// Lines end in telltale quoted printable ... wrap and decode
990
		elseif (preg_match('~\s=[\r\n]~', $val))
991
		{
992
			$val = preg_replace('~\s=[\r?\n]~', ' ', $val);
993
			$val = $this->_decode_string($val, 'quoted-printable');
994
		}
995
		// Lines end in = but not ==
996
		elseif (preg_match('~((?<!=)=[\r\n])~', $val))
997
		{
998
			$val = $this->_decode_string($val, 'quoted-printable');
999
		}
1000
1001
		return str_replace("\r\n", "\n", $val ?? '');
1002
	}
1003
1004
	/**
1005
	 * Returns the decoded subject of the email
1006
	 *
1007
	 * - Makes sure the subject header is set, if not sets it to ''
1008
	 *
1009
	 * @return string or null
1010
	 */
1011
	public function load_subject(): string
1012
	{
1013
		// Account for those no-subject emails
1014
		if (!isset($this->headers['subject']))
1015
		{
1016
			$this->headers['subject'] = '';
1017
		}
1018
1019
		// Change it to a readable form ...
1020
		$this->subject = htmlspecialchars($this->_decode_header($this->headers['subject']), ENT_COMPAT, 'UTF-8');
1021
1022
		return $this->subject;
1023
	}
1024
1025
	/**
1026
	 * Checks the message components to determine if the message is a DSN
1027
	 *
1028
	 * What it does:
1029
	 *
1030
	 * - Checks the content of the message, looking for headers and values that
1031
	 * correlate with the message being a DSN. _parse_body checks for the existence
1032
	 * of a "message/delivery-status" header
1033
	 * - As many, many daemons and providers do not adhere to the RFC 3464
1034
	 * standard, this function will hold the "special cases"
1035
	 *
1036
	 * @return bool|null
1037
	 */
1038
	private function _check_dsn(): ?bool
1039
	{
1040
		// If we already know it's a DSN, bug out
1041
		if ($this->_is_dsn)
1042
		{
1043
			return true;
1044
		}
1045
1046
		/** Add non-header-based detection **/
1047
		return null;
1048
	}
1049
1050
	/**
1051
	 * Tries to find the original intended recipient that failed to deliver
1052
	 *
1053
	 * What it does:
1054
	 *
1055
	 * - Checks the headers of a DSN for the various ways that the intended recipient
1056
	 *   Might have been included in the DSN headers
1057
	 *
1058
	 * @return string|null
1059
	 */
1060
	public function get_failed_dest(): ?string
1061
	{
1062
		/** Body->Final-recipient Header **/
1063
		return $this->_dsn['body']['original-recipient']['value'] ?? $this->_dsn['body']['final-recipient']['value'] ?? null;
1064
	}
1065
1066
	/**
1067
	 * Find the message return_path and well return it
1068
	 *
1069
	 * @return string or null
1070
	 */
1071
	public function load_returnpath(): string
1072
	{
1073
		$matches = [];
1074
1075
		// Fetch the return path
1076
		if (isset($this->headers['return-path'])
1077
			&& preg_match('~(.*?)<(.*?)>~', $this->headers['return-path'], $matches))
1078
		{
1079
			$this->return_path = trim($matches[2]);
1080
		}
1081
1082
		return $this->return_path;
1083
	}
1084
1085
	/**
1086
	 * Check for the message security key in common headers, in-reply-to, and references
1087
	 *
1088
	 * - If the key is not found in the header, will search the message body
1089
	 * - If the key is still not found will search the entire input stream
1090
	 * - returns the found key or false.  If found will also save it in the in-reply-to header
1091
	 *
1092
	 * @param string $key optional
1093
	 * @return string of key or false on failure
1094
	 */
1095
	public function load_key($key = '')
1096
	{
1097
		$regex_key = '~(([a-z0-9]{32})\-(p|t|m)(\d+))~i';
1098
		$match = [];
1099
1100
		// Supplied a key, let's check it
1101
		if (!empty($key))
1102
		{
1103
			if (preg_match($regex_key, $key, $match) === 1)
1104
			{
1105
				$this->_load_key_details($match);
1106
			}
1107
		}
1108
		// Otherwise we play find the key
1109
		elseif (!$this->_load_key_from_headers($regex_key))
1110
		{
1111
			$this->_load_key_from_body();
1112
		}
1113
1114
		return empty($this->message_key_id) ? false : $this->message_key_id;
0 ignored issues
show
Bug Best Practice introduced by
The expression return empty($this->mess...: $this->message_key_id could also return false which is incompatible with the documented return type string. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
1115
	}
1116
1117
	/**
1118
	 * Searches the most common locations for the security key
1119
	 *
1120
	 * - Normal return location would be in the in-reply-to header
1121
	 * - Common for it to be shifted to a reference header
1122
	 *
1123
	 * @param string $regex_key
1124
	 *
1125
	 * @return bool if the security key is found
1126
	 */
1127
	private function _load_key_from_headers($regex_key): bool
1128
	{
1129
		$found_key = false;
1130
1131
		// Check our reply_to_msg_id based on in-reply-to and references, the key *should* be there.
1132
		if (empty($this->headers['in-reply-to']) || preg_match($regex_key, $this->headers['in-reply-to'], $match) === 0)
1133
		{
1134
			// Check if references are set, sometimes email clients thread from there
1135
			if (!empty($this->headers['references']))
1136
			{
1137
				// Maybe our security key is in the references
1138
				$refs = explode(' ', $this->headers['references']);
1139
				foreach ($refs as $ref)
1140
				{
1141
					if (preg_match($regex_key, $ref, $match))
1142
					{
1143
						// Found the key in the ref, set the in-reply-to
1144
						$this->headers['in-reply-to'] = $match[1];
1145
						$this->_load_key_details($match);
1146
						$found_key = true;
1147
						break;
1148
					}
1149
				}
1150
			}
1151
		}
1152
		else
1153
		{
1154
			$this->_load_key_details($match);
1155
			$found_key = true;
1156
		}
1157
1158
		return $found_key;
1159
	}
1160
1161
	/**
1162
	 * Loads found key details for use in other functions
1163
	 *
1164
	 * @param string[] $match from regex 1=>full, 2=>key, 3=>p|t|m, 4=>12345
1165
	 */
1166
	private function _load_key_details($match): void
1167
	{
1168
		if (!empty($match[1]))
1169
		{
1170
			// 1=>7738c27ae6c431495ad26587f30e2121-m29557, 2=>7738c27ae6c431495ad26587f30e2121, 3=>m, 4=>29557
1171
			$this->message_key_id = $match[1];
1172
			$this->message_key = $match[2];
1173
			$this->message_type = $match[3];
1174
			$this->message_id = (int) $match[4];
1175
		}
1176
	}
1177
1178
	/**
1179
	 * Searches the message body or the raw email in search of the key
1180
	 *
1181
	 * - Not found in the headers, so let's search the body for the [key]
1182
	 * as we insert that on outbound email just for this
1183
	 */
1184
	private function _load_key_from_body(): bool
1185
	{
1186
		$regex_key = '~\[(([a-z0-9]{32})\-(p|t|m)(\d+))\]~i';
1187
		$found_key = false;
1188
1189
		// Check the message body
1190
		if (preg_match($regex_key, $this->body, $match) === 1)
1191
		{
1192
			$this->headers['in-reply-to'] = $match[1];
1193
			$this->_load_key_details($match);
1194
			$found_key = true;
1195
		}
1196
		// Grrr ... check everything!
1197
		elseif (preg_match($regex_key, $this->raw_message, $match) === 1)
1198
		{
1199
			$this->headers['in-reply-to'] = $match[1];
1200
			$this->_load_key_details($match);
1201
			$found_key = true;
1202
		}
1203
1204
		return $found_key;
1205
	}
1206
1207
	/**
1208
	 * Loads in the most email from, to, and cc address
1209
	 *
1210
	 * - Will attempt to return the name and address for fields "name:" <email>
1211
	 * - Will become email['to'] = email and email['to_name'] = name
1212
	 *
1213
	 * @return array of addresses
1214
	 */
1215
	public function load_address(): array
1216
	{
1217
		$this->email['to'] = [];
1218
		$this->email['from'] = [];
1219
		$this->email['cc'] = [];
1220
1221
		// Fetch the "From" email and if possibly, the senders common name
1222
		if (isset($this->headers['from']))
1223
		{
1224
			$this->_parse_address($this->headers['from']);
1225
			$this->email['from'] = $this->_email_address;
1226
			$this->email['from_name'] = $this->_email_name;
1227
		}
1228
1229
		// Fetch the "To" email and if possible, the recipients common name
1230
		if (isset($this->headers['to']))
1231
		{
1232
			$to_addresses = explode(',', $this->headers['to']);
1233
			foreach ($to_addresses as $i => $to_address)
1234
			{
1235
				$this->_parse_address($to_address);
1236
				$this->email['to'][$i] = $this->_email_address;
1237
				$this->email['to_name'][$i] = $this->_email_name;
1238
			}
1239
		}
1240
1241
		// Fetch the "cc" address if there is one and once again the real name as well
1242
		if (isset($this->headers['cc']))
1243
		{
1244
			$cc_addresses = explode(',', $this->headers['cc']);
1245
			foreach ($cc_addresses as $i => $cc_address)
1246
			{
1247
				$this->_parse_address($cc_address);
1248
				$this->email['cc'][$i] = $this->_email_address;
1249
				$this->email['cc_name'][$i] = $this->_email_name;
1250
			}
1251
		}
1252
1253
		return $this->email;
1254
	}
1255
1256
	/**
1257
	 * Take an email address and parse out the email address and email name
1258
	 *
1259
	 * @param string $val
1260
	 */
1261
	private function _parse_address($val): void
1262
	{
1263
		$this->_email_name = '';
1264
1265
		if (preg_match('~(.*?)<(.*?)>~', $val, $matches))
1266
		{
1267
			// The email address, remove spaces and (comments)
1268
			$this->_email_address = trim(str_replace(' ', '', $matches[2]));
1269
			$this->_email_address = preg_replace('~\(.*?\)~', '', $this->_email_address);
1270
1271
			// Perhaps a common name as well as "name:" <email>
1272
			if (!empty($matches[1]))
1273
			{
1274
				$matches[1] = $this->_decode_header($matches[1]);
1275
				if ($matches[1][0] === '"' && str_ends_with($matches[1], '"'))
1276
				{
1277
					$this->_email_name = substr($matches[1], 1, -1);
1278
				}
1279
				else
1280
				{
1281
					$this->_email_name = $matches[1];
1282
				}
1283
			}
1284
			else
1285
			{
1286
				$this->_email_name = $this->_email_address;
1287
			}
1288
1289
			// Check the validity of the common name, if not sure, set it to email user.
1290
			if (!preg_match('~^\w+~', $this->_email_name))
1291
			{
1292
				$this->_email_name = substr($this->_email_address, 0, strpos($this->_email_address, '@'));
1293
			}
1294
		}
1295
		else
1296
		{
1297
			// Just a sad lonely email address, so we use it as is
1298
			$this->_email_address = trim(str_replace(' ', '', $val));
1299
			$this->_email_address = preg_replace('~\(.*?\)~', '', $this->_email_address);
1300
			$this->_email_name = substr($this->_email_address, 0, strpos($this->_email_address, '@'));
1301
		}
1302
	}
1303
1304
	/**
1305
	 * Finds the message sending ip and returns it
1306
	 *
1307
	 * - Will look in various header fields where the ip may reside
1308
	 * - Returns false if it can't find a valid IP4
1309
	 *
1310
	 * @return string|bool on fail
1311
	 */
1312
	public function load_ip()
1313
	{
1314
		$this->ip = false;
1315
1316
		// The sending IP can be useful in spam prevention and making a post
1317
		if (isset($this->headers['x-posted-by']))
1318
		{
1319
			$this->ip = $this->_parse_ip($this->headers['x-posted-by']);
1320
		}
1321
		elseif (isset($this->headers['x-originating-ip']))
1322
		{
1323
			$this->ip = $this->_parse_ip($this->headers['x-originating-ip']);
1324
		}
1325
		elseif (isset($this->headers['x-senderip']))
1326
		{
1327
			$this->ip = $this->_parse_ip($this->headers['x-senderip']);
1328
		}
1329
		elseif (isset($this->headers['x-mdremoteip']))
1330
		{
1331
			$this->ip = $this->_parse_ip($this->headers['x-mdremoteip']);
1332
		}
1333
		elseif (isset($this->headers['received']))
1334
		{
1335
			$this->ip = $this->_parse_ip($this->headers['received']);
1336
		}
1337
1338
		return $this->ip;
1339
	}
1340
1341
	/**
1342
	 * Validates that the ip is a valid ip4 address
1343
	 *
1344
	 * @param string|null $string
1345
	 * @return string
1346
	 */
1347
	private function _parse_ip($string): string
1348
	{
1349
		if (preg_match('~\[?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\]?~', $string, $matches) !== 1)
0 ignored issues
show
Bug introduced by
It seems like $string can also be of type null; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1349
		if (preg_match('~\[?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\]?~', /** @scrutinizer ignore-type */ $string, $matches) !== 1)
Loading history...
1350
		{
1351
			return '';
1352
		}
1353
1354
		$string = trim($matches[0], '[] ');
1355
1356
		// Validate it matches an ip4 standard
1357
		if (filter_var($string, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) !== false)
1358
		{
1359
			return $string;
1360
		}
1361
1362
		return '';
1363
	}
1364
1365
	/**
1366
	 * Finds if any spam headers have been positively set and returns that flag
1367
	 *
1368
	 * - Will look in various header fields where the spam status may reside
1369
	 *
1370
	 * @return bool on fail
1371
	 */
1372
	public function load_spam(): bool
1373
	{
1374
		// SpamAssassin (and others like rspamd)
1375
		if (isset($this->headers['x-spam-flag']) && stripos($this->headers['x-spam-flag'], 'yes') === 0)
1376
		{
1377
			$this->spam_found = true;
1378
		}
1379
		// SpamStopper and other variants
1380
		elseif (isset($this->headers['x-spam-status']) && stripos($this->headers['x-spam-status'], 'yes') === 0)
1381
		{
1382
			$this->spam_found = true;
1383
		}
1384
		// j-chkmail --  hi = likely spam lo = suspect ...
1385
		elseif (isset($this->headers['x-j-chkmail-status']) && stripos($this->headers['x-j-chkmail-status'], 'hi') === 0)
1386
		{
1387
			$this->spam_found = true;
1388
		}
1389
		// Nucleus Mailscanner
1390
		elseif (isset($this->headers['x-nucleus-mailscanner']) && strtolower($this->headers['x-nucleus-mailscanner']) !== 'found to be clean')
1391
		{
1392
			$this->spam_found = true;
1393
		}
1394
1395
		return $this->spam_found;
1396
	}
1397
}
1398