Passed
Push — development ( 8e8389...e156b5 )
by Spuds
01:10 queued 28s
created

PreparseCode::_validateLinks()   B

Complexity

Conditions 10
Paths 5

Size

Total Lines 31
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 10.7998

Importance

Changes 0
Metric Value
cc 10
eloc 15
nc 5
nop 0
dl 0
loc 31
ccs 4
cts 5
cp 0.8
crap 10.7998
rs 7.6666
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This class contains those functions pertaining to preparsing BBC data
5
 *
6
 * @package   ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
9
 *
10
 * This file contains code covered by:
11
 * copyright: 2011 Simple Machines (http://www.simplemachines.org)
12
 *
13
 * @version 2.0 dev
14
 *
15
 */
16
17
namespace BBC;
18
19
use ElkArte\Helper\TokenHash;
20
21
/**
22
 * Class PreparseCode
23
 *
24
 * @package BBC
25
 */
26
class PreparseCode
27
{
28
	/** The regular expression non breaking space */
29
	public const NBS = '\x{A0}';
30
31
	/** @var string the message to preparse */
32
	public $message = '';
33
34
	/** @var string the username of the current user */
35
	public $user_name = '';
36
37
	/** @var bool if this is just a preview */
38
	protected $previewing = false;
39
40
	/** @var array the code blocks that we want to protect */
41
	public $code_blocks = [];
42
43
	/** @var PreparseCode */
44
	public static $instance;
45
46 2
	/**
47
	 * PreparseCode constructor.
48 2
	 *
49 2
	 * @param string $user_name
50
	 */
51
	protected function __construct($user_name)
52
	{
53
		$this->user_name = $user_name;
54
	}
55
56
	/**
57
	 * Takes a message and parses it, returning the prepared message as a reference
58
	 * for use by parse_bbc.
59
	 *
60
	 * What it does:
61
	 *   - Cleans up links (javascript, etc.)
62
	 *   - Fixes improperly constructed lists [lists]
63
	 *   - Repairs improperly constructed tables, row, headers, etc.
64
	 *   - Protects code sections
65
	 *   - Checks for proper quote open / closing
66
	 *   - Processes /me tag
67
	 *   - Converts color tags to ones parse_bbc will understand
68
	 *   - Removes empty tags outside of code blocks
69 18
	 *   - Won't convert \n's and a few other things if previewing is true.
70
	 *
71
	 * @param string $message
72 18
	 * @param bool $previewing
73 18
	 */
74
	public function preparsecode(&$message, $previewing = false)
75
	{
76 18
		if (empty($message))
77
		{
78
			return '';
79
		}
80
81 18
		// Load passed values to the class
82
		$this->message = $message;
83
		$this->previewing = $previewing;
84 18
85
		// Clean out control characters
86
		$this->message = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $this->message);
87 18
88
		// This line makes all languages *theoretically* work even with the wrong charset ;).
89
		$this->message = preg_replace('~&amp;#(\d{4,5}|[2-9]\d{2,4}|1[2-9]\d);~', '&#$1;', $this->message);
90 18
91
		// Clean up after nobbc ;).
92
		$this->message = preg_replace_callback('~\[nobbc\](.+?)\[/nobbc\]~i', fn($matches) => $this->_preparsecode_nobbc_callback($matches), $this->message);
93 18
94
		// Remove \r's... they're evil!
95
		$this->message = strtr($this->message, array("\r" => ''));
96 18
97
		// You won't believe this - but too many periods upsets apache it seems!
98
		$this->message = preg_replace('~\.{100,}~', '...', $this->message);
99 18
100
		// Remove Trailing Quotes
101
		$this->_trimTrailingQuotes();
102 18
103
		// Validate code blocks are properly closed.
104
		$this->_validateCodeBlocks();
105 18
		$this->_validateICodeBlocks();
106
107 4
		// Protect CODE blocks from further processing
108 18
		$this->message = $this->tokenizeCodeBlocks($this->message);
109
110
		//  Now that we've fixed all the code tags, let's fix the img and url tags...
111 18
		$this->_fixTags();
112
113
		// Replace /me.+?\n with [me=name]dsf[/me]\n.
114 18
		$this->_itsAllAbout();
115
116
		// Make sure list and table tags are lowercase.
117 18
		$this->message = preg_replace_callback('~\[([/]?)(list|li|table|tr|td|th)((\s[^\]]+)*)\]~i',
118 18
			fn($matches) => $this->_preparsecode_lowertags_callback($matches), $this->message);
119
120
		// Don't leave any lists that were never opened or closed
121 18
		$this->_validateLists();
122
123
		// Attempt to repair common BBC input mistakes
124 18
		$this->_fixMistakes();
125
126 4
		// Remove empty bbc tags
127 18
		$this->message = preg_replace('~\[[bisu]]\s*\[/[bisu]]~i', '', $this->message);
128
		$this->message = preg_replace('~\[quote]\s*\[/quote]~i', '', $this->message);
129
130 18
		// Fix color tags of many forms so they parse properly
131
		$this->message = preg_replace('~\[color=(?:#[\da-fA-F]{3}|#[\da-fA-F]{6}|[A-Za-z]{1,20}|rgb\(\d{1,3}, ?\d{1,3}, ?\d{1,3}\))\]\s*\[/color\]~', '', $this->message);
132
133 18
		// Font tags with multiple fonts (copy&paste in the WYSIWYG by some browsers).
134
		$this->message = preg_replace_callback('~\[font=([^]]*)](.*?(?:\[/font\]))~s',
135
			fn($matches) => $this->_preparsecode_font_callback($matches), $this->message);
136 18
137
		// Don't allow rel follow links if they don't have permissions
138
		$this->_validateLinks();
139 18
140
		// Allow integration to do further processing on protected code block message
141 18
		call_integration_hook('integrate_preparse_tokenized_code', array(&$this->message, $previewing, $this->code_blocks));
142
143
		// Put it back together!
144
		$this->message = $this->restoreCodeBlocks($this->message);
145
146
		// Allow integration to do further processing
147
		call_integration_hook('integrate_preparse_code', array(&$this->message, 0, $previewing));
148
149 18
		// Safe Spacing
150
		if (!$previewing)
151
		{
152 18
			$this->message = strtr($this->message, array('  ' => '&nbsp; ', "\n" => '<br />', "\xC2\xA0" => '&nbsp;'));
153 18
		}
154
		else
155
		{
156
			$this->message = strtr($this->message, array('  ' => '&nbsp; ', "\xC2\xA0" => '&nbsp;'));
157
		}
158 18
159
		// Now we're going to do full scale table checking...
160
		$this->_preparseTable();
161 18
162
		// Quickly clean up things that will slow our parser (which are common in posted code.)
163
		$message = strtr($this->message, array('[]' => '&#91;]', '[&#039;' => '&#91;&#039;'));
164
	}
165
166
	/**
167 18
	 * Trim dangling quotes
168
	 */
169
	private function _trimTrailingQuotes()
170
	{
171 18
		// Trim off trailing quotes - these often happen by accident.
172
		while (substr($this->message, -7) === '[quote]')
173
		{
174
			$this->message = trim(substr($this->message, 0, -7));
175
		}
176
177 18
		// Trim off leading ones as well
178
		while (substr($this->message, 0, 8) === '[/quote]')
179 18
		{
180 18
			$this->message = trim(substr($this->message, 8));
181 18
		}
182
	}
183 18
184
	/**
185 4
	 * Find all code blocks, work out whether we'd be parsing them,
186
	 * then ensure they are all closed.
187
	 */
188 4
	private function _validateCodeBlocks()
189
	{
190
		$in_tag = false;
191 4
		$had_tag = false;
192
		$code_open = false;
193
194
		if (preg_match_all('~(\[/?code(?:=[^]]+)?])~i', $this->message, $matches))
195
		{
196
			foreach ($matches[0] as $match)
197 4
			{
198
				// Closing?
199
				if ($match[1] === '/')
200
				{
201
					// If it's closing and we're not in a tag we need to open it...
202 4
					if (!$in_tag)
203
					{
204
						$code_open = true;
205 4
					}
206
207 4
					// Either way we ain't in one any more.
208
					$in_tag = false;
209
				}
210
				// Opening tag...
211
				else
212
				{
213
					$had_tag = true;
214 18
215
					// If we're in a tag don't do nought!
216 4
					if (!$in_tag)
217
					{
218
						$in_tag = true;
219
					}
220 18
				}
221
			}
222
		}
223
224 18
		// If we have an open code tag, close it.
225
		if ($in_tag)
226
		{
227
			$this->message .= '[/code]';
228
		}
229 18
		// Open any ones that need to be open, only if we've never had a tag.
230
		if (!$code_open)
231
		{
232 18
			return;
233
		}
234
		if ($had_tag)
235 18
		{
236
			return;
237 18
		}
238
		$this->message = '[code]' . $this->message;
239
	}
240 18
241
	/**
242
	 * Find all icode blocks, ensure they are complete pairs and do not span lines
243 4
	 */
244
	private function _validateICodeBlocks()
245
	{
246 4
		$lines = explode("\n", $this->message);
247
		foreach ($lines as $number => $line)
248
		{
249 4
			$depth = 0;
250 4
			preg_match_all('~(\[\/?icode(?:=[^\]]+)?\])~i', $line, $matches);
251 11
			foreach ($matches[0] as $match)
252
			{
253
				// Closing icode
254
				if ($match[1] === '/')
255
				{
256 18
					--$depth;
257 18
					continue;
258
				}
259
260
				++$depth;
261
			}
262
263
			// Open any ones that need to be open, or close if left open
264
			if ($depth !== 0)
265 18
			{
266
				$lines[$number] = $depth > 0 ? $line . '[/icode]' : '[icode]' . $line;
267 18
			}
268
		}
269
270
		// Put it back together
271
		$this->message = implode("\n", $lines);
272
273
		// Clear empty ones caused by linebreaks inside of icode tags.
274
		$this->message = preg_replace('~(?<!\[icode\])\[icode\]\s*\[\/icode\]~i', '', $this->message);
275 18
	}
276
277
	/**
278
	 * Protects code / icode blocks from preparse by replacing them with %%token%% values
279
	 *
280
	 * @param string $message
281
	 * @return string
282
	 */
283
	public function tokenizeCodeBlocks($message, $html = false)
284
	{
285
		// Split up the message on the code start/end tags/
286
		$patterns = $html
287
			? ['~(</code>|<code(?:[^>]+)?>)~', '~(</icode>|<icode(?:[^>]+)?>)~']
288
			: ['~(\[\/code\]|\[code(?:=[^\]]+)?\])~i', '~(\[\/icode\]|\[icode(?:=[^\]]+)?\])~i'];
289
290
		// Token generator
291
		$tokenizer = new TokenHash();
292
293
		foreach ($patterns as $pattern)
294
		{
295
			$parts = preg_split($pattern, $message, -1, PREG_SPLIT_DELIM_CAPTURE);
296
			foreach ($parts as $i => $part)
297
			{
298
				// It goes 0 = outside, 1 = begin tag, 2 = inside, 3 = close tag, repeat.
299
				if ($i % 4 === 0 && isset($parts[$i + 3]))
300
				{
301
					// Create a unique key to put in place of the code block
302
					$key = $tokenizer->generate_hash(8);
303
304
					// Save what is there [code]stuff[/code]
305
					$this->code_blocks['%%' . $key . '%%'] = $parts[$i + 1] . $parts[$i + 2] . $parts[$i + 3];
306
307
					// Replace the code block with %%$key%% so its protected from further preparsecode processing
308
					$parts[$i + 1] = '%%';
309
					$parts[$i + 2] = $key;
310
					$parts[$i + 3] = '%%';
311
				}
312 18
			}
313
314
			// The message with code blocks as %%tokens%%
315 18
			$message = implode('', $parts);
316
		}
317 18
318
		return $message;
319
	}
320
321 18
	/**
322
	 * Fix any URLs posted - ie. remove 'javascript:'.
323
	 *
324 18
	 * - Fix the img and url tags...
325
	 * - Fixes links in message and returns nothing.
326
	 */
327 18
	private function _fixTags()
328
	{
329
		global $modSettings;
330
331 18
		// WARNING: Editing the below can cause large security holes in your forum.
332
		// Edit only if you are sure you know what you are doing.
333
334
		$fixArray = array(
335
			// [img]http://...[/img] or [img width=1]http://...[/img]
336
			array(
337
				'tag' => 'img',
338
				'protocols' => array('http', 'https'),
339
				'embeddedUrl' => false,
340
				'hasEqualSign' => false,
341
				'hasExtra' => true,
342
			),
343
			// [url]http://...[/url]
344 18
			array(
345
				'tag' => 'url',
346 18
				'protocols' => array('http', 'https'),
347
				'embeddedUrl' => true,
348 18
				'hasEqualSign' => false,
349
			),
350 18
			// [url=http://...]name[/url]
351
			array(
352 18
				'tag' => 'url',
353
				'protocols' => array('http', 'https'),
354 18
				'embeddedUrl' => true,
355
				'hasEqualSign' => true,
356
			),
357
			// [iurl]http://...[/iurl]
358 18
			array(
359
				'tag' => 'iurl',
360
				'protocols' => array('http', 'https'),
361 18
				'embeddedUrl' => true,
362
				'hasEqualSign' => false,
363
			),
364 2
			// [iurl=http://...]name[/iurl]
365 2
			array(
366 2
				'tag' => 'iurl',
367
				'protocols' => array('http', 'https'),
368 2
				'embeddedUrl' => true,
369 2
				'hasEqualSign' => true,
370
			),
371 2
		);
372 2
373
		// Integration may want to add to this array
374 2
		call_integration_hook('integrate_fixtags', array(&$fixArray, &$this->message));
375
376
		// Fix each type of tag.
377
		foreach ($fixArray as $param)
378
		{
379 2
			$this->_fixTag($param['tag'], $param['protocols'], $param['embeddedUrl'], $param['hasEqualSign'], !empty($param['hasExtra']));
380
		}
381 2
382
		// Now fix possible security problems with images loading links automatically...
383
		$this->message = preg_replace_callback('~(\[img.*?\])(.+?)\[/img\]~is',
384
			fn($matches) => $this->_fixTags_img_callback($matches), $this->message);
385 2
386
		// Limit the size of images posted?
387
		if (!empty($modSettings['max_image_width']) || !empty($modSettings['max_image_height']))
388
		{
389 2
			$this->resizeBBCImages();
390
		}
391
	}
392
393
	/**
394
	 * Fix a specific class of tag - ie. url with =.
395 2
	 *
396
	 * - Used by fixTags, fixes a specific tag's links.
397 2
	 *
398
	 * @param string $myTag - the tag
399
	 * @param string[] $protocols - http, https or ftp
400
	 * @param bool $embeddedUrl = false - whether it *can* be set to something
401 2
	 * @param bool $hasEqualSign = false, whether it *is* set to something
402
	 * @param bool $hasExtra = false - whether it can have extra cruft after the begin tag.
403
	 */
404
	private function _fixTag($myTag, $protocols, $embeddedUrl = false, $hasEqualSign = false, $hasExtra = false)
405 2
	{
406
		global $boardurl, $scripturl;
407
408
		$replaces = array();
409 2
410
		$domain_url = preg_match('~^([^:]+://[^/]+)~', $boardurl, $match) != 0 ? $match[1] : $boardurl . '/';
411
412
		if ($hasEqualSign)
413
		{
414
			preg_match_all('~\[(' . $myTag . ')=([^\]]*?)\](?:(.+?)\[/(' . $myTag . ')\])?~is', $this->message, $matches);
415 2
		}
416
		else
417 2
		{
418
			preg_match_all('~\[(' . $myTag . ($hasExtra ? '(?:[^\]]*?)' : '') . ')\](.+?)\[/(' . $myTag . ')\]~is', $this->message, $matches);
419 2
		}
420
421
		foreach ($matches[0] as $k => $dummy)
422
		{
423 2
			// Remove all leading and trailing whitespace.
424
			$replace = trim($matches[2][$k]);
425 2
			$this_tag = $matches[1][$k];
426
			$this_close = $hasEqualSign ? (empty($matches[4][$k]) ? '' : $matches[4][$k]) : $matches[3][$k];
427
428
			$found = false;
429 1
			foreach ($protocols as $protocol)
430
			{
431
				$found = strncasecmp($replace, $protocol . '://', strlen($protocol) + 3) === 0;
432
				if ($found)
433 18
				{
434
					break;
435 2
				}
436
			}
437 2
438
			// Http url checking?
439
			if (!$found && $protocols[0] === 'http')
440
			{
441
				if (substr($replace, 0, 1) === '/' && substr($replace, 0, 2) !== '//')
442 18
				{
443
					$replace = $domain_url . $replace;
444 2
				}
445
				elseif (substr($replace, 0, 1) === '?')
446 18
				{
447
					$replace = $scripturl . $replace;
448
				}
449
				elseif (substr($replace, 0, 1) === '#' && $embeddedUrl)
450
				{
451
					$replace = '#' . preg_replace('~[^A-Za-z0-9_\-#]~', '', substr($replace, 1));
452
					$this_tag = 'iurl';
453
					$this_close = 'iurl';
454
				}
455
				elseif (strpos($replace, '//') === 0)
456
				{
457
					$replace = $protocols[0] . ':' . $replace;
458
				}
459
				else
460
				{
461
					$replace = $protocols[0] . '://' . $replace;
462
				}
463
			}
464
			// FTP URL Checking
465
			elseif (!$found && $protocols[0] === 'ftp')
466
			{
467
				$replace = $protocols[0] . '://' . preg_replace('~^(?!ftps?)[^:]+://~', '', $replace);
468
			}
469
			elseif (!$found)
470
			{
471
				$replace = $protocols[0] . '://' . $replace;
472
			}
473
474
			// Build a replacement array that is considered safe and proper
475
			if ($hasEqualSign && $embeddedUrl)
476
			{
477
				$replaces[$matches[0][$k]] = '[' . $this_tag . '=' . $replace . ']' . (empty($matches[4][$k]) ? '' : $matches[3][$k] . '[/' . $this_close . ']');
478
			}
479
			elseif ($hasEqualSign)
480
			{
481
				$replaces['[' . $matches[1][$k] . '=' . $matches[2][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']';
482
			}
483
			elseif ($embeddedUrl)
484
			{
485
				$replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']' . $matches[2][$k] . '[/' . $this_close . ']';
486
			}
487
			else
488
			{
489
				$replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . ']' . $replace . '[/' . $this_close . ']';
490
			}
491
		}
492
493
		foreach ($replaces as $k => $v)
494
		{
495
			if ($k == $v)
496
			{
497
				unset($replaces[$k]);
498
			}
499
		}
500
501
		// Update as needed
502
		if (!empty($replaces))
503
		{
504
			$this->message = strtr($this->message, $replaces);
505
		}
506
	}
507
508
	/**
509
	 * Updates BBC img tags in a message so that the width / height respect the forum settings.
510
	 *
511
	 * - Will add the width/height attrib if needed, or update existing ones if they break the rules
512
	 */
513
	public function resizeBBCImages()
514
	{
515
		global $modSettings;
516
517
		// We'll need this for image processing
518
		require_once(SUBSDIR . '/Attachments.subs.php');
519
520
		// Find all the img tags - with or without width and height.
521
		preg_match_all('~\[img(\s+width=\d+)?(\s+height=\d+)?(\s+width=\d+)?](.+?)\[/img]~is', $this->message, $matches, PREG_PATTERN_ORDER);
522
523
		$replaces = array();
524
		foreach (array_keys($matches[0]) as $match)
525
		{
526
			// If the width was after the height, handle it.
527
			$matches[1][$match] = empty($matches[3][$match]) ? $matches[1][$match] : $matches[3][$match];
528
529 18
			// Now figure out if they had a desired height or width...
530
			$desired_width = empty($matches[1][$match]) ? 0 : (int) substr(trim($matches[1][$match]), 6);
531 18
			$desired_height = empty($matches[2][$match]) ? 0 : (int) substr(trim($matches[2][$match]), 7);
532 18
533
			// One was omitted, or both.  We'll have to find its real size...
534 18
			if (empty($desired_width) || empty($desired_height))
535
			{
536 18
				[$width, $height] = url_image_size(un_htmlspecialchars($matches[4][$match]));
537 18
538
				// They don't have any desired width or height!
539
				if (empty($desired_width) && empty($desired_height))
540
				{
541
					$desired_width = $width;
542
					$desired_height = $height;
543
				}
544 18
				// Scale it to the width...
545
				elseif (empty($desired_width) && !empty($height))
546
				{
547
					$desired_width = (int) (($desired_height * $width) / $height);
548
				}
549 18
				// Scale if to the height.
550
				elseif (!empty($width))
551 18
				{
552 18
					$desired_height = (int) (($desired_width * $height) / $width);
553
				}
554 18
			}
555
556
			// If the width and height are fine, just continue along...
557
			if ($desired_width <= $modSettings['max_image_width'] && $desired_height <= $modSettings['max_image_height'])
558
			{
559 18
				continue;
560
			}
561
562
			// Too bad, it's too wide.  Make it as wide as the maximum.
563 18
			if ($desired_width > $modSettings['max_image_width'] && !empty($modSettings['max_image_width']))
564
			{
565
				$desired_height = (int) (($modSettings['max_image_width'] * $desired_height) / $desired_width);
566
				$desired_width = $modSettings['max_image_width'];
567
			}
568 18
569
			// Now check the height, as well.  Might have to scale twice, even...
570
			if ($desired_height > $modSettings['max_image_height'] && !empty($modSettings['max_image_height']))
571
			{
572 18
				$desired_width = (int) (($modSettings['max_image_height'] * $desired_width) / $desired_height);
573
				$desired_height = $modSettings['max_image_height'];
574 18
			}
575
576 18
			$replaces[$matches[0][$match]] = '[img' . (empty($desired_width) ? '' : ' width=' . $desired_width) . (empty($desired_height) ? '' : ' height=' . $desired_height) . ']' . $matches[4][$match] . '[/img]';
577
		}
578 18
579
		// If any img tags were actually changed...
580 18
		if (!empty($replaces))
581
		{
582 18
			$this->message = strtr($this->message, $replaces);
583
		}
584 18
	}
585
586 18
	/**
587
	 * Replace /me with the users name, including inside footnotes
588 18
	 */
589
	private function _itsAllAbout()
590 18
	{
591
		$me_regex = '~(\A|\n)/me(?: |&nbsp;)([^\n]*)(?:\z)?~i';
592 18
		$footnote_regex = '~(\[footnote\])/me(?: |&nbsp;)([^\n]*?)(\[\/footnote\])~i';
593
594 18
		if (preg_match('~[\[\]\\"]~', $this->user_name) !== false)
595 18
		{
596 18
			$this->message = preg_replace($me_regex, '$1[me=&quot;' . $this->user_name . '&quot;]$2[/me]', $this->message);
597
			$this->message = preg_replace($footnote_regex, '$1[me=&quot;' . $this->user_name . '&quot;]$2[/me]$3', $this->message);
598 18
		}
599
		else
600 18
		{
601
			$this->message = preg_replace($me_regex, '$1[me=' . $this->user_name . ']$2[/me]', $this->message);
602 18
			$this->message = preg_replace($footnote_regex, '$1[me=' . $this->user_name . ']$2[/me]$3', $this->message);
603
		}
604 18
	}
605 18
606
	/**
607 18
	 * Make sure lists have open and close tags
608
	 */
609 18
	private function _validateLists()
610
	{
611 18
		$list_open = substr_count($this->message, '[list]') + substr_count($this->message, '[list ');
612
		$list_close = substr_count($this->message, '[/list]');
613 18
614
		if ($list_close - $list_open > 0)
615
		{
616
			$this->message = str_repeat('[list]', $list_close - $list_open) . $this->message;
617 18
		}
618
619 18
		if ($list_open - $list_close > 0)
620
		{
621 18
			$this->message .= str_repeat('[/list]', $list_open - $list_close);
622
		}
623
	}
624
625
	/**
626 18
	 * Repair a few *cough* common mistakes from user input and from wizzy cut/paste
627
	 */
628 18
	private function _fixMistakes()
629
	{
630 4
		$mistake_fixes = array(
631
			// Find [table]s not followed by [tr].
632 18
			'~\[table\](?![\s' . self::NBS . ']*\[tr\])~su' => '[table][tr]',
633
			// Find [tr]s not followed by [td] or [th]
634
			'~\[tr\](?![\s' . self::NBS . ']*\[t[dh]\])~su' => '[tr][td]',
635
			// Find [/td] and [/th]s not followed by something valid.
636
			'~\[/t([dh])\](?![\s' . self::NBS . ']*(?:\[t[dh]\]|\[/tr\]|\[/table\]))~su' => '[/t$1][/tr]',
637
			// Find [/tr]s not followed by something valid.
638
			'~\[/tr\](?![\s' . self::NBS . ']*(?:\[tr\]|\[/table\]))~su' => '[/tr][/table]',
639
			// Find [/td] [/th]s incorrectly followed by [/table].
640
			'~\[/t([dh])\][\s' . self::NBS . ']*\[/table\]~su' => '[/t$1][/tr][/table]',
641
			// Find [table]s, [tr]s, and [/td]s (possibly correctly) followed by [td].
642
			'~\[(table|tr|/td)\]([\s' . self::NBS . ']*)\[td\]~su' => '[$1]$2[_td_]',
643
			// Now, any [td]s left should have a [tr] before them.
644 18
			'~\[td\]~s' => '[tr][td]',
645
			// Look for [tr]s which are correctly placed.
646 18
			'~\[(table|/tr)\]([\s' . self::NBS . ']*)\[tr\]~su' => '[$1]$2[_tr_]',
647 18
			// Any remaining [tr]s should have a [table] before them.
648 18
			'~\[tr\]~s' => '[table][tr]',
649
			// Look for [/td]s or [/th]s followed by [/tr].
650
			'~\[/t([dh])\]([\s' . self::NBS . ']*)\[/tr\]~su' => '[/t$1]$2[_/tr_]',
651
			// Any remaining [/tr]s should have a [/td].
652 18
			'~\[/tr\]~s' => '[/td][/tr]',
653
			// Look for properly opened [li]s which aren't closed.
654
			'~\[li\]([^\[\]]+?)\[li\]~s' => '[li]$1[_/li_][_li_]',
655
			'~\[li\]([^\[\]]+?)\[/list\]~s' => '[_li_]$1[_/li_][/list]',
656
			'~\[li\]([^\[\]]+?)$~s' => '[li]$1[/li]',
657
			// Lists - find correctly closed items/lists.
658
			'~\[/li\]([\s' . self::NBS . ']*)\[/list\]~su' => '[_/li_]$1[/list]',
659 18
			// Find list items closed and then opened.
660
			'~\[/li\]([\s' . self::NBS . ']*)\[li\]~su' => '[_/li_]$1[_li_]',
661
			// Now, find any [list]s or [/li]s followed by [li].
662 2
			'~\[(list(?: [^\]]*?)?|/li)\]([\s' . self::NBS . ']*)\[li\]~su' => '[$1]$2[_li_]',
663 2
			// Allow for sub lists.
664
			'~\[/li\]([\s' . self::NBS . ']*)\[list\]~u' => '[_/li_]$1[list]',
665
			'~\[/list\]([\s' . self::NBS . ']*)\[li\]~u' => '[/list]$1[_li_]',
666 2
			// Any remaining [li]s weren't inside a [list].
667
			'~\[li\]~' => '[list][li]',
668
			// Any remaining [/li]s weren't before a [/list].
669 2
			'~\[/li\]~' => '[/li][/list]',
670
			// Put the correct ones back how we found them.
671
			'~\[_(li|/li|td|tr|/tr)_\]~' => '[$1]',
672
			// Images with no real url.
673
			'~\[img\]https?://.{0,7}\[/img\]~' => '',
674
		);
675
676 2
		// Fix up some use of tables without [tr]s, etc. (it has to be done more than once to catch it all.)
677
		for ($j = 0; $j < 3; $j++)
678
		{
679
			$this->message = preg_replace(array_keys($mistake_fixes), $mistake_fixes, $this->message);
680 2
		}
681
	}
682
683
	/**
684
	 * Replace our token-ized message with the saved code blocks
685
	 *
686
	 * @param string $message
687 2
	 * @return string
688
	 */
689
	public function restoreCodeBlocks($message)
690
	{
691 2
		if (!empty($this->code_blocks))
692
		{
693
			return str_replace(array_keys($this->code_blocks), array_values($this->code_blocks), $message);
694
		}
695
696
		return $message;
697
	}
698
699
	/**
700 2
	 * Validates and corrects table structure
701 2
	 *
702
	 * What it does
703
	 *   - Checks tables for correct tag order / nesting
704
	 *   - Adds in missing closing tags, removes excess closing tags
705 18
	 *   - Although it prevents markup error, it can mess-up the intended (abiet wrong) layout
706
	 * driving the post author in to a furious rage
707
	 *
708
	 */
709 18
	private function _preparseTable()
710
	{
711
		$table_check = $this->message;
712
		$table_offset = 0;
713
		$table_array = array();
714
715
		// Define the allowable tags after a give tag
716
		$table_order = array(
717
			'table' => array('tr'),
718
			'tr' => array('td', 'th'),
719
			'td' => array('table'),
720
			'th' => array(''),
721
		);
722
723
		// Find all closing tags (/table /tr /td etc)
724
		while (preg_match('~\[(/)*(table|tr|td|th)\]~', $table_check, $matches) === 1)
725
		{
726
			// Keep track of where this is.
727
			$offset = strpos($table_check, $matches[0]);
728
			$remove_tag = false;
729
730
			// Is it opening?
731
			if ($matches[1] !== '/')
732
			{
733
				// If the previous table tag isn't correct simply remove it.
734
				if ((!empty($table_array) && !in_array($matches[2], $table_order[$table_array[0]])) || (empty($table_array) && $matches[2] !== 'table'))
735
				{
736
					$remove_tag = true;
737
				}
738
				// Record this was the last tag.
739
				else
740
				{
741
					array_unshift($table_array, $matches[2]);
742
				}
743
			}
744
			// Otherwise is closed!
745
			elseif (empty($table_array) || ($table_array[0] !== $matches[2]))
746
			{
747
				// Only keep the tag if it's closing the right thing.
748
				$remove_tag = true;
749
			}
750
			else
751
			{
752
				array_shift($table_array);
753 4
			}
754
755 4
			// Removing?
756 4
			if ($remove_tag)
757
			{
758 4
				$this->message = substr($this->message, 0, $table_offset + $offset) . substr($this->message, $table_offset + strlen($matches[0]) + $offset);
759
760
				// We've lost some data.
761
				$table_offset -= strlen($matches[0]);
762
			}
763
764
			// Remove everything up to here.
765
			$table_offset += $offset + strlen($matches[0]);
766
			$table_check = substr($table_check, $offset + strlen($matches[0]));
767
		}
768 4
769
		// Close any remaining table tags.
770 4
		foreach ($table_array as $tag)
771
		{
772
			$this->message .= '[/' . $tag . ']';
773
		}
774
	}
775
776
	/**
777
	 * Validates bbc code URL of the form: [url url=123.com follow=true]123[/url]
778
	 *
779
	 * - Modifies if the user does not have the post_nofollow permission
780
	 * - Checks if the domain is on the allowList and modifies as required
781
	 */
782
	private function _validateLinks()
783
	{
784
		$allowed = allowedTo('post_nofollow');
785
		$regexFollow = '~\[url[^]]*(follow=([^] \s]+))[^]]*]~';
786
		$regexUrl = '~\[url[^]]*(url=([^] \s]+))[^]]*]~';
787
788
		preg_match_all($regexFollow, $this->message, $matches);
789
		if (isset($matches[1]) && is_array($matches[1]))
790
		{
791
			// Every [URL} code with follow= in them
792
			foreach ($matches[1] as $key => $followTerm)
793 18
			{
794
				// Flush out the actual URL and follow value
795 18
				preg_match($regexUrl, $matches[0][$key], $match);
796
				$allowedDomain = validateURLAllowList(addProtocol($match[2]));
797 2
				$followChoice = in_array(trim($matches[2][$key]), ['follow', 'true', 'on', 'yes'], true);
798
799
				// Allowed domain and purposely turning it off?
800 18
				if ($allowedDomain && $allowed && !$followChoice)
801
				{
802
					$this->message = str_replace($followTerm, 'follow=false', $this->message);
803
				}
804
				// Allowed domain OR you are allowed and already have it on
805
				elseif ($allowedDomain || ($allowed && $followChoice))
806
				{
807
					$this->message = str_replace($followTerm, 'follow=true', $this->message);
808
				}
809
				// Not allowed to use the function and the domain is not on the allowList
810
				else
811
				{
812
					$this->message = str_replace($followTerm, 'follow=false', $this->message);
813
				}
814
			}
815
		}
816
	}
817
818
	/**
819
	 * This is very simple, and just removes things done by preparsecode.
820
	 *
821
	 * @param string $message
822
	 *
823
	 * @return null|string|string[]
824
	 */
825
	public function un_preparsecode($message)
826
	{
827
		// Protect CODE blocks from further processing
828
		$message = $this->tokenizeCodeBlocks($message);
829
830
		// Pass integration the tokenized message and array
831
		call_integration_hook('integrate_unpreparse_code', [&$message, &$this->code_blocks, 0]);
832
833
		// Restore the code blocks
834
		$message = $this->restoreCodeBlocks($message);
835
836
		// Change breaks back to \n's and &nsbp; back to spaces.
837
		return preg_replace('~<br( /)?>~', "\n", str_replace('&nbsp;', ' ', $message));
838
	}
839
840
	/**
841
	 * Ensure tags inside of nobbc do not get parsed by converting the markers to html entities
842
	 *
843
	 * @param string[] $matches
844
	 *
845
	 * @return string
846
	 */
847
	private function _preparsecode_nobbc_callback($matches)
848
	{
849
		return '[nobbc]' . strtr($matches[1], array('[' => '&#91;', ']' => '&#93;', ':' => '&#58;', '@' => '&#64;')) . '[/nobbc]';
850
	}
851
852
	/**
853
	 * Use only the primary (first) font face when multiple are supplied
854
	 *
855
	 * @param string[] $matches
856
	 *
857
	 * @return string
858
	 */
859
	private function _preparsecode_font_callback($matches)
860
	{
861
		$fonts = explode(',', $matches[1]);
862
		$font = trim(un_htmlspecialchars($fonts[0]), ' "\'');
863
864
		return '[font=' . $font . ']' . $matches[2];
865
	}
866
867
	/**
868
	 * Takes a tag and changes it to lowercase
869
	 *
870
	 * @param string[] $matches
871
	 *
872
	 * @return string
873
	 */
874
	private function _preparsecode_lowertags_callback($matches)
875
	{
876
		return '[' . $matches[1] . strtolower($matches[2]) . $matches[3] . ']';
877
	}
878
879
	/**
880
	 * Ensure image tags do not load anything by themselves (security)
881
	 *
882
	 * @param string[] $matches
883
	 *
884
	 * @return string
885
	 */
886
	private function _fixTags_img_callback($matches)
887
	{
888
		return $matches[1] . preg_replace('~action(=|%3d)(?!dlattach)~i', 'action-', $matches[2]) . '[/img]';
889
	}
890
891
	/**
892
	 * Find and return PreparseCode instance if it exists,
893
	 * or create a new instance
894
	 *
895
	 * @param string $user the name of the user (mostly used in quote tags)
896
	 *
897
	 * @return PreparseCode
898
	 */
899
	public static function instance($user)
900
	{
901
		if (self::$instance === null)
902
		{
903
			self::$instance = new PreparseCode($user);
904
		}
905
		elseif ($user !== self::$instance->user_name)
906
		{
907
			self::$instance = new PreparseCode($user);
908
		}
909
910
		return self::$instance;
911
	}
912
}
913