Passed
Push — development ( 8e8389...e156b5 )
by Spuds
01:10 queued 28s
created

PreparseCode::_fixMistakes()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 52
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 2.0011

Importance

Changes 0
Metric Value
cc 2
eloc 26
c 0
b 0
f 0
nc 2
nop 0
dl 0
loc 52
rs 9.504
ccs 14
cts 15
cp 0.9333
crap 2.0011

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This class contains those functions pertaining to preparsing BBC data
5
 *
6
 * @package   ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
9
 *
10
 * This file contains code covered by:
11
 * copyright: 2011 Simple Machines (http://www.simplemachines.org)
12
 *
13
 * @version 2.0 dev
14
 *
15
 */
16
17
namespace BBC;
18
19
use ElkArte\Helper\TokenHash;
20
21
/**
22
 * Class PreparseCode
23
 *
24
 * @package BBC
25
 */
26
class PreparseCode
27
{
28
	/** The regular expression non breaking space */
29
	public const NBS = '\x{A0}';
30
31
	/** @var string the message to preparse */
32
	public $message = '';
33
34
	/** @var string the username of the current user */
35
	public $user_name = '';
36
37
	/** @var bool if this is just a preview */
38
	protected $previewing = false;
39
40
	/** @var array the code blocks that we want to protect */
41
	public $code_blocks = [];
42
43
	/** @var PreparseCode */
44
	public static $instance;
45
46 2
	/**
47
	 * PreparseCode constructor.
48 2
	 *
49 2
	 * @param string $user_name
50
	 */
51
	protected function __construct($user_name)
52
	{
53
		$this->user_name = $user_name;
54
	}
55
56
	/**
57
	 * Takes a message and parses it, returning the prepared message as a reference
58
	 * for use by parse_bbc.
59
	 *
60
	 * What it does:
61
	 *   - Cleans up links (javascript, etc.)
62
	 *   - Fixes improperly constructed lists [lists]
63
	 *   - Repairs improperly constructed tables, row, headers, etc.
64
	 *   - Protects code sections
65
	 *   - Checks for proper quote open / closing
66
	 *   - Processes /me tag
67
	 *   - Converts color tags to ones parse_bbc will understand
68
	 *   - Removes empty tags outside of code blocks
69 18
	 *   - Won't convert \n's and a few other things if previewing is true.
70
	 *
71
	 * @param string $message
72 18
	 * @param bool $previewing
73 18
	 */
74
	public function preparsecode(&$message, $previewing = false)
75
	{
76 18
		if (empty($message))
77
		{
78
			return '';
79
		}
80
81 18
		// Load passed values to the class
82
		$this->message = $message;
83
		$this->previewing = $previewing;
84 18
85
		// Clean out control characters
86
		$this->message = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $this->message);
87 18
88
		// This line makes all languages *theoretically* work even with the wrong charset ;).
89
		$this->message = preg_replace('~&amp;#(\d{4,5}|[2-9]\d{2,4}|1[2-9]\d);~', '&#$1;', $this->message);
90 18
91
		// Clean up after nobbc ;).
92
		$this->message = preg_replace_callback('~\[nobbc\](.+?)\[/nobbc\]~i', fn($matches) => $this->_preparsecode_nobbc_callback($matches), $this->message);
93 18
94
		// Remove \r's... they're evil!
95
		$this->message = strtr($this->message, array("\r" => ''));
96 18
97
		// You won't believe this - but too many periods upsets apache it seems!
98
		$this->message = preg_replace('~\.{100,}~', '...', $this->message);
99 18
100
		// Remove Trailing Quotes
101
		$this->_trimTrailingQuotes();
102 18
103
		// Validate code blocks are properly closed.
104
		$this->_validateCodeBlocks();
105 18
		$this->_validateICodeBlocks();
106
107 4
		// Protect CODE blocks from further processing
108 18
		$this->message = $this->tokenizeCodeBlocks($this->message);
109
110
		//  Now that we've fixed all the code tags, let's fix the img and url tags...
111 18
		$this->_fixTags();
112
113
		// Replace /me.+?\n with [me=name]dsf[/me]\n.
114 18
		$this->_itsAllAbout();
115
116
		// Make sure list and table tags are lowercase.
117 18
		$this->message = preg_replace_callback('~\[([/]?)(list|li|table|tr|td|th)((\s[^\]]+)*)\]~i',
118 18
			fn($matches) => $this->_preparsecode_lowertags_callback($matches), $this->message);
119
120
		// Don't leave any lists that were never opened or closed
121 18
		$this->_validateLists();
122
123
		// Attempt to repair common BBC input mistakes
124 18
		$this->_fixMistakes();
125
126 4
		// Remove empty bbc tags
127 18
		$this->message = preg_replace('~\[[bisu]]\s*\[/[bisu]]~i', '', $this->message);
128
		$this->message = preg_replace('~\[quote]\s*\[/quote]~i', '', $this->message);
129
130 18
		// Fix color tags of many forms so they parse properly
131
		$this->message = preg_replace('~\[color=(?:#[\da-fA-F]{3}|#[\da-fA-F]{6}|[A-Za-z]{1,20}|rgb\(\d{1,3}, ?\d{1,3}, ?\d{1,3}\))\]\s*\[/color\]~', '', $this->message);
132
133 18
		// Font tags with multiple fonts (copy&paste in the WYSIWYG by some browsers).
134
		$this->message = preg_replace_callback('~\[font=([^]]*)](.*?(?:\[/font\]))~s',
135
			fn($matches) => $this->_preparsecode_font_callback($matches), $this->message);
136 18
137
		// Don't allow rel follow links if they don't have permissions
138
		$this->_validateLinks();
139 18
140
		// Allow integration to do further processing on protected code block message
141 18
		call_integration_hook('integrate_preparse_tokenized_code', array(&$this->message, $previewing, $this->code_blocks));
142
143
		// Put it back together!
144
		$this->message = $this->restoreCodeBlocks($this->message);
145
146
		// Allow integration to do further processing
147
		call_integration_hook('integrate_preparse_code', array(&$this->message, 0, $previewing));
148
149 18
		// Safe Spacing
150
		if (!$previewing)
151
		{
152 18
			$this->message = strtr($this->message, array('  ' => '&nbsp; ', "\n" => '<br />', "\xC2\xA0" => '&nbsp;'));
153 18
		}
154
		else
155
		{
156
			$this->message = strtr($this->message, array('  ' => '&nbsp; ', "\xC2\xA0" => '&nbsp;'));
157
		}
158 18
159
		// Now we're going to do full scale table checking...
160
		$this->_preparseTable();
161 18
162
		// Quickly clean up things that will slow our parser (which are common in posted code.)
163
		$message = strtr($this->message, array('[]' => '&#91;]', '[&#039;' => '&#91;&#039;'));
164
	}
165
166
	/**
167 18
	 * Trim dangling quotes
168
	 */
169
	private function _trimTrailingQuotes()
170
	{
171 18
		// Trim off trailing quotes - these often happen by accident.
172
		while (substr($this->message, -7) === '[quote]')
173
		{
174
			$this->message = trim(substr($this->message, 0, -7));
175
		}
176
177 18
		// Trim off leading ones as well
178
		while (substr($this->message, 0, 8) === '[/quote]')
179 18
		{
180 18
			$this->message = trim(substr($this->message, 8));
181 18
		}
182
	}
183 18
184
	/**
185 4
	 * Find all code blocks, work out whether we'd be parsing them,
186
	 * then ensure they are all closed.
187
	 */
188 4
	private function _validateCodeBlocks()
189
	{
190
		$in_tag = false;
191 4
		$had_tag = false;
192
		$code_open = false;
193
194
		if (preg_match_all('~(\[/?code(?:=[^]]+)?])~i', $this->message, $matches))
195
		{
196
			foreach ($matches[0] as $match)
197 4
			{
198
				// Closing?
199
				if ($match[1] === '/')
200
				{
201
					// If it's closing and we're not in a tag we need to open it...
202 4
					if (!$in_tag)
203
					{
204
						$code_open = true;
205 4
					}
206
207 4
					// Either way we ain't in one any more.
208
					$in_tag = false;
209
				}
210
				// Opening tag...
211
				else
212
				{
213
					$had_tag = true;
214 18
215
					// If we're in a tag don't do nought!
216 4
					if (!$in_tag)
217
					{
218
						$in_tag = true;
219
					}
220 18
				}
221
			}
222
		}
223
224 18
		// If we have an open code tag, close it.
225
		if ($in_tag)
226
		{
227
			$this->message .= '[/code]';
228
		}
229 18
		// Open any ones that need to be open, only if we've never had a tag.
230
		if (!$code_open)
231
		{
232 18
			return;
233
		}
234
		if ($had_tag)
235 18
		{
236
			return;
237 18
		}
238
		$this->message = '[code]' . $this->message;
239
	}
240 18
241
	/**
242
	 * Find all icode blocks, ensure they are complete pairs and do not span lines
243 4
	 */
244
	private function _validateICodeBlocks()
245
	{
246 4
		$lines = explode("\n", $this->message);
247
		foreach ($lines as $number => $line)
248
		{
249 4
			$depth = 0;
250 4
			preg_match_all('~(\[\/?icode(?:=[^\]]+)?\])~i', $line, $matches);
251 11
			foreach ($matches[0] as $match)
252
			{
253
				// Closing icode
254
				if ($match[1] === '/')
255
				{
256 18
					--$depth;
257 18
					continue;
258
				}
259
260
				++$depth;
261
			}
262
263
			// Open any ones that need to be open, or close if left open
264
			if ($depth !== 0)
265 18
			{
266
				$lines[$number] = $depth > 0 ? $line . '[/icode]' : '[icode]' . $line;
267 18
			}
268
		}
269
270
		// Put it back together
271
		$this->message = implode("\n", $lines);
272
273
		// Clear empty ones caused by linebreaks inside of icode tags.
274
		$this->message = preg_replace('~(?<!\[icode\])\[icode\]\s*\[\/icode\]~i', '', $this->message);
275 18
	}
276
277
	/**
278
	 * Protects code / icode blocks from preparse by replacing them with %%token%% values
279
	 *
280
	 * @param string $message
281
	 * @return string
282
	 */
283
	public function tokenizeCodeBlocks($message, $html = false)
284
	{
285
		// Split up the message on the code start/end tags/
286
		$patterns = $html
287
			? ['~(</code>|<code(?:[^>]+)?>)~', '~(</icode>|<icode(?:[^>]+)?>)~']
288
			: ['~(\[\/code\]|\[code(?:=[^\]]+)?\])~i', '~(\[\/icode\]|\[icode(?:=[^\]]+)?\])~i'];
289
290
		// Token generator
291
		$tokenizer = new TokenHash();
292
293
		foreach ($patterns as $pattern)
294
		{
295
			$parts = preg_split($pattern, $message, -1, PREG_SPLIT_DELIM_CAPTURE);
296
			foreach ($parts as $i => $part)
297
			{
298
				// It goes 0 = outside, 1 = begin tag, 2 = inside, 3 = close tag, repeat.
299
				if ($i % 4 === 0 && isset($parts[$i + 3]))
300
				{
301
					// Create a unique key to put in place of the code block
302
					$key = $tokenizer->generate_hash(8);
303
304
					// Save what is there [code]stuff[/code]
305
					$this->code_blocks['%%' . $key . '%%'] = $parts[$i + 1] . $parts[$i + 2] . $parts[$i + 3];
306
307
					// Replace the code block with %%$key%% so its protected from further preparsecode processing
308
					$parts[$i + 1] = '%%';
309
					$parts[$i + 2] = $key;
310
					$parts[$i + 3] = '%%';
311
				}
312 18
			}
313
314
			// The message with code blocks as %%tokens%%
315 18
			$message = implode('', $parts);
316
		}
317 18
318
		return $message;
319
	}
320
321 18
	/**
322
	 * Fix any URLs posted - ie. remove 'javascript:'.
323
	 *
324 18
	 * - Fix the img and url tags...
325
	 * - Fixes links in message and returns nothing.
326
	 */
327 18
	private function _fixTags()
328
	{
329
		global $modSettings;
330
331 18
		// WARNING: Editing the below can cause large security holes in your forum.
332
		// Edit only if you are sure you know what you are doing.
333
334
		$fixArray = array(
335
			// [img]http://...[/img] or [img width=1]http://...[/img]
336
			array(
337
				'tag' => 'img',
338
				'protocols' => array('http', 'https'),
339
				'embeddedUrl' => false,
340
				'hasEqualSign' => false,
341
				'hasExtra' => true,
342
			),
343
			// [url]http://...[/url]
344 18
			array(
345
				'tag' => 'url',
346 18
				'protocols' => array('http', 'https'),
347
				'embeddedUrl' => true,
348 18
				'hasEqualSign' => false,
349
			),
350 18
			// [url=http://...]name[/url]
351
			array(
352 18
				'tag' => 'url',
353
				'protocols' => array('http', 'https'),
354 18
				'embeddedUrl' => true,
355
				'hasEqualSign' => true,
356
			),
357
			// [iurl]http://...[/iurl]
358 18
			array(
359
				'tag' => 'iurl',
360
				'protocols' => array('http', 'https'),
361 18
				'embeddedUrl' => true,
362
				'hasEqualSign' => false,
363
			),
364 2
			// [iurl=http://...]name[/iurl]
365 2
			array(
366 2
				'tag' => 'iurl',
367
				'protocols' => array('http', 'https'),
368 2
				'embeddedUrl' => true,
369 2
				'hasEqualSign' => true,
370
			),
371 2
		);
372 2
373
		// Integration may want to add to this array
374 2
		call_integration_hook('integrate_fixtags', array(&$fixArray, &$this->message));
375
376
		// Fix each type of tag.
377
		foreach ($fixArray as $param)
378
		{
379 2
			$this->_fixTag($param['tag'], $param['protocols'], $param['embeddedUrl'], $param['hasEqualSign'], !empty($param['hasExtra']));
380
		}
381 2
382
		// Now fix possible security problems with images loading links automatically...
383
		$this->message = preg_replace_callback('~(\[img.*?\])(.+?)\[/img\]~is',
384
			fn($matches) => $this->_fixTags_img_callback($matches), $this->message);
385 2
386
		// Limit the size of images posted?
387
		if (!empty($modSettings['max_image_width']) || !empty($modSettings['max_image_height']))
388
		{
389 2
			$this->resizeBBCImages();
390
		}
391
	}
392
393
	/**
394
	 * Fix a specific class of tag - ie. url with =.
395 2
	 *
396
	 * - Used by fixTags, fixes a specific tag's links.
397 2
	 *
398
	 * @param string $myTag - the tag
399
	 * @param string[] $protocols - http, https or ftp
400
	 * @param bool $embeddedUrl = false - whether it *can* be set to something
401 2
	 * @param bool $hasEqualSign = false, whether it *is* set to something
402
	 * @param bool $hasExtra = false - whether it can have extra cruft after the begin tag.
403
	 */
404
	private function _fixTag($myTag, $protocols, $embeddedUrl = false, $hasEqualSign = false, $hasExtra = false)
405 2
	{
406
		global $boardurl, $scripturl;
407
408
		$replaces = array();
409 2
410
		$domain_url = preg_match('~^([^:]+://[^/]+)~', $boardurl, $match) != 0 ? $match[1] : $boardurl . '/';
411
412
		if ($hasEqualSign)
413
		{
414
			preg_match_all('~\[(' . $myTag . ')=([^\]]*?)\](?:(.+?)\[/(' . $myTag . ')\])?~is', $this->message, $matches);
415 2
		}
416
		else
417 2
		{
418
			preg_match_all('~\[(' . $myTag . ($hasExtra ? '(?:[^\]]*?)' : '') . ')\](.+?)\[/(' . $myTag . ')\]~is', $this->message, $matches);
419 2
		}
420
421
		foreach ($matches[0] as $k => $dummy)
422
		{
423 2
			// Remove all leading and trailing whitespace.
424
			$replace = trim($matches[2][$k]);
425 2
			$this_tag = $matches[1][$k];
426
			$this_close = $hasEqualSign ? (empty($matches[4][$k]) ? '' : $matches[4][$k]) : $matches[3][$k];
427
428
			$found = false;
429 1
			foreach ($protocols as $protocol)
430
			{
431
				$found = strncasecmp($replace, $protocol . '://', strlen($protocol) + 3) === 0;
432
				if ($found)
433 18
				{
434
					break;
435 2
				}
436
			}
437 2
438
			// Http url checking?
439
			if (!$found && $protocols[0] === 'http')
440
			{
441
				if (substr($replace, 0, 1) === '/' && substr($replace, 0, 2) !== '//')
442 18
				{
443
					$replace = $domain_url . $replace;
444 2
				}
445
				elseif (substr($replace, 0, 1) === '?')
446 18
				{
447
					$replace = $scripturl . $replace;
448
				}
449
				elseif (substr($replace, 0, 1) === '#' && $embeddedUrl)
450
				{
451
					$replace = '#' . preg_replace('~[^A-Za-z0-9_\-#]~', '', substr($replace, 1));
452
					$this_tag = 'iurl';
453
					$this_close = 'iurl';
454
				}
455
				elseif (strpos($replace, '//') === 0)
456
				{
457
					$replace = $protocols[0] . ':' . $replace;
458
				}
459
				else
460
				{
461
					$replace = $protocols[0] . '://' . $replace;
462
				}
463
			}
464
			// FTP URL Checking
465
			elseif (!$found && $protocols[0] === 'ftp')
466
			{
467
				$replace = $protocols[0] . '://' . preg_replace('~^(?!ftps?)[^:]+://~', '', $replace);
468
			}
469
			elseif (!$found)
470
			{
471
				$replace = $protocols[0] . '://' . $replace;
472
			}
473
474
			// Build a replacement array that is considered safe and proper
475
			if ($hasEqualSign && $embeddedUrl)
476
			{
477
				$replaces[$matches[0][$k]] = '[' . $this_tag . '=' . $replace . ']' . (empty($matches[4][$k]) ? '' : $matches[3][$k] . '[/' . $this_close . ']');
478
			}
479
			elseif ($hasEqualSign)
480
			{
481
				$replaces['[' . $matches[1][$k] . '=' . $matches[2][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']';
482
			}
483
			elseif ($embeddedUrl)
484
			{
485
				$replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']' . $matches[2][$k] . '[/' . $this_close . ']';
486
			}
487
			else
488
			{
489
				$replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . ']' . $replace . '[/' . $this_close . ']';
490
			}
491
		}
492
493
		foreach ($replaces as $k => $v)
494
		{
495
			if ($k == $v)
496
			{
497
				unset($replaces[$k]);
498
			}
499
		}
500
501
		// Update as needed
502
		if (!empty($replaces))
503
		{
504
			$this->message = strtr($this->message, $replaces);
505
		}
506
	}
507
508
	/**
509
	 * Updates BBC img tags in a message so that the width / height respect the forum settings.
510
	 *
511
	 * - Will add the width/height attrib if needed, or update existing ones if they break the rules
512
	 */
513
	public function resizeBBCImages()
514
	{
515
		global $modSettings;
516
517
		// We'll need this for image processing
518
		require_once(SUBSDIR . '/Attachments.subs.php');
519
520
		// Find all the img tags - with or without width and height.
521
		preg_match_all('~\[img(\s+width=\d+)?(\s+height=\d+)?(\s+width=\d+)?](.+?)\[/img]~is', $this->message, $matches, PREG_PATTERN_ORDER);
522
523
		$replaces = array();
524
		foreach (array_keys($matches[0]) as $match)
525
		{
526
			// If the width was after the height, handle it.
527
			$matches[1][$match] = empty($matches[3][$match]) ? $matches[1][$match] : $matches[3][$match];
528
529 18
			// Now figure out if they had a desired height or width...
530
			$desired_width = empty($matches[1][$match]) ? 0 : (int) substr(trim($matches[1][$match]), 6);
531 18
			$desired_height = empty($matches[2][$match]) ? 0 : (int) substr(trim($matches[2][$match]), 7);
532 18
533
			// One was omitted, or both.  We'll have to find its real size...
534 18
			if (empty($desired_width) || empty($desired_height))
535
			{
536 18
				[$width, $height] = url_image_size(un_htmlspecialchars($matches[4][$match]));
537 18
538
				// They don't have any desired width or height!
539
				if (empty($desired_width) && empty($desired_height))
540
				{
541
					$desired_width = $width;
542
					$desired_height = $height;
543
				}
544 18
				// Scale it to the width...
545
				elseif (empty($desired_width) && !empty($height))
546
				{
547
					$desired_width = (int) (($desired_height * $width) / $height);
548
				}
549 18
				// Scale if to the height.
550
				elseif (!empty($width))
551 18
				{
552 18
					$desired_height = (int) (($desired_width * $height) / $width);
553
				}
554 18
			}
555
556
			// If the width and height are fine, just continue along...
557
			if ($desired_width <= $modSettings['max_image_width'] && $desired_height <= $modSettings['max_image_height'])
558
			{
559 18
				continue;
560
			}
561
562
			// Too bad, it's too wide.  Make it as wide as the maximum.
563 18
			if ($desired_width > $modSettings['max_image_width'] && !empty($modSettings['max_image_width']))
564
			{
565
				$desired_height = (int) (($modSettings['max_image_width'] * $desired_height) / $desired_width);
566
				$desired_width = $modSettings['max_image_width'];
567
			}
568 18
569
			// Now check the height, as well.  Might have to scale twice, even...
570
			if ($desired_height > $modSettings['max_image_height'] && !empty($modSettings['max_image_height']))
571
			{
572 18
				$desired_width = (int) (($modSettings['max_image_height'] * $desired_width) / $desired_height);
573
				$desired_height = $modSettings['max_image_height'];
574 18
			}
575
576 18
			$replaces[$matches[0][$match]] = '[img' . (empty($desired_width) ? '' : ' width=' . $desired_width) . (empty($desired_height) ? '' : ' height=' . $desired_height) . ']' . $matches[4][$match] . '[/img]';
577
		}
578 18
579
		// If any img tags were actually changed...
580 18
		if (!empty($replaces))
581
		{
582 18
			$this->message = strtr($this->message, $replaces);
583
		}
584 18
	}
585
586 18
	/**
587
	 * Replace /me with the users name, including inside footnotes
588 18
	 */
589
	private function _itsAllAbout()
590 18
	{
591
		$me_regex = '~(\A|\n)/me(?: |&nbsp;)([^\n]*)(?:\z)?~i';
592 18
		$footnote_regex = '~(\[footnote\])/me(?: |&nbsp;)([^\n]*?)(\[\/footnote\])~i';
593
594 18
		if (preg_match('~[\[\]\\"]~', $this->user_name) !== false)
595 18
		{
596 18
			$this->message = preg_replace($me_regex, '$1[me=&quot;' . $this->user_name . '&quot;]$2[/me]', $this->message);
597
			$this->message = preg_replace($footnote_regex, '$1[me=&quot;' . $this->user_name . '&quot;]$2[/me]$3', $this->message);
598 18
		}
599
		else
600 18
		{
601
			$this->message = preg_replace($me_regex, '$1[me=' . $this->user_name . ']$2[/me]', $this->message);
602 18
			$this->message = preg_replace($footnote_regex, '$1[me=' . $this->user_name . ']$2[/me]$3', $this->message);
603
		}
604 18
	}
605 18
606
	/**
607 18
	 * Make sure lists have open and close tags
608
	 */
609 18
	private function _validateLists()
610
	{
611 18
		$list_open = substr_count($this->message, '[list]') + substr_count($this->message, '[list ');
612
		$list_close = substr_count($this->message, '[/list]');
613 18
614
		if ($list_close - $list_open > 0)
615
		{
616
			$this->message = str_repeat('[list]', $list_close - $list_open) . $this->message;
617 18
		}
618
619 18
		if ($list_open - $list_close > 0)
620
		{
621 18
			$this->message .= str_repeat('[/list]', $list_open - $list_close);
622
		}
623
	}
624
625
	/**
626 18
	 * Repair a few *cough* common mistakes from user input and from wizzy cut/paste
627
	 */
628 18
	private function _fixMistakes()
629
	{
630 4
		$mistake_fixes = array(
631
			// Find [table]s not followed by [tr].
632 18
			'~\[table\](?![\s' . self::NBS . ']*\[tr\])~su' => '[table][tr]',
633
			// Find [tr]s not followed by [td] or [th]
634
			'~\[tr\](?![\s' . self::NBS . ']*\[t[dh]\])~su' => '[tr][td]',
635
			// Find [/td] and [/th]s not followed by something valid.
636
			'~\[/t([dh])\](?![\s' . self::NBS . ']*(?:\[t[dh]\]|\[/tr\]|\[/table\]))~su' => '[/t$1][/tr]',
637
			// Find [/tr]s not followed by something valid.
638
			'~\[/tr\](?![\s' . self::NBS . ']*(?:\[tr\]|\[/table\]))~su' => '[/tr][/table]',
639
			// Find [/td] [/th]s incorrectly followed by [/table].
640
			'~\[/t([dh])\][\s' . self::NBS . ']*\[/table\]~su' => '[/t$1][/tr][/table]',
641
			// Find [table]s, [tr]s, and [/td]s (possibly correctly) followed by [td].
642
			'~\[(table|tr|/td)\]([\s' . self::NBS . ']*)\[td\]~su' => '[$1]$2[_td_]',
643
			// Now, any [td]s left should have a [tr] before them.
644 18
			'~\[td\]~s' => '[tr][td]',
645
			// Look for [tr]s which are correctly placed.
646 18
			'~\[(table|/tr)\]([\s' . self::NBS . ']*)\[tr\]~su' => '[$1]$2[_tr_]',
647 18
			// Any remaining [tr]s should have a [table] before them.
648 18
			'~\[tr\]~s' => '[table][tr]',
649
			// Look for [/td]s or [/th]s followed by [/tr].
650
			'~\[/t([dh])\]([\s' . self::NBS . ']*)\[/tr\]~su' => '[/t$1]$2[_/tr_]',
651
			// Any remaining [/tr]s should have a [/td].
652 18
			'~\[/tr\]~s' => '[/td][/tr]',
653
			// Look for properly opened [li]s which aren't closed.
654
			'~\[li\]([^\[\]]+?)\[li\]~s' => '[li]$1[_/li_][_li_]',
655
			'~\[li\]([^\[\]]+?)\[/list\]~s' => '[_li_]$1[_/li_][/list]',
656
			'~\[li\]([^\[\]]+?)$~s' => '[li]$1[/li]',
657
			// Lists - find correctly closed items/lists.
658
			'~\[/li\]([\s' . self::NBS . ']*)\[/list\]~su' => '[_/li_]$1[/list]',
659 18
			// Find list items closed and then opened.
660
			'~\[/li\]([\s' . self::NBS . ']*)\[li\]~su' => '[_/li_]$1[_li_]',
661
			// Now, find any [list]s or [/li]s followed by [li].
662 2
			'~\[(list(?: [^\]]*?)?|/li)\]([\s' . self::NBS . ']*)\[li\]~su' => '[$1]$2[_li_]',
663 2
			// Allow for sub lists.
664
			'~\[/li\]([\s' . self::NBS . ']*)\[list\]~u' => '[_/li_]$1[list]',
665
			'~\[/list\]([\s' . self::NBS . ']*)\[li\]~u' => '[/list]$1[_li_]',
666 2
			// Any remaining [li]s weren't inside a [list].
667
			'~\[li\]~' => '[list][li]',
668
			// Any remaining [/li]s weren't before a [/list].
669 2
			'~\[/li\]~' => '[/li][/list]',
670
			// Put the correct ones back how we found them.
671
			'~\[_(li|/li|td|tr|/tr)_\]~' => '[$1]',
672
			// Images with no real url.
673
			'~\[img\]https?://.{0,7}\[/img\]~' => '',
674
		);
675
676 2
		// Fix up some use of tables without [tr]s, etc. (it has to be done more than once to catch it all.)
677
		for ($j = 0; $j < 3; $j++)
678
		{
679
			$this->message = preg_replace(array_keys($mistake_fixes), $mistake_fixes, $this->message);
680 2
		}
681
	}
682
683
	/**
684
	 * Replace our token-ized message with the saved code blocks
685
	 *
686
	 * @param string $message
687 2
	 * @return string
688
	 */
689
	public function restoreCodeBlocks($message)
690
	{
691 2
		if (!empty($this->code_blocks))
692
		{
693
			return str_replace(array_keys($this->code_blocks), array_values($this->code_blocks), $message);
694
		}
695
696
		return $message;
697
	}
698
699
	/**
700 2
	 * Validates and corrects table structure
701 2
	 *
702
	 * What it does
703
	 *   - Checks tables for correct tag order / nesting
704
	 *   - Adds in missing closing tags, removes excess closing tags
705 18
	 *   - Although it prevents markup error, it can mess-up the intended (abiet wrong) layout
706
	 * driving the post author in to a furious rage
707
	 *
708
	 */
709 18
	private function _preparseTable()
710
	{
711
		$table_check = $this->message;
712
		$table_offset = 0;
713
		$table_array = array();
714
715
		// Define the allowable tags after a give tag
716
		$table_order = array(
717
			'table' => array('tr'),
718
			'tr' => array('td', 'th'),
719
			'td' => array('table'),
720
			'th' => array(''),
721
		);
722
723
		// Find all closing tags (/table /tr /td etc)
724
		while (preg_match('~\[(/)*(table|tr|td|th)\]~', $table_check, $matches) === 1)
725
		{
726
			// Keep track of where this is.
727
			$offset = strpos($table_check, $matches[0]);
728
			$remove_tag = false;
729
730
			// Is it opening?
731
			if ($matches[1] !== '/')
732
			{
733
				// If the previous table tag isn't correct simply remove it.
734
				if ((!empty($table_array) && !in_array($matches[2], $table_order[$table_array[0]])) || (empty($table_array) && $matches[2] !== 'table'))
735
				{
736
					$remove_tag = true;
737
				}
738
				// Record this was the last tag.
739
				else
740
				{
741
					array_unshift($table_array, $matches[2]);
742
				}
743
			}
744
			// Otherwise is closed!
745
			elseif (empty($table_array) || ($table_array[0] !== $matches[2]))
746
			{
747
				// Only keep the tag if it's closing the right thing.
748
				$remove_tag = true;
749
			}
750
			else
751
			{
752
				array_shift($table_array);
753 4
			}
754
755 4
			// Removing?
756 4
			if ($remove_tag)
757
			{
758 4
				$this->message = substr($this->message, 0, $table_offset + $offset) . substr($this->message, $table_offset + strlen($matches[0]) + $offset);
759
760
				// We've lost some data.
761
				$table_offset -= strlen($matches[0]);
762
			}
763
764
			// Remove everything up to here.
765
			$table_offset += $offset + strlen($matches[0]);
766
			$table_check = substr($table_check, $offset + strlen($matches[0]));
767
		}
768 4
769
		// Close any remaining table tags.
770 4
		foreach ($table_array as $tag)
771
		{
772
			$this->message .= '[/' . $tag . ']';
773
		}
774
	}
775
776
	/**
777
	 * Validates bbc code URL of the form: [url url=123.com follow=true]123[/url]
778
	 *
779
	 * - Modifies if the user does not have the post_nofollow permission
780
	 * - Checks if the domain is on the allowList and modifies as required
781
	 */
782
	private function _validateLinks()
783
	{
784
		$allowed = allowedTo('post_nofollow');
785
		$regexFollow = '~\[url[^]]*(follow=([^] \s]+))[^]]*]~';
786
		$regexUrl = '~\[url[^]]*(url=([^] \s]+))[^]]*]~';
787
788
		preg_match_all($regexFollow, $this->message, $matches);
789
		if (isset($matches[1]) && is_array($matches[1]))
790
		{
791
			// Every [URL} code with follow= in them
792
			foreach ($matches[1] as $key => $followTerm)
793 18
			{
794
				// Flush out the actual URL and follow value
795 18
				preg_match($regexUrl, $matches[0][$key], $match);
796
				$allowedDomain = validateURLAllowList(addProtocol($match[2]));
797 2
				$followChoice = in_array(trim($matches[2][$key]), ['follow', 'true', 'on', 'yes'], true);
798
799
				// Allowed domain and purposely turning it off?
800 18
				if ($allowedDomain && $allowed && !$followChoice)
801
				{
802
					$this->message = str_replace($followTerm, 'follow=false', $this->message);
803
				}
804
				// Allowed domain OR you are allowed and already have it on
805
				elseif ($allowedDomain || ($allowed && $followChoice))
806
				{
807
					$this->message = str_replace($followTerm, 'follow=true', $this->message);
808
				}
809
				// Not allowed to use the function and the domain is not on the allowList
810
				else
811
				{
812
					$this->message = str_replace($followTerm, 'follow=false', $this->message);
813
				}
814
			}
815
		}
816
	}
817
818
	/**
819
	 * This is very simple, and just removes things done by preparsecode.
820
	 *
821
	 * @param string $message
822
	 *
823
	 * @return null|string|string[]
824
	 */
825
	public function un_preparsecode($message)
826
	{
827
		// Protect CODE blocks from further processing
828
		$message = $this->tokenizeCodeBlocks($message);
829
830
		// Pass integration the tokenized message and array
831
		call_integration_hook('integrate_unpreparse_code', [&$message, &$this->code_blocks, 0]);
832
833
		// Restore the code blocks
834
		$message = $this->restoreCodeBlocks($message);
835
836
		// Change breaks back to \n's and &nsbp; back to spaces.
837
		return preg_replace('~<br( /)?>~', "\n", str_replace('&nbsp;', ' ', $message));
838
	}
839
840
	/**
841
	 * Ensure tags inside of nobbc do not get parsed by converting the markers to html entities
842
	 *
843
	 * @param string[] $matches
844
	 *
845
	 * @return string
846
	 */
847
	private function _preparsecode_nobbc_callback($matches)
848
	{
849
		return '[nobbc]' . strtr($matches[1], array('[' => '&#91;', ']' => '&#93;', ':' => '&#58;', '@' => '&#64;')) . '[/nobbc]';
850
	}
851
852
	/**
853
	 * Use only the primary (first) font face when multiple are supplied
854
	 *
855
	 * @param string[] $matches
856
	 *
857
	 * @return string
858
	 */
859
	private function _preparsecode_font_callback($matches)
860
	{
861
		$fonts = explode(',', $matches[1]);
862
		$font = trim(un_htmlspecialchars($fonts[0]), ' "\'');
863
864
		return '[font=' . $font . ']' . $matches[2];
865
	}
866
867
	/**
868
	 * Takes a tag and changes it to lowercase
869
	 *
870
	 * @param string[] $matches
871
	 *
872
	 * @return string
873
	 */
874
	private function _preparsecode_lowertags_callback($matches)
875
	{
876
		return '[' . $matches[1] . strtolower($matches[2]) . $matches[3] . ']';
877
	}
878
879
	/**
880
	 * Ensure image tags do not load anything by themselves (security)
881
	 *
882
	 * @param string[] $matches
883
	 *
884
	 * @return string
885
	 */
886
	private function _fixTags_img_callback($matches)
887
	{
888
		return $matches[1] . preg_replace('~action(=|%3d)(?!dlattach)~i', 'action-', $matches[2]) . '[/img]';
889
	}
890
891
	/**
892
	 * Find and return PreparseCode instance if it exists,
893
	 * or create a new instance
894
	 *
895
	 * @param string $user the name of the user (mostly used in quote tags)
896
	 *
897
	 * @return PreparseCode
898
	 */
899
	public static function instance($user)
900
	{
901
		if (self::$instance === null)
902
		{
903
			self::$instance = new PreparseCode($user);
904
		}
905
		elseif ($user !== self::$instance->user_name)
906
		{
907
			self::$instance = new PreparseCode($user);
908
		}
909
910
		return self::$instance;
911
	}
912
}
913