PreparseCode::_fixMistakes()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 52
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 2.0011

Importance

Changes 0
Metric Value
cc 2
eloc 26
c 0
b 0
f 0
nc 2
nop 0
dl 0
loc 52
rs 9.504
ccs 14
cts 15
cp 0.9333
crap 2.0011

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This class contains those functions pertaining to preparsing BBC data
5
 *
6
 * @package   ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause (see accompanying LICENSE.txt file)
9
 *
10
 * This file contains code covered by:
11
 * copyright: 2011 Simple Machines (http://www.simplemachines.org)
12
 *
13
 * @version 2.0 dev
14
 *
15
 */
16
17
namespace BBC;
18
19
use ElkArte\Helper\TokenHash;
20
21
/**
22
 * Class PreparseCode
23
 *
24
 * @package BBC
25
 */
26
class PreparseCode
27
{
28
	/** The regular expression non breaking space */
29
	public const NBS = '\x{A0}';
30
31
	/** @var string the message to preparse */
32
	public $message = '';
33
34
	/** @var string the username of the current user */
35
	public $user_name = '';
36
37
	/** @var bool if this is just a preview */
38
	protected $previewing = false;
39
40
	/** @var array the code blocks that we want to protect */
41
	public $code_blocks = [];
42
43
	/** @var PreparseCode */
44
	public static $instance;
45
46 2
	/**
47
	 * PreparseCode constructor.
48 2
	 *
49 2
	 * @param string $user_name
50
	 */
51
	protected function __construct($user_name)
52
	{
53
		$this->user_name = $user_name;
54
	}
55
56
	/**
57
	 * Takes a message and parses it, returning the prepared message as a reference
58
	 * for use by parse_bbc.
59
	 *
60
	 * What it does:
61
	 *   - Cleans up links (javascript, etc.)
62
	 *   - Fixes improperly constructed lists [lists]
63
	 *   - Repairs improperly constructed tables, row, headers, etc.
64
	 *   - Protects code sections
65
	 *   - Checks for proper quote open / closing
66
	 *   - Processes /me tag
67
	 *   - Converts color tags to ones parse_bbc will understand
68
	 *   - Removes empty tags outside of code blocks
69 18
	 *   - Won't convert \n's and a few other things if previewing is true.
70
	 *
71
	 * @param string $message
72 18
	 * @param bool $previewing
73 18
	 */
74
	public function preparsecode(&$message, $previewing = false): ?string
75
	{
76 18
		if (empty($message))
77
		{
78
			return '';
79
		}
80
81 18
		// Load passed values to the class
82
		$this->message = $message;
83
		$this->previewing = $previewing;
84 18
85
		// Clean out control characters
86
		$this->message = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $this->message);
87 18
88
		// This line makes all languages *theoretically* work even with the wrong charset ;).
89
		$this->message = preg_replace('~&amp;#(\d{4,5}|[2-9]\d{2,4}|1[2-9]\d);~', '&#$1;', $this->message);
90 18
91
		// Clean up after nobbc ;).
92
		$this->message = preg_replace_callback('~\[nobbc\](.+?)\[/nobbc\]~i', fn($matches) => $this->_preparsecode_nobbc_callback($matches), $this->message);
93 18
94
		// Remove \r's... they're evil!
95
		$this->message = strtr($this->message, ["\r" => '']);
96 18
97
		// You won't believe this - but too many periods upsets apache it seems!
98
		$this->message = preg_replace('~\.{100,}~', '...', $this->message);
99 18
100
		// Remove Trailing Quotes
101
		$this->_trimTrailingQuotes();
102 18
103
		// Validate code blocks are properly closed.
104
		$this->_validateCodeBlocks();
105 18
		$this->_validateICodeBlocks();
106
107 4
		// Protect CODE blocks from further processing
108 18
		$this->message = $this->tokenizeCodeBlocks($this->message);
109
110
		//  Now that we've fixed all the code tags, let's fix the img and url tags...
111 18
		$this->_fixTags();
112
113
		// Replace /me.+?\n with [me=name]dsf[/me]\n.
114 18
		$this->_itsAllAbout();
115
116
		// Make sure list and table tags are lowercase.
117 18
		$this->message = preg_replace_callback('~\[([/]?)(list|li|table|tr|td|th)((\s[^\]]+)*)\]~i',
118 18
			fn($matches) => $this->_preparsecode_lowertags_callback($matches), $this->message);
119
120
		// Don't leave any lists that were never opened or closed
121 18
		$this->_validateLists();
122
123
		// Attempt to repair common BBC input mistakes
124 18
		$this->_fixMistakes();
125
126 4
		// Remove empty bbc tags
127 18
		$this->message = preg_replace('~\[[bisu]]\s*\[/[bisu]]~i', '', $this->message);
128
		$this->message = preg_replace('~\[quote]\s*\[/quote]~i', '', $this->message);
129
130 18
		// Fix color tags of many forms so they parse properly
131
		$this->message = preg_replace('~\[color=(?:#[\da-fA-F]{3}|#[\da-fA-F]{6}|[A-Za-z]{1,20}|rgb\(\d{1,3}, ?\d{1,3}, ?\d{1,3}\))\]\s*\[/color\]~', '', $this->message);
132
133 18
		// Font tags with multiple fonts (copy&paste in the WYSIWYG by some browsers).
134
		$this->message = preg_replace_callback('~\[font=([^]]*)](.*?(?:\[/font\]))~s',
135
			fn($matches) => $this->_preparsecode_font_callback($matches), $this->message);
136 18
137
		// Don't allow rel follow links if they don't have permissions
138
		$this->_validateLinks();
139 18
140
		// Allow integration to do further processing on protected code block message
141 18
		call_integration_hook('integrate_preparse_tokenized_code', [&$this->message, $previewing, $this->code_blocks]);
142
143
		// Put it back together!
144
		$this->message = $this->restoreCodeBlocks($this->message);
145
146
		// Allow integration to do further processing
147
		call_integration_hook('integrate_preparse_code', [&$this->message, 0, $previewing]);
148
149 18
		// Safe Spacing
150
		if (!$previewing)
151
		{
152 18
			$this->message = strtr($this->message, ['  ' => '&nbsp; ', "\n" => '<br />', "\xC2\xA0" => '&nbsp;']);
153 18
		}
154
		else
155
		{
156
			$this->message = strtr($this->message, ['  ' => '&nbsp; ', "\xC2\xA0" => '&nbsp;']);
157
		}
158 18
159
		// Now we're going to do full scale table checking...
160
		$this->_preparseTable();
161 18
162
		// Quickly clean up things that will slow our parser (which are common in posted code.)
163
		$message = strtr($this->message, ['[]' => '&#91;]', '[&#039;' => '&#91;&#039;']);
164
165
		return null;
166
	}
167 18
168
	/**
169
	 * Trim dangling quotes
170
	 */
171 18
	private function _trimTrailingQuotes(): void
172
	{
173
		// Trim off trailing quotes - these often happen by accident.
174
		while (substr($this->message, -7) === '[quote]')
175
		{
176
			$this->message = trim(substr($this->message, 0, -7));
177 18
		}
178
179 18
		// Trim off leading ones as well
180 18
		while (substr($this->message, 0, 8) === '[/quote]')
181 18
		{
182
			$this->message = trim(substr($this->message, 8));
183 18
		}
184
	}
185 4
186
	/**
187
	 * Find all code blocks, work out whether we'd be parsing them,
188 4
	 * then ensure they are all closed.
189
	 */
190
	private function _validateCodeBlocks(): void
191 4
	{
192
		$in_tag = false;
193
		$had_tag = false;
194
		$code_open = false;
195
196
		if (preg_match_all('~(\[/?code(?:=[^]]+)?])~i', $this->message, $matches))
197 4
		{
198
			foreach ($matches[0] as $match)
199
			{
200
				// Closing?
201
				if ($match[1] === '/')
202 4
				{
203
					// If it's closing and we're not in a tag we need to open it...
204
					if (!$in_tag)
205 4
					{
206
						$code_open = true;
207 4
					}
208
209
					// Either way we ain't in one any more.
210
					$in_tag = false;
211
				}
212
				// Opening tag...
213
				else
214 18
				{
215
					$had_tag = true;
216 4
217
					// If we're in a tag don't do nought!
218
					if (!$in_tag)
219
					{
220 18
						$in_tag = true;
221
					}
222
				}
223
			}
224 18
		}
225
226
		// If we have an open code tag, close it.
227
		if ($in_tag)
228
		{
229 18
			$this->message .= '[/code]';
230
		}
231
		// Open any ones that need to be open, only if we've never had a tag.
232 18
		if (!$code_open)
233
		{
234
			return;
235 18
		}
236
		if ($had_tag)
237 18
		{
238
			return;
239
		}
240 18
		$this->message = '[code]' . $this->message;
241
	}
242
243 4
	/**
244
	 * Find all icode blocks, ensure they are complete pairs and do not span lines
245
	 */
246 4
	private function _validateICodeBlocks(): void
247
	{
248
		$lines = explode("\n", $this->message);
249 4
		foreach ($lines as $number => $line)
250 4
		{
251 11
			$depth = 0;
252
			preg_match_all('~(\[\/?icode(?:=[^\]]+)?\])~i', $line, $matches);
253
			foreach ($matches[0] as $match)
254
			{
255
				// Closing icode
256 18
				if ($match[1] === '/')
257 18
				{
258
					--$depth;
259
					continue;
260
				}
261
262
				++$depth;
263
			}
264
265 18
			// Open any ones that need to be open, or close if left open
266
			if ($depth !== 0)
267 18
			{
268
				$lines[$number] = $depth > 0 ? $line . '[/icode]' : '[icode]' . $line;
269
			}
270
		}
271
272
		// Put it back together
273
		$this->message = implode("\n", $lines);
274
275 18
		// Clear empty ones caused by linebreaks inside of icode tags.
276
		$this->message = preg_replace('~(?<!\[icode\])\[icode\]\s*\[\/icode\]~i', '', $this->message);
277
	}
278
279
	/**
280
	 * Protects code / icode blocks from preparse by replacing them with %%token%% values
281
	 *
282
	 * @param string $message
283
	 * @return string
284
	 */
285
	public function tokenizeCodeBlocks($message, $html = false): string
286
	{
287
		// Split up the message on the code start/end tags/
288
		$patterns = $html
289
			? ['~(</code>|<code(?:[^>]+)?>)~', '~(</icode>|<icode(?:[^>]+)?>)~']
290
			: ['~(\[\/code\]|\[code(?:=[^\]]+)?\])~i', '~(\[\/icode\]|\[icode(?:=[^\]]+)?\])~i'];
291
292
		// Token generator
293
		$tokenizer = new TokenHash();
294
295
		foreach ($patterns as $pattern)
296
		{
297
			$parts = preg_split($pattern, $message, -1, PREG_SPLIT_DELIM_CAPTURE);
298
			foreach ($parts as $i => $part)
299
			{
300
				// It goes 0 = outside, 1 = begin tag, 2 = inside, 3 = close tag, repeat.
301
				if ($i % 4 === 0 && isset($parts[$i + 3]))
302
				{
303
					// Create a unique key to put in place of the code block
304
					$key = $tokenizer->generate_hash(8);
305
306
					// Save what is there [code]stuff[/code]
307
					$this->code_blocks['%%' . $key . '%%'] = $parts[$i + 1] . $parts[$i + 2] . $parts[$i + 3];
308
309
					// Replace the code block with %%$key%% so its protected from further preparsecode processing
310
					$parts[$i + 1] = '%%';
311
					$parts[$i + 2] = $key;
312 18
					$parts[$i + 3] = '%%';
313
				}
314
			}
315 18
316
			// The message with code blocks as %%tokens%%
317 18
			$message = implode('', $parts);
318
		}
319
320
		return $message;
321 18
	}
322
323
	/**
324 18
	 * Fix any URLs posted - ie. remove 'javascript:'.
325
	 *
326
	 * - Fix the img and url tags...
327 18
	 * - Fixes links in message and returns nothing.
328
	 */
329
	private function _fixTags(): void
330
	{
331 18
		global $modSettings;
332
333
		// WARNING: Editing the below can cause large security holes in your forum.
334
		// Edit only if you are sure you know what you are doing.
335
336
		$fixArray = [
337
			// [img]http://...[/img] or [img width=1]http://...[/img]
338
			[
339
				'tag' => 'img',
340
				'protocols' => ['http', 'https'],
341
				'embeddedUrl' => false,
342
				'hasEqualSign' => false,
343
				'hasExtra' => true,
344 18
			],
345
			// [url]http://...[/url]
346 18
			[
347
				'tag' => 'url',
348 18
				'protocols' => ['http', 'https'],
349
				'embeddedUrl' => true,
350 18
				'hasEqualSign' => false,
351
			],
352 18
			// [url=http://...]name[/url]
353
			[
354 18
				'tag' => 'url',
355
				'protocols' => ['http', 'https'],
356
				'embeddedUrl' => true,
357
				'hasEqualSign' => true,
358 18
			],
359
			// [iurl]http://...[/iurl]
360
			[
361 18
				'tag' => 'iurl',
362
				'protocols' => ['http', 'https'],
363
				'embeddedUrl' => true,
364 2
				'hasEqualSign' => false,
365 2
			],
366 2
			// [iurl=http://...]name[/iurl]
367
			[
368 2
				'tag' => 'iurl',
369 2
				'protocols' => ['http', 'https'],
370
				'embeddedUrl' => true,
371 2
				'hasEqualSign' => true,
372 2
			],
373
		];
374 2
375
		// Integration may want to add to this array
376
		call_integration_hook('integrate_fixtags', [&$fixArray, &$this->message]);
377
378
		// Fix each type of tag.
379 2
		foreach ($fixArray as $param)
380
		{
381 2
			$this->_fixTag($param['tag'], $param['protocols'], $param['embeddedUrl'], $param['hasEqualSign'], !empty($param['hasExtra']));
382
		}
383
384
		// Now fix possible security problems with images loading links automatically...
385 2
		$this->message = preg_replace_callback('~(\[img.*?\])(.+?)\[/img\]~is',
386
			fn($matches) => $this->_fixTags_img_callback($matches), $this->message);
387
388
		// Limit the size of images posted?
389 2
		if (!empty($modSettings['max_image_width']) || !empty($modSettings['max_image_height']))
390
		{
391
			$this->resizeBBCImages();
392
		}
393
	}
394
395 2
	/**
396
	 * Fix a specific class of tag - ie. url with =.
397 2
	 *
398
	 * - Used by fixTags, fixes a specific tag's links.
399
	 *
400
	 * @param string $myTag - the tag
401 2
	 * @param string[] $protocols - http, https or ftp
402
	 * @param bool $embeddedUrl = false - whether it *can* be set to something
403
	 * @param bool $hasEqualSign = false, whether it *is* set to something
404
	 * @param bool $hasExtra = false - whether it can have extra cruft after the begin tag.
405 2
	 */
406
	private function _fixTag($myTag, $protocols, $embeddedUrl = false, $hasEqualSign = false, $hasExtra = false): void
407
	{
408
		global $boardurl, $scripturl;
409 2
410
		$replaces = [];
411
412
		$domain_url = preg_match('~^([^:]+://[^/]+)~', $boardurl, $match) != 0 ? $match[1] : $boardurl . '/';
413
414
		if ($hasEqualSign)
415 2
		{
416
			preg_match_all('~\[(' . $myTag . ')=([^\]]*?)\](?:(.+?)\[/(' . $myTag . ')\])?~is', $this->message, $matches);
417 2
		}
418
		else
419 2
		{
420
			preg_match_all('~\[(' . $myTag . ($hasExtra ? '(?:[^\]]*?)' : '') . ')\](.+?)\[/(' . $myTag . ')\]~is', $this->message, $matches);
421
		}
422
423 2
		foreach ($matches[0] as $k => $dummy)
424
		{
425 2
			// Remove all leading and trailing whitespace.
426
			$replace = trim($matches[2][$k]);
427
			$this_tag = $matches[1][$k];
428
			$this_close = $hasEqualSign ? (empty($matches[4][$k]) ? '' : $matches[4][$k]) : $matches[3][$k];
429 1
430
			$found = false;
431
			foreach ($protocols as $protocol)
432
			{
433 18
				$found = strncasecmp($replace, $protocol . '://', strlen($protocol) + 3) === 0;
434
				if ($found)
435 2
				{
436
					break;
437 2
				}
438
			}
439
440
			// Http url checking?
441
			if (!$found && $protocols[0] === 'http')
442 18
			{
443
				if (substr($replace, 0, 1) === '/' && substr($replace, 0, 2) !== '//')
444 2
				{
445
					$replace = $domain_url . $replace;
446 18
				}
447
				elseif (substr($replace, 0, 1) === '?')
448
				{
449
					$replace = $scripturl . $replace;
450
				}
451
				elseif (substr($replace, 0, 1) === '#' && $embeddedUrl)
452
				{
453
					$replace = '#' . preg_replace('~[^A-Za-z0-9_\-#]~', '', substr($replace, 1));
454
					$this_tag = 'iurl';
455
					$this_close = 'iurl';
456
				}
457
				elseif (str_starts_with($replace, '//'))
458
				{
459
					$replace = $protocols[0] . ':' . $replace;
460
				}
461
				else
462
				{
463
					$replace = $protocols[0] . '://' . $replace;
464
				}
465
			}
466
			// FTP URL Checking
467
			elseif (!$found && $protocols[0] === 'ftp')
468
			{
469
				$replace = $protocols[0] . '://' . preg_replace('~^(?!ftps?)[^:]+://~', '', $replace);
470
			}
471
			elseif (!$found)
472
			{
473
				$replace = $protocols[0] . '://' . $replace;
474
			}
475
476
			// Build a replacement array that is considered safe and proper
477
			if ($hasEqualSign && $embeddedUrl)
478
			{
479
				$replaces[$matches[0][$k]] = '[' . $this_tag . '=' . $replace . ']' . (empty($matches[4][$k]) ? '' : $matches[3][$k] . '[/' . $this_close . ']');
480
			}
481
			elseif ($hasEqualSign)
482
			{
483
				$replaces['[' . $matches[1][$k] . '=' . $matches[2][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']';
484
			}
485
			elseif ($embeddedUrl)
486
			{
487
				$replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']' . $matches[2][$k] . '[/' . $this_close . ']';
488
			}
489
			else
490
			{
491
				$replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . ']' . $replace . '[/' . $this_close . ']';
492
			}
493
		}
494
495
		foreach ($replaces as $k => $v)
496
		{
497
			if ($k == $v)
498
			{
499
				unset($replaces[$k]);
500
			}
501
		}
502
503
		// Update as needed
504
		if (!empty($replaces))
505
		{
506
			$this->message = strtr($this->message, $replaces);
507
		}
508
	}
509
510
	/**
511
	 * Updates BBC img tags in a message so that the width / height respect the forum settings.
512
	 *
513
	 * - Will add the width/height attrib if needed, or update existing ones if they break the rules
514
	 */
515
	public function resizeBBCImages(): void
516
	{
517
		global $modSettings;
518
519
		// We'll need this for image processing
520
		require_once(SUBSDIR . '/Attachments.subs.php');
521
522
		// Find all the img tags - with or without width and height.
523
		preg_match_all('~\[img(\s+width=\d+)?(\s+height=\d+)?(\s+width=\d+)?](.+?)\[/img]~is', $this->message, $matches, PREG_PATTERN_ORDER);
524
525
		$replaces = [];
526
		foreach (array_keys($matches[0]) as $match)
527
		{
528
			// If the width was after the height, handle it.
529 18
			$matches[1][$match] = empty($matches[3][$match]) ? $matches[1][$match] : $matches[3][$match];
530
531 18
			// Now figure out if they had a desired height or width...
532 18
			$desired_width = empty($matches[1][$match]) ? 0 : (int) substr(trim($matches[1][$match]), 6);
533
			$desired_height = empty($matches[2][$match]) ? 0 : (int) substr(trim($matches[2][$match]), 7);
534 18
535
			// One was omitted, or both.  We'll have to find its real size...
536 18
			if (empty($desired_width) || empty($desired_height))
537 18
			{
538
				[$width, $height] = url_image_size(un_htmlspecialchars($matches[4][$match]));
539
540
				// They don't have any desired width or height!
541
				if (empty($desired_width) && empty($desired_height))
542
				{
543
					$desired_width = $width;
544 18
					$desired_height = $height;
545
				}
546
				// Scale it to the width...
547
				elseif (empty($desired_width) && !empty($height))
548
				{
549 18
					$desired_width = (int) (($desired_height * $width) / $height);
550
				}
551 18
				// Scale if to the height.
552 18
				elseif (!empty($width))
553
				{
554 18
					$desired_height = (int) (($desired_width * $height) / $width);
555
				}
556
			}
557
558
			// If the width and height are fine, just continue along...
559 18
			if ($desired_width <= $modSettings['max_image_width'] && $desired_height <= $modSettings['max_image_height'])
560
			{
561
				continue;
562
			}
563 18
564
			// Too bad, it's too wide.  Make it as wide as the maximum.
565
			if ($desired_width > $modSettings['max_image_width'] && !empty($modSettings['max_image_width']))
566
			{
567
				$desired_height = (int) (($modSettings['max_image_width'] * $desired_height) / $desired_width);
568 18
				$desired_width = $modSettings['max_image_width'];
569
			}
570
571
			// Now check the height, as well.  Might have to scale twice, even...
572 18
			if ($desired_height > $modSettings['max_image_height'] && !empty($modSettings['max_image_height']))
573
			{
574 18
				$desired_width = (int) (($modSettings['max_image_height'] * $desired_width) / $desired_height);
575
				$desired_height = $modSettings['max_image_height'];
576 18
			}
577
578 18
			$replaces[$matches[0][$match]] = '[img' . (empty($desired_width) ? '' : ' width=' . $desired_width) . (empty($desired_height) ? '' : ' height=' . $desired_height) . ']' . $matches[4][$match] . '[/img]';
579
		}
580 18
581
		// If any img tags were actually changed...
582 18
		if (!empty($replaces))
583
		{
584 18
			$this->message = strtr($this->message, $replaces);
585
		}
586 18
	}
587
588 18
	/**
589
	 * Replace /me with the users name, including inside footnotes
590 18
	 */
591
	private function _itsAllAbout(): void
592 18
	{
593
		$me_regex = '~(\A|\n)/me(?: |&nbsp;)([^\n]*)(?:\z)?~i';
594 18
		$footnote_regex = '~(\[footnote\])/me(?: |&nbsp;)([^\n]*?)(\[\/footnote\])~i';
595 18
596 18
		if (preg_match('~[\[\]\\"]~', $this->user_name) !== false)
597
		{
598 18
			$this->message = preg_replace($me_regex, '$1[me=&quot;' . $this->user_name . '&quot;]$2[/me]', $this->message);
599
			$this->message = preg_replace($footnote_regex, '$1[me=&quot;' . $this->user_name . '&quot;]$2[/me]$3', $this->message);
600 18
		}
601
		else
602 18
		{
603
			$this->message = preg_replace($me_regex, '$1[me=' . $this->user_name . ']$2[/me]', $this->message);
604 18
			$this->message = preg_replace($footnote_regex, '$1[me=' . $this->user_name . ']$2[/me]$3', $this->message);
605 18
		}
606
	}
607 18
608
	/**
609 18
	 * Make sure lists have open and close tags
610
	 */
611 18
	private function _validateLists(): void
612
	{
613 18
		$list_open = substr_count($this->message, '[list]') + substr_count($this->message, '[list ');
614
		$list_close = substr_count($this->message, '[/list]');
615
616
		if ($list_close - $list_open > 0)
617 18
		{
618
			$this->message = str_repeat('[list]', $list_close - $list_open) . $this->message;
619 18
		}
620
621 18
		if ($list_open - $list_close > 0)
622
		{
623
			$this->message .= str_repeat('[/list]', $list_open - $list_close);
624
		}
625
	}
626 18
627
	/**
628 18
	 * Repair a few *cough* common mistakes from user input and from wizzy cut/paste
629
	 */
630 4
	private function _fixMistakes(): void
631
	{
632 18
		$mistake_fixes = [
633
			// Find [table]s not followed by [tr].
634
			'~\[table\](?![\s' . self::NBS . ']*\[tr\])~su' => '[table][tr]',
635
			// Find [tr]s not followed by [td] or [th]
636
			'~\[tr\](?![\s' . self::NBS . ']*\[t[dh]\])~su' => '[tr][td]',
637
			// Find [/td] and [/th]s not followed by something valid.
638
			'~\[/t([dh])\](?![\s' . self::NBS . ']*(?:\[t[dh]\]|\[/tr\]|\[/table\]))~su' => '[/t$1][/tr]',
639
			// Find [/tr]s not followed by something valid.
640
			'~\[/tr\](?![\s' . self::NBS . ']*(?:\[tr\]|\[/table\]))~su' => '[/tr][/table]',
641
			// Find [/td] [/th]s incorrectly followed by [/table].
642
			'~\[/t([dh])\][\s' . self::NBS . ']*\[/table\]~su' => '[/t$1][/tr][/table]',
643
			// Find [table]s, [tr]s, and [/td]s (possibly correctly) followed by [td].
644 18
			'~\[(table|tr|/td)\]([\s' . self::NBS . ']*)\[td\]~su' => '[$1]$2[_td_]',
645
			// Now, any [td]s left should have a [tr] before them.
646 18
			'~\[td\]~s' => '[tr][td]',
647 18
			// Look for [tr]s which are correctly placed.
648 18
			'~\[(table|/tr)\]([\s' . self::NBS . ']*)\[tr\]~su' => '[$1]$2[_tr_]',
649
			// Any remaining [tr]s should have a [table] before them.
650
			'~\[tr\]~s' => '[table][tr]',
651
			// Look for [/td]s or [/th]s followed by [/tr].
652 18
			'~\[/t([dh])\]([\s' . self::NBS . ']*)\[/tr\]~su' => '[/t$1]$2[_/tr_]',
653
			// Any remaining [/tr]s should have a [/td].
654
			'~\[/tr\]~s' => '[/td][/tr]',
655
			// Look for properly opened [li]s which aren't closed.
656
			'~\[li\]([^\[\]]+?)\[li\]~s' => '[li]$1[_/li_][_li_]',
657
			'~\[li\]([^\[\]]+?)\[/list\]~s' => '[_li_]$1[_/li_][/list]',
658
			'~\[li\]([^\[\]]+?)$~s' => '[li]$1[/li]',
659 18
			// Lists - find correctly closed items/lists.
660
			'~\[/li\]([\s' . self::NBS . ']*)\[/list\]~su' => '[_/li_]$1[/list]',
661
			// Find list items closed and then opened.
662 2
			'~\[/li\]([\s' . self::NBS . ']*)\[li\]~su' => '[_/li_]$1[_li_]',
663 2
			// Now, find any [list]s or [/li]s followed by [li].
664
			'~\[(list(?: [^\]]*?)?|/li)\]([\s' . self::NBS . ']*)\[li\]~su' => '[$1]$2[_li_]',
665
			// Allow for sub lists.
666 2
			'~\[/li\]([\s' . self::NBS . ']*)\[list\]~u' => '[_/li_]$1[list]',
667
			'~\[/list\]([\s' . self::NBS . ']*)\[li\]~u' => '[/list]$1[_li_]',
668
			// Any remaining [li]s weren't inside a [list].
669 2
			'~\[li\]~' => '[list][li]',
670
			// Any remaining [/li]s weren't before a [/list].
671
			'~\[/li\]~' => '[/li][/list]',
672
			// Put the correct ones back how we found them.
673
			'~\[_(li|/li|td|tr|/tr)_\]~' => '[$1]',
674
			// Images with no real url.
675
			'~\[img\]https?://.{0,7}\[/img\]~' => '',
676 2
		];
677
678
		// Fix up some use of tables without [tr]s, etc. (it has to be done more than once to catch it all.)
679
		for ($j = 0; $j < 3; $j++)
680 2
		{
681
			$this->message = preg_replace(array_keys($mistake_fixes), $mistake_fixes, $this->message);
682
		}
683
	}
684
685
	/**
686
	 * Replace our token-ized message with the saved code blocks
687 2
	 *
688
	 * @param string $message
689
	 * @return string
690
	 */
691 2
	public function restoreCodeBlocks($message): string
692
	{
693
		if (!empty($this->code_blocks))
694
		{
695
			return str_replace(array_keys($this->code_blocks), array_values($this->code_blocks), $message);
696
		}
697
698
		return $message;
699
	}
700 2
701 2
	/**
702
	 * Validates and corrects table structure
703
	 *
704
	 * What it does
705 18
	 *   - Checks tables for correct tag order / nesting
706
	 *   - Adds in missing closing tags, removes excess closing tags
707
	 *   - Although it prevents markup error, it can mess-up the intended (abiet wrong) layout
708
	 * driving the post author in to a furious rage
709 18
	 *
710
	 */
711
	private function _preparseTable(): void
712
	{
713
		$table_check = $this->message;
714
		$table_offset = 0;
715
		$table_array = [];
716
717
		// Define the allowable tags after a give tag
718
		$table_order = [
719
			'table' => ['tr'],
720
			'tr' => ['td', 'th'],
721
			'td' => ['table'],
722
			'th' => [''],
723
		];
724
725
		// Find all closing tags (/table /tr /td etc)
726
		while (preg_match('~\[(/)*(table|tr|td|th)\]~', $table_check, $matches) === 1)
727
		{
728
			// Keep track of where this is.
729
			$offset = strpos($table_check, $matches[0]);
730
			$remove_tag = false;
731
732
			// Is it opening?
733
			if ($matches[1] !== '/')
734
			{
735
				// If the previous table tag isn't correct simply remove it.
736
				if ((!empty($table_array) && !in_array($matches[2], $table_order[$table_array[0]])) || (empty($table_array) && $matches[2] !== 'table'))
737
				{
738
					$remove_tag = true;
739
				}
740
				// Record this was the last tag.
741
				else
742
				{
743
					array_unshift($table_array, $matches[2]);
744
				}
745
			}
746
			// Otherwise is closed!
747
			elseif (empty($table_array) || ($table_array[0] !== $matches[2]))
748
			{
749
				// Only keep the tag if it's closing the right thing.
750
				$remove_tag = true;
751
			}
752
			else
753 4
			{
754
				array_shift($table_array);
755 4
			}
756 4
757
			// Removing?
758 4
			if ($remove_tag)
759
			{
760
				$this->message = substr($this->message, 0, $table_offset + $offset) . substr($this->message, $table_offset + strlen($matches[0]) + $offset);
761
762
				// We've lost some data.
763
				$table_offset -= strlen($matches[0]);
764
			}
765
766
			// Remove everything up to here.
767
			$table_offset += $offset + strlen($matches[0]);
768 4
			$table_check = substr($table_check, $offset + strlen($matches[0]));
769
		}
770 4
771
		// Close any remaining table tags.
772
		foreach ($table_array as $tag)
773
		{
774
			$this->message .= '[/' . $tag . ']';
775
		}
776
	}
777
778
	/**
779
	 * Validates bbc code URL of the form: [url url=123.com follow=true]123[/url]
780
	 *
781
	 * - Modifies if the user does not have the post_nofollow permission
782
	 * - Checks if the domain is on the allowList and modifies as required
783
	 */
784
	private function _validateLinks(): void
785
	{
786
		$allowed = allowedTo('post_nofollow');
787
		$regexFollow = '~\[url[^]]*(follow=([^] \s]+))[^]]*]~';
788
		$regexUrl = '~\[url[^]]*(url=([^] \s]+))[^]]*]~';
789
790
		preg_match_all($regexFollow, $this->message, $matches);
791
		if (isset($matches[1]) && is_array($matches[1]))
792
		{
793 18
			// Every [URL} code with follow= in them
794
			foreach ($matches[1] as $key => $followTerm)
795 18
			{
796
				// Flush out the actual URL and follow value
797 2
				preg_match($regexUrl, $matches[0][$key], $match);
798
				$allowedDomain = validateURLAllowList(addProtocol($match[2]));
799
				$followChoice = in_array(trim($matches[2][$key]), ['follow', 'true', 'on', 'yes'], true);
800 18
801
				// Allowed domain and purposely turning it off?
802
				if ($allowedDomain && $allowed && !$followChoice)
803
				{
804
					$this->message = str_replace($followTerm, 'follow=false', $this->message);
805
				}
806
				// Allowed domain OR you are allowed and already have it on
807
				elseif ($allowedDomain || ($allowed && $followChoice))
808
				{
809
					$this->message = str_replace($followTerm, 'follow=true', $this->message);
810
				}
811
				// Not allowed to use the function and the domain is not on the allowList
812
				else
813
				{
814
					$this->message = str_replace($followTerm, 'follow=false', $this->message);
815
				}
816
			}
817
		}
818
	}
819
820
	/**
821
	 * This is very simple, and just removes things done by preparsecode.
822
	 *
823
	 * @param string $message
824
	 *
825
	 * @return null|string|string[]
826
	 */
827
	public function un_preparsecode($message)
828
	{
829
		// Protect CODE blocks from further processing
830
		$message = $this->tokenizeCodeBlocks($message);
831
832
		// Pass integration the tokenized message and array
833
		call_integration_hook('integrate_unpreparse_code', [&$message, &$this->code_blocks, 0]);
834
835
		// Restore the code blocks
836
		$message = $this->restoreCodeBlocks($message);
837
838
		// Change breaks back to \n's and &nsbp; back to spaces.
839
		return preg_replace('~<br( /)?>~', "\n", str_replace('&nbsp;', ' ', $message));
840
	}
841
842
	/**
843
	 * Ensure tags inside of nobbc do not get parsed by converting the markers to html entities
844
	 *
845
	 * @param string[] $matches
846
	 *
847
	 * @return string
848
	 */
849
	private function _preparsecode_nobbc_callback($matches): string
850
	{
851
		return '[nobbc]' . strtr($matches[1], ['[' => '&#91;', ']' => '&#93;', ':' => '&#58;', '@' => '&#64;']) . '[/nobbc]';
852
	}
853
854
	/**
855
	 * Use only the primary (first) font face when multiple are supplied
856
	 *
857
	 * @param string[] $matches
858
	 *
859
	 * @return string
860
	 */
861
	private function _preparsecode_font_callback($matches): string
862
	{
863
		$fonts = explode(',', $matches[1]);
864
		$font = trim(un_htmlspecialchars($fonts[0]), ' "\'');
865
866
		return '[font=' . $font . ']' . $matches[2];
867
	}
868
869
	/**
870
	 * Takes a tag and changes it to lowercase
871
	 *
872
	 * @param string[] $matches
873
	 *
874
	 * @return string
875
	 */
876
	private function _preparsecode_lowertags_callback($matches): string
877
	{
878
		return '[' . $matches[1] . strtolower($matches[2]) . $matches[3] . ']';
879
	}
880
881
	/**
882
	 * Ensure image tags do not load anything by themselves (security)
883
	 *
884
	 * @param string[] $matches
885
	 *
886
	 * @return string
887
	 */
888
	private function _fixTags_img_callback($matches): string
889
	{
890
		return $matches[1] . preg_replace('~action(=|%3d)(?!dlattach)~i', 'action-', $matches[2]) . '[/img]';
891
	}
892
893
	/**
894
	 * Find and return PreparseCode instance if it exists,
895
	 * or create a new instance
896
	 *
897
	 * @param string $user the name of the user (mostly used in quote tags)
898
	 *
899
	 * @return PreparseCode
900
	 */
901
	public static function instance($user): PreparseCode
902
	{
903
		if (self::$instance === null)
904
		{
905
			self::$instance = new PreparseCode($user);
906
		}
907
		elseif ($user !== self::$instance->user_name)
908
		{
909
			self::$instance = new PreparseCode($user);
910
		}
911
912
		return self::$instance;
913
	}
914
}
915