Completed
Pull Request — development (#3050)
by John
23:37
created

PreparseCode::preparsecode()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 77
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 27
CRAP Score 2.0001

Importance

Changes 0
Metric Value
cc 2
eloc 28
nc 2
nop 2
dl 0
loc 77
ccs 27
cts 28
cp 0.9643
crap 2.0001
rs 8.9342
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * This class contains those functions pertaining to preparsing BBC data
5
 *
6
 * @name      ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause
9
 *
10
 * This file contains code covered by:
11
 * copyright:    2011 Simple Machines (http://www.simplemachines.org)
12
 * license:    BSD, See included LICENSE.TXT for terms and conditions.
13
 *
14
 * @version 2.0 dev
15
 *
16
 */
17
18
namespace BBC;
19
20
/**
21
 * Class PreparseCode
22
 *
23
 * @package BBC
24
 */
25
class PreparseCode
26
{
27
	/** The regular expression non breaking space */
28
	const NBS = '\x{A0}';
29
	/** @var string the message to preparse */
30
	public $message = '';
31
	/** @var bool if this is just a preview */
32
	protected $previewing = false;
33
	/** @var array the code blocks that we want to protect */
34
	public $code_blocks = array();
35
	/** @var PreparseCode */
36
	public static $instance;
37
38
	/**
39
	 * PreparseCode constructor.
40
	 */
41 2
	public function __construct()
42
	{
43 2
	}
44
45
	/**
46
	 * Takes a message and parses it, returning the prepared message as a reference
47
	 * for use by parse_bbc.
48
	 *
49
	 * What it does:
50
	 *   - Cleans up links (javascript, etc.)
51
	 *   - Fixes improperly constructed lists [lists]
52
	 *   - Repairs improperly constructed tables, row, headers, etc
53
	 *   - Protects code sections
54
	 *   - Checks for proper quote open / closing
55
	 *   - Processes /me tag
56
	 *   - Converts color tags to ones parse_bbc will understand
57
	 *   - Removes empty tags outside of code blocks
58
	 *   - Won't convert \n's and a few other things if previewing is true.
59
	 *
60
	 * @param string $message
61
	 * @param boolean $previewing
62
	 */
63 3
	public function preparsecode(&$message, $previewing = false)
64
	{
65
		// Load passed values to the class
66 3
		$this->message = $message;
67 3
		$this->previewing = $previewing;
68
69
		// This line makes all languages *theoretically* work even with the wrong charset ;).
70 3
		$this->message = preg_replace('~&amp;#(\d{4,5}|[2-9]\d{2,4}|1[2-9]\d);~', '&#$1;', $this->message);
71
72
		// Clean up after nobbc ;).
73 3
		$this->message = preg_replace_callback('~\[nobbc\](.+?)\[/nobbc\]~i', array($this, '_preparsecode_nobbc_callback'), $this->message);
74
75
		// Remove \r's... they're evil!
76 3
		$this->message = strtr($this->message, array("\r" => ''));
77
78
		// You won't believe this - but too many periods upsets apache it seems!
79 3
		$this->message = preg_replace('~\.{100,}~', '...', $this->message);
80
81
		// Remove Trailing Quotes
82 3
		$this->_trimTrailingQuotes();
83
84
		// Validate code blocks are properly closed.
85 3
		$this->_validateCodeBlocks();
86
87
		// Protect CODE blocks from further processing
88 3
		$this->_tokenizeCodeBlocks();
89
90
		//  Now that we've fixed all the code tags, let's fix the img and url tags...
91 3
		$this->_fixTags();
92
93
		// Replace /me.+?\n with [me=name]dsf[/me]\n.
94 3
		$this->_itsAllAbout();
95
96
		// Make sure list and table tags are lowercase.
97 3
		$this->message = preg_replace_callback('~\[([/]?)(list|li|table|tr|td|th)((\s[^\]]+)*)\]~i', array($this, '_preparsecode_lowertags_callback'), $this->message);
98
99
		// Don't leave any lists that were never opened or closed
100 3
		$this->_validateLists();
101
102
		// Attempt to repair common BBC input mistakes
103 3
		$this->_fixMistakes();
104
105
		// Remove empty bbc tags
106 3
		$this->message = preg_replace('~\[[bisu]\]\s*\[/[bisu]\]~', '', $this->message);
107 3
		$this->message = preg_replace('~\[quote\]\s*\[/quote\]~', '', $this->message);
108
109
		// Fix color tags of many forms so they parse properly
110 3
		$this->message = preg_replace('~\[color=(?:#[\da-fA-F]{3}|#[\da-fA-F]{6}|[A-Za-z]{1,20}|rgb\(\d{1,3}, ?\d{1,3}, ?\d{1,3}\))\]\s*\[/color\]~', '', $this->message);
111
112
		// Font tags with multiple fonts (copy&paste in the WYSIWYG by some browsers).
113 3
		$this->message = preg_replace_callback('~\[font=([^\]]*)\](.*?(?:\[/font\]))~s', array($this, '_preparsecode_font_callback'), $this->message);
114
115
		// Allow integration to do further processing on protected code block message
116 3
		call_integration_hook('integrate_preparse_tokenized_code', array(&$this->message, $previewing, $this->code_blocks));
117
118
		// Put it back together!
119 3
		$this->_restoreCodeBlocks();
120
121
		// Allow integration to do further processing
122 3
		call_integration_hook('integrate_preparse_code', array(&$this->message, 0, $previewing));
123
124
		// Safe Spacing
125 3
		if (!$previewing)
126
		{
127 3
			$this->message = strtr($this->message, array('  ' => '&nbsp; ', "\n" => '<br />', "\xC2\xA0" => '&nbsp;'));
128
		}
129
		else
130
		{
131
			$this->message = strtr($this->message, array('  ' => '&nbsp; ', "\xC2\xA0" => '&nbsp;'));
132
		}
133
134
		// Now we're going to do full scale table checking...
135 3
		$this->_preparseTable();
136
137
		// Quickly clean up things that will slow our parser (which are common in posted code.)
138 3
		$message = strtr($this->message, array('[]' => '&#91;]', '[&#039;' => '&#91;&#039;'));
139 3
	}
140
141
	/**
142
	 * Trim dangling quotes
143
	 */
144 3
	private function _trimTrailingQuotes()
145
	{
146
		// Trim off trailing quotes - these often happen by accident.
147 3 View Code Duplication
		while (substr($this->message, -7) === '[quote]')
148
		{
149
			$this->message = trim(substr($this->message, 0, -7));
150
		}
151
152
		// Trim off leading ones as well
153 3 View Code Duplication
		while (substr($this->message, 0, 8) === '[/quote]')
154
		{
155
			$this->message = trim(substr($this->message, 8));
156
		}
157 3
	}
158
159
	/**
160
	 * Find all code blocks, work out whether we'd be parsing them,
161
	 * then ensure they are all closed.
162
	 */
163 3
	private function _validateCodeBlocks()
164
	{
165 3
		$in_tag = false;
166 3
		$had_tag = false;
167 3
		$code_open = false;
168
169 3
		if (preg_match_all('~(\[(/)*code(?:=[^\]]+)?\])~is', $this->message, $matches))
170
		{
171 2
			foreach ($matches[0] as $index => $dummy)
172
			{
173
				// Closing?
174 2
				if (!empty($matches[2][$index]))
175
				{
176
					// If it's closing and we're not in a tag we need to open it...
177 2
					if (!$in_tag)
178
					{
179
						$code_open = true;
180
					}
181
182
					// Either way we ain't in one any more.
183 2
					$in_tag = false;
184
				}
185
				// Opening tag...
186
				else
187
				{
188 2
					$had_tag = true;
189
190
					// If we're in a tag don't do nought!
191 2
					if (!$in_tag)
192
					{
193 2
						$in_tag = true;
194
					}
195
				}
196
			}
197
		}
198
199
		// If we have an open code tag, close it.
200 3
		if ($in_tag)
201
		{
202 2
			$this->message .= '[/code]';
203
		}
204
205
		// Open any ones that need to be open, only if we've never had a tag.
206 3
		if ($code_open && !$had_tag)
207
		{
208
			$this->message = '[code]' . $this->message;
209
		}
210 3
	}
211
212
	/**
213
	 * Protects code blocks from preparse by replacing them with %%token%% values
214
	 */
215 3
	private function _tokenizeCodeBlocks()
216
	{
217
		// Split up the message on the code start/end tags/
218 3
		$parts = preg_split('~(\[/code\]|\[code(?:=[^\]]+)?\])~i', $this->message, -1, PREG_SPLIT_DELIM_CAPTURE);
219
220
		// Token generator
221 3
		$tokenizer = new \Token_Hash();
222
223
		// Separate all code blocks
224 3
		for ($i = 0, $n = count($parts); $i < $n; $i++)
225
		{
226
			// It goes 0 = outside, 1 = begin tag, 2 = inside, 3 = close tag, repeat.
227 3
			if ($i % 4 === 0 && isset($parts[$i + 3]))
228
			{
229
				// Create a unique key to put in place of the code block
230 2
				$key = $tokenizer->generate_hash(8);
231
232
				// Save what is there [code]stuff[/code]
233 2
				$this->code_blocks['%%' . $key . '%%'] = $parts[$i + 1] . $parts[$i + 2] . $parts[$i + 3];
234
235
				// Replace the code block with %%$key%% so its protected from further preparsecode processing
236 2
				$parts[$i + 1] = '%%';
237 2
				$parts[$i + 2] = $key;
238 2
				$parts[$i + 3] = '%%';
239
			}
240
		}
241
242
		// The message with code blocks as %%tokens%%
243 3
		$this->message = implode('', $parts);
244 3
	}
245
246
	/**
247
	 * Fix any URLs posted - ie. remove 'javascript:'.
248
	 *
249
	 * - Fix the img and url tags...
250
	 * - Fixes links in message and returns nothing.
251
	 */
252 3
	private function _fixTags()
253
	{
254 3
		global $modSettings;
255
256
		// WARNING: Editing the below can cause large security holes in your forum.
257
		// Edit only if you are sure you know what you are doing.
258
259
		$fixArray = array(
260
			// [img]http://...[/img] or [img width=1]http://...[/img]
261
			array(
262 3
				'tag' => 'img',
263
				'protocols' => array('http', 'https'),
264
				'embeddedUrl' => false,
265
				'hasEqualSign' => false,
266
				'hasExtra' => true,
267
			),
268
			// [url]http://...[/url]
269
			array(
270
				'tag' => 'url',
271
				'protocols' => array('http', 'https'),
272
				'embeddedUrl' => true,
273
				'hasEqualSign' => false,
274
			),
275
			// [url=http://...]name[/url]
276
			array(
277
				'tag' => 'url',
278
				'protocols' => array('http', 'https'),
279
				'embeddedUrl' => true,
280
				'hasEqualSign' => true,
281
			),
282
			// [iurl]http://...[/iurl]
283
			array(
284
				'tag' => 'iurl',
285
				'protocols' => array('http', 'https'),
286
				'embeddedUrl' => true,
287
				'hasEqualSign' => false,
288
			),
289
			// [iurl=http://...]name[/iurl]
290
			array(
291
				'tag' => 'iurl',
292
				'protocols' => array('http', 'https'),
293
				'embeddedUrl' => true,
294
				'hasEqualSign' => true,
295
			),
296
		);
297
298
		// Integration may want to add to this array
299 3
		call_integration_hook('integrate_fixtags', array(&$fixArray, &$this->message));
300
301
		// Fix each type of tag.
302 3
		foreach ($fixArray as $param)
303
		{
304 3
			$this->_fixTag($param['tag'], $param['protocols'], $param['embeddedUrl'], $param['hasEqualSign'], !empty($param['hasExtra']));
305
		}
306
307
		// Now fix possible security problems with images loading links automatically...
308 3
		$this->message = preg_replace_callback('~(\[img.*?\])(.+?)\[/img\]~is', array($this, '_fixTags_img_callback'), $this->message);
309
310
		// Limit the size of images posted?
311 3
		if (!empty($modSettings['max_image_width']) || !empty($modSettings['max_image_height']))
312
		{
313
			$this->resizeBBCImages();
314
		}
315 3
	}
316
317
	/**
318
	 * Fix a specific class of tag - ie. url with =.
319
	 *
320
	 * - Used by fixTags, fixes a specific tag's links.
321
	 *
322
	 * @param string   $myTag - the tag
323
	 * @param string[] $protocols - http, https or ftp
324
	 * @param bool     $embeddedUrl = false - whether it *can* be set to something
325
	 * @param bool     $hasEqualSign = false, whether it *is* set to something
326
	 * @param bool     $hasExtra = false - whether it can have extra cruft after the begin tag.
327
	 */
328 3
	private function _fixTag($myTag, $protocols, $embeddedUrl = false, $hasEqualSign = false, $hasExtra = false)
329
	{
330 3
		global $boardurl, $scripturl;
331
332 3
		$replaces = array();
333
334
		// Ensure it has a domain name, use the site name if needed
335 3
		if (preg_match('~^([^:]+://[^/]+)~', $boardurl, $match) != 0)
336
		{
337 3
			$domain_url = $match[1];
338
		}
339
		else
340
		{
341
			$domain_url = $boardurl . '/';
342
		}
343
344 3
		if ($hasEqualSign)
345
		{
346 3
			preg_match_all('~\[(' . $myTag . ')=([^\]]*?)\](?:(.+?)\[/(' . $myTag . ')\])?~is', $this->message, $matches);
347
		}
348
		else
349
		{
350 3
			preg_match_all('~\[(' . $myTag . ($hasExtra ? '(?:[^\]]*?)' : '') . ')\](.+?)\[/(' . $myTag . ')\]~is', $this->message, $matches);
351
		}
352
353 3
		foreach ($matches[0] as $k => $dummy)
354
		{
355
			// Remove all leading and trailing whitespace.
356 1
			$replace = trim($matches[2][$k]);
357 1
			$this_tag = $matches[1][$k];
358 1
			$this_close = $hasEqualSign ? (empty($matches[4][$k]) ? '' : $matches[4][$k]) : $matches[3][$k];
359
360 1
			$found = false;
361 1
			foreach ($protocols as $protocol)
362
			{
363 1
				$found = strncasecmp($replace, $protocol . '://', strlen($protocol) + 3) === 0;
364 1
				if ($found)
365
				{
366 1
					break;
367
				}
368
			}
369
370
			// Http url checking?
371 1
			if (!$found && $protocols[0] === 'http')
372
			{
373 1
				if (substr($replace, 0, 1) === '/' && substr($replace, 0, 2) !== '//')
374
				{
375
					$replace = $domain_url . $replace;
376
				}
377 1
				elseif (substr($replace, 0, 1) === '?')
378
				{
379
					$replace = $scripturl . $replace;
380
				}
381 1
				elseif (substr($replace, 0, 1) === '#' && $embeddedUrl)
382
				{
383
					$replace = '#' . preg_replace('~[^A-Za-z0-9_\-#]~', '', substr($replace, 1));
384
					$this_tag = 'iurl';
385
					$this_close = 'iurl';
386
				}
387 1
				elseif (substr($replace, 0, 2) === '//')
388
				{
389 1
					$replace = $protocols[0] . ':' . $replace;
390
				}
391
				else
392
				{
393 1
					$replace = $protocols[0] . '://' . $replace;
394
				}
395
			}
396
			// FTP URL Checking
397 1
			elseif (!$found && $protocols[0] === 'ftp')
398
			{
399
				$replace = $protocols[0] . '://' . preg_replace('~^(?!ftps?)[^:]+://~', '', $replace);
400
			}
401 1
			elseif (!$found)
402
			{
403
				$replace = $protocols[0] . '://' . $replace;
404
			}
405
406
			// Build a replacement array that is considered safe and proper
407 1
			if ($hasEqualSign && $embeddedUrl)
408
			{
409 1
				$replaces[$matches[0][$k]] = '[' . $this_tag . '=' . $replace . ']' . (empty($matches[4][$k]) ? '' : $matches[3][$k] . '[/' . $this_close . ']');
410
			}
411 1
			elseif ($hasEqualSign)
412
			{
413
				$replaces['[' . $matches[1][$k] . '=' . $matches[2][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']';
414
			}
415 1
			elseif ($embeddedUrl)
416
			{
417 1
				$replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']' . $matches[2][$k] . '[/' . $this_close . ']';
418
			}
419
			else
420
			{
421 1
				$replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . ']' . $replace . '[/' . $this_close . ']';
422
			}
423
		}
424
425 3
		foreach ($replaces as $k => $v)
426
		{
427 1
			if ($k == $v)
428
			{
429 1
				unset($replaces[$k]);
430
			}
431
		}
432
433
		// Update as needed
434 3
		if (!empty($replaces))
435
		{
436 1
			$this->message = strtr($this->message, $replaces);
437
		}
438 3
	}
439
440
	/**
441
	 * Updates BBC img tags in a message so that the width / height respect the forum settings.
442
	 *
443
	 * - Will add the width/height attrib if needed, or update existing ones if they break the rules
444
	 */
445
	public function resizeBBCImages()
446
	{
447
		global $modSettings;
448
449
		// We'll need this for image processing
450
		require_once(SUBSDIR . '/Attachments.subs.php');
451
452
		// Find all the img tags - with or without width and height.
453
		preg_match_all('~\[img(\s+width=\d+)?(\s+height=\d+)?(\s+width=\d+)?\](.+?)\[/img\]~is', $this->message, $matches, PREG_PATTERN_ORDER);
454
455
		$replaces = array();
456
		foreach ($matches[0] as $match => $dummy)
457
		{
458
			// If the width was after the height, handle it.
459
			$matches[1][$match] = !empty($matches[3][$match]) ? $matches[3][$match] : $matches[1][$match];
460
461
			// Now figure out if they had a desired height or width...
462
			$desired_width = !empty($matches[1][$match]) ? (int) substr(trim($matches[1][$match]), 6) : 0;
463
			$desired_height = !empty($matches[2][$match]) ? (int) substr(trim($matches[2][$match]), 7) : 0;
464
465
			// One was omitted, or both.  We'll have to find its real size...
466
			if (empty($desired_width) || empty($desired_height))
467
			{
468
				list ($width, $height) = url_image_size(un_htmlspecialchars($matches[4][$match]));
469
470
				// They don't have any desired width or height!
471
				if (empty($desired_width) && empty($desired_height))
472
				{
473
					$desired_width = $width;
474
					$desired_height = $height;
475
				}
476
				// Scale it to the width...
477
				elseif (empty($desired_width) && !empty($height))
478
				{
479
					$desired_width = (int) (($desired_height * $width) / $height);
480
				}
481
				// Scale if to the height.
482
				elseif (!empty($width))
483
				{
484
					$desired_height = (int) (($desired_width * $height) / $width);
485
				}
486
			}
487
488
			// If the width and height are fine, just continue along...
489
			if ($desired_width <= $modSettings['max_image_width'] && $desired_height <= $modSettings['max_image_height'])
490
			{
491
				continue;
492
			}
493
494
			// Too bad, it's too wide.  Make it as wide as the maximum.
495 View Code Duplication
			if ($desired_width > $modSettings['max_image_width'] && !empty($modSettings['max_image_width']))
496
			{
497
				$desired_height = (int) (($modSettings['max_image_width'] * $desired_height) / $desired_width);
498
				$desired_width = $modSettings['max_image_width'];
499
			}
500
501
			// Now check the height, as well.  Might have to scale twice, even...
502 View Code Duplication
			if ($desired_height > $modSettings['max_image_height'] && !empty($modSettings['max_image_height']))
503
			{
504
				$desired_width = (int) (($modSettings['max_image_height'] * $desired_width) / $desired_height);
505
				$desired_height = $modSettings['max_image_height'];
506
			}
507
508
			$replaces[$matches[0][$match]] = '[img' . (!empty($desired_width) ? ' width=' . $desired_width : '') . (!empty($desired_height) ? ' height=' . $desired_height : '') . ']' . $matches[4][$match] . '[/img]';
509
		}
510
511
		// If any img tags were actually changed...
512
		if (!empty($replaces))
513
		{
514
			$this->message = strtr($this->message, $replaces);
515
		}
516
	}
517
518
	/**
519
	 * Replace /me with the users name, including inside footnotes
520
	 */
521 3
	private function _itsAllAbout()
522
	{
523 3
		global $user_info;
524
525 3
		$me_regex = '~(\A|\n)/me(?: |&nbsp;)([^\n]*)(?:\z)?~i';
526 3
		$footnote_regex = '~(\[footnote\])/me(?: |&nbsp;)([^\n]*?)(\[\/footnote\])~i';
527
528 3
		if (preg_match('~[\[\]\\"]~', $user_info['name']) !== false)
529
		{
530 3
			$this->message = preg_replace($me_regex, '$1[me=&quot;' . $user_info['name'] . '&quot;]$2[/me]', $this->message);
531 3
			$this->message = preg_replace($footnote_regex, '$1[me=&quot;' . $user_info['name'] . '&quot;]$2[/me]$3', $this->message);
532
		}
533
		else
534
		{
535
			$this->message = preg_replace($me_regex, '$1[me=' . $user_info['name'] . ']$2[/me]', $this->message);
536
			$this->message = preg_replace($footnote_regex, '$1[me=' . $user_info['name'] . ']$2[/me]$3', $this->message);
537
		}
538 3
	}
539
540
	/**
541
	 * Make sure lists have open and close tags
542
	 */
543 3
	private function _validateLists()
544
	{
545 3
		$list_open = substr_count($this->message, '[list]') + substr_count($this->message, '[list ');
546 3
		$list_close = substr_count($this->message, '[/list]');
547
548 3 View Code Duplication
		if ($list_close - $list_open > 0)
549
		{
550
			$this->message = str_repeat('[list]', $list_close - $list_open) . $this->message;
551
		}
552
553 3 View Code Duplication
		if ($list_open - $list_close > 0)
554
		{
555
			$this->message = $this->message . str_repeat('[/list]', $list_open - $list_close);
556
		}
557 3
	}
558
559
	/**
560
	 * Repair a few *cough* common mistakes from user input and from wizzy cut/paste
561
	 */
562 3
	private function _fixMistakes()
563
	{
564
		$mistake_fixes = array(
565
			// Find [table]s not followed by [tr].
566 3
			'~\[table\](?![\s' . self::NBS . ']*\[tr\])~su' => '[table][tr]',
567
			// Find [tr]s not followed by [td] or [th]
568 3
			'~\[tr\](?![\s' . self::NBS . ']*\[t[dh]\])~su' => '[tr][td]',
569
			// Find [/td] and [/th]s not followed by something valid.
570 3
			'~\[/t([dh])\](?![\s' . self::NBS . ']*(?:\[t[dh]\]|\[/tr\]|\[/table\]))~su' => '[/t$1][/tr]',
571
			// Find [/tr]s not followed by something valid.
572 3
			'~\[/tr\](?![\s' . self::NBS . ']*(?:\[tr\]|\[/table\]))~su' => '[/tr][/table]',
573
			// Find [/td] [/th]s incorrectly followed by [/table].
574 3
			'~\[/t([dh])\][\s' . self::NBS . ']*\[/table\]~su' => '[/t$1][/tr][/table]',
575
			// Find [table]s, [tr]s, and [/td]s (possibly correctly) followed by [td].
576 3
			'~\[(table|tr|/td)\]([\s' . self::NBS . ']*)\[td\]~su' => '[$1]$2[_td_]',
577
			// Now, any [td]s left should have a [tr] before them.
578 3
			'~\[td\]~s' => '[tr][td]',
579
			// Look for [tr]s which are correctly placed.
580 3
			'~\[(table|/tr)\]([\s' . self::NBS . ']*)\[tr\]~su' => '[$1]$2[_tr_]',
581
			// Any remaining [tr]s should have a [table] before them.
582 3
			'~\[tr\]~s' => '[table][tr]',
583
			// Look for [/td]s or [/th]s followed by [/tr].
584 3
			'~\[/t([dh])\]([\s' . self::NBS . ']*)\[/tr\]~su' => '[/t$1]$2[_/tr_]',
585
			// Any remaining [/tr]s should have a [/td].
586 3
			'~\[/tr\]~s' => '[/td][/tr]',
587
			// Look for properly opened [li]s which aren't closed.
588 3
			'~\[li\]([^\[\]]+?)\[li\]~s' => '[li]$1[_/li_][_li_]',
589 3
			'~\[li\]([^\[\]]+?)\[/list\]~s' => '[_li_]$1[_/li_][/list]',
590 3
			'~\[li\]([^\[\]]+?)$~s' => '[li]$1[/li]',
591
			// Lists - find correctly closed items/lists.
592 3
			'~\[/li\]([\s' . self::NBS . ']*)\[/list\]~su' => '[_/li_]$1[/list]',
593
			// Find list items closed and then opened.
594 3
			'~\[/li\]([\s' . self::NBS . ']*)\[li\]~su' => '[_/li_]$1[_li_]',
595
			// Now, find any [list]s or [/li]s followed by [li].
596 3
			'~\[(list(?: [^\]]*?)?|/li)\]([\s' . self::NBS . ']*)\[li\]~su' => '[$1]$2[_li_]',
597
			// Allow for sub lists.
598 3
			'~\[/li\]([\s' . self::NBS . ']*)\[list\]~u' => '[_/li_]$1[list]',
599 3
			'~\[/list\]([\s' . self::NBS . ']*)\[li\]~u' => '[/list]$1[_li_]',
600
			// Any remaining [li]s weren't inside a [list].
601 3
			'~\[li\]~' => '[list][li]',
602
			// Any remaining [/li]s weren't before a [/list].
603 3
			'~\[/li\]~' => '[/li][/list]',
604
			// Put the correct ones back how we found them.
605 3
			'~\[_(li|/li|td|tr|/tr)_\]~' => '[$1]',
606
			// Images with no real url.
607 3
			'~\[img\]https?://.{0,7}\[/img\]~' => '',
608
		);
609
610
		// Fix up some use of tables without [tr]s, etc. (it has to be done more than once to catch it all.)
611 3
		for ($j = 0; $j < 3; $j++)
612
		{
613 3
			$this->message = preg_replace(array_keys($mistake_fixes), $mistake_fixes, $this->message);
614
		}
615 3
	}
616
617
	/**
618
	 * Replace our token-ized message with the saved code blocks
619
	 */
620 3
	private function _restoreCodeBlocks()
621
	{
622 3
		if (!empty($this->code_blocks))
623
		{
624 2
			$this->message = str_replace(array_keys($this->code_blocks), array_values($this->code_blocks), $this->message);
625
		}
626 3
	}
627
628
	/**
629
	 * Validates and corrects table structure
630
	 *
631
	 * What it does
632
	 *   - Checks tables for correct tag order / nesting
633
	 *   - Adds in missing closing tags, removes excess closing tags
634
	 *   - Although it prevents markup error, it can mess-up the intended (abiet wrong) layout
635
	 * driving the post author in to a furious rage
636
	 *
637
	 */
638 3
	private function _preparseTable()
639
	{
640 3
		$table_check = $this->message;
641 3
		$table_offset = 0;
642 3
		$table_array = array();
643
644
		// Define the allowable tags after a give tag
645
		$table_order = array(
646 3
			'table' => array('tr'),
647
			'tr' => array('td', 'th'),
648
			'td' => array('table'),
649
			'th' => array(''),
650
		);
651
652
		// Find all closing tags (/table /tr /td etc)
653 3
		while (preg_match('~\[(/)*(table|tr|td|th)\]~', $table_check, $matches) === 1)
654
		{
655
			// Keep track of where this is.
656 1
			$offset = strpos($table_check, $matches[0]);
657 1
			$remove_tag = false;
658
659
			// Is it opening?
660 1
			if ($matches[1] != '/')
661
			{
662
				// If the previous table tag isn't correct simply remove it.
663 1
				if ((!empty($table_array) && !in_array($matches[2], $table_order[$table_array[0]])) || (empty($table_array) && $matches[2] !== 'table'))
664
				{
665
					$remove_tag = true;
666
				}
667
				// Record this was the last tag.
668
				else
669
				{
670 1
					array_unshift($table_array, $matches[2]);
671
				}
672
			}
673
			// Otherwise is closed!
674
			else
675
			{
676
				// Only keep the tag if it's closing the right thing.
677 1
				if (empty($table_array) || ($table_array[0] != $matches[2]))
678
				{
679
					$remove_tag = true;
680
				}
681
				else
682
				{
683 1
					array_shift($table_array);
684
				}
685
			}
686
687
			// Removing?
688 1
			if ($remove_tag)
689
			{
690
				$this->message = substr($this->message, 0, $table_offset + $offset) . substr($this->message, $table_offset + strlen($matches[0]) + $offset);
691
692
				// We've lost some data.
693
				$table_offset -= strlen($matches[0]);
694
			}
695
696
			// Remove everything up to here.
697 1
			$table_offset += $offset + strlen($matches[0]);
698 1
			$table_check = substr($table_check, $offset + strlen($matches[0]));
699
		}
700
701
		// Close any remaining table tags.
702 3
		foreach ($table_array as $tag)
703
		{
704
			$this->message .= '[/' . $tag . ']';
705
		}
706 3
	}
707
708
	/**
709
	 * This is very simple, and just removes things done by preparsecode.
710
	 *
711
	 * @param string $message
712
	 */
713
	public function un_preparsecode($message)
714
	{
715
		// Protect CODE blocks from further processing
716
		$this->message = $message;
717
		$this->_tokenizeCodeBlocks();
718
719
		// Pass integration the tokenized message and array
720
		call_integration_hook('integrate_unpreparse_code', array(&$this->message, &$this->code_blocks, 0));
721
722
		// Restore the code blocks
723
		$this->_restoreCodeBlocks();
724
725
		// Change breaks back to \n's and &nsbp; back to spaces.
726
		return preg_replace('~<br( /)?' . '>~', "\n", str_replace('&nbsp;', ' ', $this->message));
727
	}
728
729
	/**
730
	 * Ensure tags inside of nobbc do not get parsed by converting the markers to html entities
731
	 *
732
	 * @param string[] $matches
733
	 */
734
	private function _preparsecode_nobbc_callback($matches)
735
	{
736
		return '[nobbc]' . strtr($matches[1], array('[' => '&#91;', ']' => '&#93;', ':' => '&#58;', '@' => '&#64;')) . '[/nobbc]';
737
	}
738
739
	/**
740
	 * Use only the primary (first) font face when multiple are supplied
741
	 *
742
	 * @param string[] $matches
743
	 */
744 2
	private function _preparsecode_font_callback($matches)
745
	{
746 2
		$fonts = explode(',', $matches[1]);
747 2
		$font = trim(un_htmlspecialchars($fonts[0]), ' "\'');
748
749 2
		return '[font=' . $font . ']' . $matches[2];
750
	}
751
752
	/**
753
	 * Takes a tag and changes it to lowercase
754
	 *
755
	 * @param string[] $matches
756
	 */
757 2
	private function _preparsecode_lowertags_callback($matches)
758
	{
759 2
		return '[' . $matches[1] . strtolower($matches[2]) . $matches[3] . ']';
760
	}
761
762
	/**
763
	 * Ensure image tags do not load anything by themselves (security)
764
	 *
765
	 * @param string[] $matches
766
	 */
767
	private function _fixTags_img_callback($matches)
768
	{
769
		return $matches[1] . preg_replace('~action(=|%3d)(?!dlattach)~i', 'action-', $matches[2]) . '[/img]';
770
	}
771
772
	/**
773
	 * Find and return PreparseCode instance if it exists,
774
	 * or create a new instance
775
	 *
776
	 * @return PreparseCode
777
	 */
778 2
	public static function instance()
779
	{
780 2
		if (self::$instance === null)
781
		{
782 1
			self::$instance = new PreparseCode;
783
		}
784
785 2
		return self::$instance;
786
	}
787
}
788