Passed
Push — main ( 44ea53...137754 )
by TARIQ
15:15 queued 02:39
created

PucReadmeParser::sanitize_text()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 4
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 5
rs 10
1
<?php
2
3
if ( !class_exists('PucReadmeParser', false) ):
4
5
/**
6
 * This is a slightly modified version of github.com/markjaquith/WordPress-Plugin-Readme-Parser
7
 * It uses Parsedown instead of the "Markdown Extra" parser.
8
 */
9
10
class PucReadmeParser {
11
12
	function __construct() {
13
		// This space intentionally blank
14
	}
15
16
	function parse_readme( $file ) {
17
		$file_contents = @implode('', @file($file));
18
		return $this->parse_readme_contents( $file_contents );
19
	}
20
21
	function parse_readme_contents( $file_contents ) {
22
		$file_contents = str_replace(array("\r\n", "\r"), "\n", $file_contents);
23
		$file_contents = trim($file_contents);
24
		if ( 0 === strpos( $file_contents, "\xEF\xBB\xBF" ) )
25
			$file_contents = substr( $file_contents, 3 );
26
27
		// Markdown transformations
28
		$file_contents = preg_replace( "|^###([^#]+)#*?\s*?\n|im", '=$1='."\n",     $file_contents );
29
		$file_contents = preg_replace( "|^##([^#]+)#*?\s*?\n|im",  '==$1=='."\n",   $file_contents );
30
		$file_contents = preg_replace( "|^#([^#]+)#*?\s*?\n|im",   '===$1==='."\n", $file_contents );
31
32
		// === Plugin Name ===
33
		// Must be the very first thing.
34
		if ( !preg_match('|^===(.*)===|', $file_contents, $_name) )
35
			return array(); // require a name
36
		$name = trim($_name[1], '=');
37
		$name = $this->sanitize_text( $name );
38
39
		$file_contents = $this->chop_string( $file_contents, $_name[0] );
40
41
42
		// Requires at least: 1.5
43
		if ( preg_match('|Requires at least:(.*)|i', $file_contents, $_requires_at_least) )
44
			$requires_at_least = $this->sanitize_text($_requires_at_least[1]);
45
		else
46
			$requires_at_least = NULL;
47
48
49
		// Tested up to: 2.1
50
		if ( preg_match('|Tested up to:(.*)|i', $file_contents, $_tested_up_to) )
51
			$tested_up_to = $this->sanitize_text( $_tested_up_to[1] );
52
		else
53
			$tested_up_to = NULL;
54
55
		// Requires PHP: 5.2.4
56
		if ( preg_match('|Requires PHP:(.*)|i', $file_contents, $_requires_php) ) {
57
			$requires_php = $this->sanitize_text( $_requires_php[1] );
58
		} else {
59
			$requires_php = null;
60
		}
61
62
		// Stable tag: 10.4-ride-the-fire-eagle-danger-day
63
		if ( preg_match('|Stable tag:(.*)|i', $file_contents, $_stable_tag) )
64
			$stable_tag = $this->sanitize_text( $_stable_tag[1] );
65
		else
66
			$stable_tag = NULL; // we assume trunk, but don't set it here to tell the difference between specified trunk and default trunk
67
68
69
		// Tags: some tag, another tag, we like tags
70
		if ( preg_match('|Tags:(.*)|i', $file_contents, $_tags) ) {
71
			$tags = preg_split('|,[\s]*?|', trim($_tags[1]));
72
			foreach ( array_keys($tags) as $t )
73
				$tags[$t] = $this->sanitize_text( $tags[$t] );
74
		} else {
75
			$tags = array();
76
		}
77
78
79
		// Contributors: markjaquith, mdawaffe, zefrank
80
		$contributors = array();
81
		if ( preg_match('|Contributors:(.*)|i', $file_contents, $_contributors) ) {
82
			$temp_contributors = preg_split('|,[\s]*|', trim($_contributors[1]));
83
			foreach ( array_keys($temp_contributors) as $c ) {
84
				$tmp_sanitized = $this->user_sanitize( $temp_contributors[$c] );
85
				if ( strlen(trim($tmp_sanitized)) > 0 )
86
					$contributors[$c] = $tmp_sanitized;
87
				unset($tmp_sanitized);
88
			}
89
		}
90
91
92
		// Donate Link: URL
93
		if ( preg_match('|Donate link:(.*)|i', $file_contents, $_donate_link) )
94
			$donate_link = esc_url( $_donate_link[1] );
95
		else
96
			$donate_link = NULL;
97
98
99
		// togs, conts, etc are optional and order shouldn't matter.  So we chop them only after we've grabbed their values.
100
		foreach ( array('tags', 'contributors', 'requires_at_least', 'tested_up_to', 'stable_tag', 'donate_link') as $chop ) {
101
			if ( $$chop ) {
102
				$_chop = '_' . $chop;
103
				$file_contents = $this->chop_string( $file_contents, ${$_chop}[0] );
104
			}
105
		}
106
107
		$file_contents = trim($file_contents);
108
109
110
		// short-description fu
111
		if ( !preg_match('/(^(.*?))^[\s]*=+?[\s]*.+?[\s]*=+?/ms', $file_contents, $_short_description) )
112
			$_short_description = array( 1 => &$file_contents, 2 => &$file_contents );
113
		$short_desc_filtered = $this->sanitize_text( $_short_description[2] );
114
		$short_desc_length = strlen($short_desc_filtered);
115
		$short_description = substr($short_desc_filtered, 0, 150);
116
		if ( $short_desc_length > strlen($short_description) )
117
			$truncated = true;
118
		else
119
			$truncated = false;
120
		if ( $_short_description[1] )
121
			$file_contents = $this->chop_string( $file_contents, $_short_description[1] ); // yes, the [1] is intentional
122
123
		// == Section ==
124
		// Break into sections
125
		// $_sections[0] will be the title of the first section, $_sections[1] will be the content of the first section
126
		// the array alternates from there:  title2, content2, title3, content3... and so forth
127
		$_sections = preg_split('/^[\s]*==[\s]*(.+?)[\s]*==/m', $file_contents, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
128
129
		$sections = array();
130
		for ( $i=0; $i < count($_sections); $i +=2 ) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
131
			$title = $this->sanitize_text( $_sections[$i] );
132
			if ( isset($_sections[$i+1]) ) {
133
				$content = preg_replace('/(^[\s]*)=[\s]+(.+?)[\s]+=/m', '$1<h4>$2</h4>', $_sections[$i+1]);
134
				$content = $this->filter_text( $content, true );
135
			} else {
136
				$content = '';
137
			}
138
			$sections[str_replace(' ', '_', strtolower($title))] = array('title' => $title, 'content' => $content);
139
		}
140
141
142
		// Special sections
143
		// This is where we nab our special sections, so we can enforce their order and treat them differently, if needed
144
		// upgrade_notice is not a section, but parse it like it is for now
145
		$final_sections = array();
146
		foreach ( array('description', 'installation', 'frequently_asked_questions', 'screenshots', 'changelog', 'change_log', 'upgrade_notice') as $special_section ) {
147
			if ( isset($sections[$special_section]) ) {
148
				$final_sections[$special_section] = $sections[$special_section]['content'];
149
				unset($sections[$special_section]);
150
			}
151
		}
152
		if ( isset($final_sections['change_log']) && empty($final_sections['changelog']) )
153
			$final_sections['changelog'] = $final_sections['change_log'];
154
155
156
		$final_screenshots = array();
157
		if ( isset($final_sections['screenshots']) ) {
158
			preg_match_all('|<li>(.*?)</li>|s', $final_sections['screenshots'], $screenshots, PREG_SET_ORDER);
159
			if ( $screenshots ) {
160
				foreach ( (array) $screenshots as $ss )
161
					$final_screenshots[] = $ss[1];
162
			}
163
		}
164
165
		// Parse the upgrade_notice section specially:
166
		// 1.0 => blah, 1.1 => fnord
167
		$upgrade_notice = array();
168
		if ( isset($final_sections['upgrade_notice']) ) {
169
			$split = preg_split( '#<h4>(.*?)</h4>#', $final_sections['upgrade_notice'], -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
170
			if ( count($split) >= 2 ) {
171
				for ( $i = 0; $i < count( $split ); $i += 2 ) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
172
					$upgrade_notice[$this->sanitize_text( $split[$i] )] = substr( $this->sanitize_text( $split[$i + 1] ), 0, 300 );
173
				}
174
			}
175
			unset( $final_sections['upgrade_notice'] );
176
		}
177
178
		// No description?
179
		// No problem... we'll just fall back to the old style of description
180
		// We'll even let you use markup this time!
181
		$excerpt = false;
182
		if ( !isset($final_sections['description']) ) {
183
			$final_sections = array_merge(array('description' => $this->filter_text( $_short_description[2], true )), $final_sections);
184
			$excerpt = true;
185
		}
186
187
188
		// dump the non-special sections into $remaining_content
189
		// their order will be determined by their original order in the readme.txt
190
		$remaining_content = '';
191
		foreach ( $sections as $s_name => $s_data ) {
192
			$remaining_content .= "\n<h3>{$s_data['title']}</h3>\n{$s_data['content']}";
193
		}
194
		$remaining_content = trim($remaining_content);
195
196
197
		// All done!
198
		// $r['tags'] and $r['contributors'] are simple arrays
199
		// $r['sections'] is an array with named elements
200
		$r = array(
201
			'name' => $name,
202
			'tags' => $tags,
203
			'requires_at_least' => $requires_at_least,
204
			'tested_up_to' => $tested_up_to,
205
			'requires_php' => $requires_php,
206
			'stable_tag' => $stable_tag,
207
			'contributors' => $contributors,
208
			'donate_link' => $donate_link,
209
			'short_description' => $short_description,
210
			'screenshots' => $final_screenshots,
211
			'is_excerpt' => $excerpt,
212
			'is_truncated' => $truncated,
213
			'sections' => $final_sections,
214
			'remaining_content' => $remaining_content,
215
			'upgrade_notice' => $upgrade_notice
216
		);
217
218
		return $r;
219
	}
220
221
	function chop_string( $string, $chop ) { // chop a "prefix" from a string: Agressive! uses strstr not 0 === strpos
222
		if ( $_string = strstr($string, $chop) ) {
223
			$_string = substr($_string, strlen($chop));
224
			return trim($_string);
225
		} else {
226
			return trim($string);
227
		}
228
	}
229
230
	function user_sanitize( $text, $strict = false ) { // whitelisted chars
231
		if ( function_exists('user_sanitize') ) // bbPress native
232
			return user_sanitize( $text, $strict );
233
234
		if ( $strict ) {
235
			$text = preg_replace('/[^a-z0-9-]/i', '', $text);
236
			$text = preg_replace('|-+|', '-', $text);
237
		} else {
238
			$text = preg_replace('/[^a-z0-9_-]/i', '', $text);
239
		}
240
		return $text;
241
	}
242
243
	function sanitize_text( $text ) { // not fancy
244
		$text = strip_tags($text);
245
		$text = esc_html($text);
246
		$text = trim($text);
247
		return $text;
248
	}
249
250
	function filter_text( $text, $markdown = false ) { // fancy, Markdown
251
		$text = trim($text);
252
253
		$text = call_user_func( array( __CLASS__, 'code_trick' ), $text, $markdown ); // A better parser than Markdown's for: backticks -> CODE
254
255
		if ( $markdown ) { // Parse markdown.
256
			if ( !class_exists('Parsedown', false) ) {
257
				/** @noinspection PhpIncludeInspection */
258
				require_once(dirname(__FILE__) . '/Parsedown' . (version_compare(PHP_VERSION, '5.3.0', '>=') ? '' : 'Legacy') . '.php');
259
			}
260
			$instance = Parsedown::instance();
261
			$text = $instance->text($text);
262
		}
263
264
		$allowed = array(
265
			'a' => array(
266
				'href' => array(),
267
				'title' => array(),
268
				'rel' => array()),
269
			'blockquote' => array('cite' => array()),
270
			'br' => array(),
271
			'p' => array(),
272
			'code' => array(),
273
			'pre' => array(),
274
			'em' => array(),
275
			'strong' => array(),
276
			'ul' => array(),
277
			'ol' => array(),
278
			'li' => array(),
279
			'h3' => array(),
280
			'h4' => array()
281
		);
282
283
		$text = balanceTags($text);
284
		
285
		$text = wp_kses( $text, $allowed );
286
		$text = trim($text);
287
		return $text;
288
	}
289
290
	function code_trick( $text, $markdown ) { // Don't use bbPress native function - it's incompatible with Markdown
291
		// If doing markdown, first take any user formatted code blocks and turn them into backticks so that
292
		// markdown will preserve things like underscores in code blocks
293
		if ( $markdown )
294
			$text = preg_replace_callback("!(<pre><code>|<code>)(.*?)(</code></pre>|</code>)!s", array( __CLASS__,'decodeit'), $text);
295
296
		$text = str_replace(array("\r\n", "\r"), "\n", $text);
297
		if ( !$markdown ) {
298
			// This gets the "inline" code blocks, but can't be used with Markdown.
299
			$text = preg_replace_callback("|(`)(.*?)`|", array( __CLASS__, 'encodeit'), $text);
300
			// This gets the "block level" code blocks and converts them to PRE CODE
301
			$text = preg_replace_callback("!(^|\n)`(.*?)`!s", array( __CLASS__, 'encodeit'), $text);
302
		} else {
303
			// Markdown can do inline code, we convert bbPress style block level code to Markdown style
304
			$text = preg_replace_callback("!(^|\n)([ \t]*?)`(.*?)`!s", array( __CLASS__, 'indent'), $text);
305
		}
306
		return $text;
307
	}
308
309
	function indent( $matches ) {
310
		$text = $matches[3];
311
		$text = preg_replace('|^|m', $matches[2] . '    ', $text);
312
		return $matches[1] . $text;
313
	}
314
315
	function encodeit( $matches ) {
316
		if ( function_exists('encodeit') ) // bbPress native
317
			return encodeit( $matches );
318
319
		$text = trim($matches[2]);
320
		$text = htmlspecialchars($text, ENT_QUOTES);
321
		$text = str_replace(array("\r\n", "\r"), "\n", $text);
322
		$text = preg_replace("|\n\n\n+|", "\n\n", $text);
323
		$text = str_replace('&amp;lt;', '&lt;', $text);
324
		$text = str_replace('&amp;gt;', '&gt;', $text);
325
		$text = "<code>$text</code>";
326
		if ( "`" != $matches[1] )
327
			$text = "<pre>$text</pre>";
328
		return $text;
329
	}
330
331
	function decodeit( $matches ) {
332
		if ( function_exists('decodeit') ) // bbPress native
333
			return decodeit( $matches );
334
335
		$text = $matches[2];
336
		$trans_table = array_flip(get_html_translation_table(HTML_ENTITIES));
337
		$text = strtr($text, $trans_table);
338
		$text = str_replace('<br />', '', $text);
339
		$text = str_replace('&#38;', '&', $text);
340
		$text = str_replace('&#39;', "'", $text);
341
		if ( '<pre><code>' == $matches[1] )
342
			$text = "\n$text\n";
343
		return "`$text`";
344
	}
345
346
} // end class
347
348
endif;
349