1 | <?php |
||
2 | /** |
||
3 | * This file is part of the O2System Framework package. |
||
4 | * |
||
5 | * For the full copyright and license information, please view the LICENSE |
||
6 | * file that was distributed with this source code. |
||
7 | * |
||
8 | * @author Steeve Andrian Salim |
||
9 | * @copyright Copyright (c) Steeve Andrian Salim |
||
10 | */ |
||
11 | // ------------------------------------------------------------------------ |
||
12 | /** |
||
13 | * Security Helper |
||
14 | * |
||
15 | * A collection of helper function for security purposes. |
||
16 | */ |
||
17 | // ------------------------------------------------------------------------ |
||
18 | |||
19 | if ( ! function_exists('strip_image_tags')) { |
||
20 | /** |
||
21 | * strip_image_tags |
||
22 | * |
||
23 | * Strip all image tags from string of HTML source code. |
||
24 | * |
||
25 | * @param string $source_code The string of HTML source code. |
||
26 | * |
||
27 | * @return string |
||
28 | */ |
||
29 | function strip_image_tags($source_code) |
||
30 | { |
||
31 | return preg_replace( |
||
32 | [ |
||
33 | '#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i', |
||
34 | '#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i', |
||
35 | ], |
||
36 | '\\2', |
||
37 | $source_code |
||
38 | ); |
||
39 | } |
||
40 | } |
||
41 | |||
42 | //-------------------------------------------------------------------- |
||
43 | |||
44 | if ( ! function_exists('strip_cdata')) { |
||
45 | /** |
||
46 | * strip_cdata |
||
47 | * |
||
48 | * Strip all CDATA encapsulation from string of HTML source code. |
||
49 | * |
||
50 | * @param string $source_code The string of HTML source code. |
||
51 | * |
||
52 | * @return string |
||
53 | */ |
||
54 | function strip_cdata($source_code) |
||
55 | { |
||
56 | preg_match_all('/<!\[cdata\[(.*?)\]\]>/is', $source_code, $matches); |
||
57 | |||
58 | return str_replace($matches[ 0 ], $matches[ 1 ], $source_code); |
||
59 | } |
||
60 | } |
||
61 | // ------------------------------------------------------------------------ |
||
62 | |||
63 | if ( ! function_exists('strips_all_tags')) { |
||
64 | /** |
||
65 | * strips_all_tags |
||
66 | * |
||
67 | * Strip all HTML tags from string of HTML source code but keep safe |
||
68 | * the original content. |
||
69 | * |
||
70 | * @param string $source_code The string of HTML source code. |
||
71 | * |
||
72 | * @return string |
||
73 | */ |
||
74 | function strips_all_tags($source_code) |
||
75 | { |
||
76 | return preg_replace([ |
||
77 | '@<script[^>]*?>.*?</script>@si', // Strip out javascript |
||
78 | '@<[\/\!]*?[^<>]*?>@si', // Strip out HTML tags |
||
79 | '@<style[^>]*?>.*?</style>@siU', // Strip style tags properly |
||
80 | '@<![\s\S]*?--[ \t\n\r]*>@' // Strip multi-line comments including CDATA |
||
81 | ], '', $source_code); |
||
82 | } |
||
83 | } |
||
84 | |||
85 | // ------------------------------------------------------------------------ |
||
86 | |||
87 | if ( ! function_exists('strips_tags')) { |
||
88 | /** |
||
89 | * strips_tags |
||
90 | * |
||
91 | * Strips all HTML tags and its content at the defined tags. |
||
92 | * Strip out all the content between any tag that has an opening and closing tag, like <table>, <object>, etc. |
||
93 | * |
||
94 | * @param string $source_code The string of HTML source code. |
||
95 | * @param string $disallowed_tags The list of disallowed HTML tags, separated with |. |
||
96 | * @param string $allowed_tags The list of allowed HTML tags, separated with |. |
||
97 | * |
||
98 | * @return string |
||
99 | */ |
||
100 | function strips_tags($source_code, $disallowed_tags = 'script|style|noframes|select|option', $allowed_tags = '') |
||
101 | { |
||
102 | //prep the string |
||
103 | $source_code = ' ' . $source_code; |
||
104 | |||
105 | //initialize keep tag logic |
||
106 | if (strlen($allowed_tags) > 0) { |
||
107 | $k = explode('|', $allowed_tags); |
||
108 | for ($i = 0; $i < count($k); $i++) { |
||
0 ignored issues
–
show
|
|||
109 | $source_code = str_replace('<' . $k[ $i ], '[{(' . $k[ $i ], $source_code); |
||
110 | $source_code = str_replace('</' . $k[ $i ], '[{(/' . $k[ $i ], $source_code); |
||
111 | } |
||
112 | } |
||
113 | //begin removal |
||
114 | //remove comment blocks |
||
115 | while (stripos($source_code, '<!--') > 0) { |
||
116 | $pos[ 1 ] = stripos($source_code, '<!--'); |
||
117 | $pos[ 2 ] = stripos($source_code, '-->', $pos[ 1 ]); |
||
118 | $len[ 1 ] = $pos[ 2 ] - $pos[ 1 ] + 3; |
||
119 | $x = substr($source_code, $pos[ 1 ], $len[ 1 ]); |
||
120 | $source_code = str_replace($x, '', $source_code); |
||
121 | } |
||
122 | //remove tags with content between them |
||
123 | if (strlen($disallowed_tags) > 0) { |
||
124 | $e = explode('|', $disallowed_tags); |
||
125 | for ($i = 0; $i < count($e); $i++) { |
||
0 ignored issues
–
show
It seems like you are calling the size function
count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}
// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
![]() |
|||
126 | while (stripos($source_code, '<' . $e[ $i ]) > 0) { |
||
127 | $len[ 1 ] = strlen('<' . $e[ $i ]); |
||
128 | $pos[ 1 ] = stripos($source_code, '<' . $e[ $i ]); |
||
129 | $pos[ 2 ] = stripos($source_code, $e[ $i ] . '>', $pos[ 1 ] + $len[ 1 ]); |
||
130 | $len[ 2 ] = $pos[ 2 ] - $pos[ 1 ] + $len[ 1 ]; |
||
131 | $x = substr($source_code, $pos[ 1 ], $len[ 2 ]); |
||
132 | $source_code = str_replace($x, '', $source_code); |
||
133 | } |
||
134 | } |
||
135 | } |
||
136 | //remove remaining tags |
||
137 | while (stripos($source_code, '<') > 0) { |
||
138 | $pos[ 1 ] = stripos($source_code, '<'); |
||
139 | $pos[ 2 ] = stripos($source_code, '>', $pos[ 1 ]); |
||
140 | $len[ 1 ] = $pos[ 2 ] - $pos[ 1 ] + 1; |
||
141 | $x = substr($source_code, $pos[ 1 ], $len[ 1 ]); |
||
142 | $source_code = str_replace($x, '', $source_code); |
||
143 | } |
||
144 | //finalize keep tag |
||
145 | if (strlen($allowed_tags) > 0) { |
||
146 | for ($i = 0; $i < count($k); $i++) { |
||
0 ignored issues
–
show
It seems like you are calling the size function
count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}
// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
![]() Comprehensibility
Best Practice
introduced
by
|
|||
147 | $source_code = str_replace('[{(' . $k[ $i ], '<' . $k[ $i ], $source_code); |
||
148 | $source_code = str_replace('[{(/' . $k[ $i ], '</' . $k[ $i ], $source_code); |
||
149 | } |
||
150 | } |
||
151 | |||
152 | return trim($source_code); |
||
153 | } |
||
154 | } |
||
155 | |||
156 | // ------------------------------------------------------------------------ |
||
157 | |||
158 | if ( ! function_exists('strip_word_doc')) { |
||
159 | /** |
||
160 | * strip_word_doc |
||
161 | * |
||
162 | * Strip all word doc tags from string of source code. |
||
163 | * |
||
164 | * @param string $source_code The string of HTML source code. |
||
165 | * @param string $allowed_tags The list of disallowed HTML tags, separated with |. |
||
166 | * |
||
167 | * @return string |
||
168 | */ |
||
169 | function strip_word_doc($source_code, $allowed_tags = '') |
||
170 | { |
||
171 | mb_regex_encoding('UTF-8'); |
||
172 | |||
173 | //replace MS special characters first |
||
174 | $search = [ |
||
175 | '/‘/u', |
||
176 | '/’/u', |
||
177 | '/“/u', |
||
178 | '/”/u', |
||
179 | '/—/u', |
||
180 | ]; |
||
181 | $replace = [ |
||
182 | '\'', |
||
183 | '\'', |
||
184 | '"', |
||
185 | '"', |
||
186 | '-', |
||
187 | ]; |
||
188 | $source_code = preg_replace($search, $replace, $source_code); |
||
189 | |||
190 | //make sure _all_ html entities are converted to the plain ascii equivalents - it appears |
||
191 | //in some MS headers, some html entities are encoded and some aren't |
||
192 | $source_code = html_entity_decode($source_code, ENT_QUOTES, 'UTF-8'); |
||
193 | |||
194 | //try to strip out any C style comments first, since these, embedded in html comments, seem to |
||
195 | //prevent strip_tags from removing html comments (MS Word introduced combination) |
||
196 | if (mb_stripos($source_code, '/*') !== false) { |
||
197 | $source_code = mb_eregi_replace('#/\*.*?\*/#s', '', $source_code, 'm'); |
||
198 | } |
||
199 | |||
200 | //introduce a space into any arithmetic expressions that could be caught by strip_tags so that they won't be |
||
201 | //'<1' becomes '< 1'(note: somewhat application specific) |
||
202 | $source_code = preg_replace( |
||
203 | [ |
||
204 | '/<([0-9]+)/', |
||
205 | ], |
||
206 | [ |
||
207 | '< $1', |
||
208 | ], |
||
209 | $source_code |
||
210 | ); |
||
211 | $source_code = strip_tags($source_code, $allowed_tags); |
||
212 | |||
213 | //eliminate extraneous whitespace from start and end of line, or anywhere there are two or more spaces, convert it to one |
||
214 | $source_code = preg_replace( |
||
215 | [ |
||
216 | '/^\s\s+/', |
||
217 | '/\s\s+$/', |
||
218 | '/\s\s+/u', |
||
219 | ], |
||
220 | [ |
||
221 | '', |
||
222 | '', |
||
223 | ' ', |
||
224 | ], |
||
225 | $source_code |
||
226 | ); |
||
227 | |||
228 | //strip out inline css and simplify style tags |
||
229 | $search = [ |
||
230 | '#<(strong|b)[^>]*>(.*?)</(strong|b)>#isu', |
||
231 | '#<(em|i)[^>]*>(.*?)</(em|i)>#isu', |
||
232 | '#<u[^>]*>(.*?)</u>#isu', |
||
233 | ]; |
||
234 | $replace = [ |
||
235 | '<b>$2</b>', |
||
236 | '<i>$2</i>', |
||
237 | '<u>$1</u>', |
||
238 | ]; |
||
239 | $source_code = preg_replace($search, $replace, $source_code); |
||
240 | |||
241 | //on some of the ?newer MS Word exports, where you get conditionals of the form 'if gte mso 9', etc., it appears |
||
242 | //that whatever is in one of the html comments prevents strip_tags from eradicating the html comment that contains |
||
243 | //some MS Style Definitions - this last bit gets rid of any leftover comments */ |
||
244 | $num_matches = preg_match_all("/\<!--/u", $source_code, $matches); |
||
245 | if ($num_matches) { |
||
246 | $source_code = preg_replace('/\<!--(.)*--\>/isu', '', $source_code); |
||
247 | } |
||
248 | |||
249 | return $source_code; |
||
250 | } |
||
251 | } |
||
252 | |||
253 | //-------------------------------------------------------------------- |
||
254 | |||
255 | if ( ! function_exists('strip_slashes_recursive')) { |
||
256 | /** |
||
257 | * strip_slashes_recursive |
||
258 | * |
||
259 | * Recursive Strip Slashes |
||
260 | * |
||
261 | * Un-quotes a quoted string |
||
262 | * |
||
263 | * @link http://php.net/manual/en/function.stripslashes.php |
||
264 | * |
||
265 | * @param string $string <p> |
||
266 | * The input string. |
||
267 | * </p> |
||
268 | * |
||
269 | * @return string a string with backslashes stripped off. |
||
270 | * (\' becomes ' and so on.) |
||
271 | * Double backslashes (\\) are made into a single |
||
272 | * backslash (\). |
||
273 | * @since 4.0 |
||
274 | * @since 5.0 |
||
275 | */ |
||
276 | function strip_slashes_recursive($string) |
||
277 | { |
||
278 | $string = is_array($string) ? array_map('strip_slashes_recursive', $string) : stripslashes($string); |
||
0 ignored issues
–
show
|
|||
279 | |||
280 | return $string; |
||
281 | } |
||
282 | } |
||
283 | |||
284 | // ------------------------------------------------------------------------ |
||
285 | |||
286 | if ( ! function_exists('strip_comments')) { |
||
287 | /** |
||
288 | * strip_comments |
||
289 | * |
||
290 | * Strip HTML Comments |
||
291 | * |
||
292 | * @param string $source_code HTML Source Code |
||
293 | * |
||
294 | * @return string |
||
295 | */ |
||
296 | function strip_comments($source_code) |
||
297 | { |
||
298 | return preg_replace('/<!--[\s\S]*?-->/', '', $source_code); |
||
299 | } |
||
300 | } |
||
301 | |||
302 | // ------------------------------------------------------------------------ |
||
303 | |||
304 | if ( ! function_exists('clean_white_space')) { |
||
305 | /** |
||
306 | * clean_white_space |
||
307 | * |
||
308 | * Clean HTML Whitespace |
||
309 | * |
||
310 | * @param string $source_code HTML Source Code |
||
311 | * |
||
312 | * @return string |
||
313 | */ |
||
314 | function clean_white_space($source_code) |
||
315 | { |
||
316 | $source_code = str_replace(["\n", "\r", ' ', "\t"], '', $source_code); |
||
317 | |||
318 | return preg_replace('| +|', ' ', $source_code); |
||
319 | } |
||
320 | } |
||
321 | |||
322 | // ------------------------------------------------------------------------ |
||
323 | |||
324 | if ( ! function_exists('encode_php_tags')) { |
||
325 | /** |
||
326 | * encode_php_tags |
||
327 | * |
||
328 | * Encode PHP tags to entities. |
||
329 | * |
||
330 | * @param string $string The string tobe encoded. |
||
331 | * |
||
332 | * @return string |
||
333 | */ |
||
334 | function encode_php_tags($string) |
||
335 | { |
||
336 | return str_replace(['<?', '?>'], ['<?', '?>'], $string); |
||
337 | } |
||
338 | } |
||
339 | |||
340 | // ------------------------------------------------------------------------ |
||
341 | |||
342 | if ( ! function_exists('escape_html')) { |
||
343 | /** |
||
344 | * escape_html |
||
345 | * |
||
346 | * Returns HTML escaped variable. |
||
347 | * |
||
348 | * @param mixed $source_code The input string or array of strings to be escaped. |
||
349 | * @param bool $double_encode $double_encode set to FALSE prevents escaping twice. |
||
350 | * |
||
351 | * @return mixed The escaped string or array of strings as a result. |
||
352 | */ |
||
353 | function escape_html($source_code, $encoding = 'UTF-8', $double_encode = true) |
||
354 | { |
||
355 | if (is_array($source_code)) { |
||
356 | return array_map('escape_html', $source_code, array_fill(0, count($source_code), $double_encode)); |
||
357 | } |
||
358 | |||
359 | return htmlspecialchars($source_code, ENT_QUOTES, $encoding, $double_encode); |
||
360 | } |
||
361 | } |
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: