1 | <?php |
||
2 | /** |
||
3 | * YOURLS modification of a small subset from WordPress' KSES implementation. |
||
4 | * Straight from the Let's Not Reinvent The Wheel department. |
||
5 | */ |
||
6 | |||
7 | /** |
||
8 | * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes |
||
9 | * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar |
||
10 | * |
||
11 | * This program is free software and open source software; you can redistribute |
||
12 | * it and/or modify it under the terms of the GNU General Public License as |
||
13 | * published by the Free Software Foundation; either version 2 of the License, |
||
14 | * or (at your option) any later version. |
||
15 | * |
||
16 | * This program is distributed in the hope that it will be useful, but WITHOUT |
||
17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
||
19 | * more details. |
||
20 | * |
||
21 | * You should have received a copy of the GNU General Public License along |
||
22 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
23 | * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA |
||
24 | * http://www.gnu.org/licenses/gpl.html |
||
25 | * |
||
26 | * [kses strips evil scripts!] |
||
27 | * |
||
28 | * @version 0.2.2 |
||
29 | * @copyright (C) 2002, 2003, 2005 |
||
30 | * @author Ulf Harnhammar <http://advogato.org/person/metaur/> |
||
31 | * |
||
32 | * @package External |
||
33 | * @subpackage KSES |
||
34 | * |
||
35 | */ |
||
36 | |||
37 | /* NOTE ABOUT GLOBALS |
||
38 | * Two globals are defined: $yourls_allowedentitynames and $yourls_allowedprotocols |
||
39 | * - $yourls_allowedentitynames is used internally in KSES functions to sanitize HTML entities |
||
40 | * - $yourls_allowedprotocols is used in various parts of YOURLS, not just in KSES, albeit being defined here |
||
41 | * Two globals are not defined and unused at this moment: $yourls_allowedtags_all and $yourls_allowedtags |
||
42 | * The code for these vars is here and ready for any future use |
||
43 | */ |
||
44 | |||
45 | // Populate after plugins have loaded to allow user defined values |
||
46 | yourls_add_action( 'plugins_loaded', 'yourls_kses_init' ); |
||
47 | |||
48 | /** |
||
49 | * Init KSES globals if not already defined (by a plugin) |
||
50 | * |
||
51 | * @since 1.6 |
||
52 | * |
||
53 | */ |
||
54 | function yourls_kses_init() { |
||
55 | 1 | global $yourls_allowedentitynames, $yourls_allowedprotocols; |
|
56 | |||
57 | 1 | if( ! $yourls_allowedentitynames ) { |
|
58 | 1 | $yourls_allowedentitynames = yourls_apply_filter( 'kses_allowed_entities', yourls_kses_allowed_entities() ); |
|
59 | } |
||
60 | |||
61 | 1 | if( ! $yourls_allowedprotocols ) { |
|
62 | 1 | $yourls_allowedprotocols = yourls_apply_filter( 'kses_allowed_protocols', yourls_kses_allowed_protocols() ); |
|
63 | } |
||
64 | |||
65 | /** See NOTE ABOUT GLOBALS ** |
||
66 | |||
67 | if( ! $yourls_allowedtags_all ) { |
||
68 | $yourls_allowedtags_all = yourls_kses_allowed_tags_all(); |
||
69 | $yourls_allowedtags_all = array_map( '_yourls_add_global_attributes', $yourls_allowedtags_all ); |
||
70 | $yourls_allowedtags_all = yourls_apply_filter( 'kses_allowed_tags_all', $yourls_allowedtags_all ); |
||
71 | } else { |
||
72 | // User defined: let's sanitize |
||
73 | $yourls_allowedtags_all = yourls_kses_array_lc( $yourls_allowedtags_all ); |
||
74 | } |
||
75 | |||
76 | if( ! $yourls_allowedtags ) { |
||
77 | $yourls_allowedtags = yourls_kses_allowed_tags(); |
||
78 | $yourls_allowedtags = array_map( '_yourls_add_global_attributes', $yourls_allowedtags ); |
||
79 | $yourls_allowedtags = yourls_apply_filter( 'kses_allowed_tags', $yourls_allowedtags ); |
||
80 | } else { |
||
81 | // User defined: let's sanitize |
||
82 | $yourls_allowedtags = yourls_kses_array_lc( $yourls_allowedtags ); |
||
83 | } |
||
84 | |||
85 | /**/ |
||
86 | 1 | } |
|
87 | |||
88 | /** |
||
89 | * Kses global for all allowable HTML tags. |
||
90 | * |
||
91 | * Complete (?) list of HTML tags. Keep this function available for any plugin or |
||
92 | * future feature that will want to display lots of HTML. |
||
93 | * |
||
94 | * @since 1.6 |
||
95 | * |
||
96 | * @return array All tags |
||
97 | */ |
||
98 | function yourls_kses_allowed_tags_all() { |
||
99 | return array( |
||
100 | 1 | 'address' => array(), |
|
101 | 'a' => array( |
||
102 | 'href' => true, |
||
103 | 'rel' => true, |
||
104 | 'rev' => true, |
||
105 | 'name' => true, |
||
106 | 'target' => true, |
||
107 | ), |
||
108 | 'abbr' => array(), |
||
109 | 'acronym' => array(), |
||
110 | 'area' => array( |
||
111 | 'alt' => true, |
||
112 | 'coords' => true, |
||
113 | 'href' => true, |
||
114 | 'nohref' => true, |
||
115 | 'shape' => true, |
||
116 | 'target' => true, |
||
117 | ), |
||
118 | 'article' => array( |
||
119 | 'align' => true, |
||
120 | 'dir' => true, |
||
121 | 'lang' => true, |
||
122 | 'xml:lang' => true, |
||
123 | ), |
||
124 | 'aside' => array( |
||
125 | 'align' => true, |
||
126 | 'dir' => true, |
||
127 | 'lang' => true, |
||
128 | 'xml:lang' => true, |
||
129 | ), |
||
130 | 'b' => array(), |
||
131 | 'big' => array(), |
||
132 | 'blockquote' => array( |
||
133 | 'cite' => true, |
||
134 | 'lang' => true, |
||
135 | 'xml:lang' => true, |
||
136 | ), |
||
137 | 'br' => array(), |
||
138 | 'button' => array( |
||
139 | 'disabled' => true, |
||
140 | 'name' => true, |
||
141 | 'type' => true, |
||
142 | 'value' => true, |
||
143 | ), |
||
144 | 'caption' => array( |
||
145 | 'align' => true, |
||
146 | ), |
||
147 | 'cite' => array( |
||
148 | 'dir' => true, |
||
149 | 'lang' => true, |
||
150 | ), |
||
151 | 'code' => array(), |
||
152 | 'col' => array( |
||
153 | 'align' => true, |
||
154 | 'char' => true, |
||
155 | 'charoff' => true, |
||
156 | 'span' => true, |
||
157 | 'dir' => true, |
||
158 | 'valign' => true, |
||
159 | 'width' => true, |
||
160 | ), |
||
161 | 'del' => array( |
||
162 | 'datetime' => true, |
||
163 | ), |
||
164 | 'dd' => array(), |
||
165 | 'details' => array( |
||
166 | 'align' => true, |
||
167 | 'dir' => true, |
||
168 | 'lang' => true, |
||
169 | 'open' => true, |
||
170 | 'xml:lang' => true, |
||
171 | ), |
||
172 | 'div' => array( |
||
173 | 'align' => true, |
||
174 | 'dir' => true, |
||
175 | 'lang' => true, |
||
176 | 'xml:lang' => true, |
||
177 | ), |
||
178 | 'dl' => array(), |
||
179 | 'dt' => array(), |
||
180 | 'em' => array(), |
||
181 | 'fieldset' => array(), |
||
182 | 'figure' => array( |
||
183 | 'align' => true, |
||
184 | 'dir' => true, |
||
185 | 'lang' => true, |
||
186 | 'xml:lang' => true, |
||
187 | ), |
||
188 | 'figcaption' => array( |
||
189 | 'align' => true, |
||
190 | 'dir' => true, |
||
191 | 'lang' => true, |
||
192 | 'xml:lang' => true, |
||
193 | ), |
||
194 | 'font' => array( |
||
195 | 'color' => true, |
||
196 | 'face' => true, |
||
197 | 'size' => true, |
||
198 | ), |
||
199 | 'footer' => array( |
||
200 | 'align' => true, |
||
201 | 'dir' => true, |
||
202 | 'lang' => true, |
||
203 | 'xml:lang' => true, |
||
204 | ), |
||
205 | 'form' => array( |
||
206 | 'action' => true, |
||
207 | 'accept' => true, |
||
208 | 'accept-charset' => true, |
||
209 | 'enctype' => true, |
||
210 | 'method' => true, |
||
211 | 'name' => true, |
||
212 | 'target' => true, |
||
213 | ), |
||
214 | 'h1' => array( |
||
215 | 'align' => true, |
||
216 | ), |
||
217 | 'h2' => array( |
||
218 | 'align' => true, |
||
219 | ), |
||
220 | 'h3' => array( |
||
221 | 'align' => true, |
||
222 | ), |
||
223 | 'h4' => array( |
||
224 | 'align' => true, |
||
225 | ), |
||
226 | 'h5' => array( |
||
227 | 'align' => true, |
||
228 | ), |
||
229 | 'h6' => array( |
||
230 | 'align' => true, |
||
231 | ), |
||
232 | 'header' => array( |
||
233 | 'align' => true, |
||
234 | 'dir' => true, |
||
235 | 'lang' => true, |
||
236 | 'xml:lang' => true, |
||
237 | ), |
||
238 | 'hgroup' => array( |
||
239 | 'align' => true, |
||
240 | 'dir' => true, |
||
241 | 'lang' => true, |
||
242 | 'xml:lang' => true, |
||
243 | ), |
||
244 | 'hr' => array( |
||
245 | 'align' => true, |
||
246 | 'noshade' => true, |
||
247 | 'size' => true, |
||
248 | 'width' => true, |
||
249 | ), |
||
250 | 'i' => array(), |
||
251 | 'img' => array( |
||
252 | 'alt' => true, |
||
253 | 'align' => true, |
||
254 | 'border' => true, |
||
255 | 'height' => true, |
||
256 | 'hspace' => true, |
||
257 | 'longdesc' => true, |
||
258 | 'vspace' => true, |
||
259 | 'src' => true, |
||
260 | 'usemap' => true, |
||
261 | 'width' => true, |
||
262 | ), |
||
263 | 'ins' => array( |
||
264 | 'datetime' => true, |
||
265 | 'cite' => true, |
||
266 | ), |
||
267 | 'kbd' => array(), |
||
268 | 'label' => array( |
||
269 | 'for' => true, |
||
270 | ), |
||
271 | 'legend' => array( |
||
272 | 'align' => true, |
||
273 | ), |
||
274 | 'li' => array( |
||
275 | 'align' => true, |
||
276 | ), |
||
277 | 'map' => array( |
||
278 | 'name' => true, |
||
279 | ), |
||
280 | 'menu' => array( |
||
281 | 'type' => true, |
||
282 | ), |
||
283 | 'nav' => array( |
||
284 | 'align' => true, |
||
285 | 'dir' => true, |
||
286 | 'lang' => true, |
||
287 | 'xml:lang' => true, |
||
288 | ), |
||
289 | 'p' => array( |
||
290 | 'align' => true, |
||
291 | 'dir' => true, |
||
292 | 'lang' => true, |
||
293 | 'xml:lang' => true, |
||
294 | ), |
||
295 | 'pre' => array( |
||
296 | 'width' => true, |
||
297 | ), |
||
298 | 'q' => array( |
||
299 | 'cite' => true, |
||
300 | ), |
||
301 | 's' => array(), |
||
302 | 'span' => array( |
||
303 | 'dir' => true, |
||
304 | 'align' => true, |
||
305 | 'lang' => true, |
||
306 | 'xml:lang' => true, |
||
307 | ), |
||
308 | 'section' => array( |
||
309 | 'align' => true, |
||
310 | 'dir' => true, |
||
311 | 'lang' => true, |
||
312 | 'xml:lang' => true, |
||
313 | ), |
||
314 | 'small' => array(), |
||
315 | 'strike' => array(), |
||
316 | 'strong' => array(), |
||
317 | 'sub' => array(), |
||
318 | 'summary' => array( |
||
319 | 'align' => true, |
||
320 | 'dir' => true, |
||
321 | 'lang' => true, |
||
322 | 'xml:lang' => true, |
||
323 | ), |
||
324 | 'sup' => array(), |
||
325 | 'table' => array( |
||
326 | 'align' => true, |
||
327 | 'bgcolor' => true, |
||
328 | 'border' => true, |
||
329 | 'cellpadding' => true, |
||
330 | 'cellspacing' => true, |
||
331 | 'dir' => true, |
||
332 | 'rules' => true, |
||
333 | 'summary' => true, |
||
334 | 'width' => true, |
||
335 | ), |
||
336 | 'tbody' => array( |
||
337 | 'align' => true, |
||
338 | 'char' => true, |
||
339 | 'charoff' => true, |
||
340 | 'valign' => true, |
||
341 | ), |
||
342 | 'td' => array( |
||
343 | 'abbr' => true, |
||
344 | 'align' => true, |
||
345 | 'axis' => true, |
||
346 | 'bgcolor' => true, |
||
347 | 'char' => true, |
||
348 | 'charoff' => true, |
||
349 | 'colspan' => true, |
||
350 | 'dir' => true, |
||
351 | 'headers' => true, |
||
352 | 'height' => true, |
||
353 | 'nowrap' => true, |
||
354 | 'rowspan' => true, |
||
355 | 'scope' => true, |
||
356 | 'valign' => true, |
||
357 | 'width' => true, |
||
358 | ), |
||
359 | 'textarea' => array( |
||
360 | 'cols' => true, |
||
361 | 'rows' => true, |
||
362 | 'disabled' => true, |
||
363 | 'name' => true, |
||
364 | 'readonly' => true, |
||
365 | ), |
||
366 | 'tfoot' => array( |
||
367 | 'align' => true, |
||
368 | 'char' => true, |
||
369 | 'charoff' => true, |
||
370 | 'valign' => true, |
||
371 | ), |
||
372 | 'th' => array( |
||
373 | 'abbr' => true, |
||
374 | 'align' => true, |
||
375 | 'axis' => true, |
||
376 | 'bgcolor' => true, |
||
377 | 'char' => true, |
||
378 | 'charoff' => true, |
||
379 | 'colspan' => true, |
||
380 | 'headers' => true, |
||
381 | 'height' => true, |
||
382 | 'nowrap' => true, |
||
383 | 'rowspan' => true, |
||
384 | 'scope' => true, |
||
385 | 'valign' => true, |
||
386 | 'width' => true, |
||
387 | ), |
||
388 | 'thead' => array( |
||
389 | 'align' => true, |
||
390 | 'char' => true, |
||
391 | 'charoff' => true, |
||
392 | 'valign' => true, |
||
393 | ), |
||
394 | 'title' => array(), |
||
395 | 'tr' => array( |
||
396 | 'align' => true, |
||
397 | 'bgcolor' => true, |
||
398 | 'char' => true, |
||
399 | 'charoff' => true, |
||
400 | 'valign' => true, |
||
401 | ), |
||
402 | 'tt' => array(), |
||
403 | 'u' => array(), |
||
404 | 'ul' => array( |
||
405 | 'type' => true, |
||
406 | ), |
||
407 | 'ol' => array( |
||
408 | 'start' => true, |
||
409 | 'type' => true, |
||
410 | ), |
||
411 | 'var' => array(), |
||
412 | ); |
||
413 | } |
||
414 | |||
415 | /** |
||
416 | * Kses global for default allowable HTML tags. TODO: trim down to necessary only. |
||
417 | * |
||
418 | * Short list of HTML tags used in YOURLS core for display |
||
419 | * |
||
420 | * @since 1.6 |
||
421 | * |
||
422 | * @return array Allowed tags |
||
423 | */ |
||
424 | function yourls_kses_allowed_tags() { |
||
425 | return array( |
||
426 | 'a' => array( |
||
427 | 1 | 'href' => true, |
|
428 | 'title' => true, |
||
429 | ), |
||
430 | 'abbr' => array( |
||
431 | 'title' => true, |
||
432 | ), |
||
433 | 'acronym' => array( |
||
434 | 'title' => true, |
||
435 | ), |
||
436 | 'b' => array(), |
||
437 | 'blockquote' => array( |
||
438 | 'cite' => true, |
||
439 | ), |
||
440 | 'cite' => array(), |
||
441 | 'code' => array(), |
||
442 | 'del' => array( |
||
443 | 'datetime' => true, |
||
444 | ), |
||
445 | 'em' => array(), |
||
446 | 'i' => array(), |
||
447 | 'q' => array( |
||
448 | 'cite' => true, |
||
449 | ), |
||
450 | 'strike' => array(), |
||
451 | 'strong' => array(), |
||
452 | ); |
||
453 | } |
||
454 | |||
455 | /** |
||
456 | * Kses global for allowable HTML entities. |
||
457 | * |
||
458 | * @since 1.6 |
||
459 | * |
||
460 | * @return array Allowed entities |
||
461 | */ |
||
462 | function yourls_kses_allowed_entities() { |
||
463 | return array( |
||
464 | 2 | 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen', |
|
465 | 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo', |
||
466 | 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn', |
||
467 | 'acute', 'micro', 'para', 'middot', 'cedil', 'ordm', |
||
468 | 'raquo', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde', |
||
469 | 'Auml', 'Aring', 'AElig', 'Ccedil', 'Egrave', 'Eacute', |
||
470 | 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml', |
||
471 | 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde', |
||
472 | 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc', |
||
473 | 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute', |
||
474 | 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil', |
||
475 | 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute', |
||
476 | 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute', |
||
477 | 'ocirc', 'otilde', 'ouml', 'divide', 'oslash', 'ugrave', |
||
478 | 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml', |
||
479 | 'quot', 'amp', 'lt', 'gt', 'apos', 'OElig', |
||
480 | 'oelig', 'Scaron', 'scaron', 'Yuml', 'circ', 'tilde', |
||
481 | 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm', |
||
482 | 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo', |
||
483 | 'ldquo', 'rdquo', 'bdquo', 'dagger', 'Dagger', 'permil', |
||
484 | 'lsaquo', 'rsaquo', 'euro', 'fnof', 'Alpha', 'Beta', |
||
485 | 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta', |
||
486 | 'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi', |
||
487 | 'Omicron', 'Pi', 'Rho', 'Sigma', 'Tau', 'Upsilon', |
||
488 | 'Phi', 'Chi', 'Psi', 'Omega', 'alpha', 'beta', |
||
489 | 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta', |
||
490 | 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi', |
||
491 | 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau', |
||
492 | 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym', |
||
493 | 'upsih', 'piv', 'bull', 'hellip', 'prime', 'Prime', |
||
494 | 'oline', 'frasl', 'weierp', 'image', 'real', 'trade', |
||
495 | 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr', |
||
496 | 'crarr', 'lArr', 'uArr', 'rArr', 'dArr', 'hArr', |
||
497 | 'forall', 'part', 'exist', 'empty', 'nabla', 'isin', |
||
498 | 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast', |
||
499 | 'radic', 'prop', 'infin', 'ang', 'and', 'or', |
||
500 | 'cap', 'cup', 'int', 'sim', 'cong', 'asymp', |
||
501 | 'ne', 'equiv', 'le', 'ge', 'sub', 'sup', |
||
502 | 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp', |
||
503 | 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang', |
||
504 | 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams', |
||
505 | ); |
||
506 | } |
||
507 | |||
508 | /** |
||
509 | * Kses global for allowable protocols. |
||
510 | * |
||
511 | * @since 1.6 |
||
512 | * |
||
513 | * @return array Allowed protocols |
||
514 | */ |
||
515 | function yourls_kses_allowed_protocols() { |
||
516 | // More or less common stuff in links. From http://en.wikipedia.org/wiki/URI_scheme |
||
517 | return array( |
||
518 | // Common |
||
519 | 2 | 'http://', 'https://', 'ftp://', |
|
520 | 'file://', 'smb://', |
||
521 | 'sftp://', |
||
522 | 'feed:', 'feed://', |
||
523 | 'mailto:', |
||
524 | 'news:', 'nntp://', |
||
525 | |||
526 | // Old school bearded geek |
||
527 | 'gopher://', 'telnet://', 'finger://', |
||
528 | 'nntp://', 'worldwind://', |
||
529 | |||
530 | // Dev |
||
531 | 'ssh://', 'svn://', 'svn+ssh://', 'git://', 'cvs://', |
||
532 | 'apt:', |
||
533 | 'market://', // Google Play |
||
534 | 'view-source:', |
||
535 | |||
536 | // P2P |
||
537 | 'ed2k://', 'magnet:', 'udp://', |
||
538 | |||
539 | // Streaming stuff |
||
540 | 'mms://', 'lastfm://', 'spotify:', 'rtsp://', |
||
541 | |||
542 | // Text & voice |
||
543 | 'aim:', 'facetime://', 'gtalk:', 'xmpp:', |
||
544 | 'irc://', 'ircs://', 'mumble://', |
||
545 | 'callto:', 'skype:', 'sip:', |
||
546 | 'teamspeak://', 'tel:', 'ventrilo://', 'xfire:', |
||
0 ignored issues
–
show
Coding Style
introduced
by
Loading history...
|
|||
547 | 'ymsgr:', 'tg://', 'whatsapp://', |
||
0 ignored issues
–
show
|
|||
548 | |||
549 | // Misc |
||
550 | 'steam:', 'steam://', |
||
551 | 'bitcoin:', |
||
552 | 'ldap://', 'ldaps://', |
||
553 | |||
554 | // Purposedly removed for security |
||
555 | /* |
||
556 | 'about:', 'chrome://', 'chrome-extension://', |
||
557 | 'javascript:', |
||
558 | 'data:', |
||
0 ignored issues
–
show
|
|||
559 | */ |
||
560 | ); |
||
561 | } |
||
562 | |||
563 | |||
564 | /** |
||
565 | * Converts and fixes HTML entities. |
||
566 | * |
||
567 | * This function normalizes HTML entities. It will convert "AT&T" to the correct |
||
568 | * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. |
||
569 | * |
||
570 | * @since 1.6 |
||
571 | * |
||
572 | * @param string $string Content to normalize entities |
||
573 | * @return string Content with normalized entities |
||
574 | */ |
||
575 | function yourls_kses_normalize_entities($string) { |
||
576 | # Disarm all entities by converting & to & |
||
577 | |||
578 | 25 | $string = str_replace('&', '&', $string); |
|
579 | |||
580 | # Change back the allowed entities in our entity whitelist |
||
581 | |||
582 | 25 | $string = preg_replace_callback('/&([A-Za-z]{2,8});/', 'yourls_kses_named_entities', $string); |
|
583 | 25 | $string = preg_replace_callback('/&#(0*[0-9]{1,7});/', 'yourls_kses_normalize_entities2', $string); |
|
584 | 25 | $string = preg_replace_callback('/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'yourls_kses_normalize_entities3', $string); |
|
585 | |||
586 | 25 | return $string; |
|
587 | } |
||
588 | |||
589 | /** |
||
590 | * Callback for yourls_kses_normalize_entities() regular expression. |
||
591 | * |
||
592 | * This function only accepts valid named entity references, which are finite, |
||
593 | * case-sensitive, and highly scrutinized by HTML and XML validators. |
||
594 | * |
||
595 | * @since 1.6 |
||
596 | * |
||
597 | * @param array $matches preg_replace_callback() matches array |
||
598 | * @return string Correctly encoded entity |
||
599 | */ |
||
600 | function yourls_kses_named_entities($matches) { |
||
601 | 5 | global $yourls_allowedentitynames; |
|
602 | |||
603 | 5 | if ( empty($matches[1]) ) |
|
604 | return ''; |
||
605 | |||
606 | 5 | $i = $matches[1]; |
|
607 | 5 | return ( ( ! in_array($i, $yourls_allowedentitynames) ) ? "&$i;" : "&$i;" ); |
|
608 | } |
||
609 | |||
610 | /** |
||
611 | * Callback for yourls_kses_normalize_entities() regular expression. |
||
612 | * |
||
613 | * This function helps yourls_kses_normalize_entities() to only accept 16-bit values |
||
614 | * and nothing more for &#number; entities. |
||
615 | * |
||
616 | * @access private |
||
617 | * @since 1.6 |
||
618 | * |
||
619 | * @param array $matches preg_replace_callback() matches array |
||
620 | * @return string Correctly encoded entity |
||
621 | */ |
||
622 | function yourls_kses_normalize_entities2($matches) { |
||
623 | if ( empty($matches[1]) ) |
||
624 | return ''; |
||
625 | |||
626 | $i = $matches[1]; |
||
627 | if (yourls_valid_unicode($i)) { |
||
628 | $i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT); |
||
629 | $i = "&#$i;"; |
||
630 | } else { |
||
631 | $i = "&#$i;"; |
||
632 | } |
||
633 | |||
634 | return $i; |
||
635 | } |
||
636 | |||
637 | /** |
||
638 | * Callback for yourls_kses_normalize_entities() for regular expression. |
||
639 | * |
||
640 | * This function helps yourls_kses_normalize_entities() to only accept valid Unicode |
||
641 | * numeric entities in hex form. |
||
642 | * |
||
643 | * @access private |
||
644 | * @since 1.6 |
||
645 | * |
||
646 | * @param array $matches preg_replace_callback() matches array |
||
647 | * @return string Correctly encoded entity |
||
648 | */ |
||
649 | function yourls_kses_normalize_entities3($matches) { |
||
650 | 1 | if ( empty($matches[1]) ) |
|
651 | return ''; |
||
652 | |||
653 | 1 | $hexchars = $matches[1]; |
|
654 | 1 | return ( ( ! yourls_valid_unicode(hexdec($hexchars)) ) ? "&#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';' ); |
|
655 | } |
||
656 | |||
657 | /** |
||
658 | * Helper function to add global attributes to a tag in the allowed html list. |
||
659 | * |
||
660 | * @since 1.6 |
||
661 | * @access private |
||
662 | * |
||
663 | * @param array $value An array of attributes. |
||
664 | * @return array The array of attributes with global attributes added. |
||
665 | */ |
||
666 | function _yourls_add_global_attributes( $value ) { |
||
667 | $global_attributes = array( |
||
668 | 'class' => true, |
||
669 | 'id' => true, |
||
670 | 'style' => true, |
||
671 | 'title' => true, |
||
672 | ); |
||
673 | |||
674 | if ( true === $value ) |
||
675 | $value = array(); |
||
676 | |||
677 | if ( is_array( $value ) ) |
||
678 | return array_merge( $value, $global_attributes ); |
||
679 | |||
680 | return $value; |
||
681 | } |
||
682 | |||
683 | /** |
||
684 | * Helper function to determine if a Unicode value is valid. |
||
685 | * |
||
686 | * @since 1.6 |
||
687 | * |
||
688 | * @param int $i Unicode value |
||
689 | * @return bool True if the value was a valid Unicode number |
||
690 | */ |
||
691 | function yourls_valid_unicode($i) { |
||
692 | 1 | return ( $i == 0x9 || $i == 0xa || $i == 0xd || |
|
693 | 1 | ($i >= 0x20 && $i <= 0xd7ff) || |
|
694 | ($i >= 0xe000 && $i <= 0xfffd) || |
||
695 | 1 | ($i >= 0x10000 && $i <= 0x10ffff) ); |
|
696 | } |
||
697 | |||
698 | /** |
||
699 | * Goes through an array and changes the keys to all lower case. |
||
700 | * |
||
701 | * @since 1.6 |
||
702 | * |
||
703 | * @param array $inarray Unfiltered array |
||
704 | * @return array Fixed array with all lowercase keys |
||
705 | */ |
||
706 | function yourls_kses_array_lc($inarray) { |
||
707 | $outarray = array (); |
||
708 | |||
709 | foreach ( (array) $inarray as $inkey => $inval) { |
||
710 | $outkey = strtolower($inkey); |
||
711 | $outarray[$outkey] = array (); |
||
712 | |||
713 | foreach ( (array) $inval as $inkey2 => $inval2) { |
||
714 | $outkey2 = strtolower($inkey2); |
||
715 | $outarray[$outkey][$outkey2] = $inval2; |
||
716 | } # foreach $inval |
||
717 | } # foreach $inarray |
||
718 | |||
719 | return $outarray; |
||
720 | } |
||
721 | |||
722 | /** |
||
723 | * Convert all entities to their character counterparts. |
||
724 | * |
||
725 | * This function decodes numeric HTML entities (A and A). It doesn't do |
||
726 | * anything with other entities like ä, but we don't need them in the URL |
||
727 | * protocol whitelisting system anyway. |
||
728 | * |
||
729 | * @since 1.6 |
||
730 | * |
||
731 | * @param string $string Content to change entities |
||
732 | * @return string Content after decoded entities |
||
733 | */ |
||
734 | function yourls_kses_decode_entities($string) { |
||
735 | $string = preg_replace_callback('/&#([0-9]+);/', '_yourls_kses_decode_entities_chr', $string); |
||
736 | $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_yourls_kses_decode_entities_chr_hexdec', $string); |
||
737 | |||
738 | return $string; |
||
739 | } |
||
740 | |||
741 | /** |
||
742 | * Regex callback for yourls_kses_decode_entities() |
||
743 | * |
||
744 | * @since 1.6 |
||
745 | * |
||
746 | * @param array $match preg match |
||
747 | * @return string |
||
748 | */ |
||
749 | function _yourls_kses_decode_entities_chr( $match ) { |
||
750 | return chr( $match[1] ); |
||
751 | } |
||
752 | |||
753 | /** |
||
754 | * Regex callback for yourls_kses_decode_entities() |
||
755 | * |
||
756 | * @since 1.6 |
||
757 | * |
||
758 | * @param array $match preg match |
||
759 | * @return string |
||
760 | */ |
||
761 | function _yourls_kses_decode_entities_chr_hexdec( $match ) { |
||
762 | return chr( hexdec( $match[1] ) ); |
||
763 | } |
||
764 | |||
765 | /** |
||
766 | * Removes any null characters in $string. |
||
767 | * |
||
768 | * @since 1.6 |
||
769 | * |
||
770 | * @param string $string |
||
771 | * @return string |
||
772 | */ |
||
773 | function yourls_kses_no_null($string) { |
||
774 | $string = preg_replace( '/\0+/', '', $string ); |
||
775 | $string = preg_replace( '/(\\\\0)+/', '', $string ); |
||
776 | |||
777 | return $string; |
||
778 | } |
||
779 |