Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like AntiXSS often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AntiXSS, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
23 | final class AntiXSS |
||
24 | { |
||
25 | |||
26 | /** |
||
27 | * @var array |
||
28 | */ |
||
29 | private static $entitiesFallback = array( |
||
30 | "\t" => '	', |
||
31 | "\n" => '
', |
||
32 | '!' => '!', |
||
33 | '"' => '"', |
||
34 | '#' => '#', |
||
35 | '$' => '$', |
||
36 | '%' => '%', |
||
37 | '&' => '&', |
||
38 | "'" => ''', |
||
39 | '(' => '(', |
||
40 | ')' => ')', |
||
41 | '*' => '*', |
||
42 | '+' => '+', |
||
43 | ',' => ',', |
||
44 | '.' => '.', |
||
45 | '/' => '/', |
||
46 | ':' => ':', |
||
47 | ';' => ';', |
||
48 | '<' => '<', |
||
49 | '<⃒' => '<⃒', |
||
50 | '=' => '=', |
||
51 | '=⃥' => '=⃥', |
||
52 | '>' => '>', |
||
53 | '>⃒' => '&nvgt', |
||
54 | '?' => '?', |
||
55 | '@' => '@', |
||
56 | '[' => '[', |
||
57 | ']' => ']', |
||
58 | '^' => '^', |
||
59 | '_' => '_', |
||
60 | '`' => '`', |
||
61 | 'fj' => 'fj', |
||
62 | '{' => '{', |
||
63 | '|' => '|', |
||
64 | '}' => '}', |
||
65 | ' ' => ' ', |
||
66 | '¡' => '¡', |
||
67 | '¢' => '¢', |
||
68 | '£' => '£', |
||
69 | '¤' => '¤', |
||
70 | '¥' => '¥', |
||
71 | '¦' => '¦', |
||
72 | '§' => '§', |
||
73 | '¨' => '¨', |
||
74 | '©' => '©', |
||
75 | 'ª' => 'ª', |
||
76 | '«' => '«', |
||
77 | '¬' => '¬', |
||
78 | '' => '­', |
||
79 | '®' => '®', |
||
80 | '¯' => '¯', |
||
81 | '°' => '°', |
||
82 | '±' => '±', |
||
83 | '²' => '²', |
||
84 | '³' => '³', |
||
85 | '´' => '´', |
||
86 | 'µ' => 'µ', |
||
87 | '¶' => '¶', |
||
88 | '·' => '·', |
||
89 | '¸' => '¸', |
||
90 | '¹' => '¹', |
||
91 | 'º' => 'º', |
||
92 | '»' => '»', |
||
93 | '¼' => '¼', |
||
94 | '½' => '½', |
||
95 | '¾' => '¾', |
||
96 | '¿' => '¿', |
||
97 | 'À' => 'À', |
||
98 | 'Á' => 'Á', |
||
99 | 'Â' => 'Â', |
||
100 | 'Ã' => 'Ã', |
||
101 | 'Ä' => 'Ä', |
||
102 | 'Å' => 'Å', |
||
103 | 'Æ' => 'Æ', |
||
104 | 'Ç' => 'Ç', |
||
105 | 'È' => 'È', |
||
106 | 'É' => 'É', |
||
107 | 'Ê' => 'Ê', |
||
108 | 'Ë' => 'Ë', |
||
109 | 'Ì' => 'Ì', |
||
110 | 'Í' => 'Í', |
||
111 | 'Î' => 'Î', |
||
112 | 'Ï' => 'Ï', |
||
113 | 'Ð' => 'Ð', |
||
114 | 'Ñ' => 'Ñ', |
||
115 | 'Ò' => 'Ò', |
||
116 | 'Ó' => 'Ó', |
||
117 | 'Ô' => 'Ô', |
||
118 | 'Õ' => 'Õ', |
||
119 | 'Ö' => 'Ö', |
||
120 | '×' => '×', |
||
121 | 'Ø' => 'Ø', |
||
122 | 'Ù' => 'Ù', |
||
123 | 'Ú' => 'Ú', |
||
124 | 'Û' => 'Û', |
||
125 | 'Ü' => 'Ü', |
||
126 | 'Ý' => 'Ý', |
||
127 | 'Þ' => 'Þ', |
||
128 | 'ß' => 'ß', |
||
129 | 'à' => 'à', |
||
130 | 'á' => 'á', |
||
131 | 'â' => 'â', |
||
132 | 'ã' => 'ã', |
||
133 | 'ä' => 'ä', |
||
134 | 'å' => 'å', |
||
135 | 'æ' => 'æ', |
||
136 | 'ç' => 'ç', |
||
137 | 'è' => 'è', |
||
138 | 'é' => 'é', |
||
139 | 'ê' => 'ê', |
||
140 | 'ë' => 'ë', |
||
141 | 'ì' => 'ì', |
||
142 | 'í' => 'í', |
||
143 | 'î' => 'î', |
||
144 | 'ï' => 'ï', |
||
145 | 'ð' => 'ð', |
||
146 | 'ñ' => 'ñ', |
||
147 | 'ò' => 'ò', |
||
148 | 'ó' => 'ó', |
||
149 | 'ô' => 'ô', |
||
150 | 'õ' => 'õ', |
||
151 | 'ö' => 'ö', |
||
152 | '÷' => '÷', |
||
153 | 'ø' => 'ø', |
||
154 | 'ù' => 'ù', |
||
155 | 'ú' => 'ú', |
||
156 | 'û' => 'û', |
||
157 | 'ü' => 'ü', |
||
158 | 'ý' => 'ý', |
||
159 | 'þ' => 'þ', |
||
160 | 'ÿ' => 'ÿ', |
||
161 | 'Ā' => 'Ā', |
||
162 | 'ā' => 'ā', |
||
163 | 'Ă' => 'Ă', |
||
164 | 'ă' => 'ă', |
||
165 | 'Ą' => 'Ą', |
||
166 | 'ą' => 'ą', |
||
167 | 'Ć' => 'Ć', |
||
168 | 'ć' => 'ć', |
||
169 | 'Ĉ' => 'Ĉ', |
||
170 | 'ĉ' => 'ĉ', |
||
171 | 'Ċ' => 'Ċ', |
||
172 | 'ċ' => 'ċ', |
||
173 | 'Č' => 'Č', |
||
174 | 'č' => 'č', |
||
175 | 'Ď' => 'Ď', |
||
176 | 'ď' => 'ď', |
||
177 | 'Đ' => 'Đ', |
||
178 | 'đ' => 'đ', |
||
179 | 'Ē' => 'Ē', |
||
180 | 'ē' => 'ē', |
||
181 | 'Ė' => 'Ė', |
||
182 | 'ė' => 'ė', |
||
183 | 'Ę' => 'Ę', |
||
184 | 'ę' => 'ę', |
||
185 | 'Ě' => 'Ě', |
||
186 | 'ě' => 'ě', |
||
187 | 'Ĝ' => 'Ĝ', |
||
188 | 'ĝ' => 'ĝ', |
||
189 | 'Ğ' => 'Ğ', |
||
190 | 'ğ' => 'ğ', |
||
191 | 'Ġ' => 'Ġ', |
||
192 | 'ġ' => 'ġ', |
||
193 | 'Ģ' => 'Ģ', |
||
194 | 'Ĥ' => 'Ĥ', |
||
195 | 'ĥ' => 'ĥ', |
||
196 | 'Ħ' => 'Ħ', |
||
197 | 'ħ' => 'ħ', |
||
198 | 'Ĩ' => 'Ĩ', |
||
199 | 'ĩ' => 'ĩ', |
||
200 | 'Ī' => 'Ī', |
||
201 | 'ī' => 'ī', |
||
202 | 'Į' => 'Į', |
||
203 | 'į' => 'į', |
||
204 | 'İ' => 'İ', |
||
205 | 'ı' => 'ı', |
||
206 | 'IJ' => 'IJ', |
||
207 | 'ij' => 'ij', |
||
208 | 'Ĵ' => 'Ĵ', |
||
209 | 'ĵ' => 'ĵ', |
||
210 | 'Ķ' => 'Ķ', |
||
211 | 'ķ' => 'ķ', |
||
212 | 'ĸ' => 'ĸ', |
||
213 | 'Ĺ' => 'Ĺ', |
||
214 | 'ĺ' => 'ĺ', |
||
215 | 'Ļ' => 'Ļ', |
||
216 | 'ļ' => 'ļ', |
||
217 | 'Ľ' => 'Ľ', |
||
218 | 'ľ' => 'ľ', |
||
219 | 'Ŀ' => 'Ŀ', |
||
220 | 'ŀ' => 'ŀ', |
||
221 | 'Ł' => 'Ł', |
||
222 | 'ł' => 'ł', |
||
223 | 'Ń' => 'Ń', |
||
224 | 'ń' => 'ń', |
||
225 | 'Ņ' => 'Ņ', |
||
226 | 'ņ' => 'ņ', |
||
227 | 'Ň' => 'Ň', |
||
228 | 'ň' => 'ň', |
||
229 | 'ʼn' => 'ʼn', |
||
230 | 'Ŋ' => 'Ŋ', |
||
231 | 'ŋ' => 'ŋ', |
||
232 | 'Ō' => 'Ō', |
||
233 | 'ō' => 'ō', |
||
234 | 'Ő' => 'Ő', |
||
235 | 'ő' => 'ő', |
||
236 | 'Œ' => 'Œ', |
||
237 | 'œ' => 'œ', |
||
238 | 'Ŕ' => 'Ŕ', |
||
239 | 'ŕ' => 'ŕ', |
||
240 | 'Ŗ' => 'Ŗ', |
||
241 | 'ŗ' => 'ŗ', |
||
242 | 'Ř' => 'Ř', |
||
243 | 'ř' => 'ř', |
||
244 | 'Ś' => 'Ś', |
||
245 | 'ś' => 'ś', |
||
246 | 'Ŝ' => 'Ŝ', |
||
247 | 'ŝ' => 'ŝ', |
||
248 | 'Ş' => 'Ş', |
||
249 | 'ş' => 'ş', |
||
250 | 'Š' => 'Š', |
||
251 | 'š' => 'š', |
||
252 | 'Ţ' => 'Ţ', |
||
253 | 'ţ' => 'ţ', |
||
254 | 'Ť' => 'Ť', |
||
255 | 'ť' => 'ť', |
||
256 | 'Ŧ' => 'Ŧ', |
||
257 | 'ŧ' => 'ŧ', |
||
258 | 'Ũ' => 'Ũ', |
||
259 | 'ũ' => 'ũ', |
||
260 | 'Ū' => 'Ū', |
||
261 | 'ū' => 'ū', |
||
262 | 'Ŭ' => 'Ŭ', |
||
263 | 'ŭ' => 'ŭ', |
||
264 | 'Ů' => 'Ů', |
||
265 | 'ů' => 'ů', |
||
266 | 'Ű' => 'Ű', |
||
267 | 'ű' => 'ű', |
||
268 | 'Ų' => 'Ų', |
||
269 | 'ų' => 'ų', |
||
270 | 'Ŵ' => 'Ŵ', |
||
271 | 'ŵ' => 'ŵ', |
||
272 | 'Ŷ' => 'Ŷ', |
||
273 | 'ŷ' => 'ŷ', |
||
274 | 'Ÿ' => 'Ÿ', |
||
275 | 'Ź' => 'Ź', |
||
276 | 'ź' => 'ź', |
||
277 | 'Ż' => 'Ż', |
||
278 | 'ż' => 'ż', |
||
279 | 'Ž' => 'Ž', |
||
280 | 'ž' => 'ž', |
||
281 | 'ƒ' => 'ƒ', |
||
282 | 'Ƶ' => 'Ƶ', |
||
283 | 'ǵ' => 'ǵ', |
||
284 | 'ȷ' => 'ȷ', |
||
285 | 'ˆ' => 'ˆ', |
||
286 | 'ˇ' => 'ˇ', |
||
287 | '˘' => '˘', |
||
288 | '˙' => '˙', |
||
289 | '˚' => '˚', |
||
290 | '˛' => '˛', |
||
291 | '˜' => '˜', |
||
292 | '˝' => '˝', |
||
293 | '̑' => '̑', |
||
294 | 'Α' => 'Α', |
||
295 | 'Β' => 'Β', |
||
296 | 'Γ' => 'Γ', |
||
297 | 'Δ' => 'Δ', |
||
298 | 'Ε' => 'Ε', |
||
299 | 'Ζ' => 'Ζ', |
||
300 | 'Η' => 'Η', |
||
301 | 'Θ' => 'Θ', |
||
302 | 'Ι' => 'Ι', |
||
303 | 'Κ' => 'Κ', |
||
304 | 'Λ' => 'Λ', |
||
305 | 'Μ' => 'Μ', |
||
306 | 'Ν' => 'Ν', |
||
307 | 'Ξ' => 'Ξ', |
||
308 | 'Ο' => 'Ο', |
||
309 | 'Π' => 'Π', |
||
310 | 'Ρ' => 'Ρ', |
||
311 | 'Σ' => 'Σ', |
||
312 | 'Τ' => 'Τ', |
||
313 | 'Υ' => 'Υ', |
||
314 | 'Φ' => 'Φ', |
||
315 | 'Χ' => 'Χ', |
||
316 | 'Ψ' => 'Ψ', |
||
317 | 'Ω' => 'Ω', |
||
318 | 'α' => 'α', |
||
319 | 'β' => 'β', |
||
320 | 'γ' => 'γ', |
||
321 | 'δ' => 'δ', |
||
322 | 'ε' => 'ε', |
||
323 | 'ζ' => 'ζ', |
||
324 | 'η' => 'η', |
||
325 | 'θ' => 'θ', |
||
326 | 'ι' => 'ι', |
||
327 | 'κ' => 'κ', |
||
328 | 'λ' => 'λ', |
||
329 | 'μ' => 'μ', |
||
330 | 'ν' => 'ν', |
||
331 | 'ξ' => 'ξ', |
||
332 | 'ο' => 'ο', |
||
333 | 'π' => 'π', |
||
334 | 'ρ' => 'ρ', |
||
335 | 'ς' => 'ς', |
||
336 | 'σ' => 'σ', |
||
337 | 'τ' => 'τ', |
||
338 | 'υ' => 'υ', |
||
339 | 'φ' => 'φ', |
||
340 | 'χ' => 'χ', |
||
341 | 'ψ' => 'ψ', |
||
342 | 'ω' => 'ω', |
||
343 | 'ϑ' => 'ϑ', |
||
344 | 'ϒ' => 'ϒ', |
||
345 | 'ϕ' => 'ϕ', |
||
346 | 'ϖ' => 'ϖ', |
||
347 | 'Ϝ' => 'Ϝ', |
||
348 | 'ϝ' => 'ϝ', |
||
349 | 'ϰ' => 'ϰ', |
||
350 | 'ϱ' => 'ϱ', |
||
351 | 'ϵ' => 'ϵ', |
||
352 | '϶' => '϶', |
||
353 | 'Ё' => 'Ё', |
||
354 | 'Ђ' => 'Ђ', |
||
355 | 'Ѓ' => 'Ѓ', |
||
356 | 'Є' => 'Є', |
||
357 | 'Ѕ' => 'Ѕ', |
||
358 | 'І' => 'І', |
||
359 | 'Ї' => 'Ї', |
||
360 | 'Ј' => 'Ј', |
||
361 | 'Љ' => 'Љ', |
||
362 | 'Њ' => 'Њ', |
||
363 | 'Ћ' => 'Ћ', |
||
364 | 'Ќ' => 'Ќ', |
||
365 | 'Ў' => 'Ў', |
||
366 | 'Џ' => 'Џ', |
||
367 | 'А' => 'А', |
||
368 | 'Б' => 'Б', |
||
369 | 'В' => 'В', |
||
370 | 'Г' => 'Г', |
||
371 | 'Д' => 'Д', |
||
372 | 'Е' => 'Е', |
||
373 | 'Ж' => 'Ж', |
||
374 | 'З' => 'З', |
||
375 | 'И' => 'И', |
||
376 | 'Й' => 'Й', |
||
377 | 'К' => 'К', |
||
378 | 'Л' => 'Л', |
||
379 | 'М' => 'М', |
||
380 | 'Н' => 'Н', |
||
381 | 'О' => 'О', |
||
382 | 'П' => 'П', |
||
383 | 'Р' => 'Р', |
||
384 | 'С' => 'С', |
||
385 | 'Т' => 'Т', |
||
386 | 'У' => 'У', |
||
387 | 'Ф' => 'Ф', |
||
388 | 'Х' => 'Х', |
||
389 | 'Ц' => 'Ц', |
||
390 | 'Ч' => 'Ч', |
||
391 | 'Ш' => 'Ш', |
||
392 | 'Щ' => 'Щ', |
||
393 | 'Ъ' => 'Ъ', |
||
394 | 'Ы' => 'Ы', |
||
395 | 'Ь' => 'Ь', |
||
396 | 'Э' => 'Э', |
||
397 | 'Ю' => 'Ю', |
||
398 | 'Я' => 'Я', |
||
399 | 'а' => 'а', |
||
400 | 'б' => 'б', |
||
401 | 'в' => 'в', |
||
402 | 'г' => 'г', |
||
403 | 'д' => 'д', |
||
404 | 'е' => 'е', |
||
405 | 'ж' => 'ж', |
||
406 | 'з' => 'з', |
||
407 | 'и' => 'и', |
||
408 | 'й' => 'й', |
||
409 | 'к' => 'к', |
||
410 | 'л' => 'л', |
||
411 | 'м' => 'м', |
||
412 | 'н' => 'н', |
||
413 | 'о' => 'о', |
||
414 | 'п' => 'п', |
||
415 | 'р' => 'р', |
||
416 | 'с' => 'с', |
||
417 | 'т' => 'т', |
||
418 | 'у' => 'у', |
||
419 | 'ф' => 'ф', |
||
420 | 'х' => 'х', |
||
421 | 'ц' => 'ц', |
||
422 | 'ч' => 'ч', |
||
423 | 'ш' => 'ш', |
||
424 | 'щ' => 'щ', |
||
425 | 'ъ' => 'ъ', |
||
426 | 'ы' => 'ы', |
||
427 | 'ь' => 'ь', |
||
428 | 'э' => 'э', |
||
429 | 'ю' => 'ю', |
||
430 | 'я' => 'я', |
||
431 | 'ё' => 'ё', |
||
432 | 'ђ' => 'ђ', |
||
433 | 'ѓ' => 'ѓ', |
||
434 | 'є' => 'є', |
||
435 | 'ѕ' => 'ѕ', |
||
436 | 'і' => 'і', |
||
437 | 'ї' => 'ї', |
||
438 | 'ј' => 'ј', |
||
439 | 'љ' => 'љ', |
||
440 | 'њ' => 'њ', |
||
441 | 'ћ' => 'ћ', |
||
442 | 'ќ' => 'ќ', |
||
443 | 'ў' => 'ў', |
||
444 | 'џ' => 'џ', |
||
445 | ' ' => ' ', |
||
446 | ' ' => ' ', |
||
447 | ' ' => ' ', |
||
448 | ' ' => ' ', |
||
449 | ' ' => ' ', |
||
450 | ' ' => ' ', |
||
451 | ' ' => ' ', |
||
452 | ' ' => ' ', |
||
453 | '' => '​', |
||
454 | '' => '‌', |
||
455 | '' => '‍', |
||
456 | '' => '‎', |
||
457 | '' => '‏', |
||
458 | '‐' => '‐', |
||
459 | '–' => '–', |
||
460 | '—' => '—', |
||
461 | '―' => '―', |
||
462 | '‖' => '‖', |
||
463 | '‘' => '‘', |
||
464 | '’' => '’', |
||
465 | '‚' => '‚', |
||
466 | '“' => '“', |
||
467 | '”' => '”', |
||
468 | '„' => '„', |
||
469 | '†' => '†', |
||
470 | '‡' => '‡', |
||
471 | '•' => '•', |
||
472 | '‥' => '‥', |
||
473 | '…' => '…', |
||
474 | '‰' => '‰', |
||
475 | '‱' => '‱', |
||
476 | '′' => '′', |
||
477 | '″' => '″', |
||
478 | '‴' => '‴', |
||
479 | '‵' => '‵', |
||
480 | '‹' => '‹', |
||
481 | '›' => '›', |
||
482 | '‾' => '‾', |
||
483 | '⁁' => '⁁', |
||
484 | '⁃' => '⁃', |
||
485 | '⁄' => '⁄', |
||
486 | '⁏' => '⁏', |
||
487 | '⁗' => '⁗', |
||
488 | ' ' => ' ', |
||
489 | ' ' => '  ', |
||
490 | '' => '⁠', |
||
491 | '' => '⁡', |
||
492 | '' => '⁢', |
||
493 | '' => '⁣', |
||
494 | '€' => '€', |
||
495 | '⃛' => '⃛', |
||
496 | '⃜' => '⃜', |
||
497 | 'ℂ' => 'ℂ', |
||
498 | '℅' => '℅', |
||
499 | 'ℊ' => 'ℊ', |
||
500 | 'ℋ' => 'ℋ', |
||
501 | 'ℌ' => 'ℌ', |
||
502 | 'ℍ' => 'ℍ', |
||
503 | 'ℎ' => 'ℎ', |
||
504 | 'ℏ' => 'ℏ', |
||
505 | 'ℐ' => 'ℐ', |
||
506 | 'ℑ' => 'ℑ', |
||
507 | 'ℒ' => 'ℒ', |
||
508 | 'ℓ' => 'ℓ', |
||
509 | 'ℕ' => 'ℕ', |
||
510 | '№' => '№', |
||
511 | '℗' => '℗', |
||
512 | '℘' => '℘', |
||
513 | 'ℙ' => 'ℙ', |
||
514 | 'ℚ' => 'ℚ', |
||
515 | 'ℛ' => 'ℛ', |
||
516 | 'ℜ' => 'ℜ', |
||
517 | 'ℝ' => 'ℝ', |
||
518 | '℞' => '℞', |
||
519 | '™' => '™', |
||
520 | 'ℤ' => 'ℤ', |
||
521 | '℧' => '℧', |
||
522 | 'ℨ' => 'ℨ', |
||
523 | '℩' => '℩', |
||
524 | 'ℬ' => 'ℬ', |
||
525 | 'ℭ' => 'ℭ', |
||
526 | 'ℯ' => 'ℯ', |
||
527 | 'ℰ' => 'ℰ', |
||
528 | 'ℱ' => 'ℱ', |
||
529 | 'ℳ' => 'ℳ', |
||
530 | 'ℴ' => 'ℴ', |
||
531 | 'ℵ' => 'ℵ', |
||
532 | 'ℶ' => 'ℶ', |
||
533 | 'ℷ' => 'ℷ', |
||
534 | 'ℸ' => 'ℸ', |
||
535 | 'ⅅ' => 'ⅅ', |
||
536 | 'ⅆ' => 'ⅆ', |
||
537 | 'ⅇ' => 'ⅇ', |
||
538 | 'ⅈ' => 'ⅈ', |
||
539 | '⅓' => '⅓', |
||
540 | '⅔' => '⅔', |
||
541 | '⅕' => '⅕', |
||
542 | '⅖' => '⅖', |
||
543 | '⅗' => '⅗', |
||
544 | '⅘' => '⅘', |
||
545 | '⅙' => '⅙', |
||
546 | '⅚' => '⅚', |
||
547 | '⅛' => '⅛', |
||
548 | '⅜' => '⅜', |
||
549 | '⅝' => '⅝', |
||
550 | '⅞' => '⅞', |
||
551 | '←' => '←', |
||
552 | '↑' => '↑', |
||
553 | '→' => '→', |
||
554 | '↓' => '↓', |
||
555 | '↔' => '↔', |
||
556 | '↕' => '↕', |
||
557 | '↖' => '↖', |
||
558 | '↗' => '↗', |
||
559 | '↘' => '↘', |
||
560 | '↙' => '↙', |
||
561 | '↚' => '↚', |
||
562 | '↛' => '↛', |
||
563 | '↝' => '↝', |
||
564 | '↝̸' => '↝̸', |
||
565 | '↞' => '↞', |
||
566 | '↟' => '↟', |
||
567 | '↠' => '↠', |
||
568 | '↡' => '↡', |
||
569 | '↢' => '↢', |
||
570 | '↣' => '↣', |
||
571 | '↤' => '↤', |
||
572 | '↥' => '↥', |
||
573 | '↦' => '↦', |
||
574 | '↧' => '↧', |
||
575 | '↩' => '↩', |
||
576 | '↪' => '↪', |
||
577 | '↫' => '↫', |
||
578 | '↬' => '↬', |
||
579 | '↭' => '↭', |
||
580 | '↮' => '↮', |
||
581 | '↰' => '↰', |
||
582 | '↱' => '↱', |
||
583 | '↲' => '↲', |
||
584 | '↳' => '↳', |
||
585 | '↵' => '↵', |
||
586 | '↶' => '↶', |
||
587 | '↷' => '↷', |
||
588 | '↺' => '↺', |
||
589 | '↻' => '↻', |
||
590 | '↼' => '↼', |
||
591 | '↽' => '↽', |
||
592 | '↾' => '↾', |
||
593 | '↿' => '↿', |
||
594 | '⇀' => '⇀', |
||
595 | '⇁' => '⇁', |
||
596 | '⇂' => '⇂', |
||
597 | '⇃' => '⇃', |
||
598 | '⇄' => '⇄', |
||
599 | '⇅' => '⇅', |
||
600 | '⇆' => '⇆', |
||
601 | '⇇' => '⇇', |
||
602 | '⇈' => '⇈', |
||
603 | '⇉' => '⇉', |
||
604 | '⇊' => '⇊', |
||
605 | '⇋' => '⇋', |
||
606 | '⇌' => '⇌', |
||
607 | '⇍' => '⇍', |
||
608 | '⇎' => '⇎', |
||
609 | '⇏' => '⇏', |
||
610 | '⇐' => '⇐', |
||
611 | '⇑' => '⇑', |
||
612 | '⇒' => '⇒', |
||
613 | '⇓' => '⇓', |
||
614 | '⇔' => '⇔', |
||
615 | '⇕' => '⇕', |
||
616 | '⇖' => '⇖', |
||
617 | '⇗' => '⇗', |
||
618 | '⇘' => '⇘', |
||
619 | '⇙' => '⇙', |
||
620 | '⇚' => '⇚', |
||
621 | '⇛' => '⇛', |
||
622 | '⇝' => '⇝', |
||
623 | '⇤' => '⇤', |
||
624 | '⇥' => '⇥', |
||
625 | '⇵' => '⇵', |
||
626 | '⇽' => '⇽', |
||
627 | '⇾' => '⇾', |
||
628 | '⇿' => '⇿', |
||
629 | '∀' => '∀', |
||
630 | '∁' => '∁', |
||
631 | '∂' => '∂', |
||
632 | '∂̸' => '∂̸', |
||
633 | '∃' => '∃', |
||
634 | '∄' => '∄', |
||
635 | '∅' => '∅', |
||
636 | '∇' => '∇', |
||
637 | '∈' => '∈', |
||
638 | '∉' => '∉', |
||
639 | '∋' => '∋', |
||
640 | '∌' => '∌', |
||
641 | '∏' => '∏', |
||
642 | '∐' => '∐', |
||
643 | '∑' => '∑', |
||
644 | '−' => '−', |
||
645 | '∓' => '∓', |
||
646 | '∔' => '∔', |
||
647 | '∖' => '∖', |
||
648 | '∗' => '∗', |
||
649 | '∘' => '∘', |
||
650 | '√' => '√', |
||
651 | '∝' => '∝', |
||
652 | '∞' => '∞', |
||
653 | '∟' => '∟', |
||
654 | '∠' => '∠', |
||
655 | '∠⃒' => '∠⃒', |
||
656 | '∡' => '∡', |
||
657 | '∢' => '∢', |
||
658 | '∣' => '∣', |
||
659 | '∤' => '∤', |
||
660 | '∥' => '∥', |
||
661 | '∦' => '∦', |
||
662 | '∧' => '∧', |
||
663 | '∨' => '∨', |
||
664 | '∩' => '∩', |
||
665 | '∩︀' => '∩︀', |
||
666 | '∪' => '∪', |
||
667 | '∪︀' => '&cups', |
||
668 | '∫' => '∫', |
||
669 | '∬' => '∬', |
||
670 | '∭' => '∭', |
||
671 | '∮' => '∮', |
||
672 | '∯' => '∯', |
||
673 | '∰' => '∰', |
||
674 | '∱' => '∱', |
||
675 | '∲' => '∲', |
||
676 | '∳' => '∳', |
||
677 | '∴' => '∴', |
||
678 | '∵' => '∵', |
||
679 | '∶' => '∶', |
||
680 | '∷' => '∷', |
||
681 | '∸' => '∸', |
||
682 | '∺' => '∺', |
||
683 | '∻' => '∻', |
||
684 | '∼' => '∼', |
||
685 | '∼⃒' => '∼⃒', |
||
686 | '∽' => '∽', |
||
687 | '∽̱' => '∽̱', |
||
688 | '∾' => '∾', |
||
689 | '∾̳' => '∾̳', |
||
690 | '∿' => '∿', |
||
691 | '≀' => '≀', |
||
692 | '≁' => '≁', |
||
693 | '≂' => '≂', |
||
694 | '≂̸' => '≂̸', |
||
695 | '≃' => '≃', |
||
696 | '≄' => '≄', |
||
697 | '≅' => '≅', |
||
698 | '≆' => '≆', |
||
699 | '≇' => '≇', |
||
700 | '≈' => '≈', |
||
701 | '≉' => '≉', |
||
702 | '≊' => '≊', |
||
703 | '≋' => '≋', |
||
704 | '≋̸' => '≋̸', |
||
705 | '≌' => '≌', |
||
706 | '≍' => '≍', |
||
707 | '≍⃒' => '≍⃒', |
||
708 | '≎' => '≎', |
||
709 | '≎̸' => '≎̸', |
||
710 | '≏' => '≏', |
||
711 | '≏̸' => '≏̸', |
||
712 | '≐' => '≐', |
||
713 | '≐̸' => '≐̸', |
||
714 | '≑' => '≑', |
||
715 | '≒' => '≒', |
||
716 | '≓' => '≓', |
||
717 | '≔' => '≔', |
||
718 | '≕' => '≕', |
||
719 | '≖' => '≖', |
||
720 | '≗' => '≗', |
||
721 | '≙' => '≙', |
||
722 | '≚' => '≚', |
||
723 | '≜' => '≜', |
||
724 | '≟' => '≟', |
||
725 | '≠' => '≠', |
||
726 | '≡' => '≡', |
||
727 | '≡⃥' => '≡⃥', |
||
728 | '≢' => '≢', |
||
729 | '≤' => '≤', |
||
730 | '≤⃒' => '≤⃒', |
||
731 | '≥' => '≥', |
||
732 | '≥⃒' => '≥⃒', |
||
733 | '≦' => '≦', |
||
734 | '≦̸' => '≦̸', |
||
735 | '≧' => '≧', |
||
736 | '≧̸' => '≧̸', |
||
737 | '≨' => '≨', |
||
738 | '≨︀' => '≨︀', |
||
739 | '≩' => '≩', |
||
740 | '≩︀' => '≩︀', |
||
741 | '≪' => '≪', |
||
742 | '≪̸' => '≪̸', |
||
743 | '≪⃒' => '≪⃒', |
||
744 | '≫' => '≫', |
||
745 | '≫̸' => '≫̸', |
||
746 | '≫⃒' => '≫⃒', |
||
747 | '≬' => '≬', |
||
748 | '≭' => '≭', |
||
749 | '≮' => '≮', |
||
750 | '≯' => '≯', |
||
751 | '≰' => '≰', |
||
752 | '≱' => '≱', |
||
753 | '≲' => '≲', |
||
754 | '≳' => '≳', |
||
755 | '≴' => '≴', |
||
756 | '≵' => '≵', |
||
757 | '≶' => '≶', |
||
758 | '≷' => '≷', |
||
759 | '≸' => '≸', |
||
760 | '≹' => '≹', |
||
761 | '≺' => '≺', |
||
762 | '≻' => '≻', |
||
763 | '≼' => '≼', |
||
764 | '≽' => '≽', |
||
765 | '≾' => '≾', |
||
766 | '≿' => '≿', |
||
767 | '≿̸' => '≿̸', |
||
768 | '⊀' => '⊀', |
||
769 | '⊁' => '⊁', |
||
770 | '⊂' => '⊂', |
||
771 | '⊂⃒' => '⊂⃒', |
||
772 | '⊃' => '⊃', |
||
773 | '⊃⃒' => '⊃⃒', |
||
774 | '⊄' => '⊄', |
||
775 | '⊅' => '⊅', |
||
776 | '⊆' => '⊆', |
||
777 | '⊇' => '⊇', |
||
778 | '⊈' => '⊈', |
||
779 | '⊉' => '⊉', |
||
780 | '⊊' => '⊊', |
||
781 | '⊊︀' => '⊊︀', |
||
782 | '⊋' => '⊋', |
||
783 | '⊋︀' => '⊋︀', |
||
784 | '⊍' => '⊍', |
||
785 | '⊎' => '⊎', |
||
786 | '⊏' => '⊏', |
||
787 | '⊏̸' => '⊏̸', |
||
788 | '⊐' => '⊐', |
||
789 | '⊐̸' => '⊐̸', |
||
790 | '⊑' => '⊑', |
||
791 | '⊒' => '⊒', |
||
792 | '⊓' => '⊓', |
||
793 | '⊓︀' => '⊓︀', |
||
794 | '⊔' => '⊔', |
||
795 | '⊔︀' => '⊔︀', |
||
796 | '⊕' => '⊕', |
||
797 | '⊖' => '⊖', |
||
798 | '⊗' => '⊗', |
||
799 | '⊘' => '⊘', |
||
800 | '⊙' => '⊙', |
||
801 | '⊚' => '⊚', |
||
802 | '⊛' => '⊛', |
||
803 | '⊝' => '⊝', |
||
804 | '⊞' => '⊞', |
||
805 | '⊟' => '⊟', |
||
806 | '⊠' => '⊠', |
||
807 | '⊡' => '⊡', |
||
808 | '⊢' => '⊢', |
||
809 | '⊣' => '⊣', |
||
810 | '⊤' => '⊤', |
||
811 | '⊥' => '⊥', |
||
812 | '⊧' => '⊧', |
||
813 | '⊨' => '⊨', |
||
814 | '⊩' => '⊩', |
||
815 | '⊪' => '⊪', |
||
816 | '⊫' => '⊫', |
||
817 | '⊬' => '⊬', |
||
818 | '⊭' => '⊭', |
||
819 | '⊮' => '⊮', |
||
820 | '⊯' => '⊯', |
||
821 | '⊰' => '⊰', |
||
822 | '⊲' => '⊲', |
||
823 | '⊳' => '⊳', |
||
824 | '⊴' => '⊴', |
||
825 | '⊴⃒' => '⊴⃒', |
||
826 | '⊵' => '⊵', |
||
827 | '⊵⃒' => '⊵⃒', |
||
828 | '⊶' => '⊶', |
||
829 | '⊷' => '⊷', |
||
830 | '⊸' => '⊸', |
||
831 | '⊹' => '⊹', |
||
832 | '⊺' => '⊺', |
||
833 | '⊻' => '⊻', |
||
834 | '⊽' => '⊽', |
||
835 | '⊾' => '⊾', |
||
836 | '⊿' => '⊿', |
||
837 | '⋀' => '⋀', |
||
838 | '⋁' => '⋁', |
||
839 | '⋂' => '⋂', |
||
840 | '⋃' => '⋃', |
||
841 | '⋄' => '⋄', |
||
842 | '⋅' => '⋅', |
||
843 | '⋆' => '⋆', |
||
844 | '⋇' => '⋇', |
||
845 | '⋈' => '⋈', |
||
846 | '⋉' => '⋉', |
||
847 | '⋊' => '⋊', |
||
848 | '⋋' => '⋋', |
||
849 | '⋌' => '⋌', |
||
850 | '⋍' => '⋍', |
||
851 | '⋎' => '⋎', |
||
852 | '⋏' => '⋏', |
||
853 | '⋐' => '⋐', |
||
854 | '⋑' => '⋑', |
||
855 | '⋒' => '⋒', |
||
856 | '⋓' => '⋓', |
||
857 | '⋔' => '⋔', |
||
858 | '⋕' => '⋕', |
||
859 | '⋖' => '⋖', |
||
860 | '⋗' => '⋗', |
||
861 | '⋘' => '⋘', |
||
862 | '⋘̸' => '⋘̸', |
||
863 | '⋙' => '⋙', |
||
864 | '⋙̸' => '⋙̸', |
||
865 | '⋚' => '⋚', |
||
866 | '⋚︀' => '⋚︀', |
||
867 | '⋛' => '⋛', |
||
868 | '⋛︀' => '⋛︀', |
||
869 | '⋞' => '⋞', |
||
870 | '⋟' => '⋟', |
||
871 | '⋠' => '⋠', |
||
872 | '⋡' => '⋡', |
||
873 | '⋢' => '⋢', |
||
874 | '⋣' => '⋣', |
||
875 | '⋦' => '⋦', |
||
876 | '⋧' => '⋧', |
||
877 | '⋨' => '⋨', |
||
878 | '⋩' => '⋩', |
||
879 | '⋪' => '⋪', |
||
880 | '⋫' => '⋫', |
||
881 | '⋬' => '⋬', |
||
882 | '⋭' => '⋭', |
||
883 | '⋮' => '⋮', |
||
884 | '⋯' => '⋯', |
||
885 | '⋰' => '⋰', |
||
886 | '⋱' => '⋱', |
||
887 | '⋲' => '⋲', |
||
888 | '⋳' => '⋳', |
||
889 | '⋴' => '⋴', |
||
890 | '⋵' => '⋵', |
||
891 | '⋵̸' => '⋵̸', |
||
892 | '⋶' => '⋶', |
||
893 | '⋷' => '⋷', |
||
894 | '⋹' => '⋹', |
||
895 | '⋹̸' => '⋹̸', |
||
896 | '⋺' => '⋺', |
||
897 | '⋻' => '⋻', |
||
898 | '⋼' => '⋼', |
||
899 | '⋽' => '⋽', |
||
900 | '⋾' => '⋾', |
||
901 | '⌅' => '⌅', |
||
902 | '⌆' => '⌆', |
||
903 | '⌈' => '⌈', |
||
904 | '⌉' => '⌉', |
||
905 | '⌊' => '⌊', |
||
906 | '⌋' => '⌋', |
||
907 | '⌌' => '⌌', |
||
908 | '⌍' => '⌍', |
||
909 | '⌎' => '⌎', |
||
910 | '⌏' => '⌏', |
||
911 | '⌐' => '⌐', |
||
912 | '⌒' => '⌒', |
||
913 | '⌓' => '⌓', |
||
914 | '⌕' => '⌕', |
||
915 | '⌖' => '⌖', |
||
916 | '⌜' => '⌜', |
||
917 | '⌝' => '⌝', |
||
918 | '⌞' => '⌞', |
||
919 | '⌟' => '⌟', |
||
920 | '⌢' => '⌢', |
||
921 | '⌣' => '⌣', |
||
922 | '⌭' => '⌭', |
||
923 | '⌮' => '⌮', |
||
924 | '⌶' => '⌶', |
||
925 | '⌽' => '⌽', |
||
926 | '⌿' => '⌿', |
||
927 | '⍼' => '⍼', |
||
928 | '⎰' => '⎰', |
||
929 | '⎱' => '⎱', |
||
930 | '⎴' => '⎴', |
||
931 | '⎵' => '⎵', |
||
932 | '⎶' => '⎶', |
||
933 | '⏜' => '⏜', |
||
934 | '⏝' => '⏝', |
||
935 | '⏞' => '⏞', |
||
936 | '⏟' => '⏟', |
||
937 | '⏢' => '⏢', |
||
938 | '⏧' => '⏧', |
||
939 | '␣' => '␣', |
||
940 | 'Ⓢ' => 'Ⓢ', |
||
941 | '─' => '─', |
||
942 | '│' => '│', |
||
943 | '┌' => '┌', |
||
944 | '┐' => '┐', |
||
945 | '└' => '└', |
||
946 | '┘' => '┘', |
||
947 | '├' => '├', |
||
948 | '┤' => '┤', |
||
949 | '┬' => '┬', |
||
950 | '┴' => '┴', |
||
951 | '┼' => '┼', |
||
952 | '═' => '═', |
||
953 | '║' => '║', |
||
954 | '╒' => '╒', |
||
955 | '╓' => '╓', |
||
956 | '╔' => '╔', |
||
957 | '╕' => '╕', |
||
958 | '╖' => '╖', |
||
959 | '╗' => '╗', |
||
960 | '╘' => '╘', |
||
961 | '╙' => '╙', |
||
962 | '╚' => '╚', |
||
963 | '╛' => '╛', |
||
964 | '╜' => '╜', |
||
965 | '╝' => '╝', |
||
966 | '╞' => '╞', |
||
967 | '╟' => '╟', |
||
968 | '╠' => '╠', |
||
969 | '╡' => '╡', |
||
970 | '╢' => '╢', |
||
971 | '╣' => '╣', |
||
972 | '╤' => '╤', |
||
973 | '╥' => '╥', |
||
974 | '╦' => '╦', |
||
975 | '╧' => '╧', |
||
976 | '╨' => '╨', |
||
977 | '╩' => '╩', |
||
978 | '╪' => '╪', |
||
979 | '╫' => '╫', |
||
980 | '╬' => '╬', |
||
981 | '▀' => '▀', |
||
982 | '▄' => '▄', |
||
983 | '█' => '█', |
||
984 | '░' => '░', |
||
985 | '▒' => '▒', |
||
986 | '▓' => '▓', |
||
987 | '□' => '□', |
||
988 | '▪' => '▪', |
||
989 | '▫' => '▫', |
||
990 | '▭' => '▭', |
||
991 | '▮' => '▮', |
||
992 | '▱' => '▱', |
||
993 | '△' => '△', |
||
994 | '▴' => '▴', |
||
995 | '▵' => '▵', |
||
996 | '▸' => '▸', |
||
997 | '▹' => '▹', |
||
998 | '▽' => '▽', |
||
999 | '▾' => '▾', |
||
1000 | '▿' => '▿', |
||
1001 | '◂' => '◂', |
||
1002 | '◃' => '◃', |
||
1003 | '◊' => '◊', |
||
1004 | '○' => '○', |
||
1005 | '◬' => '◬', |
||
1006 | '◯' => '◯', |
||
1007 | '◸' => '◸', |
||
1008 | '◹' => '◹', |
||
1009 | '◺' => '◺', |
||
1010 | '◻' => '◻', |
||
1011 | '◼' => '◼', |
||
1012 | '★' => '★', |
||
1013 | '☆' => '☆', |
||
1014 | '☎' => '☎', |
||
1015 | '♀' => '♀', |
||
1016 | '♂' => '♂', |
||
1017 | '♠' => '♠', |
||
1018 | '♣' => '♣', |
||
1019 | '♥' => '♥', |
||
1020 | '♦' => '♦', |
||
1021 | '♪' => '♪', |
||
1022 | '♭' => '♭', |
||
1023 | '♮' => '♮', |
||
1024 | '♯' => '♯', |
||
1025 | '✓' => '✓', |
||
1026 | '✗' => '✗', |
||
1027 | '✠' => '✠', |
||
1028 | '✶' => '✶', |
||
1029 | '❘' => '❘', |
||
1030 | '❲' => '❲', |
||
1031 | '❳' => '❳', |
||
1032 | '⟈' => '⟈', |
||
1033 | '⟉' => '⟉', |
||
1034 | '⟦' => '⟦', |
||
1035 | '⟧' => '⟧', |
||
1036 | '⟨' => '⟨', |
||
1037 | '⟩' => '⟩', |
||
1038 | '⟪' => '⟪', |
||
1039 | '⟫' => '⟫', |
||
1040 | '⟬' => '⟬', |
||
1041 | '⟭' => '⟭', |
||
1042 | '⟵' => '⟵', |
||
1043 | '⟶' => '⟶', |
||
1044 | '⟷' => '⟷', |
||
1045 | '⟸' => '⟸', |
||
1046 | '⟹' => '⟹', |
||
1047 | '⟺' => '⟺', |
||
1048 | '⟼' => '⟼', |
||
1049 | '⟿' => '⟿', |
||
1050 | '⤂' => '⤂', |
||
1051 | '⤃' => '⤃', |
||
1052 | '⤄' => '⤄', |
||
1053 | '⤅' => '⤅', |
||
1054 | '⤌' => '⤌', |
||
1055 | '⤍' => '⤍', |
||
1056 | '⤎' => '⤎', |
||
1057 | '⤏' => '⤏', |
||
1058 | '⤐' => '⤐', |
||
1059 | '⤑' => '⤑', |
||
1060 | '⤒' => '⤒', |
||
1061 | '⤓' => '⤓', |
||
1062 | '⤖' => '⤖', |
||
1063 | '⤙' => '⤙', |
||
1064 | '⤚' => '⤚', |
||
1065 | '⤛' => '⤛', |
||
1066 | '⤜' => '⤜', |
||
1067 | '⤝' => '⤝', |
||
1068 | '⤞' => '⤞', |
||
1069 | '⤟' => '⤟', |
||
1070 | '⤠' => '⤠', |
||
1071 | '⤣' => '⤣', |
||
1072 | '⤤' => '⤤', |
||
1073 | '⤥' => '⤥', |
||
1074 | '⤦' => '⤦', |
||
1075 | '⤧' => '⤧', |
||
1076 | '⤨' => '⤨', |
||
1077 | '⤩' => '⤩', |
||
1078 | '⤪' => '⤪', |
||
1079 | '⤳' => '⤳', |
||
1080 | '⤳̸' => '⤳̸', |
||
1081 | '⤵' => '⤵', |
||
1082 | '⤶' => '⤶', |
||
1083 | '⤷' => '⤷', |
||
1084 | '⤸' => '⤸', |
||
1085 | '⤹' => '⤹', |
||
1086 | '⤼' => '⤼', |
||
1087 | '⤽' => '⤽', |
||
1088 | '⥅' => '⥅', |
||
1089 | '⥈' => '⥈', |
||
1090 | '⥉' => '⥉', |
||
1091 | '⥊' => '⥊', |
||
1092 | '⥋' => '⥋', |
||
1093 | '⥎' => '⥎', |
||
1094 | '⥏' => '⥏', |
||
1095 | '⥐' => '⥐', |
||
1096 | '⥑' => '⥑', |
||
1097 | '⥒' => '⥒', |
||
1098 | '⥓' => '⥓', |
||
1099 | '⥔' => '⥔', |
||
1100 | '⥕' => '⥕', |
||
1101 | '⥖' => '⥖', |
||
1102 | '⥗' => '⥗', |
||
1103 | '⥘' => '⥘', |
||
1104 | '⥙' => '⥙', |
||
1105 | '⥚' => '⥚', |
||
1106 | '⥛' => '⥛', |
||
1107 | '⥜' => '⥜', |
||
1108 | '⥝' => '⥝', |
||
1109 | '⥞' => '⥞', |
||
1110 | '⥟' => '⥟', |
||
1111 | '⥠' => '⥠', |
||
1112 | '⥡' => '⥡', |
||
1113 | '⥢' => '⥢', |
||
1114 | '⥣' => '⥣', |
||
1115 | '⥤' => '⥤', |
||
1116 | '⥥' => '⥥', |
||
1117 | '⥦' => '⥦', |
||
1118 | '⥧' => '⥧', |
||
1119 | '⥨' => '⥨', |
||
1120 | '⥩' => '⥩', |
||
1121 | '⥪' => '⥪', |
||
1122 | '⥫' => '⥫', |
||
1123 | '⥬' => '⥬', |
||
1124 | '⥭' => '⥭', |
||
1125 | '⥮' => '⥮', |
||
1126 | '⥯' => '⥯', |
||
1127 | '⥰' => '⥰', |
||
1128 | '⥱' => '⥱', |
||
1129 | '⥲' => '⥲', |
||
1130 | '⥳' => '⥳', |
||
1131 | '⥴' => '⥴', |
||
1132 | '⥵' => '⥵', |
||
1133 | '⥶' => '⥶', |
||
1134 | '⥸' => '⥸', |
||
1135 | '⥹' => '⥹', |
||
1136 | '⥻' => '⥻', |
||
1137 | '⥼' => '⥼', |
||
1138 | '⥽' => '⥽', |
||
1139 | '⥾' => '⥾', |
||
1140 | '⥿' => '⥿', |
||
1141 | '⦅' => '⦅', |
||
1142 | '⦆' => '⦆', |
||
1143 | '⦋' => '⦋', |
||
1144 | '⦌' => '⦌', |
||
1145 | '⦍' => '⦍', |
||
1146 | '⦎' => '⦎', |
||
1147 | '⦏' => '⦏', |
||
1148 | '⦐' => '⦐', |
||
1149 | '⦑' => '⦑', |
||
1150 | '⦒' => '⦒', |
||
1151 | '⦓' => '⦓', |
||
1152 | '⦔' => '⦔', |
||
1153 | '⦕' => '⦕', |
||
1154 | '⦖' => '⦖', |
||
1155 | '⦚' => '⦚', |
||
1156 | '⦜' => '⦜', |
||
1157 | '⦝' => '⦝', |
||
1158 | '⦤' => '⦤', |
||
1159 | '⦥' => '⦥', |
||
1160 | '⦦' => '⦦', |
||
1161 | '⦧' => '⦧', |
||
1162 | '⦨' => '⦨', |
||
1163 | '⦩' => '⦩', |
||
1164 | '⦪' => '⦪', |
||
1165 | '⦫' => '⦫', |
||
1166 | '⦬' => '⦬', |
||
1167 | '⦭' => '⦭', |
||
1168 | '⦮' => '⦮', |
||
1169 | '⦯' => '⦯', |
||
1170 | '⦰' => '⦰', |
||
1171 | '⦱' => '⦱', |
||
1172 | '⦲' => '⦲', |
||
1173 | '⦳' => '⦳', |
||
1174 | '⦴' => '⦴', |
||
1175 | '⦵' => '⦵', |
||
1176 | '⦶' => '⦶', |
||
1177 | '⦷' => '⦷', |
||
1178 | '⦹' => '⦹', |
||
1179 | '⦻' => '⦻', |
||
1180 | '⦼' => '⦼', |
||
1181 | '⦾' => '⦾', |
||
1182 | '⦿' => '⦿', |
||
1183 | '⧀' => '⧀', |
||
1184 | '⧁' => '⧁', |
||
1185 | '⧂' => '⧂', |
||
1186 | '⧃' => '⧃', |
||
1187 | '⧄' => '⧄', |
||
1188 | '⧅' => '⧅', |
||
1189 | '⧉' => '⧉', |
||
1190 | '⧍' => '⧍', |
||
1191 | '⧎' => '⧎', |
||
1192 | '⧏' => '⧏', |
||
1193 | '⧏̸' => '⧏̸', |
||
1194 | '⧐' => '⧐', |
||
1195 | '⧐̸' => '⧐̸', |
||
1196 | '⧜' => '⧜', |
||
1197 | '⧝' => '⧝', |
||
1198 | '⧞' => '⧞', |
||
1199 | '⧣' => '⧣', |
||
1200 | '⧤' => '⧤', |
||
1201 | '⧥' => '⧥', |
||
1202 | '⧫' => '⧫', |
||
1203 | '⧴' => '⧴', |
||
1204 | '⧶' => '⧶', |
||
1205 | '⨀' => '⨀', |
||
1206 | '⨁' => '⨁', |
||
1207 | '⨂' => '⨂', |
||
1208 | '⨄' => '⨄', |
||
1209 | '⨆' => '⨆', |
||
1210 | '⨌' => '⨌', |
||
1211 | '⨍' => '⨍', |
||
1212 | '⨐' => '⨐', |
||
1213 | '⨑' => '⨑', |
||
1214 | '⨒' => '⨒', |
||
1215 | '⨓' => '⨓', |
||
1216 | '⨔' => '⨔', |
||
1217 | '⨕' => '⨕', |
||
1218 | '⨖' => '⨖', |
||
1219 | '⨗' => '⨗', |
||
1220 | '⨢' => '⨢', |
||
1221 | '⨣' => '⨣', |
||
1222 | '⨤' => '⨤', |
||
1223 | '⨥' => '⨥', |
||
1224 | '⨦' => '⨦', |
||
1225 | '⨧' => '⨧', |
||
1226 | '⨩' => '⨩', |
||
1227 | '⨪' => '⨪', |
||
1228 | '⨭' => '⨭', |
||
1229 | '⨮' => '⨮', |
||
1230 | '⨯' => '⨯', |
||
1231 | '⨰' => '⨰', |
||
1232 | '⨱' => '⨱', |
||
1233 | '⨳' => '⨳', |
||
1234 | '⨴' => '⨴', |
||
1235 | '⨵' => '⨵', |
||
1236 | '⨶' => '⨶', |
||
1237 | '⨷' => '⨷', |
||
1238 | '⨸' => '⨸', |
||
1239 | '⨹' => '⨹', |
||
1240 | '⨺' => '⨺', |
||
1241 | '⨻' => '⨻', |
||
1242 | '⨼' => '⨼', |
||
1243 | '⨿' => '⨿', |
||
1244 | '⩀' => '⩀', |
||
1245 | '⩂' => '⩂', |
||
1246 | '⩃' => '⩃', |
||
1247 | '⩄' => '⩄', |
||
1248 | '⩅' => '⩅', |
||
1249 | '⩆' => '⩆', |
||
1250 | '⩇' => '⩇', |
||
1251 | '⩈' => '⩈', |
||
1252 | '⩉' => '⩉', |
||
1253 | '⩊' => '⩊', |
||
1254 | '⩋' => '⩋', |
||
1255 | '⩌' => '⩌', |
||
1256 | '⩍' => '⩍', |
||
1257 | '⩐' => '⩐', |
||
1258 | '⩓' => '⩓', |
||
1259 | '⩔' => '⩔', |
||
1260 | '⩕' => '⩕', |
||
1261 | '⩖' => '⩖', |
||
1262 | '⩗' => '⩗', |
||
1263 | '⩘' => '⩘', |
||
1264 | '⩚' => '⩚', |
||
1265 | '⩛' => '⩛', |
||
1266 | '⩜' => '⩜', |
||
1267 | '⩝' => '⩝', |
||
1268 | '⩟' => '⩟', |
||
1269 | '⩦' => '⩦', |
||
1270 | '⩪' => '⩪', |
||
1271 | '⩭' => '⩭', |
||
1272 | '⩭̸' => '⩭̸', |
||
1273 | '⩮' => '⩮', |
||
1274 | '⩯' => '⩯', |
||
1275 | '⩰' => '⩰', |
||
1276 | '⩰̸' => '⩰̸', |
||
1277 | '⩱' => '⩱', |
||
1278 | '⩲' => '⩲', |
||
1279 | '⩳' => '⩳', |
||
1280 | '⩴' => '⩴', |
||
1281 | '⩵' => '⩵', |
||
1282 | '⩷' => '⩷', |
||
1283 | '⩸' => '⩸', |
||
1284 | '⩹' => '⩹', |
||
1285 | '⩺' => '⩺', |
||
1286 | '⩻' => '⩻', |
||
1287 | '⩼' => '⩼', |
||
1288 | '⩽' => '⩽', |
||
1289 | '⩽̸' => '⩽̸', |
||
1290 | '⩾' => '⩾', |
||
1291 | '⩾̸' => '⩾̸', |
||
1292 | '⩿' => '⩿', |
||
1293 | '⪀' => '⪀', |
||
1294 | '⪁' => '⪁', |
||
1295 | '⪂' => '⪂', |
||
1296 | '⪃' => '⪃', |
||
1297 | '⪄' => '⪄', |
||
1298 | '⪅' => '⪅', |
||
1299 | '⪆' => '⪆', |
||
1300 | '⪇' => '⪇', |
||
1301 | '⪈' => '⪈', |
||
1302 | '⪉' => '⪉', |
||
1303 | '⪊' => '⪊', |
||
1304 | '⪋' => '⪋', |
||
1305 | '⪌' => '⪌', |
||
1306 | '⪍' => '⪍', |
||
1307 | '⪎' => '⪎', |
||
1308 | '⪏' => '⪏', |
||
1309 | '⪐' => '⪐', |
||
1310 | '⪑' => '⪑', |
||
1311 | '⪒' => '⪒', |
||
1312 | '⪓' => '⪓', |
||
1313 | '⪔' => '⪔', |
||
1314 | '⪕' => '⪕', |
||
1315 | '⪖' => '⪖', |
||
1316 | '⪗' => '⪗', |
||
1317 | '⪘' => '⪘', |
||
1318 | '⪙' => '⪙', |
||
1319 | '⪚' => '⪚', |
||
1320 | '⪝' => '⪝', |
||
1321 | '⪞' => '⪞', |
||
1322 | '⪟' => '⪟', |
||
1323 | '⪠' => '⪠', |
||
1324 | '⪡' => '⪡', |
||
1325 | '⪡̸' => '⪡̸', |
||
1326 | '⪢' => '⪢', |
||
1327 | '⪢̸' => '⪢̸', |
||
1328 | '⪤' => '⪤', |
||
1329 | '⪥' => '⪥', |
||
1330 | '⪦' => '⪦', |
||
1331 | '⪧' => '⪧', |
||
1332 | '⪨' => '⪨', |
||
1333 | '⪩' => '⪩', |
||
1334 | '⪪' => '⪪', |
||
1335 | '⪫' => '⪫', |
||
1336 | '⪬' => '⪬', |
||
1337 | '⪬︀' => '⪬︀', |
||
1338 | '⪭' => '⪭', |
||
1339 | '⪭︀' => '⪭︀', |
||
1340 | '⪮' => '⪮', |
||
1341 | '⪯' => '⪯', |
||
1342 | '⪯̸' => '⪯̸', |
||
1343 | '⪰' => '⪰', |
||
1344 | '⪰̸' => '⪰̸', |
||
1345 | '⪳' => '⪳', |
||
1346 | '⪴' => '⪴', |
||
1347 | '⪵' => '⪵', |
||
1348 | '⪶' => '⪶', |
||
1349 | '⪷' => '⪷', |
||
1350 | '⪸' => '⪸', |
||
1351 | '⪹' => '⪹', |
||
1352 | '⪺' => '⪺', |
||
1353 | '⪻' => '⪻', |
||
1354 | '⪼' => '⪼', |
||
1355 | '⪽' => '⪽', |
||
1356 | '⪾' => '⪾', |
||
1357 | '⪿' => '⪿', |
||
1358 | '⫀' => '⫀', |
||
1359 | '⫁' => '⫁', |
||
1360 | '⫂' => '⫂', |
||
1361 | '⫃' => '⫃', |
||
1362 | '⫄' => '⫄', |
||
1363 | '⫅' => '⫅', |
||
1364 | '⫅̸' => '⫅̸', |
||
1365 | '⫆' => '⫆', |
||
1366 | '⫆̸' => '⫆̸', |
||
1367 | '⫇' => '⫇', |
||
1368 | '⫈' => '⫈', |
||
1369 | '⫋' => '⫋', |
||
1370 | '⫋︀' => '⫋︀', |
||
1371 | '⫌' => '⫌', |
||
1372 | '⫌︀' => '⫌︀', |
||
1373 | '⫏' => '⫏', |
||
1374 | '⫐' => '⫐', |
||
1375 | '⫑' => '⫑', |
||
1376 | '⫒' => '⫒', |
||
1377 | '⫓' => '⫓', |
||
1378 | '⫔' => '⫔', |
||
1379 | '⫕' => '⫕', |
||
1380 | '⫖' => '⫖', |
||
1381 | '⫗' => '⫗', |
||
1382 | '⫘' => '⫘', |
||
1383 | '⫙' => '⫙', |
||
1384 | '⫚' => '⫚', |
||
1385 | '⫛' => '⫛', |
||
1386 | '⫤' => '⫤', |
||
1387 | '⫦' => '⫦', |
||
1388 | '⫧' => '⫧', |
||
1389 | '⫨' => '⫨', |
||
1390 | '⫩' => '⫩', |
||
1391 | '⫫' => '⫫', |
||
1392 | '⫬' => '⫬', |
||
1393 | '⫭' => '⫭', |
||
1394 | '⫮' => '⫮', |
||
1395 | '⫯' => '⫯', |
||
1396 | '⫰' => '⫰', |
||
1397 | '⫱' => '⫱', |
||
1398 | '⫲' => '⫲', |
||
1399 | '⫳' => '⫳', |
||
1400 | '⫽' => '⫽', |
||
1401 | '⫽⃥' => '⫽⃥', |
||
1402 | 'ff' => 'ff', |
||
1403 | 'fi' => 'fi', |
||
1404 | 'fl' => 'fl', |
||
1405 | 'ffi' => 'ffi', |
||
1406 | 'ffl' => 'ffl', |
||
1407 | '𝒜' => '𝒜', |
||
1408 | '𝒞' => '𝒞', |
||
1409 | '𝒟' => '𝒟', |
||
1410 | '𝒢' => '𝒢', |
||
1411 | '𝒥' => '𝒥', |
||
1412 | '𝒦' => '𝒦', |
||
1413 | '𝒩' => '𝒩', |
||
1414 | '𝒪' => '𝒪', |
||
1415 | '𝒫' => '𝒫', |
||
1416 | '𝒬' => '𝒬', |
||
1417 | '𝒮' => '𝒮', |
||
1418 | '𝒯' => '𝒯', |
||
1419 | '𝒰' => '𝒰', |
||
1420 | '𝒱' => '𝒱', |
||
1421 | '𝒲' => '𝒲', |
||
1422 | '𝒳' => '𝒳', |
||
1423 | '𝒴' => '𝒴', |
||
1424 | '𝒵' => '𝒵', |
||
1425 | '𝒶' => '𝒶', |
||
1426 | '𝒷' => '𝒷', |
||
1427 | '𝒸' => '𝒸', |
||
1428 | '𝒹' => '𝒹', |
||
1429 | '𝒻' => '𝒻', |
||
1430 | '𝒽' => '𝒽', |
||
1431 | '𝒾' => '𝒾', |
||
1432 | '𝒿' => '𝒿', |
||
1433 | '𝓀' => '𝓀', |
||
1434 | '𝓁' => '𝓁', |
||
1435 | '𝓂' => '𝓂', |
||
1436 | '𝓃' => '𝓃', |
||
1437 | '𝓅' => '𝓅', |
||
1438 | '𝓆' => '𝓆', |
||
1439 | '𝓇' => '𝓇', |
||
1440 | '𝓈' => '𝓈', |
||
1441 | '𝓉' => '𝓉', |
||
1442 | '𝓊' => '𝓊', |
||
1443 | '𝓋' => '𝓋', |
||
1444 | '𝓌' => '𝓌', |
||
1445 | '𝓍' => '𝓍', |
||
1446 | '𝓎' => '𝓎', |
||
1447 | '𝓏' => '𝓏', |
||
1448 | '𝔄' => '𝔄', |
||
1449 | '𝔅' => '𝔅', |
||
1450 | '𝔇' => '𝔇', |
||
1451 | '𝔈' => '𝔈', |
||
1452 | '𝔉' => '𝔉', |
||
1453 | '𝔊' => '𝔊', |
||
1454 | '𝔍' => '𝔍', |
||
1455 | '𝔎' => '𝔎', |
||
1456 | '𝔏' => '𝔏', |
||
1457 | '𝔐' => '𝔐', |
||
1458 | '𝔑' => '𝔑', |
||
1459 | '𝔒' => '𝔒', |
||
1460 | '𝔓' => '𝔓', |
||
1461 | '𝔔' => '𝔔', |
||
1462 | '𝔖' => '𝔖', |
||
1463 | '𝔗' => '𝔗', |
||
1464 | '𝔘' => '𝔘', |
||
1465 | '𝔙' => '𝔙', |
||
1466 | '𝔚' => '𝔚', |
||
1467 | '𝔛' => '𝔛', |
||
1468 | '𝔜' => '𝔜', |
||
1469 | '𝔞' => '𝔞', |
||
1470 | '𝔟' => '𝔟', |
||
1471 | '𝔠' => '𝔠', |
||
1472 | '𝔡' => '𝔡', |
||
1473 | '𝔢' => '𝔢', |
||
1474 | '𝔣' => '𝔣', |
||
1475 | '𝔤' => '𝔤', |
||
1476 | '𝔥' => '𝔥', |
||
1477 | '𝔦' => '𝔦', |
||
1478 | '𝔧' => '𝔧', |
||
1479 | '𝔨' => '𝔨', |
||
1480 | '𝔩' => '𝔩', |
||
1481 | '𝔪' => '𝔪', |
||
1482 | '𝔫' => '𝔫', |
||
1483 | '𝔬' => '𝔬', |
||
1484 | '𝔭' => '𝔭', |
||
1485 | '𝔮' => '𝔮', |
||
1486 | '𝔯' => '𝔯', |
||
1487 | '𝔰' => '𝔰', |
||
1488 | '𝔱' => '𝔱', |
||
1489 | '𝔲' => '𝔲', |
||
1490 | '𝔳' => '𝔳', |
||
1491 | '𝔴' => '𝔴', |
||
1492 | '𝔵' => '𝔵', |
||
1493 | '𝔶' => '𝔶', |
||
1494 | '𝔷' => '𝔷', |
||
1495 | '𝔸' => '𝔸', |
||
1496 | '𝔹' => '𝔹', |
||
1497 | '𝔻' => '𝔻', |
||
1498 | '𝔼' => '𝔼', |
||
1499 | '𝔽' => '𝔽', |
||
1500 | '𝔾' => '𝔾', |
||
1501 | '𝕀' => '𝕀', |
||
1502 | '𝕁' => '𝕁', |
||
1503 | '𝕂' => '𝕂', |
||
1504 | '𝕃' => '𝕃', |
||
1505 | '𝕄' => '𝕄', |
||
1506 | '𝕆' => '𝕆', |
||
1507 | '𝕊' => '𝕊', |
||
1508 | '𝕋' => '𝕋', |
||
1509 | '𝕌' => '𝕌', |
||
1510 | '𝕍' => '𝕍', |
||
1511 | '𝕎' => '𝕎', |
||
1512 | '𝕏' => '𝕏', |
||
1513 | '𝕐' => '𝕐', |
||
1514 | '𝕒' => '𝕒', |
||
1515 | '𝕓' => '𝕓', |
||
1516 | '𝕔' => '𝕔', |
||
1517 | '𝕕' => '𝕕', |
||
1518 | '𝕖' => '𝕖', |
||
1519 | '𝕗' => '𝕗', |
||
1520 | '𝕘' => '𝕘', |
||
1521 | '𝕙' => '𝕙', |
||
1522 | '𝕚' => '𝕚', |
||
1523 | '𝕛' => '𝕛', |
||
1524 | '𝕜' => '𝕜', |
||
1525 | '𝕝' => '𝕝', |
||
1526 | '𝕞' => '𝕞', |
||
1527 | '𝕟' => '𝕟', |
||
1528 | '𝕠' => '𝕠', |
||
1529 | '𝕡' => '𝕡', |
||
1530 | '𝕢' => '𝕢', |
||
1531 | '𝕣' => '𝕣', |
||
1532 | '𝕤' => '𝕤', |
||
1533 | '𝕥' => '𝕥', |
||
1534 | '𝕦' => '𝕦', |
||
1535 | '𝕧' => '𝕧', |
||
1536 | '𝕨' => '𝕨', |
||
1537 | '𝕩' => '𝕩', |
||
1538 | '𝕪' => '𝕪', |
||
1539 | '𝕫' => '𝕫', |
||
1540 | ); |
||
1541 | |||
1542 | /** |
||
1543 | * List of never allowed regex replacements. |
||
1544 | * |
||
1545 | * @var array |
||
1546 | */ |
||
1547 | private static $_never_allowed_regex = array( |
||
1548 | // default javascript |
||
1549 | 'javascript\s*:', |
||
1550 | // default javascript |
||
1551 | '(document|(document\.)?window)\.(location|on\w*)', |
||
1552 | // Java: jar-protocol is an XSS hazard |
||
1553 | 'jar\s*:', |
||
1554 | // Mac (will not run the script, but open it in AppleScript Editor) |
||
1555 | 'applescript\s*:', |
||
1556 | // IE: https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#VBscript_in_an_image |
||
1557 | 'vbscript\s*:', |
||
1558 | // IE, surprise! |
||
1559 | 'wscript\s*:', |
||
1560 | // IE |
||
1561 | 'jscript\s*:', |
||
1562 | // IE: https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#VBscript_in_an_image |
||
1563 | 'vbs\s*:', |
||
1564 | // https://html5sec.org/#behavior |
||
1565 | 'behavior\s:', |
||
1566 | // ? |
||
1567 | 'Redirect\s+30\d', |
||
1568 | // data-attribute + base64 |
||
1569 | "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?", |
||
1570 | // remove Netscape 4 JS entities |
||
1571 | '&\s*\{[^}]*(\}\s*;?|$)', |
||
1572 | // old IE, old Netscape |
||
1573 | 'expression\s*(\(|&\#40;)', |
||
1574 | // old Netscape |
||
1575 | 'mocha\s*:', |
||
1576 | // old Netscape |
||
1577 | 'livescript\s*:', |
||
1578 | // default view source |
||
1579 | 'view-source\s*:', |
||
1580 | ); |
||
1581 | |||
1582 | /** |
||
1583 | * List of never allowed strings, afterwards. |
||
1584 | * |
||
1585 | * @var array |
||
1586 | */ |
||
1587 | private static $_never_allowed_str_afterwards = array( |
||
1588 | 'FSCommand', |
||
1589 | 'onAbort', |
||
1590 | 'onActivate', |
||
1591 | 'onAttribute', |
||
1592 | 'onAfterPrint', |
||
1593 | 'onAfterScriptExecute', |
||
1594 | 'onAfterUpdate', |
||
1595 | 'onAnimationEnd', |
||
1596 | 'onAnimationIteration', |
||
1597 | 'onAnimationStart', |
||
1598 | 'onAriaRequest', |
||
1599 | 'onAutoComplete', |
||
1600 | 'onAutoCompleteError', |
||
1601 | 'onBeforeActivate', |
||
1602 | 'onBeforeCopy', |
||
1603 | 'onBeforeCut', |
||
1604 | 'onBeforeDeactivate', |
||
1605 | 'onBeforeEditFocus', |
||
1606 | 'onBeforePaste', |
||
1607 | 'onBeforePrint', |
||
1608 | 'onBeforeScriptExecute', |
||
1609 | 'onBeforeUnload', |
||
1610 | 'onBeforeUpdate', |
||
1611 | 'onBegin', |
||
1612 | 'onBlur', |
||
1613 | 'onBounce', |
||
1614 | 'onCancel', |
||
1615 | 'onCanPlay', |
||
1616 | 'onCanPlayThrough', |
||
1617 | 'onCellChange', |
||
1618 | 'onChange', |
||
1619 | 'onClick', |
||
1620 | 'onClose', |
||
1621 | 'onCommand', |
||
1622 | 'onCompassNeedsCalibration', |
||
1623 | 'onContextMenu', |
||
1624 | 'onControlSelect', |
||
1625 | 'onCopy', |
||
1626 | 'onCueChange', |
||
1627 | 'onCut', |
||
1628 | 'onDataAvailable', |
||
1629 | 'onDataSetChanged', |
||
1630 | 'onDataSetComplete', |
||
1631 | 'onDblClick', |
||
1632 | 'onDeactivate', |
||
1633 | 'onDeviceLight', |
||
1634 | 'onDeviceMotion', |
||
1635 | 'onDeviceOrientation', |
||
1636 | 'onDeviceProximity', |
||
1637 | 'onDrag', |
||
1638 | 'onDragDrop', |
||
1639 | 'onDragEnd', |
||
1640 | 'onDragEnter', |
||
1641 | 'onDragLeave', |
||
1642 | 'onDragOver', |
||
1643 | 'onDragStart', |
||
1644 | 'onDrop', |
||
1645 | 'onDurationChange', |
||
1646 | 'onEmptied', |
||
1647 | 'onEnd', |
||
1648 | 'onEnded', |
||
1649 | 'onError', |
||
1650 | 'onErrorUpdate', |
||
1651 | 'onExit', |
||
1652 | 'onFilterChange', |
||
1653 | 'onFinish', |
||
1654 | 'onFocus', |
||
1655 | 'onFocusIn', |
||
1656 | 'onFocusOut', |
||
1657 | 'onFormChange', |
||
1658 | 'onFormInput', |
||
1659 | 'onFullScreenChange', |
||
1660 | 'onFullScreenError', |
||
1661 | 'onGotPointerCapture', |
||
1662 | 'onHashChange', |
||
1663 | 'onHelp', |
||
1664 | 'onInput', |
||
1665 | 'onInvalid', |
||
1666 | 'onKeyDown', |
||
1667 | 'onKeyPress', |
||
1668 | 'onKeyUp', |
||
1669 | 'onLanguageChange', |
||
1670 | 'onLayoutComplete', |
||
1671 | 'onLoad', |
||
1672 | 'onLoadedData', |
||
1673 | 'onLoadedMetaData', |
||
1674 | 'onLoadStart', |
||
1675 | 'onLoseCapture', |
||
1676 | 'onLostPointerCapture', |
||
1677 | 'onMediaComplete', |
||
1678 | 'onMediaError', |
||
1679 | 'onMessage', |
||
1680 | 'onMouseDown', |
||
1681 | 'onMouseEnter', |
||
1682 | 'onMouseLeave', |
||
1683 | 'onMouseMove', |
||
1684 | 'onMouseOut', |
||
1685 | 'onMouseOver', |
||
1686 | 'onMouseUp', |
||
1687 | 'onMouseWheel', |
||
1688 | 'onMove', |
||
1689 | 'onMoveEnd', |
||
1690 | 'onMoveStart', |
||
1691 | 'onMozFullScreenChange', |
||
1692 | 'onMozFullScreenError', |
||
1693 | 'onMozPointerLockChange', |
||
1694 | 'onMozPointerLockError', |
||
1695 | 'onMsContentZoom', |
||
1696 | 'onMsFullScreenChange', |
||
1697 | 'onMsFullScreenError', |
||
1698 | 'onMsGestureChange', |
||
1699 | 'onMsGestureDoubleTap', |
||
1700 | 'onMsGestureEnd', |
||
1701 | 'onMsGestureHold', |
||
1702 | 'onMsGestureStart', |
||
1703 | 'onMsGestureTap', |
||
1704 | 'onMsGotPointerCapture', |
||
1705 | 'onMsInertiaStart', |
||
1706 | 'onMsLostPointerCapture', |
||
1707 | 'onMsManipulationStateChanged', |
||
1708 | 'onMsPointerCancel', |
||
1709 | 'onMsPointerDown', |
||
1710 | 'onMsPointerEnter', |
||
1711 | 'onMsPointerLeave', |
||
1712 | 'onMsPointerMove', |
||
1713 | 'onMsPointerOut', |
||
1714 | 'onMsPointerOver', |
||
1715 | 'onMsPointerUp', |
||
1716 | 'onMsSiteModeJumpListItemRemoved', |
||
1717 | 'onMsThumbnailClick', |
||
1718 | 'onOffline', |
||
1719 | 'onOnline', |
||
1720 | 'onOutOfSync', |
||
1721 | 'onPage', |
||
1722 | 'onPageHide', |
||
1723 | 'onPageShow', |
||
1724 | 'onPaste', |
||
1725 | 'onPause', |
||
1726 | 'onPlay', |
||
1727 | 'onPlaying', |
||
1728 | 'onPointerCancel', |
||
1729 | 'onPointerDown', |
||
1730 | 'onPointerEnter', |
||
1731 | 'onPointerLeave', |
||
1732 | 'onPointerLockChange', |
||
1733 | 'onPointerLockError', |
||
1734 | 'onPointerMove', |
||
1735 | 'onPointerOut', |
||
1736 | 'onPointerOver', |
||
1737 | 'onPointerUp', |
||
1738 | 'onPopState', |
||
1739 | 'onProgress', |
||
1740 | 'onPropertyChange', |
||
1741 | 'onRateChange', |
||
1742 | 'onReadyStateChange', |
||
1743 | 'onReceived', |
||
1744 | 'onRepeat', |
||
1745 | 'onReset', |
||
1746 | 'onResize', |
||
1747 | 'onResizeEnd', |
||
1748 | 'onResizeStart', |
||
1749 | 'onResume', |
||
1750 | 'onReverse', |
||
1751 | 'onRowDelete', |
||
1752 | 'onRowEnter', |
||
1753 | 'onRowExit', |
||
1754 | 'onRowInserted', |
||
1755 | 'onRowsDelete', |
||
1756 | 'onRowsEnter', |
||
1757 | 'onRowsExit', |
||
1758 | 'onRowsInserted', |
||
1759 | 'onScroll', |
||
1760 | 'onSearch', |
||
1761 | 'onSeek', |
||
1762 | 'onSeeked', |
||
1763 | 'onSeeking', |
||
1764 | 'onSelect', |
||
1765 | 'onSelectionChange', |
||
1766 | 'onSelectStart', |
||
1767 | 'onStalled', |
||
1768 | 'onStorage', |
||
1769 | 'onStorageCommit', |
||
1770 | 'onStart', |
||
1771 | 'onStop', |
||
1772 | 'onShow', |
||
1773 | 'onSyncRestored', |
||
1774 | 'onSubmit', |
||
1775 | 'onSuspend', |
||
1776 | 'onSynchRestored', |
||
1777 | 'onTimeError', |
||
1778 | 'onTimeUpdate', |
||
1779 | 'onTrackChange', |
||
1780 | 'onTransitionEnd', |
||
1781 | 'onToggle', |
||
1782 | 'onUnload', |
||
1783 | 'onURLFlip', |
||
1784 | 'onUserProximity', |
||
1785 | 'onVolumeChange', |
||
1786 | 'onWaiting', |
||
1787 | 'onWebKitAnimationEnd', |
||
1788 | 'onWebKitAnimationIteration', |
||
1789 | 'onWebKitAnimationStart', |
||
1790 | 'onWebKitFullScreenChange', |
||
1791 | 'onWebKitFullScreenError', |
||
1792 | 'onWebKitTransitionEnd', |
||
1793 | 'onWheel', |
||
1794 | 'seekSegmentTime', |
||
1795 | 'userid', |
||
1796 | 'datasrc', |
||
1797 | 'datafld', |
||
1798 | 'dataformatas', |
||
1799 | 'ev:handler', |
||
1800 | 'ev:event', |
||
1801 | '0;url', |
||
1802 | ); |
||
1803 | |||
1804 | /** |
||
1805 | * https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Event_Handlers |
||
1806 | * |
||
1807 | * @var array |
||
1808 | */ |
||
1809 | private $_evil_attributes = array( |
||
1810 | 'on\w*', |
||
1811 | 'style', |
||
1812 | 'xmlns', |
||
1813 | 'formaction', |
||
1814 | 'form', |
||
1815 | 'xlink:href', |
||
1816 | 'seekSegmentTime', |
||
1817 | 'FSCommand', |
||
1818 | 'eval', |
||
1819 | ); |
||
1820 | |||
1821 | /** |
||
1822 | * XSS Hash - random Hash for protecting URLs. |
||
1823 | * |
||
1824 | * @var string |
||
1825 | */ |
||
1826 | private $_xss_hash; |
||
1827 | |||
1828 | /** |
||
1829 | * The replacement-string for not allowed strings. |
||
1830 | * |
||
1831 | * @var string |
||
1832 | */ |
||
1833 | private $_replacement = ''; |
||
1834 | |||
1835 | /** |
||
1836 | * List of never allowed strings. |
||
1837 | * |
||
1838 | * @var array |
||
1839 | */ |
||
1840 | private $_never_allowed_str = array(); |
||
1841 | |||
1842 | /** |
||
1843 | * If your DB (MySQL) encoding is "utf8" and not "utf8mb4", then |
||
1844 | * you can't save 4-Bytes chars from UTF-8 and someone can create stored XSS-attacks. |
||
1845 | * |
||
1846 | * @var bool |
||
1847 | */ |
||
1848 | private $_stripe_4byte_chars = false; |
||
1849 | |||
1850 | /** |
||
1851 | * @var bool|null |
||
1852 | */ |
||
1853 | private $xss_found = null; |
||
1854 | |||
1855 | /** |
||
1856 | * __construct() |
||
1857 | */ |
||
1858 | public function __construct() |
||
1859 | { |
||
1860 | $this->_initNeverAllowedStr(); |
||
1861 | } |
||
1862 | |||
1863 | /** |
||
1864 | * Compact exploded words. |
||
1865 | * |
||
1866 | * <p> |
||
1867 | * <br /> |
||
1868 | * INFO: Callback method for xss_clean() to remove whitespace from things like 'j a v a s c r i p t'. |
||
1869 | * </p> |
||
1870 | * |
||
1871 | * @param array $matches |
||
1872 | * |
||
1873 | * @return string |
||
1874 | */ |
||
1875 | private function _compact_exploded_words_callback($matches) |
||
1876 | { |
||
1877 | return preg_replace('/(?:\s+|"|\042|\'|\047|\+)*+/', '', $matches[1]) . $matches[2]; |
||
1878 | } |
||
1879 | |||
1880 | /** |
||
1881 | * HTML-Entity decode callback. |
||
1882 | * |
||
1883 | * @param array $match |
||
1884 | * |
||
1885 | * @return string |
||
1886 | */ |
||
1887 | private function _decode_entity($match) |
||
1888 | { |
||
1889 | // init |
||
1890 | $this->_xss_hash(); |
||
1891 | |||
1892 | $match = $match[0]; |
||
1893 | |||
1894 | // protect GET variables in URLs |
||
1895 | $match = preg_replace('|\?([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->_xss_hash . '::GET_FIRST' . '\\1=\\2', $match); |
||
1896 | $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->_xss_hash . '::GET_NEXT' . '\\1=\\2', $match); |
||
1897 | |||
1898 | // un-protect URL GET vars |
||
1899 | return str_replace( |
||
1900 | array( |
||
1901 | $this->_xss_hash . '::GET_FIRST', |
||
1902 | $this->_xss_hash . '::GET_NEXT', |
||
1903 | ), |
||
1904 | array( |
||
1905 | '?', |
||
1906 | '&', |
||
1907 | ), |
||
1908 | $this->_entity_decode($match) |
||
1909 | ); |
||
1910 | } |
||
1911 | |||
1912 | /** |
||
1913 | * @param string $str |
||
1914 | * |
||
1915 | * @return mixed |
||
1916 | */ |
||
1917 | private function _do($str) |
||
1918 | { |
||
1919 | $str = (string)$str; |
||
1920 | $strInt = (int)$str; |
||
1921 | $strFloat = (float)$str; |
||
1922 | if ( |
||
1923 | !$str |
||
1924 | || |
||
1925 | "$strInt" == $str |
||
1926 | || |
||
1927 | "$strFloat" == $str |
||
1928 | ) { |
||
1929 | |||
1930 | // no xss found |
||
1931 | if ($this->xss_found !== true) { |
||
1932 | $this->xss_found = false; |
||
1933 | } |
||
1934 | |||
1935 | return $str; |
||
1936 | } |
||
1937 | |||
1938 | // removes all non-UTF-8 characters |
||
1939 | // && |
||
1940 | // remove NULL characters (ignored by some browsers) |
||
1941 | $str = UTF8::clean($str, true, true, false); |
||
1942 | |||
1943 | // decode UTF-7 characters |
||
1944 | $str = $this->_repack_utf7($str); |
||
1945 | |||
1946 | // decode the string |
||
1947 | $str = $this->_decode_string($str); |
||
1948 | |||
1949 | // remove all >= 4-Byte chars if needed |
||
1950 | if ($this->_stripe_4byte_chars === true) { |
||
1951 | $str = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $str); |
||
1952 | } |
||
1953 | |||
1954 | // backup the string (for later comparision) |
||
1955 | $str_backup = $str; |
||
1956 | |||
1957 | // remove strings that are never allowed |
||
1958 | $str = $this->_do_never_allowed($str); |
||
1959 | |||
1960 | // corrects words before the browser will do it |
||
1961 | $str = $this->_compact_exploded_javascript($str); |
||
1962 | |||
1963 | // remove disallowed javascript calls in links, images etc. |
||
1964 | $str = $this->_remove_disallowed_javascript($str); |
||
1965 | |||
1966 | // remove evil attributes such as style, onclick and xmlns |
||
1967 | $str = $this->_remove_evil_attributes($str); |
||
1968 | |||
1969 | // sanitize naughty HTML elements |
||
1970 | $str = $this->_sanitize_naughty_html($str); |
||
1971 | |||
1972 | // sanitize naughty JavaScript elements |
||
1973 | $str = $this->_sanitize_naughty_javascript($str); |
||
1974 | |||
1975 | // final clean up |
||
1976 | // |
||
1977 | // -> This adds a bit of extra precaution in case something got through the above filters. |
||
1978 | $str = $this->_do_never_allowed_afterwards($str); |
||
1979 | |||
1980 | // check for xss |
||
1981 | if ($this->xss_found !== true) { |
||
1982 | $this->xss_found = !($str_backup === $str); |
||
1983 | } |
||
1984 | |||
1985 | return $str; |
||
1986 | } |
||
1987 | |||
1988 | /** |
||
1989 | * Remove never allowed strings. |
||
1990 | * |
||
1991 | * @param string $str |
||
1992 | * |
||
1993 | * @return string |
||
1994 | */ |
||
1995 | private function _do_never_allowed($str) |
||
1996 | { |
||
1997 | static $NEVER_ALLOWED_CACHE = array(); |
||
1998 | $NEVER_ALLOWED_CACHE['keys'] = null; |
||
1999 | $NEVER_ALLOWED_CACHE['regex'] = null; |
||
2000 | |||
2001 | if (null === $NEVER_ALLOWED_CACHE['keys']) { |
||
2002 | $NEVER_ALLOWED_CACHE['keys'] = array_keys($this->_never_allowed_str); |
||
2003 | } |
||
2004 | $str = str_ireplace($NEVER_ALLOWED_CACHE['keys'], $this->_never_allowed_str, $str); |
||
2005 | |||
2006 | if (null === $NEVER_ALLOWED_CACHE['regex']) { |
||
2007 | $NEVER_ALLOWED_CACHE['regex'] = implode('|', self::$_never_allowed_regex); |
||
2008 | } |
||
2009 | $str = preg_replace('#' . $NEVER_ALLOWED_CACHE['regex'] . '#is', $this->_replacement, $str); |
||
2010 | |||
2011 | return (string)$str; |
||
2012 | } |
||
2013 | |||
2014 | /** |
||
2015 | * Remove never allowed string, afterwards. |
||
2016 | * |
||
2017 | * <p> |
||
2018 | * <br /> |
||
2019 | * INFO: clean-up also some string, if there is no html-tag |
||
2020 | * </p> |
||
2021 | * |
||
2022 | * @param string $str |
||
2023 | * |
||
2024 | * @return string |
||
2025 | */ |
||
2026 | private function _do_never_allowed_afterwards($str) |
||
2027 | { |
||
2028 | static $NEVER_ALLOWED_STR_AFTERWARDS_CACHE; |
||
2029 | |||
2030 | if (null === $NEVER_ALLOWED_STR_AFTERWARDS_CACHE) { |
||
2031 | foreach (self::$_never_allowed_str_afterwards as &$neverAllowedStr) { |
||
2032 | $neverAllowedStr .= '.*='; |
||
2033 | } |
||
2034 | |||
2035 | $NEVER_ALLOWED_STR_AFTERWARDS_CACHE = implode('|', self::$_never_allowed_str_afterwards); |
||
2036 | } |
||
2037 | |||
2038 | $str = preg_replace('#' . $NEVER_ALLOWED_STR_AFTERWARDS_CACHE . '#isU', $this->_replacement, $str); |
||
2039 | |||
2040 | return (string)$str; |
||
2041 | } |
||
2042 | |||
2043 | /** |
||
2044 | * Entity-decoding. |
||
2045 | * |
||
2046 | * @param string $str |
||
2047 | * |
||
2048 | * @return string |
||
2049 | */ |
||
2050 | private function _entity_decode($str) |
||
2051 | { |
||
2052 | static $HTML_ENTITIES_CACHE; |
||
2053 | |||
2054 | /** @noinspection UsageOfSilenceOperatorInspection */ |
||
2055 | /** @noinspection PhpUsageOfSilenceOperatorInspection */ |
||
2056 | // HHVM dons't support "ENT_DISALLOWED" && "ENT_SUBSTITUTE" |
||
2057 | $flags = Bootup::is_php('5.4') ? |
||
2058 | ENT_QUOTES | ENT_HTML5 | @ENT_DISALLOWED | @ENT_SUBSTITUTE : |
||
2059 | ENT_QUOTES; |
||
2060 | |||
2061 | // decode |
||
2062 | if (strpos($str, $this->_xss_hash) !== false) { |
||
2063 | $str = UTF8::html_entity_decode($str, $flags); |
||
2064 | } else { |
||
2065 | $str = UTF8::rawurldecode($str); |
||
2066 | } |
||
2067 | |||
2068 | // decode-again, for e.g. HHVM, PHP 5.3, miss configured applications ... |
||
2069 | if (preg_match_all('/&[A-Za-z]{2,}[;]{0}/', $str, $matches)) { |
||
2070 | |||
2071 | if (null === $HTML_ENTITIES_CACHE) { |
||
2072 | |||
2073 | // links: |
||
2074 | // - http://dev.w3.org/html5/html-author/charref |
||
2075 | // - http://www.w3schools.com/charsets/ref_html_entities_n.asp |
||
2076 | $entitiesSecurity = array( |
||
2077 | '�' => '', |
||
2078 | '�' => '', |
||
2079 | '' => '', |
||
2080 | '' => '', |
||
2081 | '>⃒' => '', |
||
2082 | '' => '', |
||
2083 | '' => '', |
||
2084 | '­' => '', |
||
2085 | '­' => '', |
||
2086 | '­' => '', |
||
2087 | ':' => ':', |
||
2088 | ':' => ':', |
||
2089 | ':' => ':', |
||
2090 | '(' => '(', |
||
2091 | '(' => '(', |
||
2092 | '(' => '(', |
||
2093 | ')' => ')', |
||
2094 | ')' => ')', |
||
2095 | ')' => ')', |
||
2096 | '?' => '?', |
||
2097 | '?' => '?', |
||
2098 | '?' => '?', |
||
2099 | '/' => '/', |
||
2100 | '/' => '/', |
||
2101 | '/' => '/', |
||
2102 | ''' => '\'', |
||
2103 | ''' => '\'', |
||
2104 | ''' => '\'', |
||
2105 | ''' => '\'', |
||
2106 | ''' => '\'', |
||
2107 | '\' => '\'', |
||
2108 | '\' => '\\', |
||
2109 | '\' => '\\', |
||
2110 | ',' => ',', |
||
2111 | ',' => ',', |
||
2112 | ',' => ',', |
||
2113 | '.' => '.', |
||
2114 | '.' => '.', |
||
2115 | '"' => '"', |
||
2116 | '"' => '"', |
||
2117 | '"' => '"', |
||
2118 | '"' => '"', |
||
2119 | '`' => '`', |
||
2120 | '`' => '`', |
||
2121 | '`' => '`', |
||
2122 | '`' => '`', |
||
2123 | '.' => '.', |
||
2124 | '=' => '=', |
||
2125 | '=' => '=', |
||
2126 | '=' => '=', |
||
2127 | '&newline;' => "\n", |
||
2128 | '
' => "\n", |
||
2129 | ' ' => "\n", |
||
2130 | '&tab;' => "\t", |
||
2131 | '	' => "\t", |
||
2132 | '	' => "\t", |
||
2133 | ); |
||
2134 | |||
2135 | $HTML_ENTITIES_CACHE = array_merge( |
||
2136 | $entitiesSecurity, |
||
2137 | array_flip(get_html_translation_table(HTML_ENTITIES, $flags)), |
||
2138 | array_flip(self::$entitiesFallback) |
||
2139 | ); |
||
2140 | } |
||
2141 | |||
2142 | $replace = array(); |
||
2143 | foreach ($matches[0] as $match) { |
||
2144 | $match .= ';'; |
||
2145 | if (isset($HTML_ENTITIES_CACHE[$match])) { |
||
2146 | $replace[$match] = $HTML_ENTITIES_CACHE[$match]; |
||
2147 | } |
||
2148 | } |
||
2149 | |||
2150 | if (count($replace) > 0) { |
||
2151 | $str = str_replace(array_keys($replace), array_values($replace), $str); |
||
2152 | } |
||
2153 | } |
||
2154 | |||
2155 | return $str; |
||
2156 | } |
||
2157 | |||
2158 | /** |
||
2159 | * Filters tag attributes for consistency and safety. |
||
2160 | * |
||
2161 | * @param string $str |
||
2162 | * |
||
2163 | * @return string |
||
2164 | */ |
||
2165 | private function _filter_attributes($str) |
||
2166 | { |
||
2167 | if ($str === '') { |
||
2168 | return ''; |
||
2169 | } |
||
2170 | |||
2171 | $out = ''; |
||
2172 | if ( |
||
2173 | preg_match_all('#\s*[A-Za-z\-]+\s*=\s*("|\042|\'|\047)([^\\1]*?)\\1#', $str, $matches) |
||
2174 | || |
||
2175 | ( |
||
2176 | $this->_replacement |
||
2177 | && |
||
2178 | preg_match_all('#\s*[a-zA-Z\-]+\s*=' . preg_quote($this->_replacement, '#') . '$#', $str, $matches) |
||
2179 | ) |
||
2180 | ) { |
||
2181 | foreach ($matches[0] as $match) { |
||
2182 | $out .= $match; |
||
2183 | } |
||
2184 | } |
||
2185 | |||
2186 | return $out; |
||
2187 | } |
||
2188 | |||
2189 | /** |
||
2190 | * initialize "$this->_never_allowed_str" |
||
2191 | */ |
||
2192 | private function _initNeverAllowedStr() |
||
2193 | { |
||
2194 | $this->_never_allowed_str = array( |
||
2195 | 'document.cookie' => $this->_replacement, |
||
2196 | 'document.write' => $this->_replacement, |
||
2197 | '.parentNode' => $this->_replacement, |
||
2198 | '.innerHTML' => $this->_replacement, |
||
2199 | '.appendChild' => $this->_replacement, |
||
2200 | '-moz-binding' => $this->_replacement, |
||
2201 | '<!--' => '<!--', |
||
2202 | '-->' => '-->', |
||
2203 | '<?' => '<?', |
||
2204 | '?>' => '?>', |
||
2205 | '<![CDATA[' => '<![CDATA[', |
||
2206 | '<!ENTITY' => '<!ENTITY', |
||
2207 | '<!DOCTYPE' => '<!DOCTYPE', |
||
2208 | '<!ATTLIST' => '<!ATTLIST', |
||
2209 | '<comment>' => '<comment>', |
||
2210 | ); |
||
2211 | } |
||
2212 | |||
2213 | /** |
||
2214 | * Callback method for xss_clean() to sanitize links. |
||
2215 | * |
||
2216 | * <p> |
||
2217 | * <br /> |
||
2218 | * INFO: This limits the PCRE backtracks, making it more performance friendly |
||
2219 | * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in |
||
2220 | * PHP 5.2+ on link-heavy strings. |
||
2221 | * </p> |
||
2222 | * |
||
2223 | * @param array $match |
||
2224 | * |
||
2225 | * @return string |
||
2226 | */ |
||
2227 | private function _js_link_removal_callback($match) |
||
2228 | { |
||
2229 | return $this->_js_removal_calback($match, 'href'); |
||
2230 | } |
||
2231 | |||
2232 | /** |
||
2233 | * Callback method for xss_clean() to sanitize tags. |
||
2234 | * |
||
2235 | * <p> |
||
2236 | * <br /> |
||
2237 | * INFO: This limits the PCRE backtracks, making it more performance friendly |
||
2238 | * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in |
||
2239 | * PHP 5.2+ on image tag heavy strings. |
||
2240 | * </p> |
||
2241 | * |
||
2242 | * @param array $match |
||
2243 | * @param string $search |
||
2244 | * |
||
2245 | * @return string |
||
2246 | */ |
||
2247 | private function _js_removal_calback($match, $search) |
||
2248 | { |
||
2249 | if (!$match[0]) { |
||
2250 | return ''; |
||
2251 | } |
||
2252 | |||
2253 | // init |
||
2254 | $replacer = $this->_filter_attributes(str_replace(array('<', '>',), '', $match[1])); |
||
2255 | $pattern = '#' . $search . '=.*(?:\(.+([^\)]*?)(?:\)|$)|javascript:|view-source:|livescript:|wscript:|vbscript:|mocha:|charset=|window\.|document\.|\.cookie|<script|d\s*a\s*t\s*a\s*:)#is'; |
||
2256 | |||
2257 | $matchInner = array(); |
||
2258 | preg_match($pattern, $match[1], $matchInner); |
||
2259 | if (count($matchInner) > 0) { |
||
2260 | $replacer = (string)preg_replace( |
||
2261 | $pattern, |
||
2262 | $search . '="' . $this->_replacement . '"', |
||
2263 | $replacer |
||
2264 | ); |
||
2265 | } |
||
2266 | |||
2267 | return str_ireplace($match[1], $replacer, $match[0]); |
||
2268 | } |
||
2269 | |||
2270 | /** |
||
2271 | * Callback method for xss_clean() to sanitize image tags. |
||
2272 | * |
||
2273 | * <p> |
||
2274 | * <br /> |
||
2275 | * INFO: This limits the PCRE backtracks, making it more performance friendly |
||
2276 | * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in |
||
2277 | * PHP 5.2+ on image tag heavy strings. |
||
2278 | * </p> |
||
2279 | * |
||
2280 | * @param array $match |
||
2281 | * |
||
2282 | * @return string |
||
2283 | */ |
||
2284 | private function _js_src_removal_callback($match) |
||
2285 | { |
||
2286 | return $this->_js_removal_calback($match, 'src'); |
||
2287 | } |
||
2288 | |||
2289 | /** |
||
2290 | * Sanitize naughty HTML. |
||
2291 | * |
||
2292 | * <p> |
||
2293 | * <br /> |
||
2294 | * Callback method for AntiXSS->sanitize_naughty_html() to remove naughty HTML elements. |
||
2295 | * </p> |
||
2296 | * |
||
2297 | * @param array $matches |
||
2298 | * |
||
2299 | * @return string |
||
2300 | */ |
||
2301 | private function _sanitize_naughty_html_callback($matches) |
||
2302 | { |
||
2303 | return '<' . $matches[1] . $matches[2] . $matches[3] // encode opening brace |
||
2304 | // encode captured opening or closing brace to prevent recursive vectors: |
||
2305 | . str_replace( |
||
2306 | array( |
||
2307 | '>', |
||
2308 | '<', |
||
2309 | ), |
||
2310 | array( |
||
2311 | '>', |
||
2312 | '<', |
||
2313 | ), |
||
2314 | $matches[4] |
||
2315 | ); |
||
2316 | } |
||
2317 | |||
2318 | /** |
||
2319 | * Add some strings to the "_evil_attributes"-array. |
||
2320 | * |
||
2321 | * @param array $strings |
||
2322 | * |
||
2323 | * @return $this |
||
2324 | */ |
||
2325 | public function addEvilAttributes(array $strings) |
||
2326 | { |
||
2327 | $this->_evil_attributes = array_merge($strings, $this->_evil_attributes); |
||
2328 | |||
2329 | return $this; |
||
2330 | } |
||
2331 | |||
2332 | /** |
||
2333 | * Compact any exploded words. |
||
2334 | * |
||
2335 | * <p> |
||
2336 | * <br /> |
||
2337 | * INFO: This corrects words like: j a v a s c r i p t |
||
2338 | * <br /> |
||
2339 | * These words are compacted back to their correct state. |
||
2340 | * </p> |
||
2341 | * |
||
2342 | * @param string $str |
||
2343 | * |
||
2344 | * @return string |
||
2345 | */ |
||
2346 | private function _compact_exploded_javascript($str) |
||
2347 | { |
||
2348 | static $WORDS_CACHE; |
||
2349 | |||
2350 | $words = array( |
||
2351 | 'javascript', |
||
2352 | 'expression', |
||
2353 | 'view-source', |
||
2354 | 'vbscript', |
||
2355 | 'jscript', |
||
2356 | 'wscript', |
||
2357 | 'vbs', |
||
2358 | 'script', |
||
2359 | 'base64', |
||
2360 | 'applet', |
||
2361 | 'alert', |
||
2362 | 'document', |
||
2363 | 'write', |
||
2364 | 'cookie', |
||
2365 | 'window', |
||
2366 | 'confirm', |
||
2367 | 'prompt', |
||
2368 | 'eval', |
||
2369 | ); |
||
2370 | |||
2371 | foreach ($words as $word) { |
||
2372 | |||
2373 | if (!isset($WORDS_CACHE[$word])) { |
||
2374 | $regex = '(?:\s|\+|"|\042|\'|\047)*'; |
||
2375 | $word = $WORDS_CACHE[$word] = substr( |
||
2376 | chunk_split($word, 1, $regex), |
||
2377 | 0, |
||
2378 | -strlen($regex) |
||
2379 | ); |
||
2380 | } else { |
||
2381 | $word = $WORDS_CACHE[$word]; |
||
2382 | } |
||
2383 | |||
2384 | // We only want to do this when it is followed by a non-word character |
||
2385 | // That way valid stuff like "dealer to" does not become "dealerto". |
||
2386 | $str = preg_replace_callback( |
||
2387 | '#(' . $word . ')(\W)#is', |
||
2388 | array( |
||
2389 | $this, |
||
2390 | '_compact_exploded_words_callback', |
||
2391 | ), |
||
2392 | $str |
||
2393 | ); |
||
2394 | } |
||
2395 | |||
2396 | return (string)$str; |
||
2397 | } |
||
2398 | |||
2399 | /** |
||
2400 | * Decode the html-tags via "UTF8::html_entity_decode()" or the string via "UTF8::rawurldecode()". |
||
2401 | * |
||
2402 | * @param string $str |
||
2403 | * |
||
2404 | * @return string |
||
2405 | */ |
||
2406 | private function _decode_string($str) |
||
2407 | { |
||
2408 | // init |
||
2409 | $regExForHtmlTags = '/<\w+.*+/si'; |
||
2410 | |||
2411 | if (preg_match($regExForHtmlTags, $str, $matches) === 1) { |
||
2412 | $str = preg_replace_callback( |
||
2413 | $regExForHtmlTags, |
||
2414 | array( |
||
2415 | $this, |
||
2416 | '_decode_entity', |
||
2417 | ), |
||
2418 | $str |
||
2419 | ); |
||
2420 | } else { |
||
2421 | $str = UTF8::rawurldecode($str); |
||
2422 | } |
||
2423 | |||
2424 | return $str; |
||
2425 | } |
||
2426 | |||
2427 | /** |
||
2428 | * Check if the "AntiXSS->xss_clean()"-method found an XSS attack in the last run. |
||
2429 | * |
||
2430 | * @return bool|null <p>Will return null if the "xss_clean()" wan't running at all.</p> |
||
2431 | */ |
||
2432 | public function isXssFound() |
||
2433 | { |
||
2434 | return $this->xss_found; |
||
2435 | } |
||
2436 | |||
2437 | /** |
||
2438 | * Remove some strings from the "_evil_attributes"-array. |
||
2439 | * |
||
2440 | * <p> |
||
2441 | * <br /> |
||
2442 | * WARNING: Use this method only if you have a really good reason. |
||
2443 | * </p> |
||
2444 | * |
||
2445 | * @param array $strings |
||
2446 | * |
||
2447 | * @return $this |
||
2448 | */ |
||
2449 | public function removeEvilAttributes(array $strings) |
||
2450 | { |
||
2451 | $this->_evil_attributes = array_diff( |
||
2452 | array_intersect($strings, $this->_evil_attributes), |
||
2453 | $this->_evil_attributes |
||
2454 | ); |
||
2455 | |||
2456 | return $this; |
||
2457 | } |
||
2458 | |||
2459 | /** |
||
2460 | * Remove disallowed Javascript in links or img tags |
||
2461 | * |
||
2462 | * <p> |
||
2463 | * <br /> |
||
2464 | * We used to do some version comparisons and use of stripos(), |
||
2465 | * but it is dog slow compared to these simplified non-capturing |
||
2466 | * preg_match(), especially if the pattern exists in the string |
||
2467 | * </p> |
||
2468 | * |
||
2469 | * <p> |
||
2470 | * <br /> |
||
2471 | * Note: It was reported that not only space characters, but all in |
||
2472 | * the following pattern can be parsed as separators between a tag name |
||
2473 | * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C] |
||
2474 | * ... however, UTF8::clean() above already strips the |
||
2475 | * hex-encoded ones, so we'll skip them below. |
||
2476 | * </p> |
||
2477 | * |
||
2478 | * @param string $str |
||
2479 | * |
||
2480 | * @return string |
||
2481 | */ |
||
2482 | private function _remove_disallowed_javascript($str) |
||
2483 | { |
||
2484 | do { |
||
2485 | $original = $str; |
||
2486 | |||
2487 | if (stripos($str, '<a') !== false) { |
||
2488 | $str = preg_replace_callback( |
||
2489 | '#<a[^a-z0-9>]+([^>]*?)(?:>|$)#i', |
||
2490 | array( |
||
2491 | $this, |
||
2492 | '_js_link_removal_callback', |
||
2493 | ), |
||
2494 | $str |
||
2495 | ); |
||
2496 | } |
||
2497 | |||
2498 | if (stripos($str, '<img') !== false) { |
||
2499 | $str = preg_replace_callback( |
||
2500 | '#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#i', |
||
2501 | array( |
||
2502 | $this, |
||
2503 | '_js_src_removal_callback', |
||
2504 | ), |
||
2505 | $str |
||
2506 | ); |
||
2507 | } |
||
2508 | |||
2509 | if (stripos($str, '<audio') !== false) { |
||
2510 | $str = preg_replace_callback( |
||
2511 | '#<audio[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#i', |
||
2512 | array( |
||
2513 | $this, |
||
2514 | '_js_src_removal_callback', |
||
2515 | ), |
||
2516 | $str |
||
2517 | ); |
||
2518 | } |
||
2519 | |||
2520 | if (stripos($str, '<video') !== false) { |
||
2521 | $str = preg_replace_callback( |
||
2522 | '#<video[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#i', |
||
2523 | array( |
||
2524 | $this, |
||
2525 | '_js_src_removal_callback', |
||
2526 | ), |
||
2527 | $str |
||
2528 | ); |
||
2529 | } |
||
2530 | |||
2531 | if (stripos($str, '<source') !== false) { |
||
2532 | $str = preg_replace_callback( |
||
2533 | '#<source[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#i', |
||
2534 | array( |
||
2535 | $this, |
||
2536 | '_js_src_removal_callback', |
||
2537 | ), |
||
2538 | $str |
||
2539 | ); |
||
2540 | } |
||
2541 | |||
2542 | if (stripos($str, 'script') !== false) { |
||
2543 | // US-ASCII: ¼ === < |
||
2544 | $str = preg_replace('#(?:¼|<)/*(?:script).*(?:¾|>)#isuU', $this->_replacement, $str); |
||
2545 | } |
||
2546 | } while ($original !== $str); |
||
2547 | |||
2548 | return (string)$str; |
||
2549 | } |
||
2550 | |||
2551 | /** |
||
2552 | * Remove Evil HTML Attributes (like event handlers and style). |
||
2553 | * |
||
2554 | * It removes the evil attribute and either: |
||
2555 | * |
||
2556 | * - Everything up until a space. For example, everything between the pipes: |
||
2557 | * |
||
2558 | * <code> |
||
2559 | * <a |style=document.write('hello');alert('world');| class=link> |
||
2560 | * </code> |
||
2561 | * |
||
2562 | * - Everything inside the quotes. For example, everything between the pipes: |
||
2563 | * |
||
2564 | * <code> |
||
2565 | * <a |style="document.write('hello'); alert('world');"| class="link"> |
||
2566 | * </code> |
||
2567 | * |
||
2568 | * @param string $str <p>The string to check.</p> |
||
2569 | * |
||
2570 | * @return string <p>The string with the evil attributes removed.</p> |
||
2571 | */ |
||
2572 | private function _remove_evil_attributes($str) |
||
2573 | { |
||
2574 | $evil_attributes_string = implode('|', $this->_evil_attributes); |
||
2575 | |||
2576 | // replace style-attribute, first (if needed) |
||
2577 | if (in_array('style', $this->_evil_attributes, true)) { |
||
2578 | do { |
||
2579 | $count = $temp_count = 0; |
||
2580 | |||
2581 | $str = preg_replace('/(<[^>]+)(?<!\w)(style="(:?[^"]*?)"|style=\'(:?[^\']*?)\')/i', '$1' . $this->_replacement, $str, -1, $temp_count); |
||
2582 | $count += $temp_count; |
||
2583 | |||
2584 | } while ($count); |
||
2585 | } |
||
2586 | |||
2587 | do { |
||
2588 | $count = $temp_count = 0; |
||
2589 | |||
2590 | // find occurrences of illegal attribute strings with and without quotes (042 ["] and 047 ['] are octal quotes) |
||
2591 | $str = preg_replace('/(<[^>]+)(?<!\w)(' . $evil_attributes_string . ')\s*=\s*(?:(?:"|\042|\'|\047)(?:[^\\2]*?)(?:\\2)|[^\s>]*)/is', '$1' . $this->_replacement, $str, -1, $temp_count); |
||
2592 | $count += $temp_count; |
||
2593 | |||
2594 | } while ($count); |
||
2595 | |||
2596 | return (string)$str; |
||
2597 | } |
||
2598 | |||
2599 | /** |
||
2600 | * UTF-7 decoding function. |
||
2601 | * |
||
2602 | * @param string $str <p>HTML document for recode ASCII part of UTF-7 back to ASCII.</p> |
||
2603 | * |
||
2604 | * @return string |
||
2605 | */ |
||
2606 | private function _repack_utf7($str) |
||
2607 | { |
||
2608 | return preg_replace_callback( |
||
2609 | '#\+([0-9a-zA-Z/]+)\-#', |
||
2610 | array($this, '_repack_utf7_callback'), |
||
2611 | $str |
||
2612 | ); |
||
2613 | } |
||
2614 | |||
2615 | /** |
||
2616 | * Additional UTF-7 decoding function. |
||
2617 | * |
||
2618 | * @param string $str <p>String for recode ASCII part of UTF-7 back to ASCII.</p> |
||
2619 | * |
||
2620 | * @return string |
||
2621 | */ |
||
2622 | private function _repack_utf7_callback($str) |
||
2623 | { |
||
2624 | $strTmp = base64_decode($str[1]); |
||
2625 | |||
2626 | if ($strTmp === false) { |
||
2627 | return $str; |
||
2628 | } |
||
2629 | |||
2630 | $str = preg_replace_callback( |
||
2631 | '/^((?:\x00.)*?)((?:[^\x00].)+)/us', |
||
2632 | array($this, '_repack_utf7_callback_back'), |
||
2633 | $strTmp |
||
2634 | ); |
||
2635 | |||
2636 | return preg_replace('/\x00(.)/us', '$1', $str); |
||
2637 | } |
||
2638 | |||
2639 | /** |
||
2640 | * Additional UTF-7 encoding function. |
||
2641 | * |
||
2642 | * @param string $str <p>String for recode ASCII part of UTF-7 back to ASCII.</p> |
||
2643 | * |
||
2644 | * @return string |
||
2645 | */ |
||
2646 | private function _repack_utf7_callback_back($str) |
||
2647 | { |
||
2648 | return $str[1] . '+' . rtrim(base64_encode($str[2]), '=') . '-'; |
||
2649 | } |
||
2650 | |||
2651 | /** |
||
2652 | * Sanitize naughty HTML elements. |
||
2653 | * |
||
2654 | * <p> |
||
2655 | * <br /> |
||
2656 | * |
||
2657 | * If a tag containing any of the words in the list |
||
2658 | * below is found, the tag gets converted to entities. |
||
2659 | * |
||
2660 | * <br /><br /> |
||
2661 | * |
||
2662 | * So this: <blink> |
||
2663 | * <br /> |
||
2664 | * Becomes: <blink> |
||
2665 | * </p> |
||
2666 | * |
||
2667 | * @param string $str |
||
2668 | * |
||
2669 | * @return string |
||
2670 | */ |
||
2671 | private function _sanitize_naughty_html($str) |
||
2672 | { |
||
2673 | $naughty = 'alert|prompt|confirm|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|button|select|isindex|layer|link|meta|keygen|object|plaintext|style|script|textarea|title|math|video|source|svg|xml|xss|eval'; |
||
2674 | $str = preg_replace_callback( |
||
2675 | '#<(/*\s*)(' . $naughty . ')([^><]*)([><]*)#i', |
||
2676 | array( |
||
2677 | $this, |
||
2678 | '_sanitize_naughty_html_callback', |
||
2679 | ), |
||
2680 | $str |
||
2681 | ); |
||
2682 | |||
2683 | return (string)$str; |
||
2684 | } |
||
2685 | |||
2686 | /** |
||
2687 | * Sanitize naughty scripting elements |
||
2688 | * |
||
2689 | * <p> |
||
2690 | * <br /> |
||
2691 | * |
||
2692 | * Similar to above, only instead of looking for |
||
2693 | * tags it looks for PHP and JavaScript commands |
||
2694 | * that are disallowed. Rather than removing the |
||
2695 | * code, it simply converts the parenthesis to entities |
||
2696 | * rendering the code un-executable. |
||
2697 | * |
||
2698 | * <br /><br /> |
||
2699 | * |
||
2700 | * For example: <pre>eval('some code')</pre> |
||
2701 | * <br /> |
||
2702 | * Becomes: <pre>eval('some code')</pre> |
||
2703 | * </p> |
||
2704 | * |
||
2705 | * @param string $str |
||
2706 | * |
||
2707 | * @return string |
||
2708 | */ |
||
2709 | private function _sanitize_naughty_javascript($str) |
||
2710 | { |
||
2711 | $str = preg_replace( |
||
2712 | '#(alert|eval|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*)\)#siU', |
||
2713 | '\\1\\2(\\3)', |
||
2714 | $str |
||
2715 | ); |
||
2716 | |||
2717 | return (string)$str; |
||
2718 | } |
||
2719 | |||
2720 | /** |
||
2721 | * Set the replacement-string for not allowed strings. |
||
2722 | * |
||
2723 | * @param string $string |
||
2724 | * |
||
2725 | * @return $this |
||
2726 | */ |
||
2727 | public function setReplacement($string) |
||
2728 | { |
||
2729 | $this->_replacement = (string)$string; |
||
2730 | |||
2731 | $this->_initNeverAllowedStr(); |
||
2732 | |||
2733 | return $this; |
||
2734 | } |
||
2735 | |||
2736 | /** |
||
2737 | * Set the option to stripe 4-Byte chars. |
||
2738 | * |
||
2739 | * <p> |
||
2740 | * <br /> |
||
2741 | * INFO: use it if your DB (MySQL) can't use "utf8mb4" -> preventing stored XSS-attacks |
||
2742 | * </p> |
||
2743 | * |
||
2744 | * @param $bool |
||
2745 | * |
||
2746 | * @return $this |
||
2747 | */ |
||
2748 | public function setStripe4byteChars($bool) |
||
2749 | { |
||
2750 | $this->_stripe_4byte_chars = (bool)$bool; |
||
2751 | |||
2752 | return $this; |
||
2753 | } |
||
2754 | |||
2755 | /** |
||
2756 | * XSS Clean |
||
2757 | * |
||
2758 | * <p> |
||
2759 | * <br /> |
||
2760 | * Sanitizes data so that "Cross Site Scripting" hacks can be |
||
2761 | * prevented. This method does a fair amount of work but |
||
2762 | * it is extremely thorough, designed to prevent even the |
||
2763 | * most obscure XSS attempts. But keep in mind that nothing |
||
2764 | * is ever 100% foolproof... |
||
2765 | * </p> |
||
2766 | * |
||
2767 | * <p> |
||
2768 | * <br /> |
||
2769 | * <strong>Note:</strong> Should only be used to deal with data upon submission. |
||
2770 | * It's not something that should be used for general |
||
2771 | * runtime processing. |
||
2772 | * </p> |
||
2773 | * |
||
2774 | * @link http://channel.bitflux.ch/wiki/XSS_Prevention |
||
2775 | * Based in part on some code and ideas from Bitflux. |
||
2776 | * |
||
2777 | * @link http://ha.ckers.org/xss.html |
||
2778 | * To help develop this script I used this great list of |
||
2779 | * vulnerabilities along with a few other hacks I've |
||
2780 | * harvested from examining vulnerabilities in other programs. |
||
2781 | * |
||
2782 | * @param string|array $str <p>input data e.g. string or array</p> |
||
2783 | * |
||
2784 | * @return string|array|boolean <p> |
||
2785 | * boolean: will return a boolean, if the "is_image"-parameter is true<br /> |
||
2786 | * string: will return a string, if the input is a string<br /> |
||
2787 | * array: will return a array, if the input is a array<br /> |
||
2788 | * </p> |
||
2789 | */ |
||
2790 | public function xss_clean($str) |
||
2791 | { |
||
2792 | // reset |
||
2793 | $this->xss_found = null; |
||
2794 | |||
2795 | // check for an array of strings |
||
2796 | if (is_array($str) === true) { |
||
2797 | foreach ($str as $key => &$value) { |
||
2798 | $str[$key] = $this->xss_clean($value); |
||
2799 | } |
||
2800 | |||
2801 | return $str; |
||
2802 | } |
||
2803 | |||
2804 | // process |
||
2805 | do { |
||
2806 | $old_str = $str; |
||
2807 | $str = $this->_do($str); |
||
2808 | } while ($old_str !== $str); |
||
2809 | |||
2810 | return $str; |
||
2811 | } |
||
2812 | |||
2813 | /** |
||
2814 | * Generates the XSS hash if needed and returns it. |
||
2815 | * |
||
2816 | * @return string <p>XSS hash</p> |
||
2817 | */ |
||
2818 | private function _xss_hash() |
||
2819 | { |
||
2820 | if ($this->_xss_hash === null) { |
||
2821 | $rand = Bootup::get_random_bytes(16); |
||
2822 | |||
2823 | if (!$rand) { |
||
2824 | $this->_xss_hash = md5(uniqid(mt_rand(), true)); |
||
2825 | } else { |
||
2826 | $this->_xss_hash = bin2hex($rand); |
||
2827 | } |
||
2828 | } |
||
2829 | |||
2830 | return 'voku::anti-xss::' . $this->_xss_hash; |
||
2831 | } |
||
2832 | |||
2833 | } |