Completed
Push — development ( 1943e4...0844f9 )
by Nils
06:58
created

AntiXSS::_entity_decode()   C

Complexity

Conditions 8
Paths 20

Size

Total Lines 107
Code Lines 80

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 80
nc 20
nop 1
dl 0
loc 107
rs 5.2676
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * Anti XSS library
7
 *
8
 * ported from "CodeIgniter"
9
 *
10
 * @author      EllisLab Dev Team
11
 * @author      Lars Moelleken
12
 * @copyright   Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/)
13
 * @copyright   Copyright (c) 2014 - 2015, British Columbia Institute of Technology (http://bcit.ca/)
14
 * @copyright   Copyright (c) 2015 - 2017, Lars Moelleken (https://moelleken.org/)
15
 *
16
 * @license     http://opensource.org/licenses/MIT	MIT License
17
 */
18
final class AntiXSS
19
{
20
21
  /**
22
   * @var array
23
   */
24
  private static $entitiesFallback = array(
25
      "\t" => '&Tab;',
26
      "\n" => '&NewLine;',
27
      '!'  => '&excl;',
28
      '"'  => '&quot;',
29
      '#'  => '&num;',
30
      '$'  => '&dollar;',
31
      '%'  => '&percnt;',
32
      '&'  => '&amp;',
33
      "'"  => '&apos;',
34
      '('  => '&lpar;',
35
      ')'  => '&rpar;',
36
      '*'  => '&ast;',
37
      '+'  => '&plus;',
38
      ','  => '&comma;',
39
      '.'  => '&period;',
40
      '/'  => '&sol;',
41
      ':'  => '&colon;',
42
      ';'  => '&semi;',
43
      '<'  => '&lt;',
44
      '<⃒' => '&nvlt;',
45
      '='  => '&equals;',
46
      '=⃥' => '&bne;',
47
      '>'  => '&gt;',
48
      '>⃒' => '&nvgt',
49
      '?'  => '&quest;',
50
      '@'  => '&commat;',
51
      '['  => '&lbrack;',
52
      ']'  => '&rsqb;',
53
      '^'  => '&Hat;',
54
      '_'  => '&lowbar;',
55
      '`'  => '&grave;',
56
      'fj' => '&fjlig;',
57
      '{'  => '&lbrace;',
58
      '|'  => '&vert;',
59
      '}'  => '&rcub;',
60
      ' '  => '&nbsp;',
61
      '¡'  => '&iexcl;',
62
      '¢'  => '&cent;',
63
      '£'  => '&pound;',
64
      '¤'  => '&curren;',
65
      '¥'  => '&yen;',
66
      '¦'  => '&brvbar;',
67
      '§'  => '&sect;',
68
      '¨'  => '&DoubleDot;',
69
      '©'  => '&copy;',
70
      'ª'  => '&ordf;',
71
      '«'  => '&laquo;',
72
      '¬'  => '&not;',
73
      '­'  => '&shy;',
74
      '®'  => '&reg;',
75
      '¯'  => '&macr;',
76
      '°'  => '&deg;',
77
      '±'  => '&plusmn;',
78
      '²'  => '&sup2;',
79
      '³'  => '&sup3;',
80
      '´'  => '&DiacriticalAcute;',
81
      'µ'  => '&micro;',
82
      '¶'  => '&para;',
83
      '·'  => '&CenterDot;',
84
      '¸'  => '&Cedilla;',
85
      '¹'  => '&sup1;',
86
      'º'  => '&ordm;',
87
      '»'  => '&raquo;',
88
      '¼'  => '&frac14;',
89
      '½'  => '&half;',
90
      '¾'  => '&frac34;',
91
      '¿'  => '&iquest;',
92
      'À'  => '&Agrave;',
93
      'Á'  => '&Aacute;',
94
      'Â'  => '&Acirc;',
95
      'Ã'  => '&Atilde;',
96
      'Ä'  => '&Auml;',
97
      'Å'  => '&Aring;',
98
      'Æ'  => '&AElig;',
99
      'Ç'  => '&Ccedil;',
100
      'È'  => '&Egrave;',
101
      'É'  => '&Eacute;',
102
      'Ê'  => '&Ecirc;',
103
      'Ë'  => '&Euml;',
104
      'Ì'  => '&Igrave;',
105
      'Í'  => '&Iacute;',
106
      'Î'  => '&Icirc;',
107
      'Ï'  => '&Iuml;',
108
      'Ð'  => '&ETH;',
109
      'Ñ'  => '&Ntilde;',
110
      'Ò'  => '&Ograve;',
111
      'Ó'  => '&Oacute;',
112
      'Ô'  => '&Ocirc;',
113
      'Õ'  => '&Otilde;',
114
      'Ö'  => '&Ouml;',
115
      '×'  => '&times;',
116
      'Ø'  => '&Oslash;',
117
      'Ù'  => '&Ugrave;',
118
      'Ú'  => '&Uacute;',
119
      'Û'  => '&Ucirc;',
120
      'Ü'  => '&Uuml;',
121
      'Ý'  => '&Yacute;',
122
      'Þ'  => '&THORN;',
123
      'ß'  => '&szlig;',
124
      'à'  => '&agrave;',
125
      'á'  => '&aacute;',
126
      'â'  => '&acirc;',
127
      'ã'  => '&atilde;',
128
      'ä'  => '&auml;',
129
      'å'  => '&aring;',
130
      'æ'  => '&aelig;',
131
      'ç'  => '&ccedil;',
132
      'è'  => '&egrave;',
133
      'é'  => '&eacute;',
134
      'ê'  => '&ecirc;',
135
      'ë'  => '&euml;',
136
      'ì'  => '&igrave;',
137
      'í'  => '&iacute;',
138
      'î'  => '&icirc;',
139
      'ï'  => '&iuml;',
140
      'ð'  => '&eth;',
141
      'ñ'  => '&ntilde;',
142
      'ò'  => '&ograve;',
143
      'ó'  => '&oacute;',
144
      'ô'  => '&ocirc;',
145
      'õ'  => '&otilde;',
146
      'ö'  => '&ouml;',
147
      '÷'  => '&divide;',
148
      'ø'  => '&oslash;',
149
      'ù'  => '&ugrave;',
150
      'ú'  => '&uacute;',
151
      'û'  => '&ucirc;',
152
      'ü'  => '&uuml;',
153
      'ý'  => '&yacute;',
154
      'þ'  => '&thorn;',
155
      'ÿ'  => '&yuml;',
156
      'Ā'  => '&Amacr;',
157
      'ā'  => '&amacr;',
158
      'Ă'  => '&Abreve;',
159
      'ă'  => '&abreve;',
160
      'Ą'  => '&Aogon;',
161
      'ą'  => '&aogon;',
162
      'Ć'  => '&Cacute;',
163
      'ć'  => '&cacute;',
164
      'Ĉ'  => '&Ccirc;',
165
      'ĉ'  => '&ccirc;',
166
      'Ċ'  => '&Cdot;',
167
      'ċ'  => '&cdot;',
168
      'Č'  => '&Ccaron;',
169
      'č'  => '&ccaron;',
170
      'Ď'  => '&Dcaron;',
171
      'ď'  => '&dcaron;',
172
      'Đ'  => '&Dstrok;',
173
      'đ'  => '&dstrok;',
174
      'Ē'  => '&Emacr;',
175
      'ē'  => '&emacr;',
176
      'Ė'  => '&Edot;',
177
      'ė'  => '&edot;',
178
      'Ę'  => '&Eogon;',
179
      'ę'  => '&eogon;',
180
      'Ě'  => '&Ecaron;',
181
      'ě'  => '&ecaron;',
182
      'Ĝ'  => '&Gcirc;',
183
      'ĝ'  => '&gcirc;',
184
      'Ğ'  => '&Gbreve;',
185
      'ğ'  => '&gbreve;',
186
      'Ġ'  => '&Gdot;',
187
      'ġ'  => '&gdot;',
188
      'Ģ'  => '&Gcedil;',
189
      'Ĥ'  => '&Hcirc;',
190
      'ĥ'  => '&hcirc;',
191
      'Ħ'  => '&Hstrok;',
192
      'ħ'  => '&hstrok;',
193
      'Ĩ'  => '&Itilde;',
194
      'ĩ'  => '&itilde;',
195
      'Ī'  => '&Imacr;',
196
      'ī'  => '&imacr;',
197
      'Į'  => '&Iogon;',
198
      'į'  => '&iogon;',
199
      'İ'  => '&Idot;',
200
      'ı'  => '&inodot;',
201
      'IJ'  => '&IJlig;',
202
      'ij'  => '&ijlig;',
203
      'Ĵ'  => '&Jcirc;',
204
      'ĵ'  => '&jcirc;',
205
      'Ķ'  => '&Kcedil;',
206
      'ķ'  => '&kcedil;',
207
      'ĸ'  => '&kgreen;',
208
      'Ĺ'  => '&Lacute;',
209
      'ĺ'  => '&lacute;',
210
      'Ļ'  => '&Lcedil;',
211
      'ļ'  => '&lcedil;',
212
      'Ľ'  => '&Lcaron;',
213
      'ľ'  => '&lcaron;',
214
      'Ŀ'  => '&Lmidot;',
215
      'ŀ'  => '&lmidot;',
216
      'Ł'  => '&Lstrok;',
217
      'ł'  => '&lstrok;',
218
      'Ń'  => '&Nacute;',
219
      'ń'  => '&nacute;',
220
      'Ņ'  => '&Ncedil;',
221
      'ņ'  => '&ncedil;',
222
      'Ň'  => '&Ncaron;',
223
      'ň'  => '&ncaron;',
224
      'ʼn'  => '&napos;',
225
      'Ŋ'  => '&ENG;',
226
      'ŋ'  => '&eng;',
227
      'Ō'  => '&Omacr;',
228
      'ō'  => '&omacr;',
229
      'Ő'  => '&Odblac;',
230
      'ő'  => '&odblac;',
231
      'Œ'  => '&OElig;',
232
      'œ'  => '&oelig;',
233
      'Ŕ'  => '&Racute;',
234
      'ŕ'  => '&racute;',
235
      'Ŗ'  => '&Rcedil;',
236
      'ŗ'  => '&rcedil;',
237
      'Ř'  => '&Rcaron;',
238
      'ř'  => '&rcaron;',
239
      'Ś'  => '&Sacute;',
240
      'ś'  => '&sacute;',
241
      'Ŝ'  => '&Scirc;',
242
      'ŝ'  => '&scirc;',
243
      'Ş'  => '&Scedil;',
244
      'ş'  => '&scedil;',
245
      'Š'  => '&Scaron;',
246
      'š'  => '&scaron;',
247
      'Ţ'  => '&Tcedil;',
248
      'ţ'  => '&tcedil;',
249
      'Ť'  => '&Tcaron;',
250
      'ť'  => '&tcaron;',
251
      'Ŧ'  => '&Tstrok;',
252
      'ŧ'  => '&tstrok;',
253
      'Ũ'  => '&Utilde;',
254
      'ũ'  => '&utilde;',
255
      'Ū'  => '&Umacr;',
256
      'ū'  => '&umacr;',
257
      'Ŭ'  => '&Ubreve;',
258
      'ŭ'  => '&ubreve;',
259
      'Ů'  => '&Uring;',
260
      'ů'  => '&uring;',
261
      'Ű'  => '&Udblac;',
262
      'ű'  => '&udblac;',
263
      'Ų'  => '&Uogon;',
264
      'ų'  => '&uogon;',
265
      'Ŵ'  => '&Wcirc;',
266
      'ŵ'  => '&wcirc;',
267
      'Ŷ'  => '&Ycirc;',
268
      'ŷ'  => '&ycirc;',
269
      'Ÿ'  => '&Yuml;',
270
      'Ź'  => '&Zacute;',
271
      'ź'  => '&zacute;',
272
      'Ż'  => '&Zdot;',
273
      'ż'  => '&zdot;',
274
      'Ž'  => '&Zcaron;',
275
      'ž'  => '&zcaron;',
276
      'ƒ'  => '&fnof;',
277
      'Ƶ'  => '&imped;',
278
      'ǵ'  => '&gacute;',
279
      'ȷ'  => '&jmath;',
280
      'ˆ'  => '&circ;',
281
      'ˇ'  => '&Hacek;',
282
      '˘'  => '&Breve;',
283
      '˙'  => '&dot;',
284
      '˚'  => '&ring;',
285
      '˛'  => '&ogon;',
286
      '˜'  => '&DiacriticalTilde;',
287
      '˝'  => '&DiacriticalDoubleAcute;',
288
      '̑'  => '&DownBreve;',
289
      'Α'  => '&Alpha;',
290
      'Β'  => '&Beta;',
291
      'Γ'  => '&Gamma;',
292
      'Δ'  => '&Delta;',
293
      'Ε'  => '&Epsilon;',
294
      'Ζ'  => '&Zeta;',
295
      'Η'  => '&Eta;',
296
      'Θ'  => '&Theta;',
297
      'Ι'  => '&Iota;',
298
      'Κ'  => '&Kappa;',
299
      'Λ'  => '&Lambda;',
300
      'Μ'  => '&Mu;',
301
      'Ν'  => '&Nu;',
302
      'Ξ'  => '&Xi;',
303
      'Ο'  => '&Omicron;',
304
      'Π'  => '&Pi;',
305
      'Ρ'  => '&Rho;',
306
      'Σ'  => '&Sigma;',
307
      'Τ'  => '&Tau;',
308
      'Υ'  => '&Upsilon;',
309
      'Φ'  => '&Phi;',
310
      'Χ'  => '&Chi;',
311
      'Ψ'  => '&Psi;',
312
      'Ω'  => '&Omega;',
313
      'α'  => '&alpha;',
314
      'β'  => '&beta;',
315
      'γ'  => '&gamma;',
316
      'δ'  => '&delta;',
317
      'ε'  => '&epsi;',
318
      'ζ'  => '&zeta;',
319
      'η'  => '&eta;',
320
      'θ'  => '&theta;',
321
      'ι'  => '&iota;',
322
      'κ'  => '&kappa;',
323
      'λ'  => '&lambda;',
324
      'μ'  => '&mu;',
325
      'ν'  => '&nu;',
326
      'ξ'  => '&xi;',
327
      'ο'  => '&omicron;',
328
      'π'  => '&pi;',
329
      'ρ'  => '&rho;',
330
      'ς'  => '&sigmav;',
331
      'σ'  => '&sigma;',
332
      'τ'  => '&tau;',
333
      'υ'  => '&upsi;',
334
      'φ'  => '&phi;',
335
      'χ'  => '&chi;',
336
      'ψ'  => '&psi;',
337
      'ω'  => '&omega;',
338
      'ϑ'  => '&thetasym;',
339
      'ϒ'  => '&upsih;',
340
      'ϕ'  => '&straightphi;',
341
      'ϖ'  => '&piv;',
342
      'Ϝ'  => '&Gammad;',
343
      'ϝ'  => '&gammad;',
344
      'ϰ'  => '&varkappa;',
345
      'ϱ'  => '&rhov;',
346
      'ϵ'  => '&straightepsilon;',
347
      '϶'  => '&backepsilon;',
348
      'Ё'  => '&IOcy;',
349
      'Ђ'  => '&DJcy;',
350
      'Ѓ'  => '&GJcy;',
351
      'Є'  => '&Jukcy;',
352
      'Ѕ'  => '&DScy;',
353
      'І'  => '&Iukcy;',
354
      'Ї'  => '&YIcy;',
355
      'Ј'  => '&Jsercy;',
356
      'Љ'  => '&LJcy;',
357
      'Њ'  => '&NJcy;',
358
      'Ћ'  => '&TSHcy;',
359
      'Ќ'  => '&KJcy;',
360
      'Ў'  => '&Ubrcy;',
361
      'Џ'  => '&DZcy;',
362
      'А'  => '&Acy;',
363
      'Б'  => '&Bcy;',
364
      'В'  => '&Vcy;',
365
      'Г'  => '&Gcy;',
366
      'Д'  => '&Dcy;',
367
      'Е'  => '&IEcy;',
368
      'Ж'  => '&ZHcy;',
369
      'З'  => '&Zcy;',
370
      'И'  => '&Icy;',
371
      'Й'  => '&Jcy;',
372
      'К'  => '&Kcy;',
373
      'Л'  => '&Lcy;',
374
      'М'  => '&Mcy;',
375
      'Н'  => '&Ncy;',
376
      'О'  => '&Ocy;',
377
      'П'  => '&Pcy;',
378
      'Р'  => '&Rcy;',
379
      'С'  => '&Scy;',
380
      'Т'  => '&Tcy;',
381
      'У'  => '&Ucy;',
382
      'Ф'  => '&Fcy;',
383
      'Х'  => '&KHcy;',
384
      'Ц'  => '&TScy;',
385
      'Ч'  => '&CHcy;',
386
      'Ш'  => '&SHcy;',
387
      'Щ'  => '&SHCHcy;',
388
      'Ъ'  => '&HARDcy;',
389
      'Ы'  => '&Ycy;',
390
      'Ь'  => '&SOFTcy;',
391
      'Э'  => '&Ecy;',
392
      'Ю'  => '&YUcy;',
393
      'Я'  => '&YAcy;',
394
      'а'  => '&acy;',
395
      'б'  => '&bcy;',
396
      'в'  => '&vcy;',
397
      'г'  => '&gcy;',
398
      'д'  => '&dcy;',
399
      'е'  => '&iecy;',
400
      'ж'  => '&zhcy;',
401
      'з'  => '&zcy;',
402
      'и'  => '&icy;',
403
      'й'  => '&jcy;',
404
      'к'  => '&kcy;',
405
      'л'  => '&lcy;',
406
      'м'  => '&mcy;',
407
      'н'  => '&ncy;',
408
      'о'  => '&ocy;',
409
      'п'  => '&pcy;',
410
      'р'  => '&rcy;',
411
      'с'  => '&scy;',
412
      'т'  => '&tcy;',
413
      'у'  => '&ucy;',
414
      'ф'  => '&fcy;',
415
      'х'  => '&khcy;',
416
      'ц'  => '&tscy;',
417
      'ч'  => '&chcy;',
418
      'ш'  => '&shcy;',
419
      'щ'  => '&shchcy;',
420
      'ъ'  => '&hardcy;',
421
      'ы'  => '&ycy;',
422
      'ь'  => '&softcy;',
423
      'э'  => '&ecy;',
424
      'ю'  => '&yucy;',
425
      'я'  => '&yacy;',
426
      'ё'  => '&iocy;',
427
      'ђ'  => '&djcy;',
428
      'ѓ'  => '&gjcy;',
429
      'є'  => '&jukcy;',
430
      'ѕ'  => '&dscy;',
431
      'і'  => '&iukcy;',
432
      'ї'  => '&yicy;',
433
      'ј'  => '&jsercy;',
434
      'љ'  => '&ljcy;',
435
      'њ'  => '&njcy;',
436
      'ћ'  => '&tshcy;',
437
      'ќ'  => '&kjcy;',
438
      'ў'  => '&ubrcy;',
439
      'џ'  => '&dzcy;',
440
      ' '  => '&ensp;',
441
      ' '  => '&emsp;',
442
      ' '  => '&emsp13;',
443
      ' '  => '&emsp14;',
444
      ' '  => '&numsp;',
445
      ' '  => '&puncsp;',
446
      ' '  => '&ThinSpace;',
447
      ' '  => '&hairsp;',
448
      '​'  => '&ZeroWidthSpace;',
449
      '‌'  => '&zwnj;',
450
      '‍'  => '&zwj;',
451
      '‎'  => '&lrm;',
452
      '‏'  => '&rlm;',
453
      '‐'  => '&hyphen;',
454
      '–'  => '&ndash;',
455
      '—'  => '&mdash;',
456
      '―'  => '&horbar;',
457
      '‖'  => '&Verbar;',
458
      '‘'  => '&OpenCurlyQuote;',
459
      '’'  => '&rsquo;',
460
      '‚'  => '&sbquo;',
461
      '“'  => '&OpenCurlyDoubleQuote;',
462
      '”'  => '&rdquo;',
463
      '„'  => '&bdquo;',
464
      '†'  => '&dagger;',
465
      '‡'  => '&Dagger;',
466
      '•'  => '&bull;',
467
      '‥'  => '&nldr;',
468
      '…'  => '&hellip;',
469
      '‰'  => '&permil;',
470
      '‱'  => '&pertenk;',
471
      '′'  => '&prime;',
472
      '″'  => '&Prime;',
473
      '‴'  => '&tprime;',
474
      '‵'  => '&backprime;',
475
      '‹'  => '&lsaquo;',
476
      '›'  => '&rsaquo;',
477
      '‾'  => '&oline;',
478
      '⁁'  => '&caret;',
479
      '⁃'  => '&hybull;',
480
      '⁄'  => '&frasl;',
481
      '⁏'  => '&bsemi;',
482
      '⁗'  => '&qprime;',
483
      ' '  => '&MediumSpace;',
484
      '  ' => '&ThickSpace;',
485
      '⁠'  => '&NoBreak;',
486
      '⁡'  => '&af;',
487
      '⁢'  => '&InvisibleTimes;',
488
      '⁣'  => '&ic;',
489
      '€'  => '&euro;',
490
      '⃛'  => '&TripleDot;',
491
      '⃜'  => '&DotDot;',
492
      'ℂ'  => '&complexes;',
493
      '℅'  => '&incare;',
494
      'ℊ'  => '&gscr;',
495
      'ℋ'  => '&HilbertSpace;',
496
      'ℌ'  => '&Hfr;',
497
      'ℍ'  => '&Hopf;',
498
      'ℎ'  => '&planckh;',
499
      'ℏ'  => '&planck;',
500
      'ℐ'  => '&imagline;',
501
      'ℑ'  => '&Ifr;',
502
      'ℒ'  => '&lagran;',
503
      'ℓ'  => '&ell;',
504
      'ℕ'  => '&naturals;',
505
      '№'  => '&numero;',
506
      '℗'  => '&copysr;',
507
      '℘'  => '&wp;',
508
      'ℙ'  => '&primes;',
509
      'ℚ'  => '&rationals;',
510
      'ℛ'  => '&realine;',
511
      'ℜ'  => '&Rfr;',
512
      'ℝ'  => '&Ropf;',
513
      '℞'  => '&rx;',
514
      '™'  => '&trade;',
515
      'ℤ'  => '&Zopf;',
516
      '℧'  => '&mho;',
517
      'ℨ'  => '&Zfr;',
518
      '℩'  => '&iiota;',
519
      'ℬ'  => '&Bscr;',
520
      'ℭ'  => '&Cfr;',
521
      'ℯ'  => '&escr;',
522
      'ℰ'  => '&expectation;',
523
      'ℱ'  => '&Fouriertrf;',
524
      'ℳ'  => '&Mellintrf;',
525
      'ℴ'  => '&orderof;',
526
      'ℵ'  => '&aleph;',
527
      'ℶ'  => '&beth;',
528
      'ℷ'  => '&gimel;',
529
      'ℸ'  => '&daleth;',
530
      'ⅅ'  => '&CapitalDifferentialD;',
531
      'ⅆ'  => '&DifferentialD;',
532
      'ⅇ'  => '&exponentiale;',
533
      'ⅈ'  => '&ImaginaryI;',
534
      '⅓'  => '&frac13;',
535
      '⅔'  => '&frac23;',
536
      '⅕'  => '&frac15;',
537
      '⅖'  => '&frac25;',
538
      '⅗'  => '&frac35;',
539
      '⅘'  => '&frac45;',
540
      '⅙'  => '&frac16;',
541
      '⅚'  => '&frac56;',
542
      '⅛'  => '&frac18;',
543
      '⅜'  => '&frac38;',
544
      '⅝'  => '&frac58;',
545
      '⅞'  => '&frac78;',
546
      '←'  => '&larr;',
547
      '↑'  => '&uarr;',
548
      '→'  => '&srarr;',
549
      '↓'  => '&darr;',
550
      '↔'  => '&harr;',
551
      '↕'  => '&UpDownArrow;',
552
      '↖'  => '&nwarrow;',
553
      '↗'  => '&UpperRightArrow;',
554
      '↘'  => '&LowerRightArrow;',
555
      '↙'  => '&swarr;',
556
      '↚'  => '&nleftarrow;',
557
      '↛'  => '&nrarr;',
558
      '↝'  => '&rarrw;',
559
      '↝̸' => '&nrarrw;',
560
      '↞'  => '&Larr;',
561
      '↟'  => '&Uarr;',
562
      '↠'  => '&twoheadrightarrow;',
563
      '↡'  => '&Darr;',
564
      '↢'  => '&larrtl;',
565
      '↣'  => '&rarrtl;',
566
      '↤'  => '&LeftTeeArrow;',
567
      '↥'  => '&UpTeeArrow;',
568
      '↦'  => '&map;',
569
      '↧'  => '&DownTeeArrow;',
570
      '↩'  => '&larrhk;',
571
      '↪'  => '&rarrhk;',
572
      '↫'  => '&larrlp;',
573
      '↬'  => '&looparrowright;',
574
      '↭'  => '&harrw;',
575
      '↮'  => '&nleftrightarrow;',
576
      '↰'  => '&Lsh;',
577
      '↱'  => '&rsh;',
578
      '↲'  => '&ldsh;',
579
      '↳'  => '&rdsh;',
580
      '↵'  => '&crarr;',
581
      '↶'  => '&curvearrowleft;',
582
      '↷'  => '&curarr;',
583
      '↺'  => '&olarr;',
584
      '↻'  => '&orarr;',
585
      '↼'  => '&leftharpoonup;',
586
      '↽'  => '&leftharpoondown;',
587
      '↾'  => '&RightUpVector;',
588
      '↿'  => '&uharl;',
589
      '⇀'  => '&rharu;',
590
      '⇁'  => '&rhard;',
591
      '⇂'  => '&RightDownVector;',
592
      '⇃'  => '&dharl;',
593
      '⇄'  => '&rightleftarrows;',
594
      '⇅'  => '&udarr;',
595
      '⇆'  => '&lrarr;',
596
      '⇇'  => '&llarr;',
597
      '⇈'  => '&upuparrows;',
598
      '⇉'  => '&rrarr;',
599
      '⇊'  => '&downdownarrows;',
600
      '⇋'  => '&leftrightharpoons;',
601
      '⇌'  => '&rightleftharpoons;',
602
      '⇍'  => '&nLeftarrow;',
603
      '⇎'  => '&nhArr;',
604
      '⇏'  => '&nrArr;',
605
      '⇐'  => '&DoubleLeftArrow;',
606
      '⇑'  => '&DoubleUpArrow;',
607
      '⇒'  => '&Implies;',
608
      '⇓'  => '&Downarrow;',
609
      '⇔'  => '&hArr;',
610
      '⇕'  => '&Updownarrow;',
611
      '⇖'  => '&nwArr;',
612
      '⇗'  => '&neArr;',
613
      '⇘'  => '&seArr;',
614
      '⇙'  => '&swArr;',
615
      '⇚'  => '&lAarr;',
616
      '⇛'  => '&rAarr;',
617
      '⇝'  => '&zigrarr;',
618
      '⇤'  => '&LeftArrowBar;',
619
      '⇥'  => '&RightArrowBar;',
620
      '⇵'  => '&DownArrowUpArrow;',
621
      '⇽'  => '&loarr;',
622
      '⇾'  => '&roarr;',
623
      '⇿'  => '&hoarr;',
624
      '∀'  => '&forall;',
625
      '∁'  => '&comp;',
626
      '∂'  => '&part;',
627
      '∂̸' => '&npart;',
628
      '∃'  => '&Exists;',
629
      '∄'  => '&nexist;',
630
      '∅'  => '&empty;',
631
      '∇'  => '&nabla;',
632
      '∈'  => '&isinv;',
633
      '∉'  => '&notin;',
634
      '∋'  => '&ReverseElement;',
635
      '∌'  => '&notniva;',
636
      '∏'  => '&prod;',
637
      '∐'  => '&Coproduct;',
638
      '∑'  => '&sum;',
639
      '−'  => '&minus;',
640
      '∓'  => '&MinusPlus;',
641
      '∔'  => '&plusdo;',
642
      '∖'  => '&ssetmn;',
643
      '∗'  => '&lowast;',
644
      '∘'  => '&compfn;',
645
      '√'  => '&Sqrt;',
646
      '∝'  => '&prop;',
647
      '∞'  => '&infin;',
648
      '∟'  => '&angrt;',
649
      '∠'  => '&angle;',
650
      '∠⃒' => '&nang;',
651
      '∡'  => '&angmsd;',
652
      '∢'  => '&angsph;',
653
      '∣'  => '&mid;',
654
      '∤'  => '&nshortmid;',
655
      '∥'  => '&shortparallel;',
656
      '∦'  => '&nparallel;',
657
      '∧'  => '&and;',
658
      '∨'  => '&or;',
659
      '∩'  => '&cap;',
660
      '∩︀' => '&caps;',
661
      '∪'  => '&cup;',
662
      '∪︀' => '&cups',
663
      '∫'  => '&Integral;',
664
      '∬'  => '&Int;',
665
      '∭'  => '&tint;',
666
      '∮'  => '&ContourIntegral;',
667
      '∯'  => '&DoubleContourIntegral;',
668
      '∰'  => '&Cconint;',
669
      '∱'  => '&cwint;',
670
      '∲'  => '&cwconint;',
671
      '∳'  => '&awconint;',
672
      '∴'  => '&there4;',
673
      '∵'  => '&Because;',
674
      '∶'  => '&ratio;',
675
      '∷'  => '&Colon;',
676
      '∸'  => '&minusd;',
677
      '∺'  => '&mDDot;',
678
      '∻'  => '&homtht;',
679
      '∼'  => '&sim;',
680
      '∼⃒' => '&nvsim;',
681
      '∽'  => '&bsim;',
682
      '∽̱' => '&race;',
683
      '∾'  => '&ac;',
684
      '∾̳' => '&acE;',
685
      '∿'  => '&acd;',
686
      '≀'  => '&wr;',
687
      '≁'  => '&NotTilde;',
688
      '≂'  => '&esim;',
689
      '≂̸' => '&nesim;',
690
      '≃'  => '&simeq;',
691
      '≄'  => '&nsime;',
692
      '≅'  => '&TildeFullEqual;',
693
      '≆'  => '&simne;',
694
      '≇'  => '&ncong;',
695
      '≈'  => '&approx;',
696
      '≉'  => '&napprox;',
697
      '≊'  => '&ape;',
698
      '≋'  => '&apid;',
699
      '≋̸' => '&napid;',
700
      '≌'  => '&bcong;',
701
      '≍'  => '&CupCap;',
702
      '≍⃒' => '&nvap;',
703
      '≎'  => '&bump;',
704
      '≎̸' => '&nbump;',
705
      '≏'  => '&HumpEqual;',
706
      '≏̸' => '&nbumpe;',
707
      '≐'  => '&esdot;',
708
      '≐̸' => '&nedot;',
709
      '≑'  => '&doteqdot;',
710
      '≒'  => '&fallingdotseq;',
711
      '≓'  => '&risingdotseq;',
712
      '≔'  => '&coloneq;',
713
      '≕'  => '&eqcolon;',
714
      '≖'  => '&ecir;',
715
      '≗'  => '&circeq;',
716
      '≙'  => '&wedgeq;',
717
      '≚'  => '&veeeq;',
718
      '≜'  => '&triangleq;',
719
      '≟'  => '&equest;',
720
      '≠'  => '&NotEqual;',
721
      '≡'  => '&Congruent;',
722
      '≡⃥' => '&bnequiv;',
723
      '≢'  => '&NotCongruent;',
724
      '≤'  => '&leq;',
725
      '≤⃒' => '&nvle;',
726
      '≥'  => '&ge;',
727
      '≥⃒' => '&nvge;',
728
      '≦'  => '&lE;',
729
      '≦̸' => '&nlE;',
730
      '≧'  => '&geqq;',
731
      '≧̸' => '&NotGreaterFullEqual;',
732
      '≨'  => '&lneqq;',
733
      '≨︀' => '&lvertneqq;',
734
      '≩'  => '&gneqq;',
735
      '≩︀' => '&gvertneqq;',
736
      '≪'  => '&ll;',
737
      '≪̸' => '&nLtv;',
738
      '≪⃒' => '&nLt;',
739
      '≫'  => '&gg;',
740
      '≫̸' => '&NotGreaterGreater;',
741
      '≫⃒' => '&nGt;',
742
      '≬'  => '&between;',
743
      '≭'  => '&NotCupCap;',
744
      '≮'  => '&NotLess;',
745
      '≯'  => '&ngtr;',
746
      '≰'  => '&NotLessEqual;',
747
      '≱'  => '&ngeq;',
748
      '≲'  => '&LessTilde;',
749
      '≳'  => '&GreaterTilde;',
750
      '≴'  => '&nlsim;',
751
      '≵'  => '&ngsim;',
752
      '≶'  => '&lessgtr;',
753
      '≷'  => '&gl;',
754
      '≸'  => '&ntlg;',
755
      '≹'  => '&NotGreaterLess;',
756
      '≺'  => '&prec;',
757
      '≻'  => '&succ;',
758
      '≼'  => '&PrecedesSlantEqual;',
759
      '≽'  => '&succcurlyeq;',
760
      '≾'  => '&precsim;',
761
      '≿'  => '&SucceedsTilde;',
762
      '≿̸' => '&NotSucceedsTilde;',
763
      '⊀'  => '&npr;',
764
      '⊁'  => '&NotSucceeds;',
765
      '⊂'  => '&sub;',
766
      '⊂⃒' => '&vnsub;',
767
      '⊃'  => '&sup;',
768
      '⊃⃒' => '&nsupset;',
769
      '⊄'  => '&nsub;',
770
      '⊅'  => '&nsup;',
771
      '⊆'  => '&SubsetEqual;',
772
      '⊇'  => '&supe;',
773
      '⊈'  => '&NotSubsetEqual;',
774
      '⊉'  => '&NotSupersetEqual;',
775
      '⊊'  => '&subsetneq;',
776
      '⊊︀' => '&vsubne;',
777
      '⊋'  => '&supsetneq;',
778
      '⊋︀' => '&vsupne;',
779
      '⊍'  => '&cupdot;',
780
      '⊎'  => '&UnionPlus;',
781
      '⊏'  => '&sqsub;',
782
      '⊏̸' => '&NotSquareSubset;',
783
      '⊐'  => '&sqsupset;',
784
      '⊐̸' => '&NotSquareSuperset;',
785
      '⊑'  => '&SquareSubsetEqual;',
786
      '⊒'  => '&SquareSupersetEqual;',
787
      '⊓'  => '&sqcap;',
788
      '⊓︀' => '&sqcaps;',
789
      '⊔'  => '&sqcup;',
790
      '⊔︀' => '&sqcups;',
791
      '⊕'  => '&CirclePlus;',
792
      '⊖'  => '&ominus;',
793
      '⊗'  => '&CircleTimes;',
794
      '⊘'  => '&osol;',
795
      '⊙'  => '&CircleDot;',
796
      '⊚'  => '&ocir;',
797
      '⊛'  => '&oast;',
798
      '⊝'  => '&odash;',
799
      '⊞'  => '&boxplus;',
800
      '⊟'  => '&boxminus;',
801
      '⊠'  => '&timesb;',
802
      '⊡'  => '&sdotb;',
803
      '⊢'  => '&vdash;',
804
      '⊣'  => '&dashv;',
805
      '⊤'  => '&DownTee;',
806
      '⊥'  => '&perp;',
807
      '⊧'  => '&models;',
808
      '⊨'  => '&DoubleRightTee;',
809
      '⊩'  => '&Vdash;',
810
      '⊪'  => '&Vvdash;',
811
      '⊫'  => '&VDash;',
812
      '⊬'  => '&nvdash;',
813
      '⊭'  => '&nvDash;',
814
      '⊮'  => '&nVdash;',
815
      '⊯'  => '&nVDash;',
816
      '⊰'  => '&prurel;',
817
      '⊲'  => '&vartriangleleft;',
818
      '⊳'  => '&vrtri;',
819
      '⊴'  => '&LeftTriangleEqual;',
820
      '⊴⃒' => '&nvltrie;',
821
      '⊵'  => '&RightTriangleEqual;',
822
      '⊵⃒' => '&nvrtrie;',
823
      '⊶'  => '&origof;',
824
      '⊷'  => '&imof;',
825
      '⊸'  => '&mumap;',
826
      '⊹'  => '&hercon;',
827
      '⊺'  => '&intcal;',
828
      '⊻'  => '&veebar;',
829
      '⊽'  => '&barvee;',
830
      '⊾'  => '&angrtvb;',
831
      '⊿'  => '&lrtri;',
832
      '⋀'  => '&xwedge;',
833
      '⋁'  => '&xvee;',
834
      '⋂'  => '&bigcap;',
835
      '⋃'  => '&bigcup;',
836
      '⋄'  => '&diamond;',
837
      '⋅'  => '&sdot;',
838
      '⋆'  => '&Star;',
839
      '⋇'  => '&divonx;',
840
      '⋈'  => '&bowtie;',
841
      '⋉'  => '&ltimes;',
842
      '⋊'  => '&rtimes;',
843
      '⋋'  => '&lthree;',
844
      '⋌'  => '&rthree;',
845
      '⋍'  => '&backsimeq;',
846
      '⋎'  => '&curlyvee;',
847
      '⋏'  => '&curlywedge;',
848
      '⋐'  => '&Sub;',
849
      '⋑'  => '&Supset;',
850
      '⋒'  => '&Cap;',
851
      '⋓'  => '&Cup;',
852
      '⋔'  => '&pitchfork;',
853
      '⋕'  => '&epar;',
854
      '⋖'  => '&lessdot;',
855
      '⋗'  => '&gtrdot;',
856
      '⋘'  => '&Ll;',
857
      '⋘̸' => '&nLl;',
858
      '⋙'  => '&Gg;',
859
      '⋙̸' => '&nGg;',
860
      '⋚'  => '&lesseqgtr;',
861
      '⋚︀' => '&lesg;',
862
      '⋛'  => '&gtreqless;',
863
      '⋛︀' => '&gesl;',
864
      '⋞'  => '&curlyeqprec;',
865
      '⋟'  => '&cuesc;',
866
      '⋠'  => '&NotPrecedesSlantEqual;',
867
      '⋡'  => '&NotSucceedsSlantEqual;',
868
      '⋢'  => '&NotSquareSubsetEqual;',
869
      '⋣'  => '&NotSquareSupersetEqual;',
870
      '⋦'  => '&lnsim;',
871
      '⋧'  => '&gnsim;',
872
      '⋨'  => '&precnsim;',
873
      '⋩'  => '&scnsim;',
874
      '⋪'  => '&nltri;',
875
      '⋫'  => '&ntriangleright;',
876
      '⋬'  => '&nltrie;',
877
      '⋭'  => '&NotRightTriangleEqual;',
878
      '⋮'  => '&vellip;',
879
      '⋯'  => '&ctdot;',
880
      '⋰'  => '&utdot;',
881
      '⋱'  => '&dtdot;',
882
      '⋲'  => '&disin;',
883
      '⋳'  => '&isinsv;',
884
      '⋴'  => '&isins;',
885
      '⋵'  => '&isindot;',
886
      '⋵̸' => '&notindot;',
887
      '⋶'  => '&notinvc;',
888
      '⋷'  => '&notinvb;',
889
      '⋹'  => '&isinE;',
890
      '⋹̸' => '&notinE;',
891
      '⋺'  => '&nisd;',
892
      '⋻'  => '&xnis;',
893
      '⋼'  => '&nis;',
894
      '⋽'  => '&notnivc;',
895
      '⋾'  => '&notnivb;',
896
      '⌅'  => '&barwed;',
897
      '⌆'  => '&doublebarwedge;',
898
      '⌈'  => '&lceil;',
899
      '⌉'  => '&RightCeiling;',
900
      '⌊'  => '&LeftFloor;',
901
      '⌋'  => '&RightFloor;',
902
      '⌌'  => '&drcrop;',
903
      '⌍'  => '&dlcrop;',
904
      '⌎'  => '&urcrop;',
905
      '⌏'  => '&ulcrop;',
906
      '⌐'  => '&bnot;',
907
      '⌒'  => '&profline;',
908
      '⌓'  => '&profsurf;',
909
      '⌕'  => '&telrec;',
910
      '⌖'  => '&target;',
911
      '⌜'  => '&ulcorner;',
912
      '⌝'  => '&urcorner;',
913
      '⌞'  => '&llcorner;',
914
      '⌟'  => '&drcorn;',
915
      '⌢'  => '&frown;',
916
      '⌣'  => '&smile;',
917
      '⌭'  => '&cylcty;',
918
      '⌮'  => '&profalar;',
919
      '⌶'  => '&topbot;',
920
      '⌽'  => '&ovbar;',
921
      '⌿'  => '&solbar;',
922
      '⍼'  => '&angzarr;',
923
      '⎰'  => '&lmoust;',
924
      '⎱'  => '&rmoust;',
925
      '⎴'  => '&OverBracket;',
926
      '⎵'  => '&bbrk;',
927
      '⎶'  => '&bbrktbrk;',
928
      '⏜'  => '&OverParenthesis;',
929
      '⏝'  => '&UnderParenthesis;',
930
      '⏞'  => '&OverBrace;',
931
      '⏟'  => '&UnderBrace;',
932
      '⏢'  => '&trpezium;',
933
      '⏧'  => '&elinters;',
934
      '␣'  => '&blank;',
935
      'Ⓢ'  => '&oS;',
936
      '─'  => '&HorizontalLine;',
937
      '│'  => '&boxv;',
938
      '┌'  => '&boxdr;',
939
      '┐'  => '&boxdl;',
940
      '└'  => '&boxur;',
941
      '┘'  => '&boxul;',
942
      '├'  => '&boxvr;',
943
      '┤'  => '&boxvl;',
944
      '┬'  => '&boxhd;',
945
      '┴'  => '&boxhu;',
946
      '┼'  => '&boxvh;',
947
      '═'  => '&boxH;',
948
      '║'  => '&boxV;',
949
      '╒'  => '&boxdR;',
950
      '╓'  => '&boxDr;',
951
      '╔'  => '&boxDR;',
952
      '╕'  => '&boxdL;',
953
      '╖'  => '&boxDl;',
954
      '╗'  => '&boxDL;',
955
      '╘'  => '&boxuR;',
956
      '╙'  => '&boxUr;',
957
      '╚'  => '&boxUR;',
958
      '╛'  => '&boxuL;',
959
      '╜'  => '&boxUl;',
960
      '╝'  => '&boxUL;',
961
      '╞'  => '&boxvR;',
962
      '╟'  => '&boxVr;',
963
      '╠'  => '&boxVR;',
964
      '╡'  => '&boxvL;',
965
      '╢'  => '&boxVl;',
966
      '╣'  => '&boxVL;',
967
      '╤'  => '&boxHd;',
968
      '╥'  => '&boxhD;',
969
      '╦'  => '&boxHD;',
970
      '╧'  => '&boxHu;',
971
      '╨'  => '&boxhU;',
972
      '╩'  => '&boxHU;',
973
      '╪'  => '&boxvH;',
974
      '╫'  => '&boxVh;',
975
      '╬'  => '&boxVH;',
976
      '▀'  => '&uhblk;',
977
      '▄'  => '&lhblk;',
978
      '█'  => '&block;',
979
      '░'  => '&blk14;',
980
      '▒'  => '&blk12;',
981
      '▓'  => '&blk34;',
982
      '□'  => '&Square;',
983
      '▪'  => '&squarf;',
984
      '▫'  => '&EmptyVerySmallSquare;',
985
      '▭'  => '&rect;',
986
      '▮'  => '&marker;',
987
      '▱'  => '&fltns;',
988
      '△'  => '&bigtriangleup;',
989
      '▴'  => '&blacktriangle;',
990
      '▵'  => '&triangle;',
991
      '▸'  => '&blacktriangleright;',
992
      '▹'  => '&rtri;',
993
      '▽'  => '&bigtriangledown;',
994
      '▾'  => '&blacktriangledown;',
995
      '▿'  => '&triangledown;',
996
      '◂'  => '&blacktriangleleft;',
997
      '◃'  => '&ltri;',
998
      '◊'  => '&lozenge;',
999
      '○'  => '&cir;',
1000
      '◬'  => '&tridot;',
1001
      '◯'  => '&bigcirc;',
1002
      '◸'  => '&ultri;',
1003
      '◹'  => '&urtri;',
1004
      '◺'  => '&lltri;',
1005
      '◻'  => '&EmptySmallSquare;',
1006
      '◼'  => '&FilledSmallSquare;',
1007
      '★'  => '&starf;',
1008
      '☆'  => '&star;',
1009
      '☎'  => '&phone;',
1010
      '♀'  => '&female;',
1011
      '♂'  => '&male;',
1012
      '♠'  => '&spadesuit;',
1013
      '♣'  => '&clubs;',
1014
      '♥'  => '&hearts;',
1015
      '♦'  => '&diamondsuit;',
1016
      '♪'  => '&sung;',
1017
      '♭'  => '&flat;',
1018
      '♮'  => '&natur;',
1019
      '♯'  => '&sharp;',
1020
      '✓'  => '&check;',
1021
      '✗'  => '&cross;',
1022
      '✠'  => '&maltese;',
1023
      '✶'  => '&sext;',
1024
      '❘'  => '&VerticalSeparator;',
1025
      '❲'  => '&lbbrk;',
1026
      '❳'  => '&rbbrk;',
1027
      '⟈'  => '&bsolhsub;',
1028
      '⟉'  => '&suphsol;',
1029
      '⟦'  => '&LeftDoubleBracket;',
1030
      '⟧'  => '&RightDoubleBracket;',
1031
      '⟨'  => '&langle;',
1032
      '⟩'  => '&RightAngleBracket;',
1033
      '⟪'  => '&Lang;',
1034
      '⟫'  => '&Rang;',
1035
      '⟬'  => '&loang;',
1036
      '⟭'  => '&roang;',
1037
      '⟵'  => '&longleftarrow;',
1038
      '⟶'  => '&LongRightArrow;',
1039
      '⟷'  => '&LongLeftRightArrow;',
1040
      '⟸'  => '&xlArr;',
1041
      '⟹'  => '&DoubleLongRightArrow;',
1042
      '⟺'  => '&xhArr;',
1043
      '⟼'  => '&xmap;',
1044
      '⟿'  => '&dzigrarr;',
1045
      '⤂'  => '&nvlArr;',
1046
      '⤃'  => '&nvrArr;',
1047
      '⤄'  => '&nvHarr;',
1048
      '⤅'  => '&Map;',
1049
      '⤌'  => '&lbarr;',
1050
      '⤍'  => '&bkarow;',
1051
      '⤎'  => '&lBarr;',
1052
      '⤏'  => '&dbkarow;',
1053
      '⤐'  => '&drbkarow;',
1054
      '⤑'  => '&DDotrahd;',
1055
      '⤒'  => '&UpArrowBar;',
1056
      '⤓'  => '&DownArrowBar;',
1057
      '⤖'  => '&Rarrtl;',
1058
      '⤙'  => '&latail;',
1059
      '⤚'  => '&ratail;',
1060
      '⤛'  => '&lAtail;',
1061
      '⤜'  => '&rAtail;',
1062
      '⤝'  => '&larrfs;',
1063
      '⤞'  => '&rarrfs;',
1064
      '⤟'  => '&larrbfs;',
1065
      '⤠'  => '&rarrbfs;',
1066
      '⤣'  => '&nwarhk;',
1067
      '⤤'  => '&nearhk;',
1068
      '⤥'  => '&searhk;',
1069
      '⤦'  => '&swarhk;',
1070
      '⤧'  => '&nwnear;',
1071
      '⤨'  => '&toea;',
1072
      '⤩'  => '&seswar;',
1073
      '⤪'  => '&swnwar;',
1074
      '⤳'  => '&rarrc;',
1075
      '⤳̸' => '&nrarrc;',
1076
      '⤵'  => '&cudarrr;',
1077
      '⤶'  => '&ldca;',
1078
      '⤷'  => '&rdca;',
1079
      '⤸'  => '&cudarrl;',
1080
      '⤹'  => '&larrpl;',
1081
      '⤼'  => '&curarrm;',
1082
      '⤽'  => '&cularrp;',
1083
      '⥅'  => '&rarrpl;',
1084
      '⥈'  => '&harrcir;',
1085
      '⥉'  => '&Uarrocir;',
1086
      '⥊'  => '&lurdshar;',
1087
      '⥋'  => '&ldrushar;',
1088
      '⥎'  => '&LeftRightVector;',
1089
      '⥏'  => '&RightUpDownVector;',
1090
      '⥐'  => '&DownLeftRightVector;',
1091
      '⥑'  => '&LeftUpDownVector;',
1092
      '⥒'  => '&LeftVectorBar;',
1093
      '⥓'  => '&RightVectorBar;',
1094
      '⥔'  => '&RightUpVectorBar;',
1095
      '⥕'  => '&RightDownVectorBar;',
1096
      '⥖'  => '&DownLeftVectorBar;',
1097
      '⥗'  => '&DownRightVectorBar;',
1098
      '⥘'  => '&LeftUpVectorBar;',
1099
      '⥙'  => '&LeftDownVectorBar;',
1100
      '⥚'  => '&LeftTeeVector;',
1101
      '⥛'  => '&RightTeeVector;',
1102
      '⥜'  => '&RightUpTeeVector;',
1103
      '⥝'  => '&RightDownTeeVector;',
1104
      '⥞'  => '&DownLeftTeeVector;',
1105
      '⥟'  => '&DownRightTeeVector;',
1106
      '⥠'  => '&LeftUpTeeVector;',
1107
      '⥡'  => '&LeftDownTeeVector;',
1108
      '⥢'  => '&lHar;',
1109
      '⥣'  => '&uHar;',
1110
      '⥤'  => '&rHar;',
1111
      '⥥'  => '&dHar;',
1112
      '⥦'  => '&luruhar;',
1113
      '⥧'  => '&ldrdhar;',
1114
      '⥨'  => '&ruluhar;',
1115
      '⥩'  => '&rdldhar;',
1116
      '⥪'  => '&lharul;',
1117
      '⥫'  => '&llhard;',
1118
      '⥬'  => '&rharul;',
1119
      '⥭'  => '&lrhard;',
1120
      '⥮'  => '&udhar;',
1121
      '⥯'  => '&ReverseUpEquilibrium;',
1122
      '⥰'  => '&RoundImplies;',
1123
      '⥱'  => '&erarr;',
1124
      '⥲'  => '&simrarr;',
1125
      '⥳'  => '&larrsim;',
1126
      '⥴'  => '&rarrsim;',
1127
      '⥵'  => '&rarrap;',
1128
      '⥶'  => '&ltlarr;',
1129
      '⥸'  => '&gtrarr;',
1130
      '⥹'  => '&subrarr;',
1131
      '⥻'  => '&suplarr;',
1132
      '⥼'  => '&lfisht;',
1133
      '⥽'  => '&rfisht;',
1134
      '⥾'  => '&ufisht;',
1135
      '⥿'  => '&dfisht;',
1136
      '⦅'  => '&lopar;',
1137
      '⦆'  => '&ropar;',
1138
      '⦋'  => '&lbrke;',
1139
      '⦌'  => '&rbrke;',
1140
      '⦍'  => '&lbrkslu;',
1141
      '⦎'  => '&rbrksld;',
1142
      '⦏'  => '&lbrksld;',
1143
      '⦐'  => '&rbrkslu;',
1144
      '⦑'  => '&langd;',
1145
      '⦒'  => '&rangd;',
1146
      '⦓'  => '&lparlt;',
1147
      '⦔'  => '&rpargt;',
1148
      '⦕'  => '&gtlPar;',
1149
      '⦖'  => '&ltrPar;',
1150
      '⦚'  => '&vzigzag;',
1151
      '⦜'  => '&vangrt;',
1152
      '⦝'  => '&angrtvbd;',
1153
      '⦤'  => '&ange;',
1154
      '⦥'  => '&range;',
1155
      '⦦'  => '&dwangle;',
1156
      '⦧'  => '&uwangle;',
1157
      '⦨'  => '&angmsdaa;',
1158
      '⦩'  => '&angmsdab;',
1159
      '⦪'  => '&angmsdac;',
1160
      '⦫'  => '&angmsdad;',
1161
      '⦬'  => '&angmsdae;',
1162
      '⦭'  => '&angmsdaf;',
1163
      '⦮'  => '&angmsdag;',
1164
      '⦯'  => '&angmsdah;',
1165
      '⦰'  => '&bemptyv;',
1166
      '⦱'  => '&demptyv;',
1167
      '⦲'  => '&cemptyv;',
1168
      '⦳'  => '&raemptyv;',
1169
      '⦴'  => '&laemptyv;',
1170
      '⦵'  => '&ohbar;',
1171
      '⦶'  => '&omid;',
1172
      '⦷'  => '&opar;',
1173
      '⦹'  => '&operp;',
1174
      '⦻'  => '&olcross;',
1175
      '⦼'  => '&odsold;',
1176
      '⦾'  => '&olcir;',
1177
      '⦿'  => '&ofcir;',
1178
      '⧀'  => '&olt;',
1179
      '⧁'  => '&ogt;',
1180
      '⧂'  => '&cirscir;',
1181
      '⧃'  => '&cirE;',
1182
      '⧄'  => '&solb;',
1183
      '⧅'  => '&bsolb;',
1184
      '⧉'  => '&boxbox;',
1185
      '⧍'  => '&trisb;',
1186
      '⧎'  => '&rtriltri;',
1187
      '⧏'  => '&LeftTriangleBar;',
1188
      '⧏̸' => '&NotLeftTriangleBar;',
1189
      '⧐'  => '&RightTriangleBar;',
1190
      '⧐̸' => '&NotRightTriangleBar;',
1191
      '⧜'  => '&iinfin;',
1192
      '⧝'  => '&infintie;',
1193
      '⧞'  => '&nvinfin;',
1194
      '⧣'  => '&eparsl;',
1195
      '⧤'  => '&smeparsl;',
1196
      '⧥'  => '&eqvparsl;',
1197
      '⧫'  => '&lozf;',
1198
      '⧴'  => '&RuleDelayed;',
1199
      '⧶'  => '&dsol;',
1200
      '⨀'  => '&xodot;',
1201
      '⨁'  => '&bigoplus;',
1202
      '⨂'  => '&bigotimes;',
1203
      '⨄'  => '&biguplus;',
1204
      '⨆'  => '&bigsqcup;',
1205
      '⨌'  => '&iiiint;',
1206
      '⨍'  => '&fpartint;',
1207
      '⨐'  => '&cirfnint;',
1208
      '⨑'  => '&awint;',
1209
      '⨒'  => '&rppolint;',
1210
      '⨓'  => '&scpolint;',
1211
      '⨔'  => '&npolint;',
1212
      '⨕'  => '&pointint;',
1213
      '⨖'  => '&quatint;',
1214
      '⨗'  => '&intlarhk;',
1215
      '⨢'  => '&pluscir;',
1216
      '⨣'  => '&plusacir;',
1217
      '⨤'  => '&simplus;',
1218
      '⨥'  => '&plusdu;',
1219
      '⨦'  => '&plussim;',
1220
      '⨧'  => '&plustwo;',
1221
      '⨩'  => '&mcomma;',
1222
      '⨪'  => '&minusdu;',
1223
      '⨭'  => '&loplus;',
1224
      '⨮'  => '&roplus;',
1225
      '⨯'  => '&Cross;',
1226
      '⨰'  => '&timesd;',
1227
      '⨱'  => '&timesbar;',
1228
      '⨳'  => '&smashp;',
1229
      '⨴'  => '&lotimes;',
1230
      '⨵'  => '&rotimes;',
1231
      '⨶'  => '&otimesas;',
1232
      '⨷'  => '&Otimes;',
1233
      '⨸'  => '&odiv;',
1234
      '⨹'  => '&triplus;',
1235
      '⨺'  => '&triminus;',
1236
      '⨻'  => '&tritime;',
1237
      '⨼'  => '&iprod;',
1238
      '⨿'  => '&amalg;',
1239
      '⩀'  => '&capdot;',
1240
      '⩂'  => '&ncup;',
1241
      '⩃'  => '&ncap;',
1242
      '⩄'  => '&capand;',
1243
      '⩅'  => '&cupor;',
1244
      '⩆'  => '&cupcap;',
1245
      '⩇'  => '&capcup;',
1246
      '⩈'  => '&cupbrcap;',
1247
      '⩉'  => '&capbrcup;',
1248
      '⩊'  => '&cupcup;',
1249
      '⩋'  => '&capcap;',
1250
      '⩌'  => '&ccups;',
1251
      '⩍'  => '&ccaps;',
1252
      '⩐'  => '&ccupssm;',
1253
      '⩓'  => '&And;',
1254
      '⩔'  => '&Or;',
1255
      '⩕'  => '&andand;',
1256
      '⩖'  => '&oror;',
1257
      '⩗'  => '&orslope;',
1258
      '⩘'  => '&andslope;',
1259
      '⩚'  => '&andv;',
1260
      '⩛'  => '&orv;',
1261
      '⩜'  => '&andd;',
1262
      '⩝'  => '&ord;',
1263
      '⩟'  => '&wedbar;',
1264
      '⩦'  => '&sdote;',
1265
      '⩪'  => '&simdot;',
1266
      '⩭'  => '&congdot;',
1267
      '⩭̸' => '&ncongdot;',
1268
      '⩮'  => '&easter;',
1269
      '⩯'  => '&apacir;',
1270
      '⩰'  => '&apE;',
1271
      '⩰̸' => '&napE;',
1272
      '⩱'  => '&eplus;',
1273
      '⩲'  => '&pluse;',
1274
      '⩳'  => '&Esim;',
1275
      '⩴'  => '&Colone;',
1276
      '⩵'  => '&Equal;',
1277
      '⩷'  => '&ddotseq;',
1278
      '⩸'  => '&equivDD;',
1279
      '⩹'  => '&ltcir;',
1280
      '⩺'  => '&gtcir;',
1281
      '⩻'  => '&ltquest;',
1282
      '⩼'  => '&gtquest;',
1283
      '⩽'  => '&les;',
1284
      '⩽̸' => '&nles;',
1285
      '⩾'  => '&ges;',
1286
      '⩾̸' => '&nges;',
1287
      '⩿'  => '&lesdot;',
1288
      '⪀'  => '&gesdot;',
1289
      '⪁'  => '&lesdoto;',
1290
      '⪂'  => '&gesdoto;',
1291
      '⪃'  => '&lesdotor;',
1292
      '⪄'  => '&gesdotol;',
1293
      '⪅'  => '&lap;',
1294
      '⪆'  => '&gap;',
1295
      '⪇'  => '&lne;',
1296
      '⪈'  => '&gne;',
1297
      '⪉'  => '&lnap;',
1298
      '⪊'  => '&gnap;',
1299
      '⪋'  => '&lesseqqgtr;',
1300
      '⪌'  => '&gEl;',
1301
      '⪍'  => '&lsime;',
1302
      '⪎'  => '&gsime;',
1303
      '⪏'  => '&lsimg;',
1304
      '⪐'  => '&gsiml;',
1305
      '⪑'  => '&lgE;',
1306
      '⪒'  => '&glE;',
1307
      '⪓'  => '&lesges;',
1308
      '⪔'  => '&gesles;',
1309
      '⪕'  => '&els;',
1310
      '⪖'  => '&egs;',
1311
      '⪗'  => '&elsdot;',
1312
      '⪘'  => '&egsdot;',
1313
      '⪙'  => '&el;',
1314
      '⪚'  => '&eg;',
1315
      '⪝'  => '&siml;',
1316
      '⪞'  => '&simg;',
1317
      '⪟'  => '&simlE;',
1318
      '⪠'  => '&simgE;',
1319
      '⪡'  => '&LessLess;',
1320
      '⪡̸' => '&NotNestedLessLess;',
1321
      '⪢'  => '&GreaterGreater;',
1322
      '⪢̸' => '&NotNestedGreaterGreater;',
1323
      '⪤'  => '&glj;',
1324
      '⪥'  => '&gla;',
1325
      '⪦'  => '&ltcc;',
1326
      '⪧'  => '&gtcc;',
1327
      '⪨'  => '&lescc;',
1328
      '⪩'  => '&gescc;',
1329
      '⪪'  => '&smt;',
1330
      '⪫'  => '&lat;',
1331
      '⪬'  => '&smte;',
1332
      '⪬︀' => '&smtes;',
1333
      '⪭'  => '&late;',
1334
      '⪭︀' => '&lates;',
1335
      '⪮'  => '&bumpE;',
1336
      '⪯'  => '&preceq;',
1337
      '⪯̸' => '&NotPrecedesEqual;',
1338
      '⪰'  => '&SucceedsEqual;',
1339
      '⪰̸' => '&NotSucceedsEqual;',
1340
      '⪳'  => '&prE;',
1341
      '⪴'  => '&scE;',
1342
      '⪵'  => '&precneqq;',
1343
      '⪶'  => '&scnE;',
1344
      '⪷'  => '&precapprox;',
1345
      '⪸'  => '&succapprox;',
1346
      '⪹'  => '&precnapprox;',
1347
      '⪺'  => '&succnapprox;',
1348
      '⪻'  => '&Pr;',
1349
      '⪼'  => '&Sc;',
1350
      '⪽'  => '&subdot;',
1351
      '⪾'  => '&supdot;',
1352
      '⪿'  => '&subplus;',
1353
      '⫀'  => '&supplus;',
1354
      '⫁'  => '&submult;',
1355
      '⫂'  => '&supmult;',
1356
      '⫃'  => '&subedot;',
1357
      '⫄'  => '&supedot;',
1358
      '⫅'  => '&subE;',
1359
      '⫅̸' => '&nsubE;',
1360
      '⫆'  => '&supseteqq;',
1361
      '⫆̸' => '&nsupseteqq;',
1362
      '⫇'  => '&subsim;',
1363
      '⫈'  => '&supsim;',
1364
      '⫋'  => '&subsetneqq;',
1365
      '⫋︀' => '&vsubnE;',
1366
      '⫌'  => '&supnE;',
1367
      '⫌︀' => '&varsupsetneqq;',
1368
      '⫏'  => '&csub;',
1369
      '⫐'  => '&csup;',
1370
      '⫑'  => '&csube;',
1371
      '⫒'  => '&csupe;',
1372
      '⫓'  => '&subsup;',
1373
      '⫔'  => '&supsub;',
1374
      '⫕'  => '&subsub;',
1375
      '⫖'  => '&supsup;',
1376
      '⫗'  => '&suphsub;',
1377
      '⫘'  => '&supdsub;',
1378
      '⫙'  => '&forkv;',
1379
      '⫚'  => '&topfork;',
1380
      '⫛'  => '&mlcp;',
1381
      '⫤'  => '&Dashv;',
1382
      '⫦'  => '&Vdashl;',
1383
      '⫧'  => '&Barv;',
1384
      '⫨'  => '&vBar;',
1385
      '⫩'  => '&vBarv;',
1386
      '⫫'  => '&Vbar;',
1387
      '⫬'  => '&Not;',
1388
      '⫭'  => '&bNot;',
1389
      '⫮'  => '&rnmid;',
1390
      '⫯'  => '&cirmid;',
1391
      '⫰'  => '&midcir;',
1392
      '⫱'  => '&topcir;',
1393
      '⫲'  => '&nhpar;',
1394
      '⫳'  => '&parsim;',
1395
      '⫽'  => '&parsl;',
1396
      '⫽⃥' => '&nparsl;',
1397
      'ff'  => '&fflig;',
1398
      'fi'  => '&filig;',
1399
      'fl'  => '&fllig;',
1400
      'ffi'  => '&ffilig;',
1401
      'ffl'  => '&ffllig;',
1402
      '𝒜' => '&Ascr;',
1403
      '𝒞' => '&Cscr;',
1404
      '𝒟' => '&Dscr;',
1405
      '𝒢' => '&Gscr;',
1406
      '𝒥' => '&Jscr;',
1407
      '𝒦' => '&Kscr;',
1408
      '𝒩' => '&Nscr;',
1409
      '𝒪' => '&Oscr;',
1410
      '𝒫' => '&Pscr;',
1411
      '𝒬' => '&Qscr;',
1412
      '𝒮' => '&Sscr;',
1413
      '𝒯' => '&Tscr;',
1414
      '𝒰' => '&Uscr;',
1415
      '𝒱' => '&Vscr;',
1416
      '𝒲' => '&Wscr;',
1417
      '𝒳' => '&Xscr;',
1418
      '𝒴' => '&Yscr;',
1419
      '𝒵' => '&Zscr;',
1420
      '𝒶' => '&ascr;',
1421
      '𝒷' => '&bscr;',
1422
      '𝒸' => '&cscr;',
1423
      '𝒹' => '&dscr;',
1424
      '𝒻' => '&fscr;',
1425
      '𝒽' => '&hscr;',
1426
      '𝒾' => '&iscr;',
1427
      '𝒿' => '&jscr;',
1428
      '𝓀' => '&kscr;',
1429
      '𝓁' => '&lscr;',
1430
      '𝓂' => '&mscr;',
1431
      '𝓃' => '&nscr;',
1432
      '𝓅' => '&pscr;',
1433
      '𝓆' => '&qscr;',
1434
      '𝓇' => '&rscr;',
1435
      '𝓈' => '&sscr;',
1436
      '𝓉' => '&tscr;',
1437
      '𝓊' => '&uscr;',
1438
      '𝓋' => '&vscr;',
1439
      '𝓌' => '&wscr;',
1440
      '𝓍' => '&xscr;',
1441
      '𝓎' => '&yscr;',
1442
      '𝓏' => '&zscr;',
1443
      '𝔄' => '&Afr;',
1444
      '𝔅' => '&Bfr;',
1445
      '𝔇' => '&Dfr;',
1446
      '𝔈' => '&Efr;',
1447
      '𝔉' => '&Ffr;',
1448
      '𝔊' => '&Gfr;',
1449
      '𝔍' => '&Jfr;',
1450
      '𝔎' => '&Kfr;',
1451
      '𝔏' => '&Lfr;',
1452
      '𝔐' => '&Mfr;',
1453
      '𝔑' => '&Nfr;',
1454
      '𝔒' => '&Ofr;',
1455
      '𝔓' => '&Pfr;',
1456
      '𝔔' => '&Qfr;',
1457
      '𝔖' => '&Sfr;',
1458
      '𝔗' => '&Tfr;',
1459
      '𝔘' => '&Ufr;',
1460
      '𝔙' => '&Vfr;',
1461
      '𝔚' => '&Wfr;',
1462
      '𝔛' => '&Xfr;',
1463
      '𝔜' => '&Yfr;',
1464
      '𝔞' => '&afr;',
1465
      '𝔟' => '&bfr;',
1466
      '𝔠' => '&cfr;',
1467
      '𝔡' => '&dfr;',
1468
      '𝔢' => '&efr;',
1469
      '𝔣' => '&ffr;',
1470
      '𝔤' => '&gfr;',
1471
      '𝔥' => '&hfr;',
1472
      '𝔦' => '&ifr;',
1473
      '𝔧' => '&jfr;',
1474
      '𝔨' => '&kfr;',
1475
      '𝔩' => '&lfr;',
1476
      '𝔪' => '&mfr;',
1477
      '𝔫' => '&nfr;',
1478
      '𝔬' => '&ofr;',
1479
      '𝔭' => '&pfr;',
1480
      '𝔮' => '&qfr;',
1481
      '𝔯' => '&rfr;',
1482
      '𝔰' => '&sfr;',
1483
      '𝔱' => '&tfr;',
1484
      '𝔲' => '&ufr;',
1485
      '𝔳' => '&vfr;',
1486
      '𝔴' => '&wfr;',
1487
      '𝔵' => '&xfr;',
1488
      '𝔶' => '&yfr;',
1489
      '𝔷' => '&zfr;',
1490
      '𝔸' => '&Aopf;',
1491
      '𝔹' => '&Bopf;',
1492
      '𝔻' => '&Dopf;',
1493
      '𝔼' => '&Eopf;',
1494
      '𝔽' => '&Fopf;',
1495
      '𝔾' => '&Gopf;',
1496
      '𝕀' => '&Iopf;',
1497
      '𝕁' => '&Jopf;',
1498
      '𝕂' => '&Kopf;',
1499
      '𝕃' => '&Lopf;',
1500
      '𝕄' => '&Mopf;',
1501
      '𝕆' => '&Oopf;',
1502
      '𝕊' => '&Sopf;',
1503
      '𝕋' => '&Topf;',
1504
      '𝕌' => '&Uopf;',
1505
      '𝕍' => '&Vopf;',
1506
      '𝕎' => '&Wopf;',
1507
      '𝕏' => '&Xopf;',
1508
      '𝕐' => '&Yopf;',
1509
      '𝕒' => '&aopf;',
1510
      '𝕓' => '&bopf;',
1511
      '𝕔' => '&copf;',
1512
      '𝕕' => '&dopf;',
1513
      '𝕖' => '&eopf;',
1514
      '𝕗' => '&fopf;',
1515
      '𝕘' => '&gopf;',
1516
      '𝕙' => '&hopf;',
1517
      '𝕚' => '&iopf;',
1518
      '𝕛' => '&jopf;',
1519
      '𝕜' => '&kopf;',
1520
      '𝕝' => '&lopf;',
1521
      '𝕞' => '&mopf;',
1522
      '𝕟' => '&nopf;',
1523
      '𝕠' => '&oopf;',
1524
      '𝕡' => '&popf;',
1525
      '𝕢' => '&qopf;',
1526
      '𝕣' => '&ropf;',
1527
      '𝕤' => '&sopf;',
1528
      '𝕥' => '&topf;',
1529
      '𝕦' => '&uopf;',
1530
      '𝕧' => '&vopf;',
1531
      '𝕨' => '&wopf;',
1532
      '𝕩' => '&xopf;',
1533
      '𝕪' => '&yopf;',
1534
      '𝕫' => '&zopf;',
1535
  );
1536
1537
  /**
1538
   * List of never allowed regex replacements.
1539
   *
1540
   * @var  array
1541
   */
1542
  private static $_never_allowed_regex = array(
1543
    // default javascript
1544
    'javascript\s*:',
1545
    // default javascript
1546
    '(document|(document\.)?window)\.(location|on\w*)',
1547
    // Java: jar-protocol is an XSS hazard
1548
    'jar\s*:',
1549
    // Mac (will not run the script, but open it in AppleScript Editor)
1550
    'applescript\s*:',
1551
    // IE: https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#VBscript_in_an_image
1552
    'vbscript\s*:',
1553
    // IE, surprise!
1554
    'wscript\s*:',
1555
    // IE
1556
    'jscript\s*:',
1557
    // IE: https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#VBscript_in_an_image
1558
    'vbs\s*:',
1559
    // https://html5sec.org/#behavior
1560
    'behavior\s:',
1561
    // ?
1562
    'Redirect\s+30\d',
1563
    // data-attribute + base64
1564
    "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?",
1565
    // remove Netscape 4 JS entities
1566
    '&\s*\{[^}]*(\}\s*;?|$)',
1567
    // old IE, old Netscape
1568
    'expression\s*(\(|&\#40;)',
1569
    // old Netscape
1570
    'mocha\s*:',
1571
    // old Netscape
1572
    'livescript\s*:',
1573
    // default view source
1574
    'view-source\s*:',
1575
  );
1576
1577
  /**
1578
   * List of never allowed strings, afterwards.
1579
   *
1580
   * @var array
1581
   */
1582
  private static $_never_allowed_str_afterwards = array(
1583
      'FSCommand',
1584
      'onAbort',
1585
      'onActivate',
1586
      'onAttribute',
1587
      'onAfterPrint',
1588
      'onAfterScriptExecute',
1589
      'onAfterUpdate',
1590
      'onAnimationEnd',
1591
      'onAnimationIteration',
1592
      'onAnimationStart',
1593
      'onAriaRequest',
1594
      'onAutoComplete',
1595
      'onAutoCompleteError',
1596
      'onBeforeActivate',
1597
      'onBeforeCopy',
1598
      'onBeforeCut',
1599
      'onBeforeDeactivate',
1600
      'onBeforeEditFocus',
1601
      'onBeforePaste',
1602
      'onBeforePrint',
1603
      'onBeforeScriptExecute',
1604
      'onBeforeUnload',
1605
      'onBeforeUpdate',
1606
      'onBegin',
1607
      'onBlur',
1608
      'onBounce',
1609
      'onCancel',
1610
      'onCanPlay',
1611
      'onCanPlayThrough',
1612
      'onCellChange',
1613
      'onChange',
1614
      'onClick',
1615
      'onClose',
1616
      'onCommand',
1617
      'onCompassNeedsCalibration',
1618
      'onContextMenu',
1619
      'onControlSelect',
1620
      'onCopy',
1621
      'onCueChange',
1622
      'onCut',
1623
      'onDataAvailable',
1624
      'onDataSetChanged',
1625
      'onDataSetComplete',
1626
      'onDblClick',
1627
      'onDeactivate',
1628
      'onDeviceLight',
1629
      'onDeviceMotion',
1630
      'onDeviceOrientation',
1631
      'onDeviceProximity',
1632
      'onDrag',
1633
      'onDragDrop',
1634
      'onDragEnd',
1635
      'onDragEnter',
1636
      'onDragLeave',
1637
      'onDragOver',
1638
      'onDragStart',
1639
      'onDrop',
1640
      'onDurationChange',
1641
      'onEmptied',
1642
      'onEnd',
1643
      'onEnded',
1644
      'onError',
1645
      'onErrorUpdate',
1646
      'onExit',
1647
      'onFilterChange',
1648
      'onFinish',
1649
      'onFocus',
1650
      'onFocusIn',
1651
      'onFocusOut',
1652
      'onFormChange',
1653
      'onFormInput',
1654
      'onFullScreenChange',
1655
      'onFullScreenError',
1656
      'onGotPointerCapture',
1657
      'onHashChange',
1658
      'onHelp',
1659
      'onInput',
1660
      'onInvalid',
1661
      'onKeyDown',
1662
      'onKeyPress',
1663
      'onKeyUp',
1664
      'onLanguageChange',
1665
      'onLayoutComplete',
1666
      'onLoad',
1667
      'onLoadedData',
1668
      'onLoadedMetaData',
1669
      'onLoadStart',
1670
      'onLoseCapture',
1671
      'onLostPointerCapture',
1672
      'onMediaComplete',
1673
      'onMediaError',
1674
      'onMessage',
1675
      'onMouseDown',
1676
      'onMouseEnter',
1677
      'onMouseLeave',
1678
      'onMouseMove',
1679
      'onMouseOut',
1680
      'onMouseOver',
1681
      'onMouseUp',
1682
      'onMouseWheel',
1683
      'onMove',
1684
      'onMoveEnd',
1685
      'onMoveStart',
1686
      'onMozFullScreenChange',
1687
      'onMozFullScreenError',
1688
      'onMozPointerLockChange',
1689
      'onMozPointerLockError',
1690
      'onMsContentZoom',
1691
      'onMsFullScreenChange',
1692
      'onMsFullScreenError',
1693
      'onMsGestureChange',
1694
      'onMsGestureDoubleTap',
1695
      'onMsGestureEnd',
1696
      'onMsGestureHold',
1697
      'onMsGestureStart',
1698
      'onMsGestureTap',
1699
      'onMsGotPointerCapture',
1700
      'onMsInertiaStart',
1701
      'onMsLostPointerCapture',
1702
      'onMsManipulationStateChanged',
1703
      'onMsPointerCancel',
1704
      'onMsPointerDown',
1705
      'onMsPointerEnter',
1706
      'onMsPointerLeave',
1707
      'onMsPointerMove',
1708
      'onMsPointerOut',
1709
      'onMsPointerOver',
1710
      'onMsPointerUp',
1711
      'onMsSiteModeJumpListItemRemoved',
1712
      'onMsThumbnailClick',
1713
      'onOffline',
1714
      'onOnline',
1715
      'onOutOfSync',
1716
      'onPage',
1717
      'onPageHide',
1718
      'onPageShow',
1719
      'onPaste',
1720
      'onPause',
1721
      'onPlay',
1722
      'onPlaying',
1723
      'onPointerCancel',
1724
      'onPointerDown',
1725
      'onPointerEnter',
1726
      'onPointerLeave',
1727
      'onPointerLockChange',
1728
      'onPointerLockError',
1729
      'onPointerMove',
1730
      'onPointerOut',
1731
      'onPointerOver',
1732
      'onPointerUp',
1733
      'onPopState',
1734
      'onProgress',
1735
      'onPropertyChange',
1736
      'onRateChange',
1737
      'onReadyStateChange',
1738
      'onReceived',
1739
      'onRepeat',
1740
      'onReset',
1741
      'onResize',
1742
      'onResizeEnd',
1743
      'onResizeStart',
1744
      'onResume',
1745
      'onReverse',
1746
      'onRowDelete',
1747
      'onRowEnter',
1748
      'onRowExit',
1749
      'onRowInserted',
1750
      'onRowsDelete',
1751
      'onRowsEnter',
1752
      'onRowsExit',
1753
      'onRowsInserted',
1754
      'onScroll',
1755
      'onSearch',
1756
      'onSeek',
1757
      'onSeeked',
1758
      'onSeeking',
1759
      'onSelect',
1760
      'onSelectionChange',
1761
      'onSelectStart',
1762
      'onStalled',
1763
      'onStorage',
1764
      'onStorageCommit',
1765
      'onStart',
1766
      'onStop',
1767
      'onShow',
1768
      'onSyncRestored',
1769
      'onSubmit',
1770
      'onSuspend',
1771
      'onSynchRestored',
1772
      'onTimeError',
1773
      'onTimeUpdate',
1774
      'onTrackChange',
1775
      'onTransitionEnd',
1776
      'onToggle',
1777
      'onUnload',
1778
      'onURLFlip',
1779
      'onUserProximity',
1780
      'onVolumeChange',
1781
      'onWaiting',
1782
      'onWebKitAnimationEnd',
1783
      'onWebKitAnimationIteration',
1784
      'onWebKitAnimationStart',
1785
      'onWebKitFullScreenChange',
1786
      'onWebKitFullScreenError',
1787
      'onWebKitTransitionEnd',
1788
      'onWheel',
1789
      'seekSegmentTime',
1790
      'userid',
1791
      'datasrc',
1792
      'datafld',
1793
      'dataformatas',
1794
      'ev:handler',
1795
      'ev:event',
1796
      '0;url',
1797
  );
1798
1799
  /**
1800
   * https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Event_Handlers
1801
   *
1802
   * @var array
1803
   */
1804
  private $_evil_attributes = array(
1805
      'on\w*',
1806
      'style',
1807
      'xmlns',
1808
      'formaction',
1809
      'form',
1810
      'xlink:href',
1811
      'seekSegmentTime',
1812
      'FSCommand',
1813
      'eval',
1814
  );
1815
1816
  /**
1817
   * XSS Hash - random Hash for protecting URLs.
1818
   *
1819
   * @var  string
1820
   */
1821
  private $_xss_hash;
1822
1823
  /**
1824
   * The replacement-string for not allowed strings.
1825
   *
1826
   * @var string
1827
   */
1828
  private $_replacement = '';
1829
1830
  /**
1831
   * List of never allowed strings.
1832
   *
1833
   * @var  array
1834
   */
1835
  private $_never_allowed_str = array();
1836
1837
  /**
1838
   * If your DB (MySQL) encoding is "utf8" and not "utf8mb4", then
1839
   * you can't save 4-Bytes chars from UTF-8 and someone can create stored XSS-attacks.
1840
   *
1841
   * @var bool
1842
   */
1843
  private $_stripe_4byte_chars = false;
1844
1845
  /**
1846
   * @var bool|null
1847
   */
1848
  private $xss_found = null;
1849
1850
  /**
1851
   * __construct()
1852
   */
1853
  public function __construct()
1854
  {
1855
    $this->_initNeverAllowedStr();
1856
  }
1857
1858
  /**
1859
   * Compact exploded words.
1860
   *
1861
   * <p>
1862
   * <br />
1863
   * INFO: Callback method for xss_clean() to remove whitespace from things like 'j a v a s c r i p t'.
1864
   * </p>
1865
   *
1866
   * @param  array $matches
1867
   *
1868
   * @return  string
1869
   */
1870
  private function _compact_exploded_words_callback($matches)
1871
  {
1872
    return preg_replace('/(?:\s+|"|\042|\'|\047|\+)*+/', '', $matches[1]) . $matches[2];
1873
  }
1874
1875
  /**
1876
   * HTML-Entity decode callback.
1877
   *
1878
   * @param array $match
1879
   *
1880
   * @return string
1881
   */
1882
  private function _decode_entity($match)
1883
  {
1884
    // init
1885
    $this->_xss_hash();
1886
1887
    $match = $match[0];
1888
1889
    // protect GET variables in URLs
1890
    $match = preg_replace('|\?([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->_xss_hash . '::GET_FIRST' . '\\1=\\2', $match);
1891
    $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->_xss_hash . '::GET_NEXT' . '\\1=\\2', $match);
1892
1893
    // un-protect URL GET vars
1894
    return str_replace(
1895
        array(
1896
            $this->_xss_hash . '::GET_FIRST',
1897
            $this->_xss_hash . '::GET_NEXT',
1898
        ),
1899
        array(
1900
            '?',
1901
            '&',
1902
        ),
1903
        $this->_entity_decode($match)
1904
    );
1905
  }
1906
1907
  /**
1908
   * @param string $str
1909
   *
1910
   * @return mixed
1911
   */
1912
  private function _do($str)
1913
  {
1914
    $str = (string)$str;
1915
    $strInt = (int)$str;
1916
    $strFloat = (float)$str;
1917
    if (
1918
        !$str
1919
        ||
1920
        "$strInt" == $str
1921
        ||
1922
        "$strFloat" == $str
1923
    ) {
1924
1925
      // no xss found
1926
      if ($this->xss_found !== true) {
1927
        $this->xss_found = false;
1928
      }
1929
1930
      return $str;
1931
    }
1932
1933
    // removes all non-UTF-8 characters
1934
    // &&
1935
    // remove NULL characters (ignored by some browsers)
1936
    $str = UTF8::clean($str, true, true, false);
1937
1938
    // decode UTF-7 characters
1939
    $str = $this->_repack_utf7($str);
1940
1941
    // decode the string
1942
    $str = $this->_decode_string($str);
1943
1944
    // remove all >= 4-Byte chars if needed
1945
    if ($this->_stripe_4byte_chars === true) {
1946
      $str = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $str);
1947
    }
1948
1949
    // backup the string (for later comparision)
1950
    $str_backup = $str;
1951
1952
    // remove strings that are never allowed
1953
    $str = $this->_do_never_allowed($str);
1954
1955
    // corrects words before the browser will do it
1956
    $str = $this->_compact_exploded_javascript($str);
1957
1958
    // remove disallowed javascript calls in links, images etc.
1959
    $str = $this->_remove_disallowed_javascript($str);
1960
1961
    // remove evil attributes such as style, onclick and xmlns
1962
    $str = $this->_remove_evil_attributes($str);
1963
1964
    // sanitize naughty HTML elements
1965
    $str = $this->_sanitize_naughty_html($str);
1966
1967
    // sanitize naughty JavaScript elements
1968
    $str = $this->_sanitize_naughty_javascript($str);
1969
1970
    // final clean up
1971
    //
1972
    // -> This adds a bit of extra precaution in case something got through the above filters.
1973
    $str = $this->_do_never_allowed_afterwards($str);
1974
1975
    // check for xss
1976
    if ($this->xss_found !== true) {
1977
      $this->xss_found = !($str_backup === $str);
1978
    }
1979
1980
    return $str;
1981
  }
1982
1983
  /**
1984
   * Remove never allowed strings.
1985
   *
1986
   * @param string $str
1987
   *
1988
   * @return string
1989
   */
1990
  private function _do_never_allowed($str)
1991
  {
1992
    static $NEVER_ALLOWED_CACHE = array();
1993
    $NEVER_ALLOWED_CACHE['keys'] = null;
1994
    $NEVER_ALLOWED_CACHE['regex'] = null;
1995
1996
    if (null === $NEVER_ALLOWED_CACHE['keys']) {
1997
      $NEVER_ALLOWED_CACHE['keys'] = array_keys($this->_never_allowed_str);
1998
    }
1999
    $str = str_ireplace($NEVER_ALLOWED_CACHE['keys'], $this->_never_allowed_str, $str);
2000
2001
    if (null === $NEVER_ALLOWED_CACHE['regex']) {
2002
      $NEVER_ALLOWED_CACHE['regex'] = implode('|', self::$_never_allowed_regex);
2003
    }
2004
    $str = preg_replace('#' . $NEVER_ALLOWED_CACHE['regex'] . '#is', $this->_replacement, $str);
2005
2006
    return (string)$str;
2007
  }
2008
2009
  /**
2010
   * Remove never allowed string, afterwards.
2011
   *
2012
   * <p>
2013
   * <br />
2014
   * INFO: clean-up also some string, if there is no html-tag
2015
   * </p>
2016
   *
2017
   * @param string $str
2018
   *
2019
   * @return  string
2020
   */
2021
  private function _do_never_allowed_afterwards($str)
2022
  {
2023
    static $NEVER_ALLOWED_STR_AFTERWARDS_CACHE;
2024
2025
    if (null === $NEVER_ALLOWED_STR_AFTERWARDS_CACHE) {
2026
      foreach (self::$_never_allowed_str_afterwards as &$neverAllowedStr) {
2027
        $neverAllowedStr .= '.*=';
2028
      }
2029
2030
      $NEVER_ALLOWED_STR_AFTERWARDS_CACHE = implode('|', self::$_never_allowed_str_afterwards);
2031
    }
2032
2033
    $str = preg_replace('#' . $NEVER_ALLOWED_STR_AFTERWARDS_CACHE . '#isU', $this->_replacement, $str);
2034
2035
    return (string)$str;
2036
  }
2037
2038
  /**
2039
   * Entity-decoding.
2040
   *
2041
   * @param string $str
2042
   *
2043
   * @return string
2044
   */
2045
  private function _entity_decode($str)
2046
  {
2047
    static $HTML_ENTITIES_CACHE;
2048
2049
    /** @noinspection UsageOfSilenceOperatorInspection */
2050
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2051
    // HHVM dons't support "ENT_DISALLOWED" && "ENT_SUBSTITUTE"
2052
    $flags = Bootup::is_php('5.4') ?
2053
        ENT_QUOTES | ENT_HTML5 | @ENT_DISALLOWED | @ENT_SUBSTITUTE :
2054
        ENT_QUOTES;
2055
2056
    // decode
2057
    if (strpos($str, $this->_xss_hash) !== false) {
2058
      $str = UTF8::html_entity_decode($str, $flags);
2059
    } else {
2060
      $str = UTF8::rawurldecode($str);
2061
    }
2062
2063
    // decode-again, for e.g. HHVM, PHP 5.3, miss configured applications ...
2064
    if (preg_match_all('/&[A-Za-z]{2,}[;]{0}/', $str, $matches)) {
2065
2066
      if (null === $HTML_ENTITIES_CACHE) {
2067
2068
        // links:
2069
        // - http://dev.w3.org/html5/html-author/charref
2070
        // - http://www.w3schools.com/charsets/ref_html_entities_n.asp
2071
        $entitiesSecurity = array(
2072
            '&#x00000;'          => '',
2073
            '&#0;'               => '',
2074
            '&#x00001;'          => '',
2075
            '&#1;'               => '',
2076
            '&nvgt;'             => '',
2077
            '&#61253;'           => '',
2078
            '&#x0EF45;'          => '',
2079
            '&shy;'              => '',
2080
            '&#x000AD;'          => '',
2081
            '&#173;'             => '',
2082
            '&colon;'            => ':',
2083
            '&#x0003A;'          => ':',
2084
            '&#58;'              => ':',
2085
            '&lpar;'             => '(',
2086
            '&#x00028;'          => '(',
2087
            '&#40;'              => '(',
2088
            '&rpar;'             => ')',
2089
            '&#x00029;'          => ')',
2090
            '&#41;'              => ')',
2091
            '&quest;'            => '?',
2092
            '&#x0003F;'          => '?',
2093
            '&#63;'              => '?',
2094
            '&sol;'              => '/',
2095
            '&#x0002F;'          => '/',
2096
            '&#47;'              => '/',
2097
            '&apos;'             => '\'',
2098
            '&#x00027;'          => '\'',
2099
            '&#039;'             => '\'',
2100
            '&#39;'              => '\'',
2101
            '&#x27;'             => '\'',
2102
            '&bsol;'             => '\'',
2103
            '&#x0005C;'          => '\\',
2104
            '&#92;'              => '\\',
2105
            '&comma;'            => ',',
2106
            '&#x0002C;'          => ',',
2107
            '&#44;'              => ',',
2108
            '&period;'           => '.',
2109
            '&#x0002E;'          => '.',
2110
            '&quot;'             => '"',
2111
            '&QUOT;'             => '"',
2112
            '&#x00022;'          => '"',
2113
            '&#34;'              => '"',
2114
            '&grave;'            => '`',
2115
            '&DiacriticalGrave;' => '`',
2116
            '&#x00060;'          => '`',
2117
            '&#96;'              => '`',
2118
            '&#46;'              => '.',
2119
            '&equals;'           => '=',
2120
            '&#x0003D;'          => '=',
2121
            '&#61;'              => '=',
2122
            '&newline;'          => "\n",
2123
            '&#x0000A;'          => "\n",
2124
            '&#10;'              => "\n",
2125
            '&tab;'              => "\t",
2126
            '&#x00009;'          => "\t",
2127
            '&#9;'               => "\t",
2128
        );
2129
2130
        $HTML_ENTITIES_CACHE = array_merge(
2131
            $entitiesSecurity,
2132
            array_flip(get_html_translation_table(HTML_ENTITIES, $flags)),
2133
            array_flip(self::$entitiesFallback)
2134
        );
2135
      }
2136
2137
      $replace = array();
2138
      foreach ($matches[0] as $match) {
2139
        $match .= ';';
2140
        if (isset($HTML_ENTITIES_CACHE[$match])) {
2141
          $replace[$match] = $HTML_ENTITIES_CACHE[$match];
2142
        }
2143
      }
2144
2145
      if (count($replace) > 0) {
2146
        $str = str_replace(array_keys($replace), array_values($replace), $str);
2147
      }
2148
    }
2149
2150
    return $str;
2151
  }
2152
2153
  /**
2154
   * Filters tag attributes for consistency and safety.
2155
   *
2156
   * @param string $str
2157
   *
2158
   * @return string
2159
   */
2160
  private function _filter_attributes($str)
2161
  {
2162
    if ($str === '') {
2163
      return '';
2164
    }
2165
2166
    $out = '';
2167
    if (
2168
        preg_match_all('#\s*[A-Za-z\-]+\s*=\s*("|\042|\'|\047)([^\\1]*?)\\1#', $str, $matches)
2169
        ||
2170
        (
2171
            $this->_replacement
2172
            &&
2173
            preg_match_all('#\s*[a-zA-Z\-]+\s*=' . preg_quote($this->_replacement, '#') . '$#', $str, $matches)
2174
        )
2175
    ) {
2176
      foreach ($matches[0] as $match) {
2177
        $out .= $match;
2178
      }
2179
    }
2180
2181
    return $out;
2182
  }
2183
2184
  /**
2185
   * initialize "$this->_never_allowed_str"
2186
   */
2187
  private function _initNeverAllowedStr()
2188
  {
2189
    $this->_never_allowed_str = array(
2190
        'document.cookie' => $this->_replacement,
2191
        'document.write'  => $this->_replacement,
2192
        '.parentNode'     => $this->_replacement,
2193
        '.innerHTML'      => $this->_replacement,
2194
        '.appendChild'    => $this->_replacement,
2195
        '-moz-binding'    => $this->_replacement,
2196
        '<!--'            => '&lt;!--',
2197
        '-->'             => '--&gt;',
2198
        '<?'              => '&lt;?',
2199
        '?>'              => '?&gt;',
2200
        '<![CDATA['       => '&lt;![CDATA[',
2201
        '<!ENTITY'        => '&lt;!ENTITY',
2202
        '<!DOCTYPE'       => '&lt;!DOCTYPE',
2203
        '<!ATTLIST'       => '&lt;!ATTLIST',
2204
        '<comment>'       => '&lt;comment&gt;',
2205
    );
2206
  }
2207
2208
  /**
2209
   * Callback method for xss_clean() to sanitize links.
2210
   *
2211
   * <p>
2212
   * <br />
2213
   * INFO: This limits the PCRE backtracks, making it more performance friendly
2214
   * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
2215
   * PHP 5.2+ on link-heavy strings.
2216
   * </p>
2217
   *
2218
   * @param array $match
2219
   *
2220
   * @return string
2221
   */
2222
  private function _js_link_removal_callback($match)
2223
  {
2224
    return $this->_js_removal_calback($match, 'href');
2225
  }
2226
2227
  /**
2228
   * Callback method for xss_clean() to sanitize tags.
2229
   *
2230
   * <p>
2231
   * <br />
2232
   * INFO: This limits the PCRE backtracks, making it more performance friendly
2233
   * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
2234
   * PHP 5.2+ on image tag heavy strings.
2235
   * </p>
2236
   *
2237
   * @param array  $match
2238
   * @param string $search
2239
   *
2240
   * @return string
2241
   */
2242
  private function _js_removal_calback($match, $search)
2243
  {
2244
    if (!$match[0]) {
2245
      return '';
2246
    }
2247
2248
    // init
2249
    $replacer = $this->_filter_attributes(str_replace(array('<', '>',), '', $match[1]));
2250
    $pattern = '#' . $search . '=.*(?:\(.+([^\)]*?)(?:\)|$)|javascript:|view-source:|livescript:|wscript:|vbscript:|mocha:|charset=|window\.|document\.|\.cookie|<script|d\s*a\s*t\s*a\s*:)#is';
2251
2252
    $matchInner = array();
2253
    preg_match($pattern, $match[1], $matchInner);
2254
    if (count($matchInner) > 0) {
2255
      $replacer = (string)preg_replace(
2256
          $pattern,
2257
          $search . '="' . $this->_replacement . '"',
2258
          $replacer
2259
      );
2260
    }
2261
2262
    return str_ireplace($match[1], $replacer, $match[0]);
2263
  }
2264
2265
  /**
2266
   * Callback method for xss_clean() to sanitize image tags.
2267
   *
2268
   * <p>
2269
   * <br />
2270
   * INFO: This limits the PCRE backtracks, making it more performance friendly
2271
   * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
2272
   * PHP 5.2+ on image tag heavy strings.
2273
   * </p>
2274
   *
2275
   * @param array $match
2276
   *
2277
   * @return string
2278
   */
2279
  private function _js_src_removal_callback($match)
2280
  {
2281
    return $this->_js_removal_calback($match, 'src');
2282
  }
2283
2284
  /**
2285
   * Sanitize naughty HTML.
2286
   *
2287
   * <p>
2288
   * <br />
2289
   * Callback method for AntiXSS->sanitize_naughty_html() to remove naughty HTML elements.
2290
   * </p>
2291
   *
2292
   * @param array $matches
2293
   *
2294
   * @return string
2295
   */
2296
  private function _sanitize_naughty_html_callback($matches)
2297
  {
2298
    return '&lt;' . $matches[1] . $matches[2] . $matches[3] // encode opening brace
2299
           // encode captured opening or closing brace to prevent recursive vectors:
2300
           . str_replace(
2301
               array(
2302
                   '>',
2303
                   '<',
2304
               ),
2305
               array(
2306
                   '&gt;',
2307
                   '&lt;',
2308
               ),
2309
               $matches[4]
2310
           );
2311
  }
2312
2313
  /**
2314
   * Add some strings to the "_evil_attributes"-array.
2315
   *
2316
   * @param array $strings
2317
   *
2318
   * @return $this
2319
   */
2320
  public function addEvilAttributes(array $strings)
2321
  {
2322
    $this->_evil_attributes = array_merge($strings, $this->_evil_attributes);
2323
2324
    return $this;
2325
  }
2326
2327
  /**
2328
   * Compact any exploded words.
2329
   *
2330
   * <p>
2331
   * <br />
2332
   * INFO: This corrects words like:  j a v a s c r i p t
2333
   * <br />
2334
   * These words are compacted back to their correct state.
2335
   * </p>
2336
   *
2337
   * @param string $str
2338
   *
2339
   * @return string
2340
   */
2341
  private function _compact_exploded_javascript($str)
2342
  {
2343
    static $WORDS_CACHE;
2344
2345
    $words = array(
2346
        'javascript',
2347
        'expression',
2348
        'view-source',
2349
        'vbscript',
2350
        'jscript',
2351
        'wscript',
2352
        'vbs',
2353
        'script',
2354
        'base64',
2355
        'applet',
2356
        'alert',
2357
        'document',
2358
        'write',
2359
        'cookie',
2360
        'window',
2361
        'confirm',
2362
        'prompt',
2363
        'eval',
2364
    );
2365
2366
    foreach ($words as $word) {
2367
2368
      if (!isset($WORDS_CACHE[$word])) {
2369
        $regex = '(?:\s|\+|"|\042|\'|\047)*';
2370
        $word = $WORDS_CACHE[$word] = substr(
2371
            chunk_split($word, 1, $regex),
2372
            0,
2373
            -strlen($regex)
2374
        );
2375
      } else {
2376
        $word = $WORDS_CACHE[$word];
2377
      }
2378
2379
      // We only want to do this when it is followed by a non-word character
2380
      // That way valid stuff like "dealer to" does not become "dealerto".
2381
      $str = preg_replace_callback(
2382
          '#(' . $word . ')(\W)#is',
2383
          array(
2384
              $this,
2385
              '_compact_exploded_words_callback',
2386
          ),
2387
          $str
2388
      );
2389
    }
2390
2391
    return (string)$str;
2392
  }
2393
2394
  /**
2395
   * Decode the html-tags via "UTF8::html_entity_decode()" or the string via "UTF8::rawurldecode()".
2396
   *
2397
   * @param string $str
2398
   *
2399
   * @return string
2400
   */
2401
  private function _decode_string($str)
2402
  {
2403
    // init
2404
    $regExForHtmlTags = '/<\w+.*+/si';
2405
2406
    if (preg_match($regExForHtmlTags, $str, $matches) === 1) {
2407
      $str = preg_replace_callback(
2408
          $regExForHtmlTags,
2409
          array(
2410
              $this,
2411
              '_decode_entity',
2412
          ),
2413
          $str
2414
      );
2415
    } else {
2416
      $str = UTF8::rawurldecode($str);
2417
    }
2418
2419
    return $str;
2420
  }
2421
2422
  /**
2423
   * Check if the "AntiXSS->xss_clean()"-method found an XSS attack in the last run.
2424
   *
2425
   * @return bool|null <p>Will return null if the "xss_clean()" wan't running at all.</p>
2426
   */
2427
  public function isXssFound()
2428
  {
2429
    return $this->xss_found;
2430
  }
2431
2432
  /**
2433
   * Remove some strings from the "_evil_attributes"-array.
2434
   *
2435
   * <p>
2436
   * <br />
2437
   * WARNING: Use this method only if you have a really good reason.
2438
   * </p>
2439
   *
2440
   * @param array $strings
2441
   *
2442
   * @return $this
2443
   */
2444
  public function removeEvilAttributes(array $strings)
2445
  {
2446
    $this->_evil_attributes = array_diff(
2447
        array_intersect($strings, $this->_evil_attributes),
2448
        $this->_evil_attributes
2449
    );
2450
2451
    return $this;
2452
  }
2453
2454
  /**
2455
   * Remove disallowed Javascript in links or img tags
2456
   *
2457
   * <p>
2458
   * <br />
2459
   * We used to do some version comparisons and use of stripos(),
2460
   * but it is dog slow compared to these simplified non-capturing
2461
   * preg_match(), especially if the pattern exists in the string
2462
   * </p>
2463
   *
2464
   * <p>
2465
   * <br />
2466
   * Note: It was reported that not only space characters, but all in
2467
   * the following pattern can be parsed as separators between a tag name
2468
   * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
2469
   * ... however, UTF8::clean() above already strips the
2470
   * hex-encoded ones, so we'll skip them below.
2471
   * </p>
2472
   *
2473
   * @param string $str
2474
   *
2475
   * @return string
2476
   */
2477
  private function _remove_disallowed_javascript($str)
2478
  {
2479
    do {
2480
      $original = $str;
2481
2482
      if (stripos($str, '<a') !== false) {
2483
        $str = preg_replace_callback(
2484
            '#<a[^a-z0-9>]+([^>]*?)(?:>|$)#i',
2485
            array(
2486
                $this,
2487
                '_js_link_removal_callback',
2488
            ),
2489
            $str
2490
        );
2491
      }
2492
2493 View Code Duplication
      if (stripos($str, '<img') !== false) {
2494
        $str = preg_replace_callback(
2495
            '#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#i',
2496
            array(
2497
                $this,
2498
                '_js_src_removal_callback',
2499
            ),
2500
            $str
2501
        );
2502
      }
2503
2504 View Code Duplication
      if (stripos($str, '<audio') !== false) {
2505
        $str = preg_replace_callback(
2506
            '#<audio[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#i',
2507
            array(
2508
                $this,
2509
                '_js_src_removal_callback',
2510
            ),
2511
            $str
2512
        );
2513
      }
2514
2515 View Code Duplication
      if (stripos($str, '<video') !== false) {
2516
        $str = preg_replace_callback(
2517
            '#<video[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#i',
2518
            array(
2519
                $this,
2520
                '_js_src_removal_callback',
2521
            ),
2522
            $str
2523
        );
2524
      }
2525
2526 View Code Duplication
      if (stripos($str, '<source') !== false) {
2527
        $str = preg_replace_callback(
2528
            '#<source[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#i',
2529
            array(
2530
                $this,
2531
                '_js_src_removal_callback',
2532
            ),
2533
            $str
2534
        );
2535
      }
2536
2537
      if (stripos($str, 'script') !== false) {
2538
        // US-ASCII: ¼ === <
2539
        $str = preg_replace('#(?:¼|<)/*(?:script).*(?:¾|>)#isuU', $this->_replacement, $str);
2540
      }
2541
    } while ($original !== $str);
2542
2543
    return (string)$str;
2544
  }
2545
2546
  /**
2547
   * Remove Evil HTML Attributes (like event handlers and style).
2548
   *
2549
   * It removes the evil attribute and either:
2550
   *
2551
   *  - Everything up until a space. For example, everything between the pipes:
2552
   *
2553
   * <code>
2554
   *   <a |style=document.write('hello');alert('world');| class=link>
2555
   * </code>
2556
   *
2557
   *  - Everything inside the quotes. For example, everything between the pipes:
2558
   *
2559
   * <code>
2560
   *   <a |style="document.write('hello'); alert('world');"| class="link">
2561
   * </code>
2562
   *
2563
   * @param string $str <p>The string to check.</p>
2564
   *
2565
   * @return string <p>The string with the evil attributes removed.</p>
2566
   */
2567
  private function _remove_evil_attributes($str)
2568
  {
2569
    $evil_attributes_string = implode('|', $this->_evil_attributes);
2570
2571
    // replace style-attribute, first (if needed)
2572
    if (in_array('style', $this->_evil_attributes, true)) {
2573 View Code Duplication
      do {
2574
        $count = $temp_count = 0;
2575
2576
        $str = preg_replace('/(<[^>]+)(?<!\w)(style="(:?[^"]*?)"|style=\'(:?[^\']*?)\')/i', '$1' . $this->_replacement, $str, -1, $temp_count);
2577
        $count += $temp_count;
2578
2579
      } while ($count);
2580
    }
2581
2582 View Code Duplication
    do {
2583
      $count = $temp_count = 0;
2584
2585
      // find occurrences of illegal attribute strings with and without quotes (042 ["] and 047 ['] are octal quotes)
2586
      $str = preg_replace('/(<[^>]+)(?<!\w)(' . $evil_attributes_string . ')\s*=\s*(?:(?:"|\042|\'|\047)(?:[^\\2]*?)(?:\\2)|[^\s>]*)/is', '$1' . $this->_replacement, $str, -1, $temp_count);
2587
      $count += $temp_count;
2588
2589
    } while ($count);
2590
2591
    return (string)$str;
2592
  }
2593
2594
  /**
2595
   * UTF-7 decoding function.
2596
   *
2597
   * @param string $str <p>HTML document for recode ASCII part of UTF-7 back to ASCII.</p>
2598
   *
2599
   * @return string
2600
   */
2601
  private function _repack_utf7($str)
2602
  {
2603
    return preg_replace_callback(
2604
        '#\+([0-9a-zA-Z/]+)\-#',
2605
        array($this, '_repack_utf7_callback'),
2606
        $str
2607
    );
2608
  }
2609
2610
  /**
2611
   * Additional UTF-7 decoding function.
2612
   *
2613
   * @param string $str <p>String for recode ASCII part of UTF-7 back to ASCII.</p>
2614
   *
2615
   * @return string
2616
   */
2617
  private function _repack_utf7_callback($str)
2618
  {
2619
    $strTmp = base64_decode($str[1]);
2620
2621
    if ($strTmp === false) {
2622
      return $str;
2623
    }
2624
2625
    $str = preg_replace_callback(
2626
        '/^((?:\x00.)*?)((?:[^\x00].)+)/us',
2627
        array($this, '_repack_utf7_callback_back'),
2628
        $strTmp
2629
    );
2630
2631
    return preg_replace('/\x00(.)/us', '$1', $str);
2632
  }
2633
2634
  /**
2635
   * Additional UTF-7 encoding function.
2636
   *
2637
   * @param string $str <p>String for recode ASCII part of UTF-7 back to ASCII.</p>
2638
   *
2639
   * @return string
2640
   */
2641
  private function _repack_utf7_callback_back($str)
2642
  {
2643
    return $str[1] . '+' . rtrim(base64_encode($str[2]), '=') . '-';
2644
  }
2645
2646
  /**
2647
   * Sanitize naughty HTML elements.
2648
   *
2649
   * <p>
2650
   * <br />
2651
   *
2652
   * If a tag containing any of the words in the list
2653
   * below is found, the tag gets converted to entities.
2654
   *
2655
   * <br /><br />
2656
   *
2657
   * So this: <blink>
2658
   * <br />
2659
   * Becomes: &lt;blink&gt;
2660
   * </p>
2661
   *
2662
   * @param string $str
2663
   *
2664
   * @return string
2665
   */
2666
  private function _sanitize_naughty_html($str)
2667
  {
2668
    $naughty = 'alert|prompt|confirm|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|button|select|isindex|layer|link|meta|keygen|object|plaintext|style|script|textarea|title|math|video|source|svg|xml|xss|eval';
2669
    $str = preg_replace_callback(
2670
        '#<(/*\s*)(' . $naughty . ')([^><]*)([><]*)#i',
2671
        array(
2672
            $this,
2673
            '_sanitize_naughty_html_callback',
2674
        ),
2675
        $str
2676
    );
2677
2678
    return (string)$str;
2679
  }
2680
2681
  /**
2682
   * Sanitize naughty scripting elements
2683
   *
2684
   * <p>
2685
   * <br />
2686
   *
2687
   * Similar to above, only instead of looking for
2688
   * tags it looks for PHP and JavaScript commands
2689
   * that are disallowed. Rather than removing the
2690
   * code, it simply converts the parenthesis to entities
2691
   * rendering the code un-executable.
2692
   *
2693
   * <br /><br />
2694
   *
2695
   * For example:  <pre>eval('some code')</pre>
2696
   * <br />
2697
   * Becomes:      <pre>eval&#40;'some code'&#41;</pre>
2698
   * </p>
2699
   *
2700
   * @param string $str
2701
   *
2702
   * @return string
2703
   */
2704
  private function _sanitize_naughty_javascript($str)
2705
  {
2706
    $str = preg_replace(
2707
        '#(alert|eval|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*)\)#siU',
2708
        '\\1\\2&#40;\\3&#41;',
2709
        $str
2710
    );
2711
2712
    return (string)$str;
2713
  }
2714
2715
  /**
2716
   * Set the replacement-string for not allowed strings.
2717
   *
2718
   * @param string $string
2719
   *
2720
   * @return $this
2721
   */
2722
  public function setReplacement($string)
2723
  {
2724
    $this->_replacement = (string)$string;
2725
2726
    $this->_initNeverAllowedStr();
2727
2728
    return $this;
2729
  }
2730
2731
  /**
2732
   * Set the option to stripe 4-Byte chars.
2733
   *
2734
   * <p>
2735
   * <br />
2736
   * INFO: use it if your DB (MySQL) can't use "utf8mb4" -> preventing stored XSS-attacks
2737
   * </p>
2738
   *
2739
   * @param $bool
2740
   *
2741
   * @return $this
2742
   */
2743
  public function setStripe4byteChars($bool)
2744
  {
2745
    $this->_stripe_4byte_chars = (bool)$bool;
2746
2747
    return $this;
2748
  }
2749
2750
  /**
2751
   * XSS Clean
2752
   *
2753
   * <p>
2754
   * <br />
2755
   * Sanitizes data so that "Cross Site Scripting" hacks can be
2756
   * prevented. This method does a fair amount of work but
2757
   * it is extremely thorough, designed to prevent even the
2758
   * most obscure XSS attempts. But keep in mind that nothing
2759
   * is ever 100% foolproof...
2760
   * </p>
2761
   *
2762
   * <p>
2763
   * <br />
2764
   * <strong>Note:</strong> Should only be used to deal with data upon submission.
2765
   *   It's not something that should be used for general
2766
   *   runtime processing.
2767
   * </p>
2768
   *
2769
   * @link http://channel.bitflux.ch/wiki/XSS_Prevention
2770
   *    Based in part on some code and ideas from Bitflux.
2771
   *
2772
   * @link http://ha.ckers.org/xss.html
2773
   *    To help develop this script I used this great list of
2774
   *    vulnerabilities along with a few other hacks I've
2775
   *    harvested from examining vulnerabilities in other programs.
2776
   *
2777
   * @param string|array $str <p>input data e.g. string or array</p>
2778
   *
2779
   * @return string|array|boolean <p>
2780
   *                              boolean: will return a boolean, if the "is_image"-parameter is true<br />
2781
   *                              string: will return a string, if the input is a string<br />
2782
   *                              array: will return a array, if the input is a array<br />
2783
   *                              </p>
2784
   */
2785
  public function xss_clean($str)
2786
  {
2787
    // reset
2788
    $this->xss_found = null;
2789
2790
    // check for an array of strings
2791
    if (is_array($str) === true) {
2792
      foreach ($str as $key => &$value) {
2793
        $str[$key] = $this->xss_clean($value);
2794
      }
2795
2796
      return $str;
2797
    }
2798
2799
    // process
2800
    do {
2801
      $old_str = $str;
2802
      $str = $this->_do($str);
2803
    } while ($old_str !== $str);
2804
2805
    return $str;
2806
  }
2807
2808
  /**
2809
   * Generates the XSS hash if needed and returns it.
2810
   *
2811
   * @return string <p>XSS hash</p>
2812
   */
2813
  private function _xss_hash()
2814
  {
2815
    if ($this->_xss_hash === null) {
2816
      $rand = Bootup::get_random_bytes(16);
2817
2818
      if (!$rand) {
2819
        $this->_xss_hash = md5(uniqid(mt_rand(), true));
2820
      } else {
2821
        $this->_xss_hash = bin2hex($rand);
2822
      }
2823
    }
2824
2825
    return 'voku::anti-xss::' . $this->_xss_hash;
2826
  }
2827
2828
}