1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
htmLawed 1.2.4.2, 16 May 2019 |
5
|
|
|
Copyright Santosh Patnaik |
6
|
|
|
Dual licensed with LGPL 3 and GPL 2+ |
7
|
|
|
A PHP Labware internal utility - www.bioinformatics.org/phplabware/internal_utilities/htmLawed |
8
|
|
|
|
9
|
|
|
See htmLawed_README.txt/htm |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
function htmLawed($t, $C = 1, $S = array()) { |
13
|
1731 |
|
$C = is_array($C) ? $C : array(); |
14
|
1731 |
|
if (!empty($C['valid_xhtml'])) { |
15
|
559 |
|
$C['elements'] = empty($C['elements']) ? '*-acronym-big-center-dir-font-isindex-s-strike-tt' : $C['elements']; |
16
|
559 |
|
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2; |
17
|
559 |
|
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2; |
18
|
|
|
} |
19
|
|
|
// config eles |
20
|
1731 |
|
$e = array('a' => 1, 'abbr' => 1, 'acronym' => 1, 'address' => 1, 'applet' => 1, 'area' => 1, 'article' => 1, 'aside' => 1, 'audio' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'blockquote' => 1, 'br' => 1, 'button' => 1, 'canvas' => 1, 'caption' => 1, 'center' => 1, 'cite' => 1, 'code' => 1, 'col' => 1, 'colgroup' => 1, 'command' => 1, 'data' => 1, 'datalist' => 1, 'dd' => 1, 'del' => 1, 'details' => 1, 'dfn' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'dt' => 1, 'em' => 1, 'embed' => 1, 'fieldset' => 1, 'figcaption' => 1, 'figure' => 1, 'font' => 1, 'footer' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'header' => 1, 'hgroup' => 1, 'hr' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'isindex' => 1, 'kbd' => 1, 'keygen' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'link' => 1, 'main' => 1, 'map' => 1, 'mark' => 1, 'menu' => 1, 'meta' => 1, 'meter' => 1, 'nav' => 1, 'noscript' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'output' => 1, 'p' => 1, 'param' => 1, 'pre' => 1, 'progress' => 1, 'q' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'script' => 1, 'section' => 1, 'select' => 1, 'small' => 1, 'source' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'style' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'time' => 1, 'tr' => 1, 'track' => 1, 'tt' => 1, 'u' => 1, 'ul' => 1, 'var' => 1, 'video' => 1, 'wbr' => 1); // 118 incl. deprecated & some Ruby |
21
|
|
|
|
22
|
1731 |
|
if (!empty($C['safe'])) { |
23
|
559 |
|
unset($e['applet'], $e['audio'], $e['canvas'], $e['embed'], $e['iframe'], $e['object'], $e['script'], $e['video']); |
24
|
|
|
} |
25
|
1731 |
|
$x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*'; |
26
|
1731 |
|
if ('-*' == $x) { |
27
|
|
|
$e = array(); |
28
|
1731 |
|
} elseif (false === strpos($x, '*')) { |
29
|
|
|
$e = array_flip(explode(',', $x)); |
30
|
|
|
} else { |
31
|
1731 |
|
if (isset($x[1])) { |
32
|
1163 |
|
preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER); |
33
|
1163 |
|
for ($i = count($m); --$i >= 0; |
34
|
|
|
) { |
35
|
1163 |
|
$m[$i] = $m[$i][0]; |
36
|
|
|
} |
37
|
1163 |
|
foreach ($m as $v) { |
|
|
|
|
38
|
1163 |
|
if ('+' == $v[0]) { |
39
|
|
|
$e[substr($v, 1)] = 1; |
40
|
|
|
} |
41
|
1163 |
|
if ('-' == $v[0] && isset($e[($v = substr($v, 1))]) && !in_array('+'.$v, $m)) { |
42
|
1163 |
|
unset($e[$v]); |
43
|
|
|
} |
44
|
|
|
} |
45
|
|
|
} |
46
|
|
|
} |
47
|
1731 |
|
$C['elements'] = &$e; |
48
|
|
|
// config attrs |
49
|
1731 |
|
$x = !empty($C['deny_attribute']) ? strtolower(str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute'])) : ''; |
50
|
1731 |
|
$x = array_flip((isset($x[0]) && '*' == $x[0]) ? str_replace('/', 'data-', explode('-', str_replace('data-', '/', $x))) : explode(',', $x.(!empty($C['safe']) ? ',on*' : ''))); |
51
|
1731 |
|
$C['deny_attribute'] = $x; |
52
|
|
|
// config URLs |
53
|
1731 |
|
$x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, tel, telnet'.(empty($C['safe']) ? ', app, javascript; *: data, javascript, ' : '; *:').'file, http, https'; |
54
|
1731 |
|
$C['schemes'] = array(); |
55
|
1731 |
|
foreach (explode(';', trim(str_replace(array(' ', "\t", "\r", "\n"), '', $x), ';')) as $v) { |
56
|
1731 |
|
$x = $x2 = null; |
57
|
1731 |
|
list($x, $x2) = explode(':', $v, 2); |
58
|
1731 |
|
if ($x2) { |
59
|
1731 |
|
$C['schemes'][$x] = array_flip(explode(',', $x2)); |
60
|
|
|
} |
61
|
|
|
} |
62
|
1731 |
|
if (!isset($C['schemes']['*'])) { |
63
|
|
|
$C['schemes']['*'] = array('file' => 1, 'http' => 1, 'https' => 1); |
64
|
|
|
if (empty($C['safe'])) { |
65
|
|
|
$C['schemes']['*'] += array('data' => 1, 'javascript' => 1); |
66
|
|
|
} |
67
|
|
|
} |
68
|
1731 |
|
if (!empty($C['safe']) && empty($C['schemes']['style'])) { |
69
|
559 |
|
$C['schemes']['style'] = array('!' => 1); |
70
|
|
|
} |
71
|
1731 |
|
$C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0; |
72
|
1731 |
|
if (!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])) { |
73
|
1731 |
|
$C['base_url'] = $C['abs_url'] = 0; |
74
|
|
|
} |
75
|
|
|
// config rest |
76
|
1731 |
|
$C['and_mark'] = empty($C['and_mark']) ? 0 : 1; |
77
|
1731 |
|
$C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && 2 == count($C['anti_link_spam']) && (empty($C['anti_link_spam'][0]) or hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0; |
78
|
1731 |
|
$C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0; |
79
|
1731 |
|
$C['balance'] = isset($C['balance']) ? (bool) $C['balance'] : 1; |
80
|
1731 |
|
$C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0); |
81
|
1731 |
|
$C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char']; |
82
|
1731 |
|
$C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0); |
83
|
1731 |
|
$C['css_expression'] = empty($C['css_expression']) ? 0 : 1; |
84
|
1731 |
|
$C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1; |
85
|
1731 |
|
$C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; |
86
|
1731 |
|
$C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0; |
87
|
1731 |
|
$C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0; |
88
|
1731 |
|
$C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6; |
89
|
1731 |
|
$C['lc_std_val'] = isset($C['lc_std_val']) ? (bool) $C['lc_std_val'] : 1; |
90
|
1731 |
|
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1; |
91
|
1731 |
|
$C['named_entity'] = isset($C['named_entity']) ? (bool) $C['named_entity'] : 1; |
92
|
1731 |
|
$C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1; |
93
|
1731 |
|
$C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body'; |
94
|
1731 |
|
$C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0; |
95
|
1731 |
|
$C['style_pass'] = empty($C['style_pass']) ? 0 : 1; |
96
|
1731 |
|
$C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy']; |
97
|
1731 |
|
$C['unique_ids'] = isset($C['unique_ids']) && (!preg_match('`\W`', $C['unique_ids'])) ? $C['unique_ids'] : 1; |
98
|
1731 |
|
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0; |
99
|
|
|
|
100
|
1731 |
|
if (isset($GLOBALS['C'])) { |
101
|
1730 |
|
$reC = $GLOBALS['C']; |
102
|
|
|
} |
103
|
1731 |
|
$GLOBALS['C'] = $C; |
104
|
1731 |
|
$S = is_array($S) ? $S : hl_spec($S); |
105
|
1731 |
|
if (isset($GLOBALS['S'])) { |
106
|
1730 |
|
$reS = $GLOBALS['S']; |
107
|
|
|
} |
108
|
1731 |
|
$GLOBALS['S'] = $S; |
109
|
|
|
|
110
|
1731 |
|
$t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t); |
111
|
1731 |
|
if ($C['clean_ms_char']) { |
112
|
|
|
$x = array("\x7f" => '', "\x80" => '€', "\x81" => '', "\x83" => 'ƒ', "\x85" => '…', "\x86" => '†', "\x87" => '‡', "\x88" => 'ˆ', "\x89" => '‰', "\x8a" => 'Š', "\x8b" => '‹', "\x8c" => 'Œ', "\x8d" => '', "\x8e" => 'Ž', "\x8f" => '', "\x90" => '', "\x95" => '•', "\x96" => '–', "\x97" => '—', "\x98" => '˜', "\x99" => '™', "\x9a" => 'š', "\x9b" => '›', "\x9c" => 'œ', "\x9d" => '', "\x9e" => 'ž', "\x9f" => 'Ÿ'); |
113
|
|
|
$x = $x + (1 == $C['clean_ms_char'] ? array("\x82" => '‚', "\x84" => '„', "\x91" => '‘', "\x92" => '’', "\x93" => '“', "\x94" => '”') : array("\x82" => '\'', "\x84" => '"', "\x91" => '\'', "\x92" => '\'', "\x93" => '"', "\x94" => '"')); |
114
|
|
|
$t = strtr($t, $x); |
115
|
|
|
} |
116
|
1731 |
|
if ($C['cdata'] or $C['comment']) { |
117
|
1172 |
|
$t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'hl_cmtcd', $t); |
118
|
|
|
} |
119
|
1731 |
|
$t = preg_replace_callback('`&([a-zA-Z][a-zA-Z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&', $t)); |
120
|
1731 |
|
if ($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])) { |
121
|
1 |
|
$GLOBALS['hl_Ids'] = array(); |
122
|
|
|
} |
123
|
1731 |
|
if ($C['hook']) { |
124
|
|
|
$t = $C['hook']($t, $C, $S); |
125
|
|
|
} |
126
|
1731 |
|
if ($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])) { |
127
|
|
|
$GLOBALS[$C['show_setting']] = array('config' => $C, 'spec' => $S, 'time' => microtime()); |
128
|
|
|
} |
129
|
|
|
// main |
130
|
1731 |
|
$t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t); |
131
|
1731 |
|
$t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t; |
132
|
1731 |
|
$t = (($C['cdata'] or $C['comment']) && false !== strpos($t, "\x01")) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t; |
133
|
1731 |
|
$t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t; |
134
|
1731 |
|
unset($C, $e); |
135
|
1731 |
|
if (isset($reC)) { |
136
|
1730 |
|
$GLOBALS['C'] = $reC; |
137
|
|
|
} |
138
|
1731 |
|
if (isset($reS)) { |
139
|
1730 |
|
$GLOBALS['S'] = $reS; |
140
|
|
|
} |
141
|
|
|
|
142
|
1731 |
|
return $t; |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
function hl_attrval($a, $t, $p) { |
146
|
|
|
// check attr val against $S |
147
|
|
|
static $ma = array('accesskey', 'class', 'itemtype', 'rel'); |
148
|
|
|
$s = in_array($a, $ma) ? ' ' : ('srcset' == $a ? ',' : ''); |
149
|
|
|
$r = array(); |
150
|
|
|
$t = !empty($s) ? explode($s, $t) : array($t); |
151
|
|
|
foreach ($t as $tk => $tv) { |
152
|
|
|
$o = 1; |
153
|
|
|
$tv = trim($tv); |
154
|
|
|
$l = strlen($tv); |
155
|
|
|
foreach ($p as $k => $v) { |
156
|
|
|
if (!$l) { |
157
|
|
|
continue; |
158
|
|
|
} |
159
|
|
|
switch ($k) { |
160
|
|
|
case 'maxlen': |
161
|
|
|
if ($l > $v) { |
162
|
|
|
$o = 0; |
163
|
|
|
} |
164
|
|
|
break; |
165
|
|
|
case 'minlen': |
166
|
|
|
if ($l < $v) { |
167
|
|
|
$o = 0; |
168
|
|
|
} |
169
|
|
|
break; |
170
|
|
|
case 'maxval': |
171
|
|
|
if ((float) ($tv) > $v) { |
172
|
|
|
$o = 0; |
173
|
|
|
} |
174
|
|
|
break; |
175
|
|
|
case 'minval': |
176
|
|
|
if ((float) ($tv) < $v) { |
177
|
|
|
$o = 0; |
178
|
|
|
} |
179
|
|
|
break; |
180
|
|
|
case 'match': |
181
|
|
|
if (!preg_match($v, $tv)) { |
182
|
|
|
$o = 0; |
183
|
|
|
} |
184
|
|
|
break; |
185
|
|
|
case 'nomatch': |
186
|
|
|
if (preg_match($v, $tv)) { |
187
|
|
|
$o = 0; |
188
|
|
|
} |
189
|
|
|
break; case 'oneof': |
190
|
|
|
$m = 0; |
191
|
|
|
foreach (explode('|', $v) as $n) { |
192
|
|
|
if ($tv == $n) { |
193
|
|
|
$m = 1; |
194
|
|
|
break; |
195
|
|
|
} |
196
|
|
|
} |
197
|
|
|
$o = $m; |
198
|
|
|
break; case 'noneof': |
199
|
|
|
$m = 1; |
200
|
|
|
foreach (explode('|', $v) as $n) { |
201
|
|
|
if ($tv == $n) { |
202
|
|
|
$m = 0; |
203
|
|
|
break; |
204
|
|
|
} |
205
|
|
|
} |
206
|
|
|
$o = $m; |
207
|
|
|
break; default: |
208
|
|
|
break; |
209
|
|
|
} |
210
|
|
|
if (!$o) { |
211
|
|
|
break; |
212
|
|
|
} |
213
|
|
|
} |
214
|
|
|
if ($o) { |
215
|
|
|
$r[] = $tv; |
216
|
|
|
} |
217
|
|
|
} |
218
|
|
|
if (',' == $s) { |
219
|
|
|
$s = ', '; |
220
|
|
|
} |
221
|
|
|
$r = implode($s, $r); |
222
|
|
|
|
223
|
|
|
return isset($r[0]) ? $r : (isset($p['default']) ? $p['default'] : 0); |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
function hl_bal($t, $do = 1, $in = 'div') { |
227
|
|
|
// balance tags |
228
|
|
|
// by content |
229
|
1731 |
|
$cB = array('blockquote' => 1, 'form' => 1, 'map' => 1, 'noscript' => 1); // Block |
230
|
1731 |
|
$cE = array('area' => 1, 'br' => 1, 'col' => 1, 'command' => 1, 'embed' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'isindex' => 1, 'keygen' => 1, 'link' => 1, 'meta' => 1, 'param' => 1, 'source' => 1, 'track' => 1, 'wbr' => 1); // Empty |
231
|
1731 |
|
$cF = array('a' => 1, 'article' => 1, 'aside' => 1, 'audio' => 1, 'button' => 1, 'canvas' => 1, 'del' => 1, 'details' => 1, 'div' => 1, 'dd' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'header' => 1, 'iframe' => 1, 'ins' => 1, 'li' => 1, 'main' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'object' => 1, 'section' => 1, 'style' => 1, 'td' => 1, 'th' => 1, 'video' => 1); // Flow; later context-wise dynamic move of ins & del to $cI |
232
|
1731 |
|
$cI = array('abbr' => 1, 'acronym' => 1, 'address' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'caption' => 1, 'cite' => 1, 'code' => 1, 'data' => 1, 'datalist' => 1, 'dfn' => 1, 'dt' => 1, 'em' => 1, 'figcaption' => 1, 'font' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hgroup' => 1, 'i' => 1, 'kbd' => 1, 'label' => 1, 'legend' => 1, 'mark' => 1, 'meter' => 1, 'output' => 1, 'p' => 1, 'pre' => 1, 'progress' => 1, 'q' => 1, 'rb' => 1, 'rt' => 1, 's' => 1, 'samp' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'time' => 1, 'tt' => 1, 'u' => 1, 'var' => 1); // Inline |
233
|
1731 |
|
$cN = array('a' => array('a' => 1, 'address' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'keygen' => 1, 'label' => 1, 'select' => 1, 'textarea' => 1), 'address' => array('address' => 1, 'article' => 1, 'aside' => 1, 'header' => 1, 'keygen' => 1, 'footer' => 1, 'nav' => 1, 'section' => 1), 'button' => array('a' => 1, 'address' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'fieldset' => 1, 'form' => 1, 'iframe' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'select' => 1, 'textarea' => 1), 'fieldset' => array('fieldset' => 1), 'footer' => array('header' => 1, 'footer' => 1), 'form' => array('form' => 1), 'header' => array('header' => 1, 'footer' => 1), 'label' => array('label' => 1), 'main' => array('main' => 1), 'meter' => array('meter' => 1), 'noscript' => array('script' => 1), 'pre' => array('big' => 1, 'font' => 1, 'img' => 1, 'object' => 1, 'script' => 1, 'small' => 1, 'sub' => 1, 'sup' => 1), 'progress' => array('progress' => 1), 'rb' => array('ruby' => 1), 'rt' => array('ruby' => 1), 'time' => array('time' => 1)); // Illegal |
234
|
1731 |
|
$cN2 = array_keys($cN); |
235
|
1731 |
|
$cS = array('colgroup' => array('col' => 1), 'datalist' => array('option' => 1), 'dir' => array('li' => 1), 'dl' => array('dd' => 1, 'dt' => 1), 'hgroup' => array('h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1), 'menu' => array('li' => 1), 'ol' => array('li' => 1), 'optgroup' => array('option' => 1), 'option' => array('#pcdata' => 1), 'rbc' => array('rb' => 1), 'rp' => array('#pcdata' => 1), 'rtc' => array('rt' => 1), 'ruby' => array('rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1), 'select' => array('optgroup' => 1, 'option' => 1), 'script' => array('#pcdata' => 1), 'table' => array('caption' => 1, 'col' => 1, 'colgroup' => 1, 'tfoot' => 1, 'tbody' => 1, 'tr' => 1, 'thead' => 1), 'tbody' => array('tr' => 1), 'tfoot' => array('tr' => 1), 'textarea' => array('#pcdata' => 1), 'thead' => array('tr' => 1), 'tr' => array('td' => 1, 'th' => 1), 'ul' => array('li' => 1)); // Specific - immediate parent-child |
236
|
1731 |
|
if ($GLOBALS['C']['direct_list_nest']) { |
237
|
613 |
|
$cS['ol'] = $cS['ul'] = $cS['menu'] += array('menu' => 1, 'ol' => 1, 'ul' => 1); |
238
|
|
|
} |
239
|
1731 |
|
$cO = array('address' => array('p' => 1), 'applet' => array('param' => 1), 'audio' => array('source' => 1, 'track' => 1), 'blockquote' => array('script' => 1), 'details' => array('summary' => 1), 'fieldset' => array('legend' => 1, '#pcdata' => 1), 'figure' => array('figcaption' => 1), 'form' => array('script' => 1), 'map' => array('area' => 1), 'object' => array('param' => 1, 'embed' => 1), 'video' => array('source' => 1, 'track' => 1)); // Other |
240
|
1731 |
|
$cT = array('colgroup' => 1, 'dd' => 1, 'dt' => 1, 'li' => 1, 'option' => 1, 'p' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1); // Omitable closing |
241
|
|
|
// block/inline type; a/ins/del both type; #pcdata: text |
242
|
1731 |
|
$eB = array('a' => 1, 'address' => 1, 'article' => 1, 'aside' => 1, 'blockquote' => 1, 'center' => 1, 'del' => 1, 'details' => 1, 'dir' => 1, 'dl' => 1, 'div' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'form' => 1, 'ins' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'header' => 1, 'hr' => 1, 'isindex' => 1, 'main' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'ol' => 1, 'p' => 1, 'pre' => 1, 'section' => 1, 'style' => 1, 'table' => 1, 'ul' => 1); |
243
|
1731 |
|
$eI = array('#pcdata' => 1, 'a' => 1, 'abbr' => 1, 'acronym' => 1, 'applet' => 1, 'audio' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'br' => 1, 'button' => 1, 'canvas' => 1, 'cite' => 1, 'code' => 1, 'command' => 1, 'data' => 1, 'datalist' => 1, 'del' => 1, 'dfn' => 1, 'em' => 1, 'embed' => 1, 'figcaption' => 1, 'font' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'kbd' => 1, 'label' => 1, 'link' => 1, 'map' => 1, 'mark' => 1, 'meta' => 1, 'meter' => 1, 'object' => 1, 'output' => 1, 'progress' => 1, 'q' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'select' => 1, 'script' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'textarea' => 1, 'time' => 1, 'tt' => 1, 'u' => 1, 'var' => 1, 'video' => 1, 'wbr' => 1); |
244
|
1731 |
|
$eN = array('a' => 1, 'address' => 1, 'article' => 1, 'aside' => 1, 'big' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'fieldset' => 1, 'font' => 1, 'footer' => 1, 'form' => 1, 'header' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'meter' => 1, 'nav' => 1, 'object' => 1, 'progress' => 1, 'ruby' => 1, 'script' => 1, 'select' => 1, 'small' => 1, 'sub' => 1, 'sup' => 1, 'textarea' => 1, 'time' => 1); // Exclude from specific ele; $cN values |
245
|
1731 |
|
$eO = array('area' => 1, 'caption' => 1, 'col' => 1, 'colgroup' => 1, 'command' => 1, 'dd' => 1, 'dt' => 1, 'hgroup' => 1, 'keygen' => 1, 'legend' => 1, 'li' => 1, 'optgroup' => 1, 'option' => 1, 'param' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'script' => 1, 'source' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'thead' => 1, 'th' => 1, 'tr' => 1, 'track' => 1); // Missing in $eB & $eI |
246
|
1731 |
|
$eF = $eB + $eI; |
247
|
|
|
|
248
|
|
|
// $in sets allowed child |
249
|
1731 |
|
$in = ((isset($eF[$in]) && '#pcdata' != $in) or isset($eO[$in])) ? $in : 'div'; |
250
|
1731 |
|
if (isset($cE[$in])) { |
251
|
|
|
return !$do ? '' : str_replace(array('<', '>'), array('<', '>'), $t); |
252
|
|
|
} |
253
|
1731 |
|
if (isset($cS[$in])) { |
254
|
|
|
$inOk = $cS[$in]; |
255
|
1731 |
|
} elseif (isset($cI[$in])) { |
256
|
|
|
$inOk = $eI; |
257
|
|
|
$cI['del'] = 1; |
258
|
|
|
$cI['ins'] = 1; |
259
|
1731 |
|
} elseif (isset($cF[$in])) { |
260
|
1731 |
|
$inOk = $eF; |
261
|
1731 |
|
unset($cI['del'], $cI['ins']); |
262
|
|
|
} elseif (isset($cB[$in])) { |
263
|
|
|
$inOk = $eB; |
264
|
|
|
unset($cI['del'], $cI['ins']); |
265
|
|
|
} |
266
|
1731 |
|
if (isset($cO[$in])) { |
267
|
|
|
$inOk = $inOk + $cO[$in]; |
|
|
|
|
268
|
|
|
} |
269
|
1731 |
|
if (isset($cN[$in])) { |
270
|
|
|
$inOk = array_diff_assoc($inOk, $cN[$in]); |
271
|
|
|
} |
272
|
|
|
|
273
|
1731 |
|
$t = explode('<', $t); |
274
|
1731 |
|
$ok = $q = array(); // $q seq list of open non-empty ele |
275
|
1731 |
|
ob_start(); |
276
|
|
|
|
277
|
1731 |
|
for ($i = -1, $ci = count($t); ++$i < $ci;) { |
278
|
|
|
// allowed $ok in parent $p |
279
|
1731 |
View Code Duplication |
if ($ql = count($q)) { |
|
|
|
|
280
|
308 |
|
$p = array_pop($q); |
281
|
308 |
|
$q[] = $p; |
282
|
308 |
|
if (isset($cS[$p])) { |
283
|
27 |
|
$ok = $cS[$p]; |
284
|
289 |
|
} elseif (isset($cI[$p])) { |
285
|
28 |
|
$ok = $eI; |
286
|
28 |
|
$cI['del'] = 1; |
287
|
28 |
|
$cI['ins'] = 1; |
288
|
265 |
|
} elseif (isset($cF[$p])) { |
289
|
264 |
|
$ok = $eF; |
290
|
264 |
|
unset($cI['del'], $cI['ins']); |
291
|
2 |
|
} elseif (isset($cB[$p])) { |
292
|
2 |
|
$ok = $eB; |
293
|
2 |
|
unset($cI['del'], $cI['ins']); |
294
|
|
|
} |
295
|
308 |
|
if (isset($cO[$p])) { |
296
|
2 |
|
$ok = $ok + $cO[$p]; |
297
|
|
|
} |
298
|
308 |
|
if (isset($cN[$p])) { |
299
|
308 |
|
$ok = array_diff_assoc($ok, $cN[$p]); |
300
|
|
|
} |
301
|
|
|
} else { |
302
|
1731 |
|
$ok = $inOk; |
303
|
1731 |
|
unset($cI['del'], $cI['ins']); |
304
|
|
|
} |
305
|
|
|
// bad tags, & ele content |
306
|
1731 |
View Code Duplication |
if (isset($e) && (1 == $do or (isset($ok['#pcdata']) && (3 == $do or 5 == $do)))) { |
|
|
|
|
307
|
|
|
echo '<', $s, $e, $a, '>'; |
|
|
|
|
308
|
|
|
} |
309
|
1731 |
View Code Duplication |
if (isset($x[0])) { |
|
|
|
|
310
|
284 |
|
if (strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))) { |
311
|
1 |
|
echo '<div>', $x, '</div>'; |
312
|
284 |
|
} elseif ($do < 3 or isset($ok['#pcdata'])) { |
313
|
284 |
|
echo $x; |
314
|
|
|
} elseif (strpos($x, "\x02\x04")) { |
315
|
|
|
foreach (preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v) { |
316
|
|
|
echo "\x01\x02" == substr($v, 0, 2) ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''); |
317
|
|
|
} |
318
|
|
|
} elseif ($do > 4) { |
319
|
|
|
echo preg_replace('`\S`', '', $x); |
320
|
|
|
} |
321
|
|
|
} |
322
|
|
|
// get markup |
323
|
1731 |
|
if (!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)) { |
324
|
1731 |
|
$x = $t[$i]; |
325
|
1731 |
|
continue; |
326
|
|
|
} |
327
|
946 |
|
$s = null; |
328
|
946 |
|
$e = null; |
329
|
946 |
|
$a = null; |
330
|
946 |
|
$x = null; |
331
|
946 |
|
list($all, $s, $e, $a, $x) = $r; |
|
|
|
|
332
|
|
|
// close tag |
333
|
946 |
|
if ($s) { |
334
|
298 |
|
if (isset($cE[$e]) or !in_array($e, $q)) { |
335
|
3 |
|
continue; |
336
|
|
|
} // Empty/unopen |
337
|
297 |
|
if ($p == $e) { |
|
|
|
|
338
|
296 |
|
array_pop($q); |
339
|
296 |
|
echo '</', $e, '>'; |
340
|
296 |
|
unset($e); |
341
|
296 |
|
continue; |
342
|
|
|
} // Last open |
343
|
2 |
|
$add = ''; // Nesting - close open tags that need to be |
344
|
2 |
|
for ($j = -1, $cj = count($q); ++$j < $cj;) { |
345
|
2 |
|
if (($d = array_pop($q)) == $e) { |
346
|
2 |
|
break; |
347
|
|
|
} else { |
348
|
2 |
|
$add .= "</{$d}>"; |
349
|
|
|
} |
350
|
|
|
} |
351
|
2 |
|
echo $add, '</', $e, '>'; |
352
|
2 |
|
unset($e); |
353
|
2 |
|
continue; |
354
|
|
|
} |
355
|
|
|
// open tag |
356
|
|
|
// $cB ele needs $eB ele as child |
357
|
946 |
|
if (isset($cB[$e]) && strlen(trim($x))) { |
358
|
1 |
|
$t[$i] = "{$e}{$a}>"; |
359
|
1 |
|
array_splice($t, $i + 1, 0, 'div>'.$x); |
360
|
1 |
|
unset($e, $x); |
361
|
1 |
|
++$ci; |
362
|
1 |
|
--$i; |
363
|
1 |
|
continue; |
364
|
|
|
} |
365
|
946 |
|
if ((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])) { |
366
|
|
|
array_splice($t, $i, 0, 'div>'); |
367
|
|
|
unset($e, $x); |
368
|
|
|
++$ci; |
369
|
|
|
--$i; |
370
|
|
|
continue; |
371
|
|
|
} |
372
|
|
|
// if no open ele, $in = parent; mostly immediate parent-child relation should hold |
373
|
946 |
|
if (!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)) { |
374
|
946 |
|
if (!isset($ok[$e])) { |
375
|
21 |
|
if ($ql && isset($cT[$p])) { |
376
|
|
|
echo '</', array_pop($q), '>'; |
377
|
|
|
unset($e, $x); |
378
|
|
|
--$i; |
379
|
|
|
} |
380
|
21 |
|
continue; |
381
|
|
|
} |
382
|
937 |
|
if (!isset($cE[$e])) { |
383
|
369 |
|
$q[] = $e; |
384
|
|
|
} |
385
|
937 |
|
echo '<', $e, $a, '>'; |
386
|
937 |
|
unset($e); |
387
|
937 |
|
continue; |
388
|
|
|
} |
389
|
|
|
// specific parent-child |
390
|
|
|
if (isset($cS[$p][$e])) { |
391
|
|
|
if (!isset($cE[$e])) { |
392
|
|
|
$q[] = $e; |
393
|
|
|
} |
394
|
|
|
echo '<', $e, $a, '>'; |
395
|
|
|
unset($e); |
396
|
|
|
continue; |
397
|
|
|
} |
398
|
|
|
// nesting |
399
|
|
|
$add = ''; |
400
|
|
|
$q2 = array(); |
401
|
|
|
for ($k = -1, $kc = count($q); ++$k < $kc;) { |
402
|
|
|
$d = $q[$k]; |
403
|
|
|
$ok2 = array(); |
|
|
|
|
404
|
|
|
if (isset($cS[$d])) { |
405
|
|
|
$q2[] = $d; |
406
|
|
|
continue; |
407
|
|
|
} |
408
|
|
|
$ok2 = isset($cI[$d]) ? $eI : $eF; |
409
|
|
|
if (isset($cO[$d])) { |
410
|
|
|
$ok2 = $ok2 + $cO[$d]; |
411
|
|
|
} |
412
|
|
|
if (isset($cN[$d])) { |
413
|
|
|
$ok2 = array_diff_assoc($ok2, $cN[$d]); |
414
|
|
|
} |
415
|
|
|
if (!isset($ok2[$e])) { |
416
|
|
|
if (!$k && !isset($inOk[$e])) { |
417
|
|
|
continue 2; |
418
|
|
|
} |
419
|
|
|
$add = "</{$d}>"; |
420
|
|
|
for (; ++$k < $kc; |
421
|
|
|
) { |
422
|
|
|
$add = "</{$q[$k]}>{$add}"; |
423
|
|
|
} |
424
|
|
|
break; |
425
|
|
|
} else { |
426
|
|
|
$q2[] = $d; |
427
|
|
|
} |
428
|
|
|
} |
429
|
|
|
$q = $q2; |
430
|
|
|
if (!isset($cE[$e])) { |
431
|
|
|
$q[] = $e; |
432
|
|
|
} |
433
|
|
|
echo $add, '<', $e, $a, '>'; |
434
|
|
|
unset($e); |
435
|
|
|
continue; |
436
|
|
|
} |
437
|
|
|
|
438
|
|
|
// end |
439
|
1731 |
View Code Duplication |
if ($ql = count($q)) { |
|
|
|
|
440
|
111 |
|
$p = array_pop($q); |
441
|
111 |
|
$q[] = $p; |
442
|
111 |
|
if (isset($cS[$p])) { |
443
|
28 |
|
$ok = $cS[$p]; |
444
|
84 |
|
} elseif (isset($cI[$p])) { |
445
|
2 |
|
$ok = $eI; |
446
|
2 |
|
$cI['del'] = 1; |
447
|
2 |
|
$cI['ins'] = 1; |
448
|
82 |
|
} elseif (isset($cF[$p])) { |
449
|
82 |
|
$ok = $eF; |
450
|
82 |
|
unset($cI['del'], $cI['ins']); |
451
|
|
|
} elseif (isset($cB[$p])) { |
452
|
|
|
$ok = $eB; |
453
|
|
|
unset($cI['del'], $cI['ins']); |
454
|
|
|
} |
455
|
111 |
|
if (isset($cO[$p])) { |
456
|
|
|
$ok = $ok + $cO[$p]; |
457
|
|
|
} |
458
|
111 |
|
if (isset($cN[$p])) { |
459
|
111 |
|
$ok = array_diff_assoc($ok, $cN[$p]); |
460
|
|
|
} |
461
|
|
|
} else { |
462
|
1657 |
|
$ok = $inOk; |
463
|
1657 |
|
unset($cI['del'], $cI['ins']); |
464
|
|
|
} |
465
|
1731 |
View Code Duplication |
if (isset($e) && (1 == $do or (isset($ok['#pcdata']) && (3 == $do or 5 == $do)))) { |
|
|
|
|
466
|
|
|
echo '<', $s, $e, $a, '>'; |
467
|
|
|
} |
468
|
1731 |
View Code Duplication |
if (isset($x[0])) { |
|
|
|
|
469
|
493 |
|
if (strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))) { |
470
|
|
|
echo '<div>', $x, '</div>'; |
471
|
493 |
|
} elseif ($do < 3 or isset($ok['#pcdata'])) { |
472
|
493 |
|
echo $x; |
473
|
|
|
} elseif (strpos($x, "\x02\x04")) { |
474
|
|
|
foreach (preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v) { |
475
|
|
|
echo "\x01\x02" == substr($v, 0, 2) ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''); |
476
|
|
|
} |
477
|
|
|
} elseif ($do > 4) { |
478
|
|
|
echo preg_replace('`\S`', '', $x); |
479
|
|
|
} |
480
|
|
|
} |
481
|
1731 |
|
while (!empty($q) && ($e = array_pop($q))) { |
482
|
111 |
|
echo '</', $e, '>'; |
483
|
|
|
} |
484
|
1731 |
|
$o = ob_get_contents(); |
485
|
1731 |
|
ob_end_clean(); |
486
|
|
|
|
487
|
1731 |
|
return $o; |
488
|
|
|
} |
489
|
|
|
|
490
|
|
|
function hl_cmtcd($t) { |
491
|
|
|
// comment/CDATA sec handler |
492
|
29 |
|
$t = $t[0]; |
493
|
29 |
|
global $C; |
494
|
29 |
|
if (!($v = $C[$n = '-' == $t[3] ? 'comment' : 'cdata'])) { |
495
|
|
|
return $t; |
496
|
|
|
} |
497
|
29 |
|
if (1 == $v) { |
498
|
29 |
|
return ''; |
499
|
|
|
} |
500
|
|
|
if ('comment' == $n && $v < 4) { |
501
|
|
|
if (' ' != substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1)) { |
502
|
|
|
$t .= ' '; |
503
|
|
|
} |
504
|
|
|
} else { |
505
|
|
|
$t = substr($t, 1, -1); |
506
|
|
|
} |
507
|
|
|
$t = 2 == $v ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; |
508
|
|
|
|
509
|
|
|
return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ('comment' == $n ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01")); |
510
|
|
|
} |
511
|
|
|
|
512
|
|
|
function hl_ent($t) { |
513
|
|
|
// entitity handler |
514
|
249 |
|
global $C; |
515
|
249 |
|
$t = $t[1]; |
516
|
249 |
|
static $U = array('quot' => 1, 'amp' => 1, 'lt' => 1, 'gt' => 1); |
517
|
249 |
|
static $N = array('fnof' => '402', 'Alpha' => '913', 'Beta' => '914', 'Gamma' => '915', 'Delta' => '916', 'Epsilon' => '917', 'Zeta' => '918', 'Eta' => '919', 'Theta' => '920', 'Iota' => '921', 'Kappa' => '922', 'Lambda' => '923', 'Mu' => '924', 'Nu' => '925', 'Xi' => '926', 'Omicron' => '927', 'Pi' => '928', 'Rho' => '929', 'Sigma' => '931', 'Tau' => '932', 'Upsilon' => '933', 'Phi' => '934', 'Chi' => '935', 'Psi' => '936', 'Omega' => '937', 'alpha' => '945', 'beta' => '946', 'gamma' => '947', 'delta' => '948', 'epsilon' => '949', 'zeta' => '950', 'eta' => '951', 'theta' => '952', 'iota' => '953', 'kappa' => '954', 'lambda' => '955', 'mu' => '956', 'nu' => '957', 'xi' => '958', 'omicron' => '959', 'pi' => '960', 'rho' => '961', 'sigmaf' => '962', 'sigma' => '963', 'tau' => '964', 'upsilon' => '965', 'phi' => '966', 'chi' => '967', 'psi' => '968', 'omega' => '969', 'thetasym' => '977', 'upsih' => '978', 'piv' => '982', 'bull' => '8226', 'hellip' => '8230', 'prime' => '8242', 'Prime' => '8243', 'oline' => '8254', 'frasl' => '8260', 'weierp' => '8472', 'image' => '8465', 'real' => '8476', 'trade' => '8482', 'alefsym' => '8501', 'larr' => '8592', 'uarr' => '8593', 'rarr' => '8594', 'darr' => '8595', 'harr' => '8596', 'crarr' => '8629', 'lArr' => '8656', 'uArr' => '8657', 'rArr' => '8658', 'dArr' => '8659', 'hArr' => '8660', 'forall' => '8704', 'part' => '8706', 'exist' => '8707', 'empty' => '8709', 'nabla' => '8711', 'isin' => '8712', 'notin' => '8713', 'ni' => '8715', 'prod' => '8719', 'sum' => '8721', 'minus' => '8722', 'lowast' => '8727', 'radic' => '8730', 'prop' => '8733', 'infin' => '8734', 'ang' => '8736', 'and' => '8743', 'or' => '8744', 'cap' => '8745', 'cup' => '8746', 'int' => '8747', 'there4' => '8756', 'sim' => '8764', 'cong' => '8773', 'asymp' => '8776', 'ne' => '8800', 'equiv' => '8801', 'le' => '8804', 'ge' => '8805', 'sub' => '8834', 'sup' => '8835', 'nsub' => '8836', 'sube' => '8838', 'supe' => '8839', 'oplus' => '8853', 'otimes' => '8855', 'perp' => '8869', 'sdot' => '8901', 'lceil' => '8968', 'rceil' => '8969', 'lfloor' => '8970', 'rfloor' => '8971', 'lang' => '9001', 'rang' => '9002', 'loz' => '9674', 'spades' => '9824', 'clubs' => '9827', 'hearts' => '9829', 'diams' => '9830', 'apos' => '39', 'OElig' => '338', 'oelig' => '339', 'Scaron' => '352', 'scaron' => '353', 'Yuml' => '376', 'circ' => '710', 'tilde' => '732', 'ensp' => '8194', 'emsp' => '8195', 'thinsp' => '8201', 'zwnj' => '8204', 'zwj' => '8205', 'lrm' => '8206', 'rlm' => '8207', 'ndash' => '8211', 'mdash' => '8212', 'lsquo' => '8216', 'rsquo' => '8217', 'sbquo' => '8218', 'ldquo' => '8220', 'rdquo' => '8221', 'bdquo' => '8222', 'dagger' => '8224', 'Dagger' => '8225', 'permil' => '8240', 'lsaquo' => '8249', 'rsaquo' => '8250', 'euro' => '8364', 'nbsp' => '160', 'iexcl' => '161', 'cent' => '162', 'pound' => '163', 'curren' => '164', 'yen' => '165', 'brvbar' => '166', 'sect' => '167', 'uml' => '168', 'copy' => '169', 'ordf' => '170', 'laquo' => '171', 'not' => '172', 'shy' => '173', 'reg' => '174', 'macr' => '175', 'deg' => '176', 'plusmn' => '177', 'sup2' => '178', 'sup3' => '179', 'acute' => '180', 'micro' => '181', 'para' => '182', 'middot' => '183', 'cedil' => '184', 'sup1' => '185', 'ordm' => '186', 'raquo' => '187', 'frac14' => '188', 'frac12' => '189', 'frac34' => '190', 'iquest' => '191', 'Agrave' => '192', 'Aacute' => '193', 'Acirc' => '194', 'Atilde' => '195', 'Auml' => '196', 'Aring' => '197', 'AElig' => '198', 'Ccedil' => '199', 'Egrave' => '200', 'Eacute' => '201', 'Ecirc' => '202', 'Euml' => '203', 'Igrave' => '204', 'Iacute' => '205', 'Icirc' => '206', 'Iuml' => '207', 'ETH' => '208', 'Ntilde' => '209', 'Ograve' => '210', 'Oacute' => '211', 'Ocirc' => '212', 'Otilde' => '213', 'Ouml' => '214', 'times' => '215', 'Oslash' => '216', 'Ugrave' => '217', 'Uacute' => '218', 'Ucirc' => '219', 'Uuml' => '220', 'Yacute' => '221', 'THORN' => '222', 'szlig' => '223', 'agrave' => '224', 'aacute' => '225', 'acirc' => '226', 'atilde' => '227', 'auml' => '228', 'aring' => '229', 'aelig' => '230', 'ccedil' => '231', 'egrave' => '232', 'eacute' => '233', 'ecirc' => '234', 'euml' => '235', 'igrave' => '236', 'iacute' => '237', 'icirc' => '238', 'iuml' => '239', 'eth' => '240', 'ntilde' => '241', 'ograve' => '242', 'oacute' => '243', 'ocirc' => '244', 'otilde' => '245', 'ouml' => '246', 'divide' => '247', 'oslash' => '248', 'ugrave' => '249', 'uacute' => '250', 'ucirc' => '251', 'uuml' => '252', 'yacute' => '253', 'thorn' => '254', 'yuml' => '255'); |
518
|
249 |
|
if ('#' != $t[0]) { |
519
|
147 |
|
return ($C['and_mark'] ? "\x06" : '&').(isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'.($C['hexdec_entity'] > 1 ? 'x'.dechex($N[$t]) : $N[$t]) : $t) : 'amp;'.$t)).';'; |
520
|
|
|
} |
521
|
114 |
|
if (($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or 11 == $n or 12 == $n or ($n > 126 && $n < 160 && 133 != $n) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or 65534 == $n or 65535 == $n or $n > 1114111))) { |
522
|
37 |
|
return ($C['and_mark'] ? "\x06" : '&')."amp;#{$t};"; |
523
|
|
|
} |
524
|
|
|
|
525
|
78 |
|
return ($C['and_mark'] ? "\x06" : '&').'#'.(((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'.dechex($n)).';'; |
526
|
|
|
} |
527
|
|
|
|
528
|
|
|
function hl_prot($p, $c = null) { |
529
|
|
|
// check URL scheme |
530
|
566 |
|
global $C; |
531
|
566 |
|
$b = $a = ''; |
532
|
566 |
|
if (null == $c) { |
533
|
17 |
|
$c = 'style'; |
534
|
17 |
|
$b = $p[1]; |
535
|
17 |
|
$a = $p[3]; |
536
|
17 |
|
$p = trim($p[2]); |
537
|
|
|
} |
538
|
566 |
|
$c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; |
539
|
566 |
|
static $d = 'denied:'; |
540
|
566 |
|
if (isset($c['!']) && substr($p, 0, 7) != $d) { |
541
|
12 |
|
$p = "$d$p"; |
542
|
|
|
} |
543
|
566 |
|
if (isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)) { |
544
|
127 |
|
return "{$b}{$p}{$a}"; |
545
|
|
|
} // All ok, frag, query, param |
546
|
545 |
|
if (preg_match('`^([^:?[@!$()*,=/\'\]]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])) { // Denied prot |
547
|
324 |
|
return "{$b}{$d}{$p}{$a}"; |
548
|
|
|
} |
549
|
223 |
|
if ($C['abs_url']) { |
550
|
|
|
if (-1 == $C['abs_url'] && 0 === strpos($p, $C['base_url'])) { // Make url rel |
551
|
|
|
$p = substr($p, strlen($C['base_url'])); |
552
|
|
|
} elseif (empty($m[1])) { // Make URL abs |
553
|
|
|
if ('//' == substr($p, 0, 2)) { |
554
|
|
|
$p = substr($C['base_url'], 0, strpos($C['base_url'], ':') + 1).$p; |
555
|
|
|
} elseif ('/' == $p[0]) { |
556
|
|
|
$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']).$p; |
557
|
|
|
} elseif (strcspn($p, './')) { |
558
|
|
|
$p = $C['base_url'].$p; |
559
|
|
|
} else { |
560
|
|
|
preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m); |
561
|
|
|
$p = preg_replace('`(?<=/)\./`', '', $m[2].$p); |
562
|
|
|
while (preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)) { |
563
|
|
|
$p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p); |
564
|
|
|
} |
565
|
|
|
$p = $m[1].$p; |
566
|
|
|
} |
567
|
|
|
} |
568
|
|
|
} |
569
|
|
|
|
570
|
223 |
|
return "{$b}{$p}{$a}"; |
571
|
|
|
} |
572
|
|
|
|
573
|
|
|
function hl_regex($p) { |
574
|
|
|
// check regex |
575
|
1154 |
|
if (empty($p)) { |
576
|
|
|
return 0; |
577
|
|
|
} |
578
|
1154 |
|
if ($v = function_exists('error_clear_last') && function_exists('error_get_last')) { |
|
|
|
|
579
|
1154 |
|
error_clear_last(); |
580
|
|
View Code Duplication |
} else { |
|
|
|
|
581
|
|
|
if ($t = ini_get('track_errors')) { |
582
|
|
|
$o = isset($php_errormsg) ? $php_errormsg : null; |
583
|
|
|
} else { |
584
|
|
|
ini_set('track_errors', 1); |
585
|
|
|
} |
586
|
|
|
unset($php_errormsg); |
587
|
|
|
} |
588
|
1154 |
|
if (($d = ini_get('display_errors'))) { |
589
|
1154 |
|
ini_set('display_errors', 0); |
590
|
|
|
} |
591
|
1154 |
|
preg_match($p, ''); |
592
|
1154 |
|
if ($v) { |
593
|
1154 |
|
$r = null == error_get_last() ? 1 : 0; |
594
|
|
View Code Duplication |
} else { |
|
|
|
|
595
|
|
|
$r = isset($php_errormsg) ? 0 : 1; |
596
|
|
|
if ($t) { |
|
|
|
|
597
|
|
|
$php_errormsg = isset($o) ? $o : null; |
|
|
|
|
598
|
|
|
} else { |
599
|
|
|
ini_set('track_errors', 0); |
600
|
|
|
} |
601
|
|
|
} |
602
|
1154 |
|
if ($d) { |
603
|
1154 |
|
ini_set('display_errors', 1); |
604
|
|
|
} |
605
|
|
|
|
606
|
1154 |
|
return $r; |
607
|
|
|
} |
608
|
|
|
|
609
|
|
|
function hl_spec($t) { |
610
|
|
|
// final $spec |
611
|
559 |
|
$s = array(); |
612
|
559 |
|
if (!function_exists('hl_aux1')) { |
613
|
|
|
function hl_aux1($m) { |
614
|
|
|
return substr(str_replace(array(';', '|', '~', ' ', ',', '/', '(', ')', '`"'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", '"'), $m[0]), 1, -1); |
615
|
|
|
} |
616
|
|
|
} |
617
|
559 |
|
$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace_callback('/"(?>(`.|[^"])*)"/sm', 'hl_aux1', trim($t))); |
618
|
559 |
|
for ($i = count(($t = explode(';', $t))); --$i >= 0;) { |
619
|
559 |
|
$w = $t[$i]; |
620
|
559 |
|
if (empty($w) or false === ($e = strpos($w, '=')) or !strlen(($a = substr($w, $e + 1)))) { |
621
|
559 |
|
continue; |
622
|
|
|
} |
623
|
|
|
$y = $n = array(); |
624
|
|
|
foreach (explode(',', $a) as $v) { |
625
|
|
|
if (!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)) { |
626
|
|
|
continue; |
627
|
|
|
} |
628
|
|
|
if ('-*' == ($x = strtolower($m[1]))) { |
629
|
|
|
$n['*'] = 1; |
630
|
|
|
continue; |
631
|
|
|
} |
632
|
|
|
if ('-' == $x[0]) { |
633
|
|
|
$n[substr($x, 1)] = 1; |
634
|
|
|
continue; |
635
|
|
|
} |
636
|
|
|
if (!isset($m[2])) { |
637
|
|
|
$y[$x] = 1; |
638
|
|
|
continue; |
639
|
|
|
} |
640
|
|
|
foreach (explode('/', $m[2]) as $m) { |
641
|
|
|
if (empty($m) or 0 == ($p = strpos($m, '=')) or $p < 5) { |
642
|
|
|
$y[$x] = 1; |
643
|
|
|
continue; |
644
|
|
|
} |
645
|
|
|
$y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(';', '|', '~', ' ', ',', '/', '(', ')'), substr($m, $p + 1)); |
646
|
|
|
} |
647
|
|
View Code Duplication |
if (isset($y[$x]['match']) && !hl_regex($y[$x]['match'])) { |
|
|
|
|
648
|
|
|
unset($y[$x]['match']); |
649
|
|
|
} |
650
|
|
View Code Duplication |
if (isset($y[$x]['nomatch']) && !hl_regex($y[$x]['nomatch'])) { |
|
|
|
|
651
|
|
|
unset($y[$x]['nomatch']); |
652
|
|
|
} |
653
|
|
|
} |
654
|
|
|
if (!count($y) && !count($n)) { |
655
|
|
|
continue; |
656
|
|
|
} |
657
|
|
|
foreach (explode(',', substr($w, 0, $e)) as $v) { |
658
|
|
|
if (!strlen(($v = strtolower($v)))) { |
659
|
|
|
continue; |
660
|
|
|
} |
661
|
|
|
if (count($y)) { |
662
|
|
|
if (!isset($s[$v])) { |
663
|
|
|
$s[$v] = $y; |
664
|
|
|
} else { |
665
|
|
|
$s[$v] = array_merge($s[$v], $y); |
666
|
|
|
} |
667
|
|
|
} |
668
|
|
|
if (count($n)) { |
669
|
|
|
if (!isset($s[$v]['n'])) { |
670
|
|
|
$s[$v]['n'] = $n; |
671
|
|
|
} else { |
672
|
|
|
$s[$v]['n'] = array_merge($s[$v]['n'], $n); |
673
|
|
|
} |
674
|
|
|
} |
675
|
|
|
} |
676
|
|
|
} |
677
|
|
|
|
678
|
559 |
|
return $s; |
679
|
|
|
} |
680
|
|
|
|
681
|
|
|
function hl_tag($t) { |
682
|
|
|
// tag/attribute handler |
683
|
1637 |
|
global $C; |
684
|
1637 |
|
$t = $t[0]; |
685
|
|
|
// invalid < > |
686
|
1637 |
|
if ('< ' == $t) { |
687
|
|
|
return '< '; |
688
|
|
|
} |
689
|
1637 |
|
if ('>' == $t) { |
690
|
200 |
|
return '>'; |
691
|
|
|
} |
692
|
1637 |
|
if (!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $t, $m)) { |
693
|
91 |
|
return str_replace(array('<', '>'), array('<', '>'), $t); |
694
|
1583 |
|
} elseif (!isset($C['elements'][($e = strtolower($m[2]))])) { |
695
|
721 |
|
return ($C['keep_bad'] % 2) ? str_replace(array('<', '>'), array('<', '>'), $t) : ''; |
696
|
|
|
} |
697
|
|
|
// attr string |
698
|
946 |
|
$a = str_replace(array("\n", "\r", "\t"), ' ', trim($m[3])); |
699
|
|
|
// tag transform |
700
|
946 |
|
static $eD = array('acronym' => 1, 'applet' => 1, 'big' => 1, 'center' => 1, 'dir' => 1, 'font' => 1, 'isindex' => 1, 's' => 1, 'strike' => 1, 'tt' => 1); // Deprecated |
701
|
946 |
|
if ($C['make_tag_strict'] && isset($eD[$e])) { |
702
|
1 |
|
$trt = hl_tag2($e, $a, $C['make_tag_strict']); |
703
|
1 |
|
if (!$e) { |
704
|
|
|
return ($C['keep_bad'] % 2) ? str_replace(array('<', '>'), array('<', '>'), $t) : ''; |
705
|
|
|
} |
706
|
|
|
} |
707
|
|
|
// close tag |
708
|
946 |
|
static $eE = array('area' => 1, 'br' => 1, 'col' => 1, 'command' => 1, 'embed' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'isindex' => 1, 'keygen' => 1, 'link' => 1, 'meta' => 1, 'param' => 1, 'source' => 1, 'track' => 1, 'wbr' => 1); // Empty ele |
709
|
946 |
|
if (!empty($m[1])) { |
710
|
311 |
|
return !isset($eE[$e]) ? (empty($C['hook_tag']) ? "</$e>" : $C['hook_tag']($e)) : (($C['keep_bad']) % 2 ? str_replace(array('<', '>'), array('<', '>'), $t) : ''); |
711
|
|
|
} |
712
|
|
|
|
713
|
|
|
// open tag & attr |
714
|
946 |
|
static $aN = array('abbr' => array('td' => 1, 'th' => 1), 'accept' => array('form' => 1, 'input' => 1), 'accept-charset' => array('form' => 1), 'action' => array('form' => 1), 'align' => array('applet' => 1, 'caption' => 1, 'col' => 1, 'colgroup' => 1, 'div' => 1, 'embed' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'object' => 1, 'p' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1), 'allowfullscreen' => array('iframe' => 1), 'alt' => array('applet' => 1, 'area' => 1, 'img' => 1, 'input' => 1), 'archive' => array('applet' => 1, 'object' => 1), 'async' => array('script' => 1), 'autocomplete' => array('form' => 1, 'input' => 1), 'autofocus' => array('button' => 1, 'input' => 1, 'keygen' => 1, 'select' => 1, 'textarea' => 1), 'autoplay' => array('audio' => 1, 'video' => 1), 'axis' => array('td' => 1, 'th' => 1), 'bgcolor' => array('embed' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1), 'border' => array('img' => 1, 'object' => 1, 'table' => 1), 'bordercolor' => array('table' => 1, 'td' => 1, 'tr' => 1), 'cellpadding' => array('table' => 1), 'cellspacing' => array('table' => 1), 'challenge' => array('keygen' => 1), 'char' => array('col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1), 'charoff' => array('col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1), 'charset' => array('a' => 1, 'script' => 1), 'checked' => array('command' => 1, 'input' => 1), 'cite' => array('blockquote' => 1, 'del' => 1, 'ins' => 1, 'q' => 1), 'classid' => array('object' => 1), 'clear' => array('br' => 1), 'code' => array('applet' => 1), 'codebase' => array('applet' => 1, 'object' => 1), 'codetype' => array('object' => 1), 'color' => array('font' => 1), 'cols' => array('textarea' => 1), 'colspan' => array('td' => 1, 'th' => 1), 'compact' => array('dir' => 1, 'dl' => 1, 'menu' => 1, 'ol' => 1, 'ul' => 1), 'content' => array('meta' => 1), 'controls' => array('audio' => 1, 'video' => 1), 'coords' => array('a' => 1, 'area' => 1), 'crossorigin' => array('img' => 1), 'data' => array('object' => 1), 'datetime' => array('del' => 1, 'ins' => 1, 'time' => 1), 'declare' => array('object' => 1), 'default' => array('track' => 1), 'defer' => array('script' => 1), 'dirname' => array('input' => 1, 'textarea' => 1), 'disabled' => array('button' => 1, 'command' => 1, 'fieldset' => 1, 'input' => 1, 'keygen' => 1, 'optgroup' => 1, 'option' => 1, 'select' => 1, 'textarea' => 1), 'download' => array('a' => 1), 'enctype' => array('form' => 1), 'face' => array('font' => 1), 'flashvars' => array('embed' => 1), 'for' => array('label' => 1, 'output' => 1), 'form' => array('button' => 1, 'fieldset' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'object' => 1, 'output' => 1, 'select' => 1, 'textarea' => 1), 'formaction' => array('button' => 1, 'input' => 1), 'formenctype' => array('button' => 1, 'input' => 1), 'formmethod' => array('button' => 1, 'input' => 1), 'formnovalidate' => array('button' => 1, 'input' => 1), 'formtarget' => array('button' => 1, 'input' => 1), 'frame' => array('table' => 1), 'frameborder' => array('iframe' => 1), 'headers' => array('td' => 1, 'th' => 1), 'height' => array('applet' => 1, 'canvas' => 1, 'embed' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'td' => 1, 'th' => 1, 'video' => 1), 'high' => array('meter' => 1), 'href' => array('a' => 1, 'area' => 1, 'link' => 1), 'hreflang' => array('a' => 1, 'area' => 1, 'link' => 1), 'hspace' => array('applet' => 1, 'embed' => 1, 'img' => 1, 'object' => 1), 'icon' => array('command' => 1), 'ismap' => array('img' => 1, 'input' => 1), 'keyparams' => array('keygen' => 1), 'keytype' => array('keygen' => 1), 'kind' => array('track' => 1), 'label' => array('command' => 1, 'menu' => 1, 'option' => 1, 'optgroup' => 1, 'track' => 1), 'language' => array('script' => 1), 'list' => array('input' => 1), 'longdesc' => array('img' => 1, 'iframe' => 1), 'loop' => array('audio' => 1, 'video' => 1), 'low' => array('meter' => 1), 'marginheight' => array('iframe' => 1), 'marginwidth' => array('iframe' => 1), 'max' => array('input' => 1, 'meter' => 1, 'progress' => 1), 'maxlength' => array('input' => 1, 'textarea' => 1), 'media' => array('a' => 1, 'area' => 1, 'link' => 1, 'source' => 1, 'style' => 1), 'mediagroup' => array('audio' => 1, 'video' => 1), 'method' => array('form' => 1), 'min' => array('input' => 1, 'meter' => 1), 'model' => array('embed' => 1), 'multiple' => array('input' => 1, 'select' => 1), 'muted' => array('audio' => 1, 'video' => 1), 'name' => array('a' => 1, 'applet' => 1, 'button' => 1, 'embed' => 1, 'fieldset' => 1, 'form' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'keygen' => 1, 'map' => 1, 'object' => 1, 'output' => 1, 'param' => 1, 'select' => 1, 'textarea' => 1), 'nohref' => array('area' => 1), 'noshade' => array('hr' => 1), 'novalidate' => array('form' => 1), 'nowrap' => array('td' => 1, 'th' => 1), 'object' => array('applet' => 1), 'open' => array('details' => 1), 'optimum' => array('meter' => 1), 'pattern' => array('input' => 1), 'ping' => array('a' => 1, 'area' => 1), 'placeholder' => array('input' => 1, 'textarea' => 1), 'pluginspage' => array('embed' => 1), 'pluginurl' => array('embed' => 1), 'poster' => array('video' => 1), 'pqg' => array('keygen' => 1), 'preload' => array('audio' => 1, 'video' => 1), 'prompt' => array('isindex' => 1), 'pubdate' => array('time' => 1), 'radiogroup' => array('command' => 1), 'readonly' => array('input' => 1, 'textarea' => 1), 'rel' => array('a' => 1, 'area' => 1, 'link' => 1), 'required' => array('input' => 1, 'select' => 1, 'textarea' => 1), 'rev' => array('a' => 1), 'reversed' => array('ol' => 1), 'rows' => array('textarea' => 1), 'rowspan' => array('td' => 1, 'th' => 1), 'rules' => array('table' => 1), 'sandbox' => array('iframe' => 1), 'scope' => array('td' => 1, 'th' => 1), 'scoped' => array('style' => 1), 'scrolling' => array('iframe' => 1), 'seamless' => array('iframe' => 1), 'selected' => array('option' => 1), 'shape' => array('a' => 1, 'area' => 1), 'size' => array('font' => 1, 'hr' => 1, 'input' => 1, 'select' => 1), 'sizes' => array('link' => 1), 'span' => array('col' => 1, 'colgroup' => 1), 'src' => array('audio' => 1, 'embed' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'script' => 1, 'source' => 1, 'track' => 1, 'video' => 1), 'srcdoc' => array('iframe' => 1), 'srclang' => array('track' => 1), 'srcset' => array('img' => 1), 'standby' => array('object' => 1), 'start' => array('ol' => 1), 'step' => array('input' => 1), 'summary' => array('table' => 1), 'target' => array('a' => 1, 'area' => 1, 'form' => 1), 'type' => array('a' => 1, 'area' => 1, 'button' => 1, 'command' => 1, 'embed' => 1, 'input' => 1, 'li' => 1, 'link' => 1, 'menu' => 1, 'object' => 1, 'ol' => 1, 'param' => 1, 'script' => 1, 'source' => 1, 'style' => 1, 'ul' => 1), 'typemustmatch' => array('object' => 1), 'usemap' => array('img' => 1, 'input' => 1, 'object' => 1), 'valign' => array('col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1), 'value' => array('button' => 1, 'data' => 1, 'input' => 1, 'li' => 1, 'meter' => 1, 'option' => 1, 'param' => 1, 'progress' => 1), 'valuetype' => array('param' => 1), 'vspace' => array('applet' => 1, 'embed' => 1, 'img' => 1, 'object' => 1), 'width' => array('applet' => 1, 'canvas' => 1, 'col' => 1, 'colgroup' => 1, 'embed' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'pre' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'video' => 1), 'wmode' => array('embed' => 1), 'wrap' => array('textarea' => 1)); // Ele-specific |
715
|
946 |
|
static $aNA = array('aria-activedescendant' => 1, 'aria-atomic' => 1, 'aria-autocomplete' => 1, 'aria-busy' => 1, 'aria-checked' => 1, 'aria-controls' => 1, 'aria-describedby' => 1, 'aria-disabled' => 1, 'aria-dropeffect' => 1, 'aria-expanded' => 1, 'aria-flowto' => 1, 'aria-grabbed' => 1, 'aria-haspopup' => 1, 'aria-hidden' => 1, 'aria-invalid' => 1, 'aria-label' => 1, 'aria-labelledby' => 1, 'aria-level' => 1, 'aria-live' => 1, 'aria-multiline' => 1, 'aria-multiselectable' => 1, 'aria-orientation' => 1, 'aria-owns' => 1, 'aria-posinset' => 1, 'aria-pressed' => 1, 'aria-readonly' => 1, 'aria-relevant' => 1, 'aria-required' => 1, 'aria-selected' => 1, 'aria-setsize' => 1, 'aria-sort' => 1, 'aria-valuemax' => 1, 'aria-valuemin' => 1, 'aria-valuenow' => 1, 'aria-valuetext' => 1); // ARIA |
716
|
946 |
|
static $aNE = array('allowfullscreen' => 1, 'checkbox' => 1, 'checked' => 1, 'command' => 1, 'compact' => 1, 'declare' => 1, 'defer' => 1, 'default' => 1, 'disabled' => 1, 'hidden' => 1, 'inert' => 1, 'ismap' => 1, 'itemscope' => 1, 'multiple' => 1, 'nohref' => 1, 'noresize' => 1, 'noshade' => 1, 'nowrap' => 1, 'open' => 1, 'radio' => 1, 'readonly' => 1, 'required' => 1, 'reversed' => 1, 'selected' => 1); // Empty |
717
|
946 |
|
static $aNO = array('onabort' => 1, 'onblur' => 1, 'oncanplay' => 1, 'oncanplaythrough' => 1, 'onchange' => 1, 'onclick' => 1, 'oncontextmenu' => 1, 'oncopy' => 1, 'oncuechange' => 1, 'oncut' => 1, 'ondblclick' => 1, 'ondrag' => 1, 'ondragend' => 1, 'ondragenter' => 1, 'ondragleave' => 1, 'ondragover' => 1, 'ondragstart' => 1, 'ondrop' => 1, 'ondurationchange' => 1, 'onemptied' => 1, 'onended' => 1, 'onerror' => 1, 'onfocus' => 1, 'onformchange' => 1, 'onforminput' => 1, 'oninput' => 1, 'oninvalid' => 1, 'onkeydown' => 1, 'onkeypress' => 1, 'onkeyup' => 1, 'onload' => 1, 'onloadeddata' => 1, 'onloadedmetadata' => 1, 'onloadstart' => 1, 'onlostpointercapture' => 1, 'onmousedown' => 1, 'onmousemove' => 1, 'onmouseout' => 1, 'onmouseover' => 1, 'onmouseup' => 1, 'onmousewheel' => 1, 'onpaste' => 1, 'onpause' => 1, 'onplay' => 1, 'onplaying' => 1, 'onpointercancel' => 1, 'ongotpointercapture' => 1, 'onpointerdown' => 1, 'onpointerenter' => 1, 'onpointerleave' => 1, 'onpointermove' => 1, 'onpointerout' => 1, 'onpointerover' => 1, 'onpointerup' => 1, 'onprogress' => 1, 'onratechange' => 1, 'onreadystatechange' => 1, 'onreset' => 1, 'onsearch' => 1, 'onscroll' => 1, 'onseeked' => 1, 'onseeking' => 1, 'onselect' => 1, 'onshow' => 1, 'onstalled' => 1, 'onsubmit' => 1, 'onsuspend' => 1, 'ontimeupdate' => 1, 'ontoggle' => 1, 'ontouchcancel' => 1, 'ontouchend' => 1, 'ontouchmove' => 1, 'ontouchstart' => 1, 'onvolumechange' => 1, 'onwaiting' => 1, 'onwheel' => 1); // Event |
718
|
946 |
|
static $aNP = array('action' => 1, 'cite' => 1, 'classid' => 1, 'codebase' => 1, 'data' => 1, 'href' => 1, 'itemtype' => 1, 'longdesc' => 1, 'model' => 1, 'pluginspage' => 1, 'pluginurl' => 1, 'src' => 1, 'srcset' => 1, 'usemap' => 1); // Need scheme check; excludes style, on* |
719
|
946 |
|
static $aNU = array('accesskey' => 1, 'class' => 1, 'contenteditable' => 1, 'contextmenu' => 1, 'dir' => 1, 'draggable' => 1, 'dropzone' => 1, 'hidden' => 1, 'id' => 1, 'inert' => 1, 'itemid' => 1, 'itemprop' => 1, 'itemref' => 1, 'itemscope' => 1, 'itemtype' => 1, 'lang' => 1, 'role' => 1, 'spellcheck' => 1, 'style' => 1, 'tabindex' => 1, 'title' => 1, 'translate' => 1, 'xmlns' => 1, 'xml:base' => 1, 'xml:lang' => 1, 'xml:space' => 1); // Univ; excludes on*, aria* |
720
|
|
|
|
721
|
946 |
|
if ($C['lc_std_val']) { |
722
|
|
|
// predef attr vals for $eAL & $aNE ele |
723
|
946 |
|
static $aNL = array('all' => 1, 'auto' => 1, 'baseline' => 1, 'bottom' => 1, 'button' => 1, 'captions' => 1, 'center' => 1, 'chapters' => 1, 'char' => 1, 'checkbox' => 1, 'circle' => 1, 'col' => 1, 'colgroup' => 1, 'color' => 1, 'cols' => 1, 'data' => 1, 'date' => 1, 'datetime' => 1, 'datetime-local' => 1, 'default' => 1, 'descriptions' => 1, 'email' => 1, 'file' => 1, 'get' => 1, 'groups' => 1, 'hidden' => 1, 'image' => 1, 'justify' => 1, 'left' => 1, 'ltr' => 1, 'metadata' => 1, 'middle' => 1, 'month' => 1, 'none' => 1, 'number' => 1, 'object' => 1, 'password' => 1, 'poly' => 1, 'post' => 1, 'preserve' => 1, 'radio' => 1, 'range' => 1, 'rect' => 1, 'ref' => 1, 'reset' => 1, 'right' => 1, 'row' => 1, 'rowgroup' => 1, 'rows' => 1, 'rtl' => 1, 'search' => 1, 'submit' => 1, 'subtitles' => 1, 'tel' => 1, 'text' => 1, 'time' => 1, 'top' => 1, 'url' => 1, 'week' => 1); |
724
|
946 |
|
static $eAL = array('a' => 1, 'area' => 1, 'bdo' => 1, 'button' => 1, 'col' => 1, 'fieldset' => 1, 'form' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'param' => 1, 'script' => 1, 'select' => 1, 'table' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1, 'track' => 1, 'xml:space' => 1); |
725
|
946 |
|
$lcase = isset($eAL[$e]) ? 1 : 0; |
726
|
|
|
} |
727
|
|
|
|
728
|
946 |
|
$depTr = 0; |
729
|
946 |
|
if ($C['no_deprecated_attr']) { |
730
|
|
|
// depr attr:applicable ele |
731
|
946 |
|
static $aND = array('align' => array('caption' => 1, 'div' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'object' => 1, 'p' => 1, 'table' => 1), 'bgcolor' => array('table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1), 'border' => array('object' => 1), 'bordercolor' => array('table' => 1, 'td' => 1, 'tr' => 1), 'cellspacing' => array('table' => 1), 'clear' => array('br' => 1), 'compact' => array('dl' => 1, 'ol' => 1, 'ul' => 1), 'height' => array('td' => 1, 'th' => 1), 'hspace' => array('img' => 1, 'object' => 1), 'language' => array('script' => 1), 'name' => array('a' => 1, 'form' => 1, 'iframe' => 1, 'img' => 1, 'map' => 1), 'noshade' => array('hr' => 1), 'nowrap' => array('td' => 1, 'th' => 1), 'size' => array('hr' => 1), 'vspace' => array('img' => 1, 'object' => 1), 'width' => array('hr' => 1, 'pre' => 1, 'table' => 1, 'td' => 1, 'th' => 1)); |
732
|
946 |
|
static $eAD = array('a' => 1, 'br' => 1, 'caption' => 1, 'div' => 1, 'dl' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'map' => 1, 'object' => 1, 'ol' => 1, 'p' => 1, 'pre' => 1, 'script' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1, 'ul' => 1); |
733
|
946 |
|
$depTr = isset($eAD[$e]) ? 1 : 0; |
734
|
|
|
} |
735
|
|
|
|
736
|
|
|
// attr name-vals |
737
|
946 |
|
if (false !== strpos($a, "\x01")) { |
738
|
|
|
$a = preg_replace('`\x01[^\x01]*\x01`', '', $a); |
739
|
|
|
} // No comment/CDATA sec |
740
|
946 |
|
$mode = 0; |
741
|
946 |
|
$a = trim($a, ' /'); |
742
|
946 |
|
$aA = array(); |
743
|
946 |
|
while (strlen($a)) { |
744
|
909 |
|
$w = 0; |
745
|
|
|
switch ($mode) { |
746
|
909 |
|
case 0: // Name |
747
|
909 |
|
if (preg_match('`^[a-zA-Z][^\s=/]+`', $a, $m)) { |
748
|
891 |
|
$nm = strtolower($m[0]); |
749
|
891 |
|
$w = $mode = 1; |
750
|
891 |
|
$a = ltrim(substr_replace($a, '', 0, strlen($m[0]))); |
751
|
|
|
} |
752
|
909 |
|
break; case 1: |
753
|
891 |
|
if ('=' == $a[0]) { // = |
754
|
891 |
|
$w = 1; |
755
|
891 |
|
$mode = 2; |
756
|
891 |
|
$a = ltrim($a, '= '); |
757
|
|
|
} else { // No val |
758
|
|
|
$w = 1; |
759
|
|
|
$mode = 0; |
760
|
|
|
$a = ltrim($a); |
761
|
|
|
$aA[$nm] = ''; |
|
|
|
|
762
|
|
|
} |
763
|
891 |
|
break; case 2: // Val |
764
|
891 |
|
if (preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)) { |
765
|
891 |
|
$a = ltrim($m[2]); |
766
|
891 |
|
$m = $m[1]; |
767
|
891 |
|
$w = 1; |
768
|
891 |
|
$mode = 0; |
769
|
891 |
|
$aA[$nm] = trim(str_replace('<', '<', ('"' == $m[0] or '\'' == $m[0]) ? substr($m, 1, -1) : $m)); |
770
|
|
|
} |
771
|
891 |
|
break; |
772
|
|
|
} |
773
|
909 |
|
if (0 == $w) { // Parse errs, deal with space, " & ' |
774
|
173 |
|
$a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a); |
775
|
173 |
|
$mode = 0; |
776
|
|
|
} |
777
|
|
|
} |
778
|
946 |
|
if (1 == $mode) { |
779
|
1 |
|
$aA[$nm] = ''; |
780
|
|
|
} |
781
|
|
|
|
782
|
|
|
// clean attrs |
783
|
946 |
|
global $S; |
784
|
946 |
|
$rl = isset($S[$e]) ? $S[$e] : array(); |
785
|
946 |
|
$a = array(); |
786
|
946 |
|
$nfr = 0; |
787
|
946 |
|
$d = $C['deny_attribute']; |
788
|
946 |
|
foreach ($aA as $k => $v) { |
789
|
891 |
|
if (((isset($d['*']) ? isset($d[$k]) : !isset($d[$k])) && (isset($aN[$k][$e]) or isset($aNU[$k]) or (isset($aNO[$k]) && !isset($d['on*'])) or (isset($aNA[$k]) && !isset($d['aria*'])) or (!isset($d['data*']) && preg_match('`data-((?!xml)[^:]+$)`', $k))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])) { |
790
|
699 |
|
if (isset($aNE[$k])) { |
791
|
3 |
|
$v = $k; |
792
|
699 |
|
} elseif (!empty($lcase) && (('button' != $e or 'input' != $e) or 'type' == $k)) { // Rather loose but ?not cause issues |
793
|
574 |
|
$v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v; |
794
|
|
|
} |
795
|
699 |
|
if ('style' == $k && !$C['style_pass']) { |
796
|
40 |
|
if (false !== strpos($v, '&#')) { |
797
|
3 |
|
static $sC = array(' ' => ' ', ' ' => ' ', 'E' => 'e', 'E' => 'e', 'e' => 'e', 'e' => 'e', 'X' => 'x', 'X' => 'x', 'x' => 'x', 'x' => 'x', 'P' => 'p', 'P' => 'p', 'p' => 'p', 'p' => 'p', 'S' => 's', 'S' => 's', 's' => 's', 's' => 's', 'I' => 'i', 'I' => 'i', 'i' => 'i', 'i' => 'i', 'O' => 'o', 'O' => 'o', 'o' => 'o', 'o' => 'o', 'N' => 'n', 'N' => 'n', 'n' => 'n', 'n' => 'n', 'U' => 'u', 'U' => 'u', 'u' => 'u', 'u' => 'u', 'R' => 'r', 'R' => 'r', 'r' => 'r', 'r' => 'r', 'L' => 'l', 'L' => 'l', 'l' => 'l', 'l' => 'l', '(' => '(', '(' => '(', ')' => ')', ')' => ')', ' ' => ':', ' ' => ':', '"' => '"', '"' => '"', ''' => "'", ''' => "'", '/' => '/', '/' => '/', '*' => '*', '*' => '*', '\' => '\\', '\' => '\\'); |
798
|
3 |
|
$v = strtr($v, $sC); |
799
|
|
|
} |
800
|
40 |
|
$v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v); |
801
|
40 |
|
$v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v; |
802
|
664 |
|
} elseif (isset($aNP[$k]) or isset($aNO[$k])) { |
803
|
550 |
|
$v = str_replace('', ' ', (false !== strpos($v, '&') ? str_replace(array('­', '­', '­'), ' ', $v) : $v)); // double-quoted char: soft-hyphen; appears here as "" or hyphen or something else depending on viewing software |
804
|
550 |
|
if ('srcset' == $k) { |
805
|
|
|
$v2 = ''; |
806
|
|
|
foreach (explode(',', $v) as $k1 => $v1) { |
807
|
|
|
$v1 = explode(' ', ltrim($v1), 2); |
808
|
|
|
$k1 = isset($v1[1]) ? trim($v1[1]) : ''; |
809
|
|
|
$v1 = trim($v1[0]); |
810
|
|
|
if (isset($v1[0])) { |
811
|
|
|
$v2 .= hl_prot($v1, $k).(empty($k1) ? '' : ' '.$k1).', '; |
812
|
|
|
} |
813
|
|
|
} |
814
|
|
|
$v = trim($v2, ', '); |
815
|
|
|
} |
816
|
550 |
|
if ('itemtype' == $k) { |
817
|
|
|
$v2 = ''; |
818
|
|
|
foreach (explode(' ', $v) as $v1) { |
819
|
|
|
if (isset($v1[0])) { |
820
|
|
|
$v2 .= hl_prot($v1, $k).' '; |
821
|
|
|
} |
822
|
|
|
} |
823
|
|
|
$v = trim($v2, ' '); |
824
|
|
|
} else { |
825
|
550 |
|
$v = hl_prot($v, $k); |
826
|
|
|
} |
827
|
550 |
|
if ('href' == $k) { // X-spam |
828
|
161 |
|
if ($C['anti_mail_spam'] && 0 === strpos($v, 'mailto:')) { |
829
|
|
|
$v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v); |
830
|
161 |
|
} elseif ($C['anti_link_spam']) { |
831
|
101 |
|
$r1 = $C['anti_link_spam'][1]; |
832
|
101 |
|
if (!empty($r1) && preg_match($r1, $v)) { |
833
|
|
|
continue; |
834
|
|
|
} |
835
|
101 |
|
$r0 = $C['anti_link_spam'][0]; |
836
|
101 |
|
if (!empty($r0) && preg_match($r0, $v)) { |
837
|
101 |
|
if (isset($a['rel'])) { |
838
|
42 |
|
if (!preg_match('`\bnofollow\b`i', $a['rel'])) { |
839
|
42 |
|
$a['rel'] .= ' nofollow'; |
840
|
|
|
} |
841
|
89 |
|
} elseif (isset($aA['rel'])) { |
842
|
1 |
|
if (!preg_match('`\bnofollow\b`i', $aA['rel'])) { |
843
|
1 |
|
$nfr = 1; |
844
|
|
|
} |
845
|
|
|
} else { |
846
|
88 |
|
$a['rel'] = 'nofollow'; |
847
|
|
|
} |
848
|
|
|
} |
849
|
|
|
} |
850
|
|
|
} |
851
|
|
|
} |
852
|
699 |
|
if (isset($rl[$k]) && is_array($rl[$k]) && 0 === ($v = hl_attrval($k, $v, $rl[$k]))) { |
853
|
|
|
continue; |
854
|
|
|
} |
855
|
699 |
|
$a[$k] = str_replace('"', '"', $v); |
856
|
|
|
} |
857
|
|
|
} |
858
|
946 |
|
if ($nfr) { |
859
|
|
|
$a['rel'] = isset($a['rel']) ? $a['rel'].' nofollow' : 'nofollow'; |
860
|
|
|
} |
861
|
|
|
|
862
|
|
|
// rqd attr |
863
|
946 |
|
static $eAR = array('area' => array('alt' => 'area'), 'bdo' => array('dir' => 'ltr'), 'command' => array('label' => ''), 'form' => array('action' => ''), 'img' => array('src' => '', 'alt' => 'image'), 'map' => array('name' => ''), 'optgroup' => array('label' => ''), 'param' => array('name' => ''), 'style' => array('scoped' => ''), 'textarea' => array('rows' => '10', 'cols' => '50')); |
864
|
946 |
|
if (isset($eAR[$e])) { |
865
|
497 |
|
foreach ($eAR[$e] as $k => $v) { |
866
|
497 |
|
if (!isset($a[$k])) { |
867
|
488 |
|
$a[$k] = isset($v[0]) ? $v : $k; |
868
|
|
|
} |
869
|
|
|
} |
870
|
|
|
} |
871
|
|
|
|
872
|
|
|
// depr attr |
873
|
946 |
|
if ($depTr) { |
874
|
790 |
|
$c = array(); |
875
|
790 |
|
foreach ($a as $k => $v) { |
876
|
650 |
|
if ('style' == $k or !isset($aND[$k][$e])) { |
877
|
649 |
|
continue; |
878
|
|
|
} |
879
|
4 |
|
$v = str_replace(array('\\', ':', ';', '&#'), '', $v); |
880
|
4 |
|
if ('align' == $k) { |
881
|
1 |
|
unset($a['align']); |
882
|
1 |
|
if ('img' == $e && ('left' == $v or 'right' == $v)) { |
883
|
|
|
$c[] = 'float: '.$v; |
884
|
1 |
|
} elseif (('div' == $e or 'table' == $e) && 'center' == $v) { |
885
|
|
|
$c[] = 'margin: auto'; |
886
|
|
|
} else { |
887
|
1 |
|
$c[] = 'text-align: '.$v; |
888
|
|
|
} |
889
|
3 |
|
} elseif ('bgcolor' == $k) { |
890
|
|
|
unset($a['bgcolor']); |
891
|
|
|
$c[] = 'background-color: '.$v; |
892
|
3 |
|
} elseif ('border' == $k) { |
893
|
|
|
unset($a['border']); |
894
|
|
|
$c[] = "border: {$v}px"; |
895
|
3 |
|
} elseif ('bordercolor' == $k) { |
896
|
|
|
unset($a['bordercolor']); |
897
|
|
|
$c[] = 'border-color: '.$v; |
898
|
3 |
|
} elseif ('cellspacing' == $k) { |
899
|
|
|
unset($a['cellspacing']); |
900
|
|
|
$c[] = "border-spacing: {$v}px"; |
901
|
3 |
View Code Duplication |
} elseif ('clear' == $k) { |
|
|
|
|
902
|
|
|
unset($a['clear']); |
903
|
|
|
$c[] = 'clear: '.('all' != $v ? $v : 'both'); |
904
|
3 |
|
} elseif ('compact' == $k) { |
905
|
|
|
unset($a['compact']); |
906
|
|
|
$c[] = 'font-size: 85%'; |
907
|
3 |
|
} elseif ('height' == $k or 'width' == $k) { |
908
|
|
|
unset($a[$k]); |
909
|
|
|
$c[] = $k.': '.('*' != $v[0] ? $v.(ctype_digit($v) ? 'px' : '') : 'auto'); |
910
|
3 |
|
} elseif ('hspace' == $k) { |
911
|
|
|
unset($a['hspace']); |
912
|
|
|
$c[] = "margin-left: {$v}px; margin-right: {$v}px"; |
913
|
3 |
|
} elseif ('language' == $k && !isset($a['type'])) { |
914
|
|
|
unset($a['language']); |
915
|
|
|
$a['type'] = 'text/'.strtolower($v); |
916
|
3 |
|
} elseif ('name' == $k) { |
917
|
3 |
|
if (2 == $C['no_deprecated_attr'] or ('a' != $e && 'map' != $e)) { |
918
|
|
|
unset($a['name']); |
919
|
|
|
} |
920
|
3 |
|
if (!isset($a['id']) && !preg_match('`\W`', $v)) { |
921
|
3 |
|
$a['id'] = $v; |
922
|
|
|
} |
923
|
|
|
} elseif ('noshade' == $k) { |
924
|
|
|
unset($a['noshade']); |
925
|
|
|
$c[] = 'border-style: none; border: 0; background-color: gray; color: gray'; |
926
|
|
|
} elseif ('nowrap' == $k) { |
927
|
|
|
unset($a['nowrap']); |
928
|
|
|
$c[] = 'white-space: nowrap'; |
929
|
|
View Code Duplication |
} elseif ('size' == $k) { |
|
|
|
|
930
|
|
|
unset($a['size']); |
931
|
|
|
$c[] = 'size: '.$v.'px'; |
932
|
|
|
} elseif ('vspace' == $k) { |
933
|
|
|
unset($a['vspace']); |
934
|
|
|
$c[] = "margin-top: {$v}px; margin-bottom: {$v}px"; |
935
|
|
|
} |
936
|
|
|
} |
937
|
790 |
|
if (count($c)) { |
938
|
1 |
|
$c = implode('; ', $c); |
939
|
1 |
|
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;').'; '.$c.';' : $c.';'; |
940
|
|
|
} |
941
|
|
|
} |
942
|
|
|
// unique ID |
943
|
946 |
|
if ($C['unique_ids'] && isset($a['id'])) { |
944
|
|
|
if (preg_match('`\s`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && 1 == $C['unique_ids'])) { |
945
|
|
|
unset($a['id']); |
946
|
|
|
} else { |
947
|
|
|
while (isset($GLOBALS['hl_Ids'][$id])) { |
948
|
|
|
$id = $C['unique_ids'].$id; |
949
|
|
|
} |
950
|
|
|
$GLOBALS['hl_Ids'][($a['id'] = $id)] = 1; |
951
|
|
|
} |
952
|
|
|
} |
953
|
|
|
// xml:lang |
954
|
946 |
|
if ($C['xml:lang'] && isset($a['lang'])) { |
955
|
|
|
$a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang']; |
956
|
|
|
if (2 == $C['xml:lang']) { |
957
|
|
|
unset($a['lang']); |
958
|
|
|
} |
959
|
|
|
} |
960
|
|
|
// for transformed tag |
961
|
946 |
|
if (!empty($trt)) { |
962
|
1 |
|
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;').'; '.$trt : $trt; |
963
|
|
|
} |
964
|
|
|
// return with empty ele / |
965
|
946 |
|
if (empty($C['hook_tag'])) { |
966
|
946 |
|
$aA = ''; |
967
|
946 |
|
foreach ($a as $k => $v) { |
968
|
772 |
|
$aA .= " {$k}=\"{$v}\""; |
969
|
|
|
} |
970
|
|
|
|
971
|
946 |
|
return "<{$e}{$aA}".(isset($eE[$e]) ? ' /' : '').'>'; |
972
|
|
|
} else { |
973
|
|
|
return $C['hook_tag']($e, $a); |
974
|
|
|
} |
975
|
|
|
} |
976
|
|
|
|
977
|
|
|
function hl_tag2(&$e, &$a, $t = 1) { |
978
|
|
|
// transform tag |
979
|
1 |
|
if ('big' == $e) { |
980
|
|
|
$e = 'span'; |
981
|
|
|
|
982
|
|
|
return 'font-size: larger;'; |
983
|
|
|
} |
984
|
1 |
|
if ('s' == $e or 'strike' == $e) { |
985
|
|
|
$e = 'span'; |
986
|
|
|
|
987
|
|
|
return 'text-decoration: line-through;'; |
988
|
|
|
} |
989
|
1 |
|
if ('tt' == $e) { |
990
|
|
|
$e = 'code'; |
991
|
|
|
|
992
|
|
|
return ''; |
993
|
|
|
} |
994
|
1 |
|
if ('center' == $e) { |
995
|
|
|
$e = 'div'; |
996
|
|
|
|
997
|
|
|
return 'text-align: center;'; |
998
|
|
|
} |
999
|
1 |
|
static $fs = array('0' => 'xx-small', '1' => 'xx-small', '2' => 'small', '3' => 'medium', '4' => 'large', '5' => 'x-large', '6' => 'xx-large', '7' => '300%', '-1' => 'smaller', '-2' => '60%', '+1' => 'larger', '+2' => '150%', '+3' => '200%', '+4' => '300%'); |
1000
|
1 |
|
if ('font' == $e) { |
1001
|
1 |
|
$a2 = ''; |
1002
|
1 |
|
while (preg_match('`(^|\s)(color|size)\s*=\s*(\'|")?(.+?)(\\3|\s|$)`i', $a, $m)) { |
1003
|
1 |
|
$a = str_replace($m[0], ' ', $a); |
1004
|
1 |
|
$a2 .= 'color' == strtolower($m[2]) ? (' color: '.str_replace('"', '\'', trim($m[4])).';') : (isset($fs[($m = trim($m[4]))]) ? ($a2 .= ' font-size: '.str_replace('"', '\'', $fs[$m]).';') : ''); |
1005
|
|
|
} |
1006
|
1 |
|
while (preg_match('`(^|\s)face\s*=\s*(\'|")?([^=]+?)\\2`i', $a, $m) or preg_match('`(^|\s)face\s*=(\s*)(\S+)`i', $a, $m)) { |
1007
|
|
|
$a = str_replace($m[0], ' ', $a); |
1008
|
|
|
$a2 .= ' font-family: '.str_replace('"', '\'', trim($m[3])).';'; |
1009
|
|
|
} |
1010
|
1 |
|
$e = 'span'; |
1011
|
|
|
|
1012
|
1 |
|
return ltrim(str_replace('<', '', $a2)); |
1013
|
|
|
} |
1014
|
|
|
if ('acronym' == $e) { |
1015
|
|
|
$e = 'abbr'; |
1016
|
|
|
|
1017
|
|
|
return ''; |
1018
|
|
|
} |
1019
|
|
|
if ('dir' == $e) { |
1020
|
|
|
$e = 'ul'; |
1021
|
|
|
|
1022
|
|
|
return ''; |
1023
|
|
|
} |
1024
|
|
|
if (2 == $t) { |
1025
|
|
|
$e = 0; |
1026
|
|
|
|
1027
|
|
|
return 0; |
1028
|
|
|
} |
1029
|
|
|
|
1030
|
|
|
return ''; |
1031
|
|
|
} |
1032
|
|
|
|
1033
|
|
|
function hl_tidy($t, $w, $p) { |
1034
|
|
|
// tidy/compact HTM |
1035
|
|
|
if (strpos(' pre,script,textarea', "$p,")) { |
1036
|
|
|
return $t; |
1037
|
|
|
} |
1038
|
|
|
if (!function_exists('hl_aux2')) { |
1039
|
|
|
function hl_aux2($m) { |
1040
|
|
|
return $m[1].str_replace(array('<', '>', "\n", "\r", "\t", ' '), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]).$m[4]; |
1041
|
|
|
} |
1042
|
|
|
} |
1043
|
|
|
$t = preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), 'hl_aux2', $t)); |
1044
|
|
|
if (-1 == ($w = strtolower($w))) { |
1045
|
|
|
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); |
1046
|
|
|
} |
1047
|
|
|
$s = strpos(" $w", 't') ? "\t" : ' '; |
1048
|
|
|
$s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ("\t" == $s ? 1 : 2)); |
1049
|
|
|
$N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; |
1050
|
|
|
$a = array('br' => 1); |
1051
|
|
|
$b = array('button' => 1, 'command' => 1, 'input' => 1, 'option' => 1, 'param' => 1, 'track' => 1); |
1052
|
|
|
$c = array('audio' => 1, 'canvas' => 1, 'caption' => 1, 'dd' => 1, 'dt' => 1, 'figcaption' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'isindex' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'object' => 1, 'p' => 1, 'pre' => 1, 'style' => 1, 'summary' => 1, 'td' => 1, 'textarea' => 1, 'th' => 1, 'video' => 1); |
1053
|
|
|
$d = array('address' => 1, 'article' => 1, 'aside' => 1, 'blockquote' => 1, 'center' => 1, 'colgroup' => 1, 'datalist' => 1, 'details' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'form' => 1, 'header' => 1, 'hgroup' => 1, 'hr' => 1, 'iframe' => 1, 'main' => 1, 'map' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'ol' => 1, 'optgroup' => 1, 'rbc' => 1, 'rtc' => 1, 'ruby' => 1, 'script' => 1, 'section' => 1, 'select' => 1, 'table' => 1, 'tbody' => 1, 'tfoot' => 1, 'thead' => 1, 'tr' => 1, 'ul' => 1); |
1054
|
|
|
$T = explode('<', $t); |
1055
|
|
|
$X = 1; |
1056
|
|
|
while ($X) { |
1057
|
|
|
$n = $N; |
1058
|
|
|
$t = $T; |
1059
|
|
|
ob_start(); |
1060
|
|
|
if (isset($d[$p])) { |
1061
|
|
|
echo str_repeat($s, ++$n); |
1062
|
|
|
} |
1063
|
|
|
echo ltrim(array_shift($t)); |
1064
|
|
|
for ($i = -1, $j = count($t); ++$i < $j;) { |
1065
|
|
|
$r = ''; |
1066
|
|
|
list($e, $r) = explode('>', $t[$i]); |
1067
|
|
|
$x = '/' == $e[0] ? 0 : ('/' == substr($e, -1) ? 1 : ('!' != $e[0] ? 2 : -1)); |
1068
|
|
|
$y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); |
1069
|
|
|
$e = "<$e>"; |
1070
|
|
|
if (isset($d[$y])) { |
1071
|
|
|
if (!$x) { |
1072
|
|
|
if ($n) { |
1073
|
|
|
echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n); |
1074
|
|
|
} else { |
1075
|
|
|
++$N; |
1076
|
|
|
ob_end_clean(); |
1077
|
|
|
continue 2; |
1078
|
|
|
} |
1079
|
|
|
} else { |
1080
|
|
|
echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, (1 != $x ? ++$n : $n)); |
1081
|
|
|
} |
1082
|
|
|
echo $r; |
1083
|
|
|
continue; |
1084
|
|
|
} |
1085
|
|
|
$f = "\n".str_repeat($s, $n); |
1086
|
|
|
if (isset($c[$y])) { |
1087
|
|
|
if (!$x) { |
1088
|
|
|
echo $e, $f, $r; |
1089
|
|
|
} else { |
1090
|
|
|
echo $f, $e, $r; |
1091
|
|
|
} |
1092
|
|
|
} elseif (isset($b[$y])) { |
1093
|
|
|
echo $f, $e, $r; |
1094
|
|
|
} elseif (isset($a[$y])) { |
1095
|
|
|
echo $e, $f, $r; |
1096
|
|
|
} elseif (!$y) { |
1097
|
|
|
echo $f, $e, $f, $r; |
1098
|
|
|
} else { |
1099
|
|
|
echo $e, $r; |
1100
|
|
|
} |
1101
|
|
|
} |
1102
|
|
|
$X = 0; |
1103
|
|
|
} |
1104
|
|
|
$t = str_replace(array("\n ", " \n"), "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents())); |
1105
|
|
|
ob_end_clean(); |
1106
|
|
|
if (($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)) { |
1107
|
|
|
$t = str_replace("\n", $l, $t); |
1108
|
|
|
} |
1109
|
|
|
|
1110
|
|
|
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); |
1111
|
|
|
} |
1112
|
|
|
|
1113
|
|
|
function hl_version() { |
1114
|
|
|
// version |
1115
|
|
|
return '1.2.4.2'; |
1116
|
|
|
} |
1117
|
|
|
|
There are different options of fixing this problem.
If you want to be on the safe side, you can add an additional type-check:
If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:
Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.