1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
htmLawed 1.1.17, 11 March 2014 |
5
|
|
|
Copyright Santosh Patnaik |
6
|
|
|
Dual licensed with LGPL 3 and GPL 2+ |
7
|
|
|
A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed |
8
|
|
|
|
9
|
|
|
See htmLawed_README.txt/htm |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
function htmLawed($t, $C=1, $S=array()) |
13
|
|
|
{ |
14
|
1 |
|
$C = is_array($C) ? $C : array(); |
15
|
1 |
|
if (!empty($C['valid_xhtml'])) { |
16
|
|
|
$C['elements'] = empty($C['elements']) ? '*-center-dir-font-isindex-menu-s-strike-u' : $C['elements']; |
17
|
|
|
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2; |
18
|
|
|
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2; |
19
|
|
|
} |
20
|
|
|
// config eles |
21
|
1 |
|
$e = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'del'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1); // 86/deprecated+embed+ruby |
22
|
1 |
|
if (!empty($C['safe'])) { |
23
|
|
|
unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']); |
24
|
|
|
} |
25
|
1 |
|
$x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*'; |
26
|
1 |
|
if ($x == '-*') { |
27
|
|
|
$e = array(); |
28
|
1 |
|
} elseif (strpos($x, '*') === false) { |
29
|
|
|
$e = array_flip(explode(',', $x)); |
30
|
|
|
} else { |
31
|
1 |
|
if (isset($x[1])) { |
32
|
|
|
preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER); |
33
|
|
|
for ($i=count($m); --$i>=0;) { |
34
|
|
|
$m[$i] = $m[$i][0]; |
35
|
|
|
} |
36
|
|
|
foreach ($m as $v) { |
37
|
|
|
if ($v[0] == '+') { |
38
|
|
|
$e[substr($v, 1)] = 1; |
39
|
|
|
} |
40
|
|
|
if ($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'. $v, $m)) { |
41
|
|
|
unset($e[$v]); |
42
|
|
|
} |
43
|
|
|
} |
44
|
|
|
} |
45
|
|
|
} |
46
|
1 |
|
$C['elements'] =& $e; |
47
|
|
|
// config attrs |
48
|
1 |
|
$x = !empty($C['deny_attribute']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute']) : ''; |
49
|
1 |
|
$x = array_flip((isset($x[0]) && $x[0] == '*') ? explode('-', $x) : explode(',', $x. (!empty($C['safe']) ? ',on*' : ''))); |
50
|
1 |
|
if (isset($x['on*'])) { |
51
|
|
|
unset($x['on*']); |
52
|
|
|
$x += array('onblur'=>1, 'onchange'=>1, 'onclick'=>1, 'ondblclick'=>1, 'onfocus'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onmousedown'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onreset'=>1, 'onselect'=>1, 'onsubmit'=>1); |
53
|
|
|
} |
54
|
1 |
|
$C['deny_attribute'] = $x; |
55
|
|
|
// config URL |
56
|
1 |
|
$x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https'; |
57
|
1 |
|
$C['schemes'] = array(); |
58
|
1 |
|
foreach (explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v) { |
59
|
1 |
|
$x = $x2 = null; |
|
|
|
|
60
|
1 |
|
list($x, $x2) = explode(':', $v, 2); |
61
|
1 |
|
if ($x2) { |
62
|
1 |
|
$C['schemes'][$x] = array_flip(explode(',', $x2)); |
63
|
|
|
} |
64
|
|
|
} |
65
|
1 |
|
if (!isset($C['schemes']['*'])) { |
66
|
|
|
$C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,); |
67
|
|
|
} |
68
|
1 |
|
if (!empty($C['safe']) && empty($C['schemes']['style'])) { |
69
|
|
|
$C['schemes']['style'] = array('!'=>1); |
70
|
|
|
} |
71
|
1 |
|
$C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0; |
72
|
1 |
|
if (!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])) { |
73
|
1 |
|
$C['base_url'] = $C['abs_url'] = 0; |
74
|
|
|
} |
75
|
|
|
// config rest |
76
|
1 |
|
$C['and_mark'] = empty($C['and_mark']) ? 0 : 1; |
77
|
1 |
|
$C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0; |
78
|
1 |
|
$C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0; |
79
|
1 |
|
$C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1; |
80
|
1 |
|
$C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0); |
81
|
1 |
|
$C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char']; |
82
|
1 |
|
$C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0); |
83
|
1 |
|
$C['css_expression'] = empty($C['css_expression']) ? 0 : 1; |
84
|
1 |
|
$C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1; |
85
|
1 |
|
$C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; |
86
|
1 |
|
$C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0; |
87
|
1 |
|
$C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0; |
88
|
1 |
|
$C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6; |
89
|
1 |
|
$C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1; |
90
|
1 |
|
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1; |
91
|
1 |
|
$C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1; |
92
|
1 |
|
$C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1; |
93
|
1 |
|
$C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body'; |
94
|
1 |
|
$C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0; |
95
|
1 |
|
$C['style_pass'] = empty($C['style_pass']) ? 0 : 1; |
96
|
1 |
|
$C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy']; |
97
|
1 |
|
$C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1; |
98
|
1 |
|
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0; |
99
|
|
|
|
100
|
1 |
|
if (isset($GLOBALS['C'])) { |
101
|
1 |
|
$reC = $GLOBALS['C']; |
102
|
|
|
} |
103
|
1 |
|
$GLOBALS['C'] = $C; |
104
|
1 |
|
$S = is_array($S) ? $S : hl_spec($S); |
105
|
1 |
|
if (isset($GLOBALS['S'])) { |
106
|
1 |
|
$reS = $GLOBALS['S']; |
107
|
|
|
} |
108
|
1 |
|
$GLOBALS['S'] = $S; |
109
|
|
|
|
110
|
1 |
|
$t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t); |
111
|
1 |
|
if ($C['clean_ms_char']) { |
112
|
|
|
$x = array("\x7f"=>'', "\x80"=>'€', "\x81"=>'', "\x83"=>'ƒ', "\x85"=>'…', "\x86"=>'†', "\x87"=>'‡', "\x88"=>'ˆ', "\x89"=>'‰', "\x8a"=>'Š', "\x8b"=>'‹', "\x8c"=>'Œ', "\x8d"=>'', "\x8e"=>'Ž', "\x8f"=>'', "\x90"=>'', "\x95"=>'•', "\x96"=>'–', "\x97"=>'—', "\x98"=>'˜', "\x99"=>'™', "\x9a"=>'š', "\x9b"=>'›', "\x9c"=>'œ', "\x9d"=>'', "\x9e"=>'ž', "\x9f"=>'Ÿ'); |
113
|
|
|
$x = $x + ($C['clean_ms_char'] == 1 ? array("\x82"=>'‚', "\x84"=>'„', "\x91"=>'‘', "\x92"=>'’', "\x93"=>'“', "\x94"=>'”') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"')); |
114
|
|
|
$t = strtr($t, $x); |
115
|
|
|
} |
116
|
1 |
|
if ($C['cdata'] or $C['comment']) { |
117
|
1 |
|
$t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'hl_cmtcd', $t); |
118
|
|
|
} |
119
|
1 |
|
$t = preg_replace_callback('`&([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&', $t)); |
120
|
1 |
|
if ($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])) { |
121
|
1 |
|
$GLOBALS['hl_Ids'] = array(); |
122
|
|
|
} |
123
|
1 |
|
if ($C['hook']) { |
124
|
|
|
$t = $C['hook']($t, $C, $S); |
125
|
|
|
} |
126
|
1 |
|
if ($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])) { |
127
|
|
|
$GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime()); |
128
|
|
|
} |
129
|
|
|
// main |
130
|
1 |
|
$t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t); |
131
|
1 |
|
$t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t; |
132
|
1 |
|
$t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t; |
133
|
1 |
|
$t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t; |
134
|
1 |
|
unset($C, $e); |
135
|
1 |
|
if (isset($reC)) { |
136
|
1 |
|
$GLOBALS['C'] = $reC; |
137
|
|
|
} |
138
|
1 |
|
if (isset($reS)) { |
139
|
1 |
|
$GLOBALS['S'] = $reS; |
140
|
|
|
} |
141
|
1 |
|
return $t; |
142
|
|
|
// eof |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
function hl_attrval($t, $p) |
146
|
|
|
{ |
147
|
|
|
// check attr val against $S |
148
|
|
|
$o = 1; |
149
|
|
|
$l = strlen($t); |
150
|
|
|
foreach ($p as $k=>$v) { |
151
|
|
|
switch ($k) { |
152
|
|
|
case 'maxlen': if ($l > $v) { |
153
|
|
|
$o = 0; |
154
|
|
|
} |
155
|
|
|
break; case 'minlen': if ($l < $v) { |
156
|
|
|
$o = 0; |
157
|
|
|
} |
158
|
|
|
break; case 'maxval': if ((float)($t) > $v) { |
159
|
|
|
$o = 0; |
160
|
|
|
} |
161
|
|
|
break; case 'minval': if ((float)($t) < $v) { |
162
|
|
|
$o = 0; |
163
|
|
|
} |
164
|
|
|
break; case 'match': if (!preg_match($v, $t)) { |
165
|
|
|
$o = 0; |
166
|
|
|
} |
167
|
|
|
break; case 'nomatch': if (preg_match($v, $t)) { |
168
|
|
|
$o = 0; |
169
|
|
|
} |
170
|
|
|
break; case 'oneof': |
171
|
|
|
$m = 0; |
172
|
|
|
foreach (explode('|', $v) as $n) { |
173
|
|
|
if ($t == $n) { |
174
|
|
|
$m = 1; |
175
|
|
|
break; |
176
|
|
|
} |
177
|
|
|
} |
178
|
|
|
$o = $m; |
179
|
|
|
break; case 'noneof': |
180
|
|
|
$m = 1; |
181
|
|
|
foreach (explode('|', $v) as $n) { |
182
|
|
|
if ($t == $n) { |
183
|
|
|
$m = 0; |
184
|
|
|
break; |
185
|
|
|
} |
186
|
|
|
} |
187
|
|
|
$o = $m; |
188
|
|
|
break; default: |
189
|
|
|
break; |
190
|
|
|
} |
191
|
|
|
if (!$o) { |
192
|
|
|
break; |
193
|
|
|
} |
194
|
|
|
} |
195
|
|
|
return ($o ? $t : (isset($p['default']) ? $p['default'] : 0)); |
196
|
|
|
// eof |
197
|
|
|
} |
198
|
|
|
|
199
|
|
|
function hl_bal($t, $do=1, $in='div') |
200
|
|
|
{ |
201
|
|
|
// balance tags |
202
|
|
|
// by content |
203
|
1 |
|
$cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block |
204
|
1 |
|
$cE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty |
205
|
1 |
|
$cF = array('button'=>1, 'del'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'noscript'=>1, 'object'=>1, 'td'=>1, 'th'=>1); // Flow; later context-wise dynamic move of ins & del to $cI |
206
|
1 |
|
$cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'p'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline |
207
|
1 |
|
$cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal |
208
|
1 |
|
$cN2 = array_keys($cN); |
209
|
1 |
|
$cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); |
|
|
|
|
210
|
1 |
|
$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child |
211
|
1 |
|
if ($GLOBALS['C']['direct_list_nest']) { |
212
|
|
|
$cS['ol'] = $cS['ul'] += array('ol'=>1, 'ul'=>1); |
213
|
|
|
} |
214
|
1 |
|
$cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other |
215
|
1 |
|
$cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing |
216
|
|
|
// block/inline type; ins & del both type; #pcdata: text |
217
|
1 |
|
$eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1); |
218
|
1 |
|
$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); |
219
|
1 |
|
$eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values |
220
|
1 |
|
$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI |
221
|
1 |
|
$eF = $eB + $eI; |
222
|
|
|
|
223
|
|
|
// $in sets allowed child |
224
|
1 |
|
$in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div'; |
225
|
1 |
|
if (isset($cE[$in])) { |
226
|
|
|
return (!$do ? '' : str_replace(array('<', '>'), array('<', '>'), $t)); |
227
|
|
|
} |
228
|
1 |
|
if (isset($cS[$in])) { |
229
|
|
|
$inOk = $cS[$in]; |
230
|
1 |
|
} elseif (isset($cI[$in])) { |
231
|
|
|
$inOk = $eI; |
232
|
|
|
$cI['del'] = 1; |
233
|
|
|
$cI['ins'] = 1; |
234
|
1 |
|
} elseif (isset($cF[$in])) { |
235
|
1 |
|
$inOk = $eF; |
236
|
1 |
|
unset($cI['del'], $cI['ins']); |
237
|
|
|
} elseif (isset($cB[$in])) { |
238
|
|
|
$inOk = $eB; |
239
|
|
|
unset($cI['del'], $cI['ins']); |
240
|
|
|
} |
241
|
1 |
|
if (isset($cO[$in])) { |
242
|
|
|
$inOk = $inOk + $cO[$in]; |
|
|
|
|
243
|
|
|
} |
244
|
1 |
|
if (isset($cN[$in])) { |
245
|
|
|
$inOk = array_diff_assoc($inOk, $cN[$in]); |
246
|
|
|
} |
247
|
|
|
|
248
|
1 |
|
$t = explode('<', $t); |
249
|
1 |
|
$ok = $q = array(); // $q seq list of open non-empty ele |
250
|
1 |
|
ob_start(); |
251
|
|
|
|
252
|
1 |
|
for ($i=-1, $ci=count($t); ++$i<$ci;) { |
253
|
|
|
// allowed $ok in parent $p |
254
|
1 |
|
if ($ql = count($q)) { |
255
|
|
|
$p = array_pop($q); |
256
|
|
|
$q[] = $p; |
257
|
|
|
if (isset($cS[$p])) { |
258
|
|
|
$ok = $cS[$p]; |
259
|
|
|
} elseif (isset($cI[$p])) { |
260
|
|
|
$ok = $eI; |
261
|
|
|
$cI['del'] = 1; |
262
|
|
|
$cI['ins'] = 1; |
263
|
|
|
} elseif (isset($cF[$p])) { |
264
|
|
|
$ok = $eF; |
265
|
|
|
unset($cI['del'], $cI['ins']); |
266
|
|
|
} elseif (isset($cB[$p])) { |
267
|
|
|
$ok = $eB; |
268
|
|
|
unset($cI['del'], $cI['ins']); |
269
|
|
|
} |
270
|
|
|
if (isset($cO[$p])) { |
271
|
|
|
$ok = $ok + $cO[$p]; |
272
|
|
|
} |
273
|
|
|
if (isset($cN[$p])) { |
274
|
|
|
$ok = array_diff_assoc($ok, $cN[$p]); |
275
|
|
|
} |
276
|
|
|
} else { |
277
|
1 |
|
$ok = $inOk; |
278
|
1 |
|
unset($cI['del'], $cI['ins']); |
279
|
|
|
} |
280
|
|
|
// bad tags, & ele content |
281
|
1 |
|
if (isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))) { |
282
|
|
|
echo '<', $s, $e, $a, '>'; |
|
|
|
|
283
|
|
|
} |
284
|
1 |
|
if (isset($x[0])) { |
285
|
|
|
if (strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))) { |
|
|
|
|
286
|
|
|
echo '<div>', $x, '</div>'; |
287
|
|
|
} elseif ($do < 3 or isset($ok['#pcdata'])) { |
288
|
|
|
echo $x; |
289
|
|
|
} elseif (strpos($x, "\x02\x04")) { |
290
|
|
|
foreach (preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v) { |
291
|
|
|
echo(substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); |
292
|
|
|
} |
293
|
|
|
} elseif ($do > 4) { |
294
|
|
|
echo preg_replace('`\S`', '', $x); |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
// get markup |
298
|
1 |
|
if (!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)) { |
299
|
1 |
|
$x = $t[$i]; |
300
|
1 |
|
continue; |
301
|
|
|
} |
302
|
|
|
$s = null; |
|
|
|
|
303
|
|
|
$e = null; |
|
|
|
|
304
|
|
|
$a = null; |
|
|
|
|
305
|
|
|
$x = null; |
|
|
|
|
306
|
|
|
list($all, $s, $e, $a, $x) = $r; |
307
|
|
|
// close tag |
308
|
|
|
if ($s) { |
309
|
|
|
if (isset($cE[$e]) or !in_array($e, $q)) { |
310
|
|
|
continue; |
311
|
|
|
} // Empty/unopen |
312
|
|
|
if ($p == $e) { |
313
|
|
|
array_pop($q); |
314
|
|
|
echo '</', $e, '>'; |
315
|
|
|
unset($e); |
316
|
|
|
continue; |
317
|
|
|
} // Last open |
318
|
|
|
$add = ''; // Nesting - close open tags that need to be |
319
|
|
|
for ($j=-1, $cj=count($q); ++$j<$cj;) { |
320
|
|
|
if (($d = array_pop($q)) == $e) { |
321
|
|
|
break; |
322
|
|
|
} else { |
323
|
|
|
$add .= "</{$d}>"; |
324
|
|
|
} |
325
|
|
|
} |
326
|
|
|
echo $add, '</', $e, '>'; |
327
|
|
|
unset($e); |
328
|
|
|
continue; |
329
|
|
|
} |
330
|
|
|
// open tag |
331
|
|
|
// $cB ele needs $eB ele as child |
332
|
|
|
if (isset($cB[$e]) && strlen(trim($x))) { |
333
|
|
|
$t[$i] = "{$e}{$a}>"; |
334
|
|
|
array_splice($t, $i+1, 0, 'div>'. $x); |
335
|
|
|
unset($e, $x); |
336
|
|
|
++$ci; |
337
|
|
|
--$i; |
338
|
|
|
continue; |
339
|
|
|
} |
340
|
|
|
if ((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])) { |
341
|
|
|
array_splice($t, $i, 0, 'div>'); |
342
|
|
|
unset($e, $x); |
343
|
|
|
++$ci; |
344
|
|
|
--$i; |
345
|
|
|
continue; |
346
|
|
|
} |
347
|
|
|
// if no open ele, $in = parent; mostly immediate parent-child relation should hold |
348
|
|
|
if (!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)) { |
349
|
|
|
if (!isset($ok[$e])) { |
350
|
|
|
if ($ql && isset($cT[$p])) { |
351
|
|
|
echo '</', array_pop($q), '>'; |
352
|
|
|
unset($e, $x); |
353
|
|
|
--$i; |
354
|
|
|
} |
355
|
|
|
continue; |
356
|
|
|
} |
357
|
|
|
if (!isset($cE[$e])) { |
358
|
|
|
$q[] = $e; |
359
|
|
|
} |
360
|
|
|
echo '<', $e, $a, '>'; |
361
|
|
|
unset($e); |
362
|
|
|
continue; |
363
|
|
|
} |
364
|
|
|
// specific parent-child |
365
|
|
|
if (isset($cS[$p][$e])) { |
366
|
|
|
if (!isset($cE[$e])) { |
367
|
|
|
$q[] = $e; |
368
|
|
|
} |
369
|
|
|
echo '<', $e, $a, '>'; |
370
|
|
|
unset($e); |
371
|
|
|
continue; |
372
|
|
|
} |
373
|
|
|
// nesting |
374
|
|
|
$add = ''; |
375
|
|
|
$q2 = array(); |
376
|
|
|
for ($k=-1, $kc=count($q); ++$k<$kc;) { |
377
|
|
|
$d = $q[$k]; |
378
|
|
|
$ok2 = array(); |
379
|
|
|
if (isset($cS[$d])) { |
380
|
|
|
$q2[] = $d; |
381
|
|
|
continue; |
382
|
|
|
} |
383
|
|
|
$ok2 = isset($cI[$d]) ? $eI : $eF; |
384
|
|
|
if (isset($cO[$d])) { |
385
|
|
|
$ok2 = $ok2 + $cO[$d]; |
386
|
|
|
} |
387
|
|
|
if (isset($cN[$d])) { |
388
|
|
|
$ok2 = array_diff_assoc($ok2, $cN[$d]); |
389
|
|
|
} |
390
|
|
|
if (!isset($ok2[$e])) { |
391
|
|
|
if (!$k && !isset($inOk[$e])) { |
392
|
|
|
continue 2; |
393
|
|
|
} |
394
|
|
|
$add = "</{$d}>"; |
395
|
|
|
for (;++$k<$kc;) { |
396
|
|
|
$add = "</{$q[$k]}>{$add}"; |
397
|
|
|
} |
398
|
|
|
break; |
399
|
|
|
} else { |
400
|
|
|
$q2[] = $d; |
401
|
|
|
} |
402
|
|
|
} |
403
|
|
|
$q = $q2; |
404
|
|
|
if (!isset($cE[$e])) { |
405
|
|
|
$q[] = $e; |
406
|
|
|
} |
407
|
|
|
echo $add, '<', $e, $a, '>'; |
408
|
|
|
unset($e); |
409
|
|
|
continue; |
410
|
|
|
} |
411
|
|
|
|
412
|
|
|
// end |
413
|
1 |
|
if ($ql = count($q)) { |
414
|
|
|
$p = array_pop($q); |
415
|
|
|
$q[] = $p; |
416
|
|
|
if (isset($cS[$p])) { |
417
|
|
|
$ok = $cS[$p]; |
418
|
|
|
} elseif (isset($cI[$p])) { |
419
|
|
|
$ok = $eI; |
420
|
|
|
$cI['del'] = 1; |
421
|
|
|
$cI['ins'] = 1; |
422
|
|
|
} elseif (isset($cF[$p])) { |
423
|
|
|
$ok = $eF; |
424
|
|
|
unset($cI['del'], $cI['ins']); |
425
|
|
|
} elseif (isset($cB[$p])) { |
426
|
|
|
$ok = $eB; |
427
|
|
|
unset($cI['del'], $cI['ins']); |
428
|
|
|
} |
429
|
|
|
if (isset($cO[$p])) { |
430
|
|
|
$ok = $ok + $cO[$p]; |
431
|
|
|
} |
432
|
|
|
if (isset($cN[$p])) { |
433
|
|
|
$ok = array_diff_assoc($ok, $cN[$p]); |
434
|
|
|
} |
435
|
|
|
} else { |
436
|
1 |
|
$ok = $inOk; |
437
|
1 |
|
unset($cI['del'], $cI['ins']); |
438
|
|
|
} |
439
|
1 |
|
if (isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))) { |
440
|
|
|
echo '<', $s, $e, $a, '>'; |
441
|
|
|
} |
442
|
1 |
|
if (isset($x[0])) { |
443
|
1 |
|
if (strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))) { |
444
|
|
|
echo '<div>', $x, '</div>'; |
445
|
1 |
|
} elseif ($do < 3 or isset($ok['#pcdata'])) { |
446
|
1 |
|
echo $x; |
447
|
|
|
} elseif (strpos($x, "\x02\x04")) { |
448
|
|
|
foreach (preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v) { |
449
|
|
|
echo(substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); |
450
|
|
|
} |
451
|
|
|
} elseif ($do > 4) { |
452
|
|
|
echo preg_replace('`\S`', '', $x); |
453
|
|
|
} |
454
|
|
|
} |
455
|
1 |
|
while (!empty($q) && ($e = array_pop($q))) { |
456
|
|
|
echo '</', $e, '>'; |
457
|
|
|
} |
458
|
1 |
|
$o = ob_get_contents(); |
459
|
1 |
|
ob_end_clean(); |
460
|
1 |
|
return $o; |
461
|
|
|
// eof |
462
|
|
|
} |
463
|
|
|
|
464
|
|
|
function hl_cmtcd($t) |
465
|
|
|
{ |
466
|
|
|
// comment/CDATA sec handler |
467
|
|
|
$t = $t[0]; |
468
|
|
|
global $C; |
469
|
|
|
if (!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])) { |
470
|
|
|
return $t; |
471
|
|
|
} |
472
|
|
|
if ($v == 1) { |
473
|
|
|
return ''; |
474
|
|
|
} |
475
|
|
|
if ($n == 'comment') { |
476
|
|
|
if (substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' ') { |
477
|
|
|
$t .= ' '; |
478
|
|
|
} |
479
|
|
|
} else { |
480
|
|
|
$t = substr($t, 1, -1); |
481
|
|
|
} |
482
|
|
|
$t = $v == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; |
483
|
|
|
return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01")); |
484
|
|
|
// eof |
485
|
|
|
} |
486
|
|
|
|
487
|
|
|
function hl_ent($t) |
488
|
|
|
{ |
489
|
|
|
// entitity handler |
490
|
|
|
global $C; |
491
|
|
|
$t = $t[1]; |
492
|
|
|
static $U = array('quot'=>1,'amp'=>1,'lt'=>1,'gt'=>1); |
493
|
|
|
static $N = array('fnof'=>'402', 'Alpha'=>'913', 'Beta'=>'914', 'Gamma'=>'915', 'Delta'=>'916', 'Epsilon'=>'917', 'Zeta'=>'918', 'Eta'=>'919', 'Theta'=>'920', 'Iota'=>'921', 'Kappa'=>'922', 'Lambda'=>'923', 'Mu'=>'924', 'Nu'=>'925', 'Xi'=>'926', 'Omicron'=>'927', 'Pi'=>'928', 'Rho'=>'929', 'Sigma'=>'931', 'Tau'=>'932', 'Upsilon'=>'933', 'Phi'=>'934', 'Chi'=>'935', 'Psi'=>'936', 'Omega'=>'937', 'alpha'=>'945', 'beta'=>'946', 'gamma'=>'947', 'delta'=>'948', 'epsilon'=>'949', 'zeta'=>'950', 'eta'=>'951', 'theta'=>'952', 'iota'=>'953', 'kappa'=>'954', 'lambda'=>'955', 'mu'=>'956', 'nu'=>'957', 'xi'=>'958', 'omicron'=>'959', 'pi'=>'960', 'rho'=>'961', 'sigmaf'=>'962', 'sigma'=>'963', 'tau'=>'964', 'upsilon'=>'965', 'phi'=>'966', 'chi'=>'967', 'psi'=>'968', 'omega'=>'969', 'thetasym'=>'977', 'upsih'=>'978', 'piv'=>'982', 'bull'=>'8226', 'hellip'=>'8230', 'prime'=>'8242', 'Prime'=>'8243', 'oline'=>'8254', 'frasl'=>'8260', 'weierp'=>'8472', 'image'=>'8465', 'real'=>'8476', 'trade'=>'8482', 'alefsym'=>'8501', 'larr'=>'8592', 'uarr'=>'8593', 'rarr'=>'8594', 'darr'=>'8595', 'harr'=>'8596', 'crarr'=>'8629', 'lArr'=>'8656', 'uArr'=>'8657', 'rArr'=>'8658', 'dArr'=>'8659', 'hArr'=>'8660', 'forall'=>'8704', 'part'=>'8706', 'exist'=>'8707', 'empty'=>'8709', 'nabla'=>'8711', 'isin'=>'8712', 'notin'=>'8713', 'ni'=>'8715', 'prod'=>'8719', 'sum'=>'8721', 'minus'=>'8722', 'lowast'=>'8727', 'radic'=>'8730', 'prop'=>'8733', 'infin'=>'8734', 'ang'=>'8736', 'and'=>'8743', 'or'=>'8744', 'cap'=>'8745', 'cup'=>'8746', 'int'=>'8747', 'there4'=>'8756', 'sim'=>'8764', 'cong'=>'8773', 'asymp'=>'8776', 'ne'=>'8800', 'equiv'=>'8801', 'le'=>'8804', 'ge'=>'8805', 'sub'=>'8834', 'sup'=>'8835', 'nsub'=>'8836', 'sube'=>'8838', 'supe'=>'8839', 'oplus'=>'8853', 'otimes'=>'8855', 'perp'=>'8869', 'sdot'=>'8901', 'lceil'=>'8968', 'rceil'=>'8969', 'lfloor'=>'8970', 'rfloor'=>'8971', 'lang'=>'9001', 'rang'=>'9002', 'loz'=>'9674', 'spades'=>'9824', 'clubs'=>'9827', 'hearts'=>'9829', 'diams'=>'9830', 'apos'=>'39', 'OElig'=>'338', 'oelig'=>'339', 'Scaron'=>'352', 'scaron'=>'353', 'Yuml'=>'376', 'circ'=>'710', 'tilde'=>'732', 'ensp'=>'8194', 'emsp'=>'8195', 'thinsp'=>'8201', 'zwnj'=>'8204', 'zwj'=>'8205', 'lrm'=>'8206', 'rlm'=>'8207', 'ndash'=>'8211', 'mdash'=>'8212', 'lsquo'=>'8216', 'rsquo'=>'8217', 'sbquo'=>'8218', 'ldquo'=>'8220', 'rdquo'=>'8221', 'bdquo'=>'8222', 'dagger'=>'8224', 'Dagger'=>'8225', 'permil'=>'8240', 'lsaquo'=>'8249', 'rsaquo'=>'8250', 'euro'=>'8364', 'nbsp'=>'160', 'iexcl'=>'161', 'cent'=>'162', 'pound'=>'163', 'curren'=>'164', 'yen'=>'165', 'brvbar'=>'166', 'sect'=>'167', 'uml'=>'168', 'copy'=>'169', 'ordf'=>'170', 'laquo'=>'171', 'not'=>'172', 'shy'=>'173', 'reg'=>'174', 'macr'=>'175', 'deg'=>'176', 'plusmn'=>'177', 'sup2'=>'178', 'sup3'=>'179', 'acute'=>'180', 'micro'=>'181', 'para'=>'182', 'middot'=>'183', 'cedil'=>'184', 'sup1'=>'185', 'ordm'=>'186', 'raquo'=>'187', 'frac14'=>'188', 'frac12'=>'189', 'frac34'=>'190', 'iquest'=>'191', 'Agrave'=>'192', 'Aacute'=>'193', 'Acirc'=>'194', 'Atilde'=>'195', 'Auml'=>'196', 'Aring'=>'197', 'AElig'=>'198', 'Ccedil'=>'199', 'Egrave'=>'200', 'Eacute'=>'201', 'Ecirc'=>'202', 'Euml'=>'203', 'Igrave'=>'204', 'Iacute'=>'205', 'Icirc'=>'206', 'Iuml'=>'207', 'ETH'=>'208', 'Ntilde'=>'209', 'Ograve'=>'210', 'Oacute'=>'211', 'Ocirc'=>'212', 'Otilde'=>'213', 'Ouml'=>'214', 'times'=>'215', 'Oslash'=>'216', 'Ugrave'=>'217', 'Uacute'=>'218', 'Ucirc'=>'219', 'Uuml'=>'220', 'Yacute'=>'221', 'THORN'=>'222', 'szlig'=>'223', 'agrave'=>'224', 'aacute'=>'225', 'acirc'=>'226', 'atilde'=>'227', 'auml'=>'228', 'aring'=>'229', 'aelig'=>'230', 'ccedil'=>'231', 'egrave'=>'232', 'eacute'=>'233', 'ecirc'=>'234', 'euml'=>'235', 'igrave'=>'236', 'iacute'=>'237', 'icirc'=>'238', 'iuml'=>'239', 'eth'=>'240', 'ntilde'=>'241', 'ograve'=>'242', 'oacute'=>'243', 'ocirc'=>'244', 'otilde'=>'245', 'ouml'=>'246', 'divide'=>'247', 'oslash'=>'248', 'ugrave'=>'249', 'uacute'=>'250', 'ucirc'=>'251', 'uuml'=>'252', 'yacute'=>'253', 'thorn'=>'254', 'yuml'=>'255'); |
494
|
|
|
if ($t[0] != '#') { |
495
|
|
|
return ($C['and_mark'] ? "\x06" : '&'). (isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'. ($C['hexdec_entity'] > 1 ? 'x'. dechex($N[$t]) : $N[$t]) : $t) : 'amp;'. $t)). ';'; |
496
|
|
|
} |
497
|
|
|
if (($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or $n == 65534 or $n == 65535 or $n > 1114111))) { |
498
|
|
|
return ($C['and_mark'] ? "\x06" : '&'). "amp;#{$t};"; |
499
|
|
|
} |
500
|
|
|
return ($C['and_mark'] ? "\x06" : '&'). '#'. (((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'. dechex($n)). ';'; |
|
|
|
|
501
|
|
|
// eof |
502
|
|
|
} |
503
|
|
|
|
504
|
|
|
function hl_prot($p, $c=null) |
505
|
|
|
{ |
506
|
|
|
// check URL scheme |
507
|
|
|
global $C; |
508
|
|
|
$b = $a = ''; |
509
|
|
|
if ($c == null) { |
510
|
|
|
$c = 'style'; |
511
|
|
|
$b = $p[1]; |
512
|
|
|
$a = $p[3]; |
513
|
|
|
$p = trim($p[2]); |
514
|
|
|
} |
515
|
|
|
$c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; |
516
|
|
|
static $d = 'denied:'; |
517
|
|
|
if (isset($c['!']) && substr($p, 0, 7) != $d) { |
518
|
|
|
$p = "$d$p"; |
519
|
|
|
} |
520
|
|
|
if (isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)) { |
521
|
|
|
return "{$b}{$p}{$a}"; |
522
|
|
|
} // All ok, frag, query, param |
523
|
|
|
if (preg_match('`^([^:?[@!$()*,=/\'\]]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])) { // Denied prot |
524
|
|
|
return "{$b}{$d}{$p}{$a}"; |
525
|
|
|
} |
526
|
|
|
if ($C['abs_url']) { |
527
|
|
|
if ($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0) { // Make url rel |
528
|
|
|
$p = substr($p, strlen($C['base_url'])); |
529
|
|
|
} elseif (empty($m[1])) { // Make URL abs |
530
|
|
|
if (substr($p, 0, 2) == '//') { |
531
|
|
|
$p = substr($C['base_url'], 0, strpos($C['base_url'], ':')+1). $p; |
532
|
|
|
} elseif ($p[0] == '/') { |
533
|
|
|
$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']). $p; |
534
|
|
|
} elseif (strcspn($p, './')) { |
535
|
|
|
$p = $C['base_url']. $p; |
536
|
|
|
} else { |
537
|
|
|
preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m); |
538
|
|
|
$p = preg_replace('`(?<=/)\./`', '', $m[2]. $p); |
539
|
|
|
while (preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)) { |
540
|
|
|
$p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p); |
541
|
|
|
} |
542
|
|
|
$p = $m[1]. $p; |
543
|
|
|
} |
544
|
|
|
} |
545
|
|
|
} |
546
|
|
|
return "{$b}{$p}{$a}"; |
547
|
|
|
// eof |
548
|
|
|
} |
549
|
|
|
|
550
|
|
|
function hl_regex($p) |
551
|
|
|
{ |
552
|
|
|
// ?regex |
553
|
|
|
if (empty($p)) { |
554
|
|
|
return 0; |
555
|
|
|
} |
556
|
|
|
if ($t = ini_get('track_errors')) { |
557
|
|
|
$o = isset($php_errormsg) ? $php_errormsg : null; |
558
|
|
|
} else { |
559
|
|
|
ini_set('track_errors', 1); |
560
|
|
|
} |
561
|
|
|
unset($php_errormsg); |
562
|
|
|
if (($d = ini_get('display_errors'))) { |
563
|
|
|
ini_set('display_errors', 0); |
564
|
|
|
} |
565
|
|
|
preg_match($p, ''); |
566
|
|
|
if ($d) { |
567
|
|
|
ini_set('display_errors', 1); |
568
|
|
|
} |
569
|
|
|
$r = isset($php_errormsg) ? 0 : 1; |
570
|
|
|
if ($t) { |
571
|
|
|
$php_errormsg = isset($o) ? $o : null; |
|
|
|
|
572
|
|
|
} else { |
573
|
|
|
ini_set('track_errors', 0); |
574
|
|
|
} |
575
|
|
|
return $r; |
576
|
|
|
// eof |
577
|
|
|
} |
578
|
|
|
|
579
|
|
|
function hl_spec($t) |
580
|
|
|
{ |
581
|
|
|
// final $spec |
582
|
|
|
$s = array(); |
583
|
|
|
$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace_callback('/"(?>(`.|[^"])*)"/sm', create_function('$m', 'return substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), $m[0]), 1, -1);'), trim($t))); |
|
|
|
|
584
|
|
|
for ($i = count(($t = explode(';', $t))); --$i>=0;) { |
585
|
|
|
$w = $t[$i]; |
586
|
|
|
if (empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))) { |
587
|
|
|
continue; |
588
|
|
|
} |
589
|
|
|
$y = $n = array(); |
590
|
|
|
foreach (explode(',', $a) as $v) { |
591
|
|
|
if (!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)) { |
592
|
|
|
continue; |
593
|
|
|
} |
594
|
|
|
if (($x = strtolower($m[1])) == '-*') { |
595
|
|
|
$n['*'] = 1; |
596
|
|
|
continue; |
597
|
|
|
} |
598
|
|
|
if ($x[0] == '-') { |
599
|
|
|
$n[substr($x, 1)] = 1; |
600
|
|
|
continue; |
601
|
|
|
} |
602
|
|
|
if (!isset($m[2])) { |
603
|
|
|
$y[$x] = 1; |
604
|
|
|
continue; |
605
|
|
|
} |
606
|
|
|
foreach (explode('/', $m[2]) as $m) { |
607
|
|
|
if (empty($m) or ($p = strpos($m, '=')) == 0 or $p < 5) { |
608
|
|
|
$y[$x] = 1; |
609
|
|
|
continue; |
610
|
|
|
} |
611
|
|
|
$y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(";", "|", "~", " ", ",", "/", "(", ")"), substr($m, $p+1)); |
612
|
|
|
} |
613
|
|
|
if (isset($y[$x]['match']) && !hl_regex($y[$x]['match'])) { |
614
|
|
|
unset($y[$x]['match']); |
615
|
|
|
} |
616
|
|
|
if (isset($y[$x]['nomatch']) && !hl_regex($y[$x]['nomatch'])) { |
617
|
|
|
unset($y[$x]['nomatch']); |
618
|
|
|
} |
619
|
|
|
} |
620
|
|
|
if (!count($y) && !count($n)) { |
621
|
|
|
continue; |
622
|
|
|
} |
623
|
|
|
foreach (explode(',', substr($w, 0, $e)) as $v) { |
624
|
|
|
if (!strlen(($v = strtolower($v)))) { |
625
|
|
|
continue; |
626
|
|
|
} |
627
|
|
|
if (count($y)) { |
628
|
|
|
$s[$v] = $y; |
629
|
|
|
} |
630
|
|
|
if (count($n)) { |
631
|
|
|
$s[$v]['n'] = $n; |
632
|
|
|
} |
633
|
|
|
} |
634
|
|
|
} |
635
|
|
|
return $s; |
636
|
|
|
// eof |
637
|
|
|
} |
638
|
|
|
|
639
|
|
|
function hl_tag($t) |
640
|
|
|
{ |
641
|
|
|
// tag/attribute handler |
642
|
|
|
global $C; |
643
|
|
|
$t = $t[0]; |
644
|
|
|
// invalid < > |
645
|
|
|
if ($t == '< ') { |
646
|
|
|
return '< '; |
647
|
|
|
} |
648
|
|
|
if ($t == '>') { |
649
|
|
|
return '>'; |
650
|
|
|
} |
651
|
|
|
if (!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $t, $m)) { |
652
|
|
|
return str_replace(array('<', '>'), array('<', '>'), $t); |
653
|
|
|
} elseif (!isset($C['elements'][($e = strtolower($m[2]))])) { |
654
|
|
|
return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : ''); |
655
|
|
|
} |
656
|
|
|
// attr string |
657
|
|
|
$a = str_replace(array("\n", "\r", "\t"), ' ', trim($m[3])); |
658
|
|
|
// tag transform |
659
|
|
|
static $eD = array('applet'=>1, 'center'=>1, 'dir'=>1, 'embed'=>1, 'font'=>1, 'isindex'=>1, 'menu'=>1, 's'=>1, 'strike'=>1, 'u'=>1); // Deprecated |
660
|
|
|
if ($C['make_tag_strict'] && isset($eD[$e])) { |
661
|
|
|
$trt = hl_tag2($e, $a, $C['make_tag_strict']); |
662
|
|
|
if (!$e) { |
663
|
|
|
return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : ''); |
664
|
|
|
} |
665
|
|
|
} |
666
|
|
|
// close tag |
667
|
|
|
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele |
668
|
|
|
if (!empty($m[1])) { |
669
|
|
|
return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "</$e>" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); |
670
|
|
|
} |
671
|
|
|
|
672
|
|
|
// open tag & attr |
673
|
|
|
static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'flashvars'=>array('embed'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific |
674
|
|
|
static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // Empty |
675
|
|
|
static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Need scheme check; excludes style, on* & src |
676
|
|
|
static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ & exceptions |
677
|
|
|
|
678
|
|
|
if ($C['lc_std_val']) { |
679
|
|
|
// predef attr vals for $eAL & $aNE ele |
680
|
|
|
static $aNL = array('all'=>1, 'baseline'=>1, 'bottom'=>1, 'button'=>1, 'center'=>1, 'char'=>1, 'checkbox'=>1, 'circle'=>1, 'col'=>1, 'colgroup'=>1, 'cols'=>1, 'data'=>1, 'default'=>1, 'file'=>1, 'get'=>1, 'groups'=>1, 'hidden'=>1, 'image'=>1, 'justify'=>1, 'left'=>1, 'ltr'=>1, 'middle'=>1, 'none'=>1, 'object'=>1, 'password'=>1, 'poly'=>1, 'post'=>1, 'preserve'=>1, 'radio'=>1, 'rect'=>1, 'ref'=>1, 'reset'=>1, 'right'=>1, 'row'=>1, 'rowgroup'=>1, 'rows'=>1, 'rtl'=>1, 'submit'=>1, 'text'=>1, 'top'=>1); |
681
|
|
|
static $eAL = array('a'=>1, 'area'=>1, 'bdo'=>1, 'button'=>1, 'col'=>1, 'form'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'xml:space'=>1); |
682
|
|
|
$lcase = isset($eAL[$e]) ? 1 : 0; |
683
|
|
|
} |
684
|
|
|
|
685
|
|
|
$depTr = 0; |
686
|
|
|
if ($C['no_deprecated_attr']) { |
687
|
|
|
// dep attr:applicable ele |
688
|
|
|
static $aND = array('align'=>array('caption'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1), 'bgcolor'=>array('table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'clear'=>array('br'=>1), 'compact'=>array('dl'=>1, 'ol'=>1, 'ul'=>1), 'height'=>array('td'=>1, 'th'=>1), 'hspace'=>array('img'=>1, 'object'=>1), 'language'=>array('script'=>1), 'name'=>array('a'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'map'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'size'=>array('hr'=>1), 'start'=>array('ol'=>1), 'type'=>array('li'=>1, 'ol'=>1, 'ul'=>1), 'value'=>array('li'=>1), 'vspace'=>array('img'=>1, 'object'=>1), 'width'=>array('hr'=>1, 'pre'=>1, 'td'=>1, 'th'=>1)); |
689
|
|
|
static $eAD = array('a'=>1, 'br'=>1, 'caption'=>1, 'div'=>1, 'dl'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'object'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'script'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1, 'ul'=>1); |
690
|
|
|
$depTr = isset($eAD[$e]) ? 1 : 0; |
691
|
|
|
} |
692
|
|
|
|
693
|
|
|
// attr name-vals |
694
|
|
|
if (strpos($a, "\x01") !== false) { |
695
|
|
|
$a = preg_replace('`\x01[^\x01]*\x01`', '', $a); |
696
|
|
|
} // No comment/CDATA sec |
697
|
|
|
$mode = 0; |
698
|
|
|
$a = trim($a, ' /'); |
699
|
|
|
$aA = array(); |
700
|
|
|
while (strlen($a)) { |
701
|
|
|
$w = 0; |
702
|
|
|
switch ($mode) { |
703
|
|
|
case 0: // Name |
704
|
|
|
if (preg_match('`^[a-zA-Z][\-a-zA-Z:]+`', $a, $m)) { |
705
|
|
|
$nm = strtolower($m[0]); |
706
|
|
|
$w = $mode = 1; |
707
|
|
|
$a = ltrim(substr_replace($a, '', 0, strlen($m[0]))); |
708
|
|
|
} |
709
|
|
|
break; case 1: |
710
|
|
|
if ($a[0] == '=') { // = |
711
|
|
|
$w = 1; |
712
|
|
|
$mode = 2; |
713
|
|
|
$a = ltrim($a, '= '); |
714
|
|
|
} else { // No val |
715
|
|
|
$w = 1; |
716
|
|
|
$mode = 0; |
717
|
|
|
$a = ltrim($a); |
718
|
|
|
$aA[$nm] = ''; |
|
|
|
|
719
|
|
|
} |
720
|
|
|
break; case 2: // Val |
721
|
|
|
if (preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)) { |
722
|
|
|
$a = ltrim($m[2]); |
723
|
|
|
$m = $m[1]; |
724
|
|
|
$w = 1; |
725
|
|
|
$mode = 0; |
726
|
|
|
$aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m); |
727
|
|
|
} |
728
|
|
|
break; |
729
|
|
|
} |
730
|
|
|
if ($w == 0) { // Parse errs, deal with space, " & ' |
731
|
|
|
$a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a); |
732
|
|
|
$mode = 0; |
733
|
|
|
} |
734
|
|
|
} |
735
|
|
|
if ($mode == 1) { |
736
|
|
|
$aA[$nm] = ''; |
737
|
|
|
} |
738
|
|
|
|
739
|
|
|
// clean attrs |
740
|
|
|
global $S; |
741
|
|
|
$rl = isset($S[$e]) ? $S[$e] : array(); |
742
|
|
|
$a = array(); |
743
|
|
|
$nfr = 0; |
744
|
|
|
foreach ($aA as $k=>$v) { |
745
|
|
|
if (((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e]))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])) { |
746
|
|
|
if (isset($aNE[$k])) { |
747
|
|
|
$v = $k; |
748
|
|
|
} elseif (!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')) { // Rather loose but ?not cause issues |
749
|
|
|
$v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v; |
750
|
|
|
} |
751
|
|
|
if ($k == 'style' && !$C['style_pass']) { |
752
|
|
|
if (false !== strpos($v, '&#')) { |
753
|
|
|
static $sC = array(' '=>' ', ' '=>' ', 'E'=>'e', 'E'=>'e', 'e'=>'e', 'e'=>'e', 'X'=>'x', 'X'=>'x', 'x'=>'x', 'x'=>'x', 'P'=>'p', 'P'=>'p', 'p'=>'p', 'p'=>'p', 'S'=>'s', 'S'=>'s', 's'=>'s', 's'=>'s', 'I'=>'i', 'I'=>'i', 'i'=>'i', 'i'=>'i', 'O'=>'o', 'O'=>'o', 'o'=>'o', 'o'=>'o', 'N'=>'n', 'N'=>'n', 'n'=>'n', 'n'=>'n', 'U'=>'u', 'U'=>'u', 'u'=>'u', 'u'=>'u', 'R'=>'r', 'R'=>'r', 'r'=>'r', 'r'=>'r', 'L'=>'l', 'L'=>'l', 'l'=>'l', 'l'=>'l', '('=>'(', '('=>'(', ')'=>')', ')'=>')', ' '=>':', ' '=>':', '"'=>'"', '"'=>'"', '''=>"'", '''=>"'", '/'=>'/', '/'=>'/', '*'=>'*', '*'=>'*', '\'=>'\\', '\'=>'\\'); |
754
|
|
|
$v = strtr($v, $sC); |
755
|
|
|
} |
756
|
|
|
$v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v); |
757
|
|
|
$v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v; |
758
|
|
|
} elseif (isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o') { |
759
|
|
|
$v = str_replace("\xad", ' ', (strpos($v, '&') !== false ? str_replace(array('­', '­', '­'), ' ', $v) : $v)); |
760
|
|
|
$v = hl_prot($v, $k); |
761
|
|
|
if ($k == 'href') { // X-spam |
762
|
|
|
if ($C['anti_mail_spam'] && strpos($v, 'mailto:') === 0) { |
763
|
|
|
$v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v); |
764
|
|
|
} elseif ($C['anti_link_spam']) { |
765
|
|
|
$r1 = $C['anti_link_spam'][1]; |
766
|
|
|
if (!empty($r1) && preg_match($r1, $v)) { |
767
|
|
|
continue; |
768
|
|
|
} |
769
|
|
|
$r0 = $C['anti_link_spam'][0]; |
770
|
|
|
if (!empty($r0) && preg_match($r0, $v)) { |
771
|
|
|
if (isset($a['rel'])) { |
772
|
|
|
if (!preg_match('`\bnofollow\b`i', $a['rel'])) { |
773
|
|
|
$a['rel'] .= ' nofollow'; |
774
|
|
|
} |
775
|
|
|
} elseif (isset($aA['rel'])) { |
776
|
|
|
if (!preg_match('`\bnofollow\b`i', $aA['rel'])) { |
777
|
|
|
$nfr = 1; |
778
|
|
|
} |
779
|
|
|
} else { |
780
|
|
|
$a['rel'] = 'nofollow'; |
781
|
|
|
} |
782
|
|
|
} |
783
|
|
|
} |
784
|
|
|
} |
785
|
|
|
} |
786
|
|
|
if (isset($rl[$k]) && is_array($rl[$k]) && ($v = hl_attrval($v, $rl[$k])) === 0) { |
787
|
|
|
continue; |
788
|
|
|
} |
789
|
|
|
$a[$k] = str_replace('"', '"', $v); |
790
|
|
|
} |
791
|
|
|
} |
792
|
|
|
if ($nfr) { |
793
|
|
|
$a['rel'] = isset($a['rel']) ? $a['rel']. ' nofollow' : 'nofollow'; |
794
|
|
|
} |
795
|
|
|
|
796
|
|
|
// rqd attr |
797
|
|
|
static $eAR = array('area'=>array('alt'=>'area'), 'bdo'=>array('dir'=>'ltr'), 'form'=>array('action'=>''), 'img'=>array('src'=>'', 'alt'=>'image'), 'map'=>array('name'=>''), 'optgroup'=>array('label'=>''), 'param'=>array('name'=>''), 'script'=>array('type'=>'text/javascript'), 'textarea'=>array('rows'=>'10', 'cols'=>'50')); |
798
|
|
|
if (isset($eAR[$e])) { |
799
|
|
|
foreach ($eAR[$e] as $k=>$v) { |
800
|
|
|
if (!isset($a[$k])) { |
801
|
|
|
$a[$k] = isset($v[0]) ? $v : $k; |
802
|
|
|
} |
803
|
|
|
} |
804
|
|
|
} |
805
|
|
|
|
806
|
|
|
// depr attrs |
807
|
|
|
if ($depTr) { |
808
|
|
|
$c = array(); |
809
|
|
|
foreach ($a as $k=>$v) { |
810
|
|
|
if ($k == 'style' or !isset($aND[$k][$e])) { |
811
|
|
|
continue; |
812
|
|
|
} |
813
|
|
|
if ($k == 'align') { |
814
|
|
|
unset($a['align']); |
815
|
|
|
if ($e == 'img' && ($v == 'left' or $v == 'right')) { |
816
|
|
|
$c[] = 'float: '. $v; |
817
|
|
|
} elseif (($e == 'div' or $e == 'table') && $v == 'center') { |
818
|
|
|
$c[] = 'margin: auto'; |
819
|
|
|
} else { |
820
|
|
|
$c[] = 'text-align: '. $v; |
821
|
|
|
} |
822
|
|
|
} elseif ($k == 'bgcolor') { |
823
|
|
|
unset($a['bgcolor']); |
824
|
|
|
$c[] = 'background-color: '. $v; |
825
|
|
|
} elseif ($k == 'border') { |
826
|
|
|
unset($a['border']); |
827
|
|
|
$c[] = "border: {$v}px"; |
828
|
|
|
} elseif ($k == 'bordercolor') { |
829
|
|
|
unset($a['bordercolor']); |
830
|
|
|
$c[] = 'border-color: '. $v; |
831
|
|
|
} elseif ($k == 'clear') { |
832
|
|
|
unset($a['clear']); |
833
|
|
|
$c[] = 'clear: '. ($v != 'all' ? $v : 'both'); |
834
|
|
|
} elseif ($k == 'compact') { |
835
|
|
|
unset($a['compact']); |
836
|
|
|
$c[] = 'font-size: 85%'; |
837
|
|
|
} elseif ($k == 'height' or $k == 'width') { |
838
|
|
|
unset($a[$k]); |
839
|
|
|
$c[] = $k. ': '. ($v[0] != '*' ? $v. (ctype_digit($v) ? 'px' : '') : 'auto'); |
840
|
|
|
} elseif ($k == 'hspace') { |
841
|
|
|
unset($a['hspace']); |
842
|
|
|
$c[] = "margin-left: {$v}px; margin-right: {$v}px"; |
843
|
|
|
} elseif ($k == 'language' && !isset($a['type'])) { |
844
|
|
|
unset($a['language']); |
845
|
|
|
$a['type'] = 'text/'. strtolower($v); |
846
|
|
|
} elseif ($k == 'name') { |
847
|
|
|
if ($C['no_deprecated_attr'] == 2 or ($e != 'a' && $e != 'map')) { |
848
|
|
|
unset($a['name']); |
849
|
|
|
} |
850
|
|
|
if (!isset($a['id']) && preg_match('`[a-zA-Z][a-zA-Z\d.:_\-]*`', $v)) { |
851
|
|
|
$a['id'] = $v; |
852
|
|
|
} |
853
|
|
|
} elseif ($k == 'noshade') { |
854
|
|
|
unset($a['noshade']); |
855
|
|
|
$c[] = 'border-style: none; border: 0; background-color: gray; color: gray'; |
856
|
|
|
} elseif ($k == 'nowrap') { |
857
|
|
|
unset($a['nowrap']); |
858
|
|
|
$c[] = 'white-space: nowrap'; |
859
|
|
|
} elseif ($k == 'size') { |
860
|
|
|
unset($a['size']); |
861
|
|
|
$c[] = 'size: '. $v. 'px'; |
862
|
|
|
} elseif ($k == 'start' or $k == 'value') { |
863
|
|
|
unset($a[$k]); |
864
|
|
|
} elseif ($k == 'type') { |
865
|
|
|
unset($a['type']); |
866
|
|
|
static $ol_type = array('i'=>'lower-roman', 'I'=>'upper-roman', 'a'=>'lower-latin', 'A'=>'upper-latin', '1'=>'decimal'); |
867
|
|
|
$c[] = 'list-style-type: '. (isset($ol_type[$v]) ? $ol_type[$v] : 'decimal'); |
868
|
|
|
} elseif ($k == 'vspace') { |
869
|
|
|
unset($a['vspace']); |
870
|
|
|
$c[] = "margin-top: {$v}px; margin-bottom: {$v}px"; |
871
|
|
|
} |
872
|
|
|
} |
873
|
|
|
if (count($c)) { |
874
|
|
|
$c = implode('; ', $c); |
875
|
|
|
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $c. ';': $c. ';'; |
876
|
|
|
} |
877
|
|
|
} |
878
|
|
|
// unique ID |
879
|
|
|
if ($C['unique_ids'] && isset($a['id'])) { |
880
|
|
|
if (!preg_match('`^[A-Za-z][A-Za-z0-9_\-.:]*$`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && $C['unique_ids'] == 1)) { |
881
|
|
|
unset($a['id']); |
882
|
|
|
} else { |
883
|
|
|
while (isset($GLOBALS['hl_Ids'][$id])) { |
884
|
|
|
$id = $C['unique_ids']. $id; |
885
|
|
|
} |
886
|
|
|
$GLOBALS['hl_Ids'][($a['id'] = $id)] = 1; |
887
|
|
|
} |
888
|
|
|
} |
889
|
|
|
// xml:lang |
890
|
|
|
if ($C['xml:lang'] && isset($a['lang'])) { |
891
|
|
|
$a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang']; |
892
|
|
|
if ($C['xml:lang'] == 2) { |
893
|
|
|
unset($a['lang']); |
894
|
|
|
} |
895
|
|
|
} |
896
|
|
|
// for transformed tag |
897
|
|
|
if (!empty($trt)) { |
898
|
|
|
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $trt : $trt; |
899
|
|
|
} |
900
|
|
|
// return with empty ele / |
901
|
|
|
if (empty($C['hook_tag'])) { |
902
|
|
|
$aA = ''; |
903
|
|
|
foreach ($a as $k=>$v) { |
904
|
|
|
$aA .= " {$k}=\"{$v}\""; |
905
|
|
|
} |
906
|
|
|
return "<{$e}{$aA}". (isset($eE[$e]) ? ' /' : ''). '>'; |
907
|
|
|
} else { |
908
|
|
|
return $C['hook_tag']($e, $a); |
909
|
|
|
} |
910
|
|
|
// eof |
911
|
|
|
} |
912
|
|
|
|
913
|
|
|
function hl_tag2(&$e, &$a, $t=1) |
914
|
|
|
{ |
915
|
|
|
// transform tag |
916
|
|
|
if ($e == 'center') { |
917
|
|
|
$e = 'div'; |
918
|
|
|
return 'text-align: center;'; |
919
|
|
|
} |
920
|
|
|
if ($e == 'dir' or $e == 'menu') { |
921
|
|
|
$e = 'ul'; |
922
|
|
|
return ''; |
923
|
|
|
} |
924
|
|
|
if ($e == 's' or $e == 'strike') { |
925
|
|
|
$e = 'span'; |
926
|
|
|
return 'text-decoration: line-through;'; |
927
|
|
|
} |
928
|
|
|
if ($e == 'u') { |
929
|
|
|
$e = 'span'; |
930
|
|
|
return 'text-decoration: underline;'; |
931
|
|
|
} |
932
|
|
|
static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%'); |
933
|
|
|
if ($e == 'font') { |
934
|
|
|
$a2 = ''; |
935
|
|
|
if (preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)) { |
936
|
|
|
$a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';'; |
937
|
|
|
} |
938
|
|
|
if (preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)) { |
939
|
|
|
$a2 .= ' color: '. trim($m[2]). ';'; |
940
|
|
|
} |
941
|
|
|
if (preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])) { |
942
|
|
|
$a2 .= ' font-size: '. $fs[$m]. ';'; |
943
|
|
|
} |
944
|
|
|
$e = 'span'; |
945
|
|
|
return ltrim($a2); |
946
|
|
|
} |
947
|
|
|
if ($t == 2) { |
948
|
|
|
$e = 0; |
949
|
|
|
return 0; |
950
|
|
|
} |
951
|
|
|
return ''; |
952
|
|
|
// eof |
953
|
|
|
} |
954
|
|
|
|
955
|
|
|
function hl_tidy($t, $w, $p) |
956
|
|
|
{ |
957
|
|
|
// Tidy/compact HTM |
958
|
|
|
if (strpos(' pre,script,textarea', "$p,")) { |
959
|
|
|
return $t; |
960
|
|
|
} |
961
|
|
|
$t = preg_replace('`\s+`', ' ', preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t)); |
|
|
|
|
962
|
|
|
if (($w = strtolower($w)) == -1) { |
963
|
|
|
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); |
964
|
|
|
} |
965
|
|
|
$s = strpos(" $w", 't') ? "\t" : ' '; |
966
|
|
|
$s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2)); |
967
|
|
|
$N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; |
968
|
|
|
$a = array('br'=>1); |
969
|
|
|
$b = array('button'=>1, 'input'=>1, 'option'=>1, 'param'=>1); |
970
|
|
|
$c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1); |
971
|
|
|
$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); |
972
|
|
|
$T = explode('<', $t); |
973
|
|
|
$X = 1; |
974
|
|
|
while ($X) { |
975
|
|
|
$n = $N; |
976
|
|
|
$t = $T; |
977
|
|
|
ob_start(); |
978
|
|
|
if (isset($d[$p])) { |
979
|
|
|
echo str_repeat($s, ++$n); |
980
|
|
|
} |
981
|
|
|
echo ltrim(array_shift($t)); |
982
|
|
|
for ($i=-1, $j=count($t); ++$i<$j;) { |
983
|
|
|
$r = ''; |
|
|
|
|
984
|
|
|
list($e, $r) = explode('>', $t[$i]); |
985
|
|
|
$x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1)); |
986
|
|
|
$y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); |
987
|
|
|
$e = "<$e>"; |
988
|
|
|
if (isset($d[$y])) { |
989
|
|
|
if (!$x) { |
990
|
|
|
if ($n) { |
991
|
|
|
echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n); |
992
|
|
|
} else { |
993
|
|
|
++$N; |
994
|
|
|
ob_end_clean(); |
995
|
|
|
continue 2; |
996
|
|
|
} |
997
|
|
|
} else { |
998
|
|
|
echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n)); |
999
|
|
|
} |
1000
|
|
|
echo $r; |
1001
|
|
|
continue; |
1002
|
|
|
} |
1003
|
|
|
$f = "\n". str_repeat($s, $n); |
1004
|
|
|
if (isset($c[$y])) { |
1005
|
|
|
if (!$x) { |
1006
|
|
|
echo $e, $f, $r; |
1007
|
|
|
} else { |
1008
|
|
|
echo $f, $e, $r; |
1009
|
|
|
} |
1010
|
|
|
} elseif (isset($b[$y])) { |
1011
|
|
|
echo $f, $e, $r; |
1012
|
|
|
} elseif (isset($a[$y])) { |
1013
|
|
|
echo $e, $f, $r; |
1014
|
|
|
} elseif (!$y) { |
1015
|
|
|
echo $f, $e, $f, $r; |
1016
|
|
|
} else { |
1017
|
|
|
echo $e, $r; |
1018
|
|
|
} |
1019
|
|
|
} |
1020
|
|
|
$X = 0; |
1021
|
|
|
} |
1022
|
|
|
$t = str_replace(array("\n ", " \n"), "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents())); |
1023
|
|
|
ob_end_clean(); |
1024
|
|
|
if (($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)) { |
1025
|
|
|
$t = str_replace("\n", $l, $t); |
1026
|
|
|
} |
1027
|
|
|
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); |
1028
|
|
|
// eof |
1029
|
|
|
} |
1030
|
|
|
|
1031
|
|
|
function hl_version() |
1032
|
|
|
{ |
1033
|
|
|
// rel |
1034
|
|
|
return '1.1.17'; |
1035
|
|
|
// eof |
1036
|
|
|
} |
1037
|
|
|
|
1038
|
|
|
function kses($t, $h, $p=array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto')) |
1039
|
|
|
{ |
1040
|
|
|
// kses compat |
1041
|
|
|
foreach ($h as $k=>$v) { |
1042
|
|
|
$h[$k]['n']['*'] = 1; |
1043
|
|
|
} |
1044
|
|
|
$C['cdata'] = $C['comment'] = $C['make_tag_strict'] = $C['no_deprecated_attr'] = $C['unique_ids'] = 0; |
|
|
|
|
1045
|
|
|
$C['keep_bad'] = 1; |
1046
|
|
|
$C['elements'] = count($h) ? strtolower(implode(',', array_keys($h))) : '-*'; |
1047
|
|
|
$C['hook'] = 'kses_hook'; |
1048
|
|
|
$C['schemes'] = '*:'. implode(',', $p); |
1049
|
|
|
return htmLawed($t, $C, $h); |
1050
|
|
|
// eof |
1051
|
|
|
} |
1052
|
|
|
|
1053
|
|
|
function kses_hook($t, &$C, &$S) |
|
|
|
|
1054
|
|
|
{ |
1055
|
|
|
// kses compat |
1056
|
|
|
return $t; |
1057
|
|
|
// eof |
1058
|
|
|
} |
1059
|
|
|
|