|
1
|
|
|
<?php |
|
2
|
|
|
/* |
|
3
|
|
|
* changed : 10. oct. 03 |
|
4
|
|
|
* author : [email protected] |
|
5
|
|
|
* additional : Martin B. Vestergaard, Adrian Cope |
|
6
|
|
|
* download: http://www.phpclasses.org/browse.html/package/1020.html |
|
7
|
|
|
* |
|
8
|
|
|
* description : |
|
9
|
|
|
* a script aimed at cleaning up after mshtml. use it in your wysiwyg html-editor, |
|
10
|
|
|
* to strip messy code resulting from a copy-paste from word. |
|
11
|
|
|
* this script doesnt come anything near htmltidy, but its pure php. if you have |
|
12
|
|
|
* access to install binaries on your server, you might want to try using htmltidy. |
|
13
|
|
|
* note : |
|
14
|
|
|
* you might want to allow fonttags or even style tags. in that case, modify the |
|
15
|
|
|
* function htmlcleaner::cleanup() |
|
16
|
|
|
* usage : |
|
17
|
|
|
* $body = htmlcleaner::cleanup($_POST['htmlCode']); |
|
18
|
|
|
* |
|
19
|
|
|
* disclaimer : |
|
20
|
|
|
* this piece of code is freely usable by anyone. if it makes your life better, |
|
21
|
|
|
* remember me in your eveningprayer. if it makes your life worse, try doing it any |
|
22
|
|
|
* better yourself. |
|
23
|
|
|
* |
|
24
|
|
|
* todo/bugs : |
|
25
|
|
|
* the script seems to remove textnodes in the root area. (eg. with no enclosing tags) |
|
26
|
|
|
*/ |
|
27
|
|
|
define ('HTML_CLEANER_NODE_CLOSINGSTYLE_NORMAL',0); |
|
28
|
|
|
define ('HTML_CLEANER_NODE_CLOSINGSTYLE_NONE',1); |
|
29
|
|
|
define ('HTML_CLEANER_NODE_CLOSINGSTYLE_XHTMLSINGLE',2); |
|
30
|
|
|
define ('HTML_CLEANER_NODE_CLOSINGSTYLE_HTMLSINGLE',3); |
|
31
|
|
|
define ('HTML_CLEANER_NODE_NODETYPE_NODE',0); |
|
32
|
|
|
define ('HTML_CLEANER_NODE_NODETYPE_CLOSINGNODE',1); |
|
33
|
|
|
define ('HTML_CLEANER_NODE_NODETYPE_TEXT',2); |
|
34
|
|
|
define ('HTML_CLEANER_NODE_NODETYPE_SPECIAL',3); |
|
35
|
|
|
class htmlcleanertag { |
|
36
|
|
|
public $nodeType; |
|
37
|
|
|
public $nodeName; |
|
38
|
|
|
public $nodeValue; |
|
39
|
|
|
public $attributes = array(); |
|
40
|
|
|
public $closingStyle; |
|
41
|
|
|
|
|
42
|
9 |
|
public function __construct($str) |
|
43
|
|
|
{ |
|
44
|
9 |
|
if ($str[0]=='<') { |
|
45
|
9 |
|
$this->nodeType = HTML_CLEANER_NODE_NODETYPE_NODE; |
|
46
|
9 |
|
if (isset($str[1]) && ($str[1]=='?' || $str[1]=='!')) { |
|
47
|
|
|
$this->nodeType = HTML_CLEANER_NODE_NODETYPE_SPECIAL; |
|
48
|
|
|
$this->nodeValue = $str; |
|
49
|
|
|
} else { |
|
50
|
9 |
|
$this->parseFromString($str); |
|
51
|
|
|
} |
|
52
|
9 |
|
} else { |
|
53
|
9 |
|
$this->nodeType = HTML_CLEANER_NODE_NODETYPE_TEXT; |
|
54
|
9 |
|
$this->nodeValue = $str; |
|
55
|
|
|
} |
|
56
|
|
|
|
|
57
|
9 |
|
} |
|
58
|
|
|
|
|
59
|
9 |
|
function parseFromString($str) |
|
60
|
|
|
{ |
|
61
|
9 |
|
$str = str_replace("\n"," ", $str); |
|
62
|
9 |
|
$offset=1; |
|
63
|
9 |
|
$endset=strlen($str)-2; |
|
64
|
9 |
|
if ($str[0] != '<' || $str[$endset+1] !== '>'){ |
|
65
|
|
|
trigger_error('tag syntax error', E_USER_ERROR); |
|
66
|
|
|
} |
|
67
|
9 |
|
if ($str[$endset]=='/') { |
|
68
|
|
|
$endset--; |
|
69
|
|
|
$this->closingStyle = HTML_CLEANER_NODE_CLOSINGSTYLE_XHTMLSINGLE; |
|
70
|
|
|
} |
|
71
|
9 |
|
if ($str[1]=='/') { |
|
72
|
9 |
|
$offset=2; |
|
73
|
9 |
|
$this->nodeType = HTML_CLEANER_NODE_NODETYPE_CLOSINGNODE; |
|
74
|
9 |
|
} |
|
75
|
|
|
|
|
76
|
9 |
|
preg_match("|</?([a-zA-Z0-9:-]+)|",$str,$matches); |
|
77
|
9 |
|
$tagname = $matches[1]; |
|
78
|
9 |
|
$offset += strlen($tagname); |
|
79
|
|
|
|
|
80
|
9 |
|
$tagattr = substr($str,$offset,$endset-$offset+1); |
|
81
|
|
|
|
|
82
|
9 |
|
$this->nodeName = strtolower($tagname); |
|
83
|
9 |
|
$this->attributes = $this->parseAttributes($tagattr); |
|
84
|
9 |
|
} |
|
85
|
|
|
|
|
86
|
9 |
|
function parseAttributes($str) |
|
87
|
|
|
{ |
|
88
|
9 |
|
$str = trim($str); |
|
89
|
9 |
|
if(strlen($str) == 0) { |
|
90
|
9 |
|
return array(); |
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
|
//echo "{{".$str."}}\n"; |
|
94
|
7 |
|
$i=0; |
|
95
|
7 |
|
$return = array(); |
|
96
|
7 |
|
$_state = -1; |
|
97
|
7 |
|
$_name = ''; |
|
98
|
7 |
|
$_quote = ''; |
|
|
|
|
|
|
99
|
7 |
|
$_value = ''; |
|
100
|
7 |
|
$strlen = strlen($str); |
|
101
|
|
|
|
|
102
|
7 |
|
while ($i<$strlen) { |
|
103
|
7 |
|
$chr = $str[$i]; |
|
104
|
|
|
|
|
105
|
7 |
|
if ($_state == -1) { // reset buffers |
|
106
|
7 |
|
$_name = ''; |
|
107
|
7 |
|
$_quote = ''; |
|
|
|
|
|
|
108
|
7 |
|
$_value = ''; |
|
109
|
7 |
|
$_state = 0; // parse from here |
|
110
|
7 |
|
} |
|
111
|
7 |
|
if ($_state == 0) { // state 0 : looking for name |
|
112
|
7 |
|
if (ctype_space($chr)) { // whitespace, NEXT |
|
113
|
1 |
|
$i++; |
|
114
|
1 |
|
continue; |
|
115
|
|
|
} |
|
116
|
7 |
|
preg_match("/([a-zA-Z][a-zA-Z0-9_:.-]*)/",$str,$matches,0,$i); |
|
117
|
|
|
|
|
118
|
7 |
|
$_name = $matches[1]; |
|
119
|
7 |
|
$i += strlen($_name); |
|
120
|
7 |
|
$chr = $str[$i]; |
|
121
|
|
|
|
|
122
|
7 |
|
if ($chr == '=') { |
|
123
|
6 |
|
$_state = 3; |
|
124
|
6 |
|
} else { |
|
125
|
1 |
|
$_state = 2; |
|
126
|
|
|
} |
|
127
|
7 |
|
} else if ($_state == 2) { // state 2: looking for equal |
|
128
|
|
|
if (!ctype_space($chr)) { |
|
129
|
|
|
if ($chr == '=') { |
|
130
|
|
|
$_state = 3; |
|
131
|
|
|
} else { |
|
132
|
|
|
// end of attribute |
|
133
|
|
|
$return[] = $_name; |
|
134
|
|
|
$_state = -1; |
|
135
|
|
|
continue; // Don't up the counter, this char is the first char for the next attribute. |
|
136
|
|
|
} |
|
137
|
|
|
} |
|
138
|
6 |
|
} else if ($_state == 3) { // state 3 : looking for quote |
|
139
|
6 |
|
if ($chr == '"' || $chr == "'" ) { |
|
140
|
|
|
// fastforward til next quot |
|
141
|
6 |
|
$regexp = '|^'.$chr.'(.*?)'.$chr.'|'; |
|
142
|
6 |
|
$skip = 1; |
|
143
|
6 |
|
} else if (!ctype_space($chr)) { |
|
144
|
|
|
// fastforward til next space |
|
145
|
|
|
$regexp = '|^(.*?) ?|'; |
|
146
|
|
|
$skip = 0; |
|
147
|
|
|
} |
|
148
|
|
|
|
|
149
|
6 |
|
preg_match($regexp,substr($str,$i),$matches); |
|
|
|
|
|
|
150
|
6 |
|
$_value = $matches[1]; |
|
151
|
6 |
|
$i += strlen($_value) + $skip ; |
|
|
|
|
|
|
152
|
|
|
|
|
153
|
6 |
|
$return[strtolower($_name)] = $_value; |
|
154
|
6 |
|
$_state = -1; |
|
155
|
|
|
|
|
156
|
6 |
|
} |
|
157
|
7 |
|
$i++; |
|
158
|
7 |
|
} |
|
159
|
7 |
|
if($_state != -1 ) { |
|
160
|
1 |
|
if ($_value!='') { |
|
161
|
|
|
$return[strtolower($_name)] = $_value; |
|
162
|
1 |
|
} else if ($_name!='') { |
|
163
|
1 |
|
$return[] = $_name; |
|
164
|
1 |
|
} |
|
165
|
1 |
|
} |
|
166
|
|
|
|
|
167
|
7 |
|
return $return; |
|
168
|
|
|
} |
|
169
|
|
|
|
|
170
|
|
|
public function _toString() { |
|
171
|
|
|
return $this->toString(); |
|
172
|
|
|
} |
|
173
|
|
|
|
|
174
|
9 |
|
public function toString() |
|
175
|
|
|
{ |
|
176
|
9 |
|
$src = ''; |
|
|
|
|
|
|
177
|
9 |
|
if ( ($this->nodeName == 'link' || |
|
178
|
9 |
|
$this->nodeName == 'img' || |
|
179
|
9 |
|
$this->nodeName == 'br' || |
|
180
|
9 |
|
$this->nodeName == 'hr') |
|
181
|
9 |
|
&& $this->closingStyle != HTML_CLEANER_NODE_CLOSINGSTYLE_XHTMLSINGLE |
|
182
|
9 |
|
) { |
|
183
|
|
|
$this->closingStyle = HTML_CLEANER_NODE_CLOSINGSTYLE_HTMLSINGLE; |
|
184
|
|
|
} |
|
185
|
9 |
|
if ($this->nodeType == HTML_CLEANER_NODE_NODETYPE_TEXT || $this->nodeType == HTML_CLEANER_NODE_NODETYPE_SPECIAL) { |
|
186
|
9 |
|
return $this->nodeValue; |
|
187
|
|
|
} |
|
188
|
9 |
|
if ($this->nodeType == HTML_CLEANER_NODE_NODETYPE_NODE) { |
|
189
|
9 |
|
$str = '<'.$this->nodeName; |
|
190
|
9 |
|
} else if ($this->nodeType == HTML_CLEANER_NODE_NODETYPE_CLOSINGNODE) { |
|
191
|
9 |
|
return '</'.$this->nodeName.">"; |
|
192
|
|
|
} |
|
193
|
9 |
|
foreach ($this->attributes as $attkey => $attvalue) { |
|
194
|
7 |
|
if (is_numeric($attkey)) { |
|
195
|
1 |
|
$str .= ' '.$attvalue; |
|
|
|
|
|
|
196
|
1 |
|
} else { |
|
197
|
6 |
|
$str .= ' '.$attkey.'="'.str_replace('"','"',$attvalue).'"'; |
|
198
|
|
|
} |
|
199
|
9 |
|
} |
|
200
|
9 |
|
if ($this->closingStyle == HTML_CLEANER_NODE_CLOSINGSTYLE_XHTMLSINGLE) { |
|
201
|
|
|
$str .= ' />'; |
|
202
|
|
|
} else { |
|
203
|
9 |
|
$str .= '>'; |
|
204
|
|
|
} |
|
205
|
9 |
|
return $str; |
|
206
|
|
|
} |
|
207
|
|
|
|
|
208
|
|
|
} |
|
209
|
|
|
|
|
210
|
|
|
class htmlcleaner |
|
|
|
|
|
|
211
|
|
|
{ |
|
212
|
|
|
public static function version() |
|
213
|
|
|
{ |
|
214
|
|
|
return 'mshtml cleanup v.0.9.2 by [email protected]'; |
|
215
|
|
|
} |
|
216
|
|
|
|
|
217
|
9 |
|
public static function dessicate($str) |
|
218
|
|
|
{ |
|
219
|
9 |
|
$i=0; |
|
220
|
9 |
|
$parts = array(); |
|
221
|
9 |
|
$_state = 0; |
|
222
|
9 |
|
$_buffer = ''; |
|
223
|
9 |
|
$_quote = ''; |
|
|
|
|
|
|
224
|
9 |
|
$str_len = strlen($str); |
|
225
|
9 |
|
while ($i<$str_len) { |
|
226
|
9 |
|
$chr = $str[$i]; |
|
227
|
9 |
|
if ($_state == -1) { // reset buffers |
|
228
|
9 |
|
$_buffer = ''; |
|
229
|
9 |
|
$_quote = ''; |
|
|
|
|
|
|
230
|
9 |
|
$_state = 0; |
|
231
|
9 |
|
} |
|
232
|
9 |
|
if ($_state == 0) { // state 0 : looking for < |
|
233
|
9 |
|
$pos = strpos($str,'<',$i); |
|
234
|
9 |
|
if( $pos === false) { |
|
235
|
|
|
// no more |
|
236
|
|
|
$_buffer = substr($str,$i); |
|
237
|
|
|
$i = $str_len; |
|
238
|
9 |
|
} else if($str[$pos] === '<') { |
|
239
|
9 |
|
$chr = '<'; |
|
|
|
|
|
|
240
|
9 |
|
$_buffer = substr($str,$i,$pos-$i); |
|
241
|
9 |
|
if ($_buffer!='') { |
|
242
|
|
|
// store part |
|
243
|
9 |
|
array_push($parts,new htmlcleanertag($_buffer)); |
|
244
|
9 |
|
} |
|
245
|
9 |
|
$_buffer = '<'; |
|
246
|
9 |
|
$i = $pos; |
|
247
|
9 |
|
if (($i+3 < $str_len) && $str[$i+1] == '!' && $str[$i+2] == '-' && $str[$i+3] == '-') { |
|
248
|
|
|
|
|
249
|
|
|
// cheating, fast forward to end of comment |
|
250
|
|
|
$end = strpos($str,'-->',$i+3); // start looking 3 steps ahead |
|
251
|
|
|
if($end !== false) { |
|
252
|
|
|
$comment = substr($str,$i,$end-$i+3); |
|
253
|
|
|
array_push($parts,new htmlcleanertag($comment)); // Remove this line to make the cleaner leave out HTML comments from the parts. |
|
254
|
|
|
$_state = -1; |
|
255
|
|
|
$i = $end+2; |
|
256
|
|
|
} else { |
|
257
|
|
|
$_buffer = substr($str,$i); |
|
258
|
|
|
$i = $str_len; |
|
259
|
|
|
} |
|
260
|
|
|
} else { |
|
261
|
9 |
|
$_state = 1; |
|
262
|
|
|
} |
|
263
|
9 |
|
} |
|
264
|
9 |
|
} else if ($_state == 1) { // state 1 : in tag looking for > |
|
265
|
9 |
|
$_buffer .= $chr; |
|
266
|
9 |
|
if ($chr == '"' || $chr == "'") { |
|
267
|
|
|
|
|
268
|
6 |
|
$regexp = '|'.$chr.'(.*?)'.$chr.'|sm'; |
|
269
|
6 |
|
preg_match($regexp,$str,$matches,0,$i); |
|
270
|
|
|
|
|
271
|
6 |
|
$_buffer .= $matches[1] . $chr; |
|
272
|
6 |
|
$i += strlen($matches[1]) + 1 ; |
|
273
|
9 |
|
} else if ($chr == '>') { |
|
274
|
9 |
|
array_push($parts,new htmlcleanertag($_buffer)); |
|
275
|
9 |
|
$_state = -1; |
|
276
|
9 |
|
} |
|
277
|
9 |
|
} |
|
278
|
9 |
|
$i++; |
|
279
|
9 |
|
} |
|
280
|
9 |
|
return $parts; |
|
281
|
|
|
} |
|
282
|
|
|
|
|
283
|
|
|
|
|
284
|
|
|
// removes the worst mess from word. |
|
285
|
9 |
|
public static function cleanup($body, $config) |
|
286
|
|
|
{ |
|
287
|
|
|
|
|
288
|
9 |
|
$scriptParts = array(); |
|
289
|
|
|
|
|
290
|
|
|
do { |
|
291
|
9 |
|
$prefix = md5(rand()); |
|
292
|
9 |
|
} while (strpos($body, $prefix) !== false); |
|
293
|
|
|
|
|
294
|
9 |
|
$callback = function($matches) use ($prefix, &$scriptParts) { |
|
295
|
|
|
$scriptPartKey = '----'.$prefix . '-' . count($scriptParts).'----'; |
|
296
|
|
|
$scriptParts[$scriptPartKey] = $matches[0]; |
|
297
|
|
|
return $scriptPartKey; |
|
298
|
9 |
|
}; |
|
299
|
|
|
|
|
300
|
9 |
|
$newbody = preg_replace_callback('!<script[^>]*>(.|[\r\n])*?</[^>]*script[^>]*>!i', $callback, $body); |
|
301
|
|
|
|
|
302
|
9 |
|
if($newbody) { |
|
303
|
9 |
|
$body = $newbody; |
|
304
|
9 |
|
} |
|
305
|
|
|
|
|
306
|
9 |
|
$body = "<htmlcleaner>$body</htmlcleaner>"; |
|
307
|
9 |
|
$rewrite_rules = $config["rewrite"]; |
|
308
|
9 |
|
$return = ''; |
|
309
|
9 |
|
$parts = htmlcleaner::dessicate($body); |
|
310
|
|
|
|
|
311
|
|
|
// flip emtied rules so we can use it as indexes |
|
312
|
9 |
|
if (is_array($config["delete_emptied"])) { |
|
313
|
|
|
$config["delete_emptied"] = array_flip($config["delete_emptied"]); |
|
314
|
|
|
} |
|
315
|
9 |
|
if (isset($config["delete_empty_containers"]) && is_array($config["delete_empty_containers"])) { |
|
316
|
|
|
$config["delete_empty_containers"] = array_flip($config["delete_empty_containers"]); |
|
317
|
|
|
} |
|
318
|
9 |
|
$delete_stack = Array(); |
|
319
|
9 |
|
$skipNodes = 0; |
|
320
|
9 |
|
if(is_array($rewrite_rules)) { |
|
321
|
|
|
foreach ($rewrite_rules as $tag_rule=> $attrib_rules) { |
|
322
|
|
|
$escaped_rule = str_replace('/','\/',$tag_rule); |
|
323
|
|
|
if($tag_rule !== $escaped_rule) { |
|
324
|
|
|
$rewrite_rules[$escaped_rule] = $attrib_rules; |
|
325
|
|
|
unset($rewrite_rules[$tag_rule]); |
|
326
|
|
|
$tag_rule = $escaped_rule; |
|
327
|
|
|
} |
|
328
|
|
|
|
|
329
|
|
|
if (is_array($attrib_rules)) { |
|
330
|
|
|
foreach ($attrib_rules as $attrib_rule=> $value_rules) { |
|
331
|
|
|
$escaped_rule = str_replace('/','\/',$attrib_rule); |
|
332
|
|
|
if ($attrib_rule !== $escaped_rule) { |
|
333
|
|
|
$rewrite_rules[$tag_rule][$escaped_rule] = $value_rules; |
|
334
|
|
|
unset($rewrite_rules[$tag_rule][$attrib_rule]); |
|
335
|
|
|
$attrib_rule = $escaped_rule; |
|
336
|
|
|
} |
|
337
|
|
|
|
|
338
|
|
|
if (is_array($value_rules)) { |
|
339
|
|
|
foreach ($value_rules as $value_rule=>$value) { |
|
340
|
|
|
$escaped_rule = str_replace('/','\/',$value_rule); |
|
341
|
|
|
if ($value_rule !== $escaped_rule) { |
|
342
|
|
|
$rewrite_rules[$tag_rule][$attrib_rule][$escaped_rule] = $value; |
|
343
|
|
|
unset($rewrite_rules[$tag_rule][$attrib_rule][$value_rule]); |
|
344
|
|
|
} |
|
345
|
|
|
} |
|
346
|
|
|
} |
|
347
|
|
|
} |
|
348
|
|
|
} |
|
349
|
|
|
} |
|
350
|
|
|
} |
|
351
|
|
|
|
|
352
|
9 |
|
foreach ($parts as $i => $part) { |
|
353
|
9 |
|
if ($skipNodes > 0) { |
|
354
|
|
|
$skipNodes--; |
|
355
|
|
|
continue; |
|
356
|
|
|
} |
|
357
|
9 |
|
if ($part->nodeType == HTML_CLEANER_NODE_CLOSINGSTYLE_NONE) { |
|
358
|
9 |
|
if (isset($config["delete_emptied"][$part->nodeName]) |
|
359
|
9 |
|
&& count($delete_stack)) { |
|
360
|
|
|
do { |
|
361
|
|
|
$closed = array_pop($delete_stack); |
|
362
|
|
|
} while ($closed["tag"] && $closed["tag"] != $part->nodeName); |
|
363
|
|
|
if ($closed["delete"]) { |
|
364
|
|
|
unset($part); |
|
365
|
|
|
} |
|
366
|
|
|
} |
|
367
|
9 |
|
} else |
|
368
|
9 |
|
if ($part->nodeType == HTML_CLEANER_NODE_NODETYPE_NODE) { |
|
369
|
9 |
|
if (isset($config["delete_emptied"][$part->nodeName]) |
|
370
|
9 |
|
&& count($delete_stack)) { |
|
371
|
|
|
array_push($delete_stack, Array("tag" => $part->nodeName)); |
|
372
|
9 |
|
} else if (isset($config["delete_empty_containers"][$part->nodeName])) { |
|
373
|
|
|
if ($part->nodeName != 'a' || !$part->attributes['name']) { // named anchor objects are not containers |
|
374
|
|
|
if (isset($parts[$i+1]) && $parts[$i+1]->nodeName == $part->nodeName && $parts[$i+1]->nodeType == HTML_CLEANER_NODE_NODETYPE_CLOSINGNODE) { |
|
375
|
|
|
$skipNodes = 1; |
|
376
|
|
|
continue; |
|
377
|
|
|
} |
|
378
|
|
|
} |
|
379
|
|
|
} |
|
380
|
9 |
|
} |
|
381
|
|
|
|
|
382
|
|
|
|
|
383
|
9 |
|
if ($part && is_array($rewrite_rules)) { |
|
384
|
|
|
foreach ($rewrite_rules as $tag_rule=>$attrib_rules) { |
|
385
|
|
|
if (preg_match('/'.$tag_rule.'/is', $part->nodeName)) { |
|
386
|
|
|
if (is_array($attrib_rules)) { |
|
387
|
|
|
foreach ($attrib_rules as $attrib_rule=>$value_rules) { |
|
388
|
|
|
foreach ($part->attributes as $attrib_key=>$attrib_val) { |
|
389
|
|
|
if (preg_match('/'.$attrib_rule.'/is', $attrib_key)) { |
|
390
|
|
|
if (is_array($value_rules)) { |
|
391
|
|
|
foreach ($value_rules as $value_rule=>$value) { |
|
392
|
|
|
if (preg_match('/'.$value_rule.'/is', $attrib_val)) { |
|
393
|
|
View Code Duplication |
if ($value === false) { |
|
394
|
|
|
unset($part->attributes[$attrib_key]); |
|
395
|
|
|
if (!count($part->attributes)) { |
|
396
|
|
|
if (isset($config["delete_emptied"][$part->nodeName])) { |
|
397
|
|
|
// remove previous config |
|
398
|
|
|
@array_pop($delete_stack); |
|
399
|
|
|
array_push($delete_stack, Array("tag" => $part->nodeName, "delete" => true)); |
|
400
|
|
|
unset($part); |
|
401
|
|
|
} |
|
402
|
|
|
break 3; |
|
403
|
|
|
} |
|
404
|
|
|
} else { |
|
405
|
|
|
$part->attributes[$attrib_key] = preg_replace('/^'.$value_rule.'$/is', $value, $part->attributes[$attrib_key]); |
|
406
|
|
|
} |
|
407
|
|
|
} |
|
408
|
|
|
} |
|
409
|
|
View Code Duplication |
} else |
|
410
|
|
|
if ($value_rules === false) { |
|
411
|
|
|
unset($part->attributes[$attrib_key]); |
|
412
|
|
|
if (!count($part->attributes)) { |
|
413
|
|
|
if (isset($config["delete_emptied"][$part->nodeName])) { |
|
414
|
|
|
// remove previous config |
|
415
|
|
|
@array_pop($delete_stack); |
|
416
|
|
|
array_push($delete_stack, Array("tag" => $part->nodeName, "delete" => true)); |
|
417
|
|
|
unset($part); |
|
418
|
|
|
} |
|
419
|
|
|
break 2; |
|
420
|
|
|
} |
|
421
|
|
|
} else { |
|
422
|
|
|
$part->attributes[preg_replace('/^'.$attrib_rule.'$/is', $value_rules, $attrib_key)] = $part->attributes[$attrib_key]; |
|
423
|
|
|
unset($part->attributes[$attrib_key]); |
|
424
|
|
|
} |
|
425
|
|
|
} |
|
426
|
|
|
} |
|
427
|
|
|
} |
|
428
|
|
|
} else if ($attrib_rules === false) { |
|
429
|
|
|
unset($part); |
|
430
|
|
|
} else { |
|
431
|
|
|
$part->nodeName = $attrib_rules; |
|
432
|
|
|
} |
|
433
|
|
|
break; // tag matched, so skip next rules. |
|
434
|
|
|
} |
|
435
|
|
|
} |
|
436
|
|
|
} |
|
437
|
9 |
|
if ($part && strstr($part->nodeValue,'<?xml:namespace')===false) { |
|
438
|
9 |
|
$return .= $part->toString(); |
|
439
|
9 |
|
} |
|
440
|
9 |
|
} |
|
441
|
|
|
|
|
442
|
9 |
|
$return = str_replace(array_keys($scriptParts), array_values($scriptParts), $return); |
|
443
|
|
|
|
|
444
|
|
|
//FIXME: htmlcleaner removes the '<' in '</htmlcleaner>' if the html code is broken |
|
|
|
|
|
|
445
|
|
|
// ie: if the last tag in the input isn't properly closed... it should instead |
|
446
|
|
|
// close any broken tag properly (add quotes and a '>') |
|
447
|
|
|
|
|
448
|
9 |
|
return str_replace('<htmlcleaner>', '', str_replace('</htmlcleaner>', '', $return)); |
|
449
|
|
|
} |
|
450
|
|
|
} |
|
451
|
|
|
|
|
452
|
|
|
class pinp_htmlcleaner extends htmlcleaner { |
|
|
|
|
|
|
453
|
|
|
|
|
454
|
|
|
public static function _dessicate($str) { |
|
455
|
|
|
return parent::dessicate($str); |
|
456
|
|
|
} |
|
457
|
|
|
public static function _cleanup($str,$config) { |
|
458
|
|
|
return parent::cleanup($str,$config); |
|
459
|
|
|
} |
|
460
|
|
|
|
|
461
|
|
|
} |
|
462
|
|
|
|
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.
Both the
$myVarassignment in line 1 and the$higherassignment in line 2 are dead. The first because$myVaris never used and the second because$higheris always overwritten for every possible time line.