1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Definition for tables. The general idea is to extract out all of the |
5
|
|
|
* essential bits, and then reconstruct it later. |
6
|
|
|
* |
7
|
|
|
* This is a bit confusing, because the DTDs and the W3C |
8
|
|
|
* validators seem to disagree on the appropriate definition. The |
9
|
|
|
* DTD claims: |
10
|
|
|
* |
11
|
|
|
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) |
12
|
|
|
* |
13
|
|
|
* But actually, the HTML4 spec then has this to say: |
14
|
|
|
* |
15
|
|
|
* The TBODY start tag is always required except when the table |
16
|
|
|
* contains only one table body and no table head or foot sections. |
17
|
|
|
* The TBODY end tag may always be safely omitted. |
18
|
|
|
* |
19
|
|
|
* So the DTD is kind of wrong. The validator is, unfortunately, kind |
20
|
|
|
* of on crack. |
21
|
|
|
* |
22
|
|
|
* The definition changed again in XHTML1.1; and in my opinion, this |
23
|
|
|
* formulation makes the most sense. |
24
|
|
|
* |
25
|
|
|
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) |
26
|
|
|
* |
27
|
|
|
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. |
28
|
|
|
* If we encounter a thead, tfoot or tbody, we are placed in the former |
29
|
|
|
* mode, and we *must* wrap any stray tr segments with a tbody. But if |
30
|
|
|
* we don't run into any of them, just have tr tags is OK. |
31
|
|
|
*/ |
32
|
|
|
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef |
33
|
|
|
{ |
34
|
|
|
public $allow_empty = false; |
35
|
|
|
public $type = 'table'; |
36
|
|
|
public $elements = array('tr' => true, 'tbody' => true, 'thead' => true, |
37
|
|
|
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true); |
38
|
|
|
public function __construct() {} |
39
|
|
|
public function validateChildren($tokens_of_children, $config, $context) { |
40
|
|
|
if (empty($tokens_of_children)) return false; |
41
|
|
|
|
42
|
|
|
// this ensures that the loop gets run one last time before closing |
43
|
|
|
// up. It's a little bit of a hack, but it works! Just make sure you |
44
|
|
|
// get rid of the token later. |
45
|
|
|
$tokens_of_children[] = false; |
46
|
|
|
|
47
|
|
|
// only one of these elements is allowed in a table |
48
|
|
|
$caption = false; |
49
|
|
|
$thead = false; |
50
|
|
|
$tfoot = false; |
51
|
|
|
|
52
|
|
|
// as many of these as you want |
53
|
|
|
$cols = array(); |
54
|
|
|
$content = array(); |
55
|
|
|
|
56
|
|
|
$nesting = 0; // current depth so we can determine nodes |
57
|
|
|
$is_collecting = false; // are we globbing together tokens to package |
58
|
|
|
// into one of the collectors? |
59
|
|
|
$collection = array(); // collected nodes |
60
|
|
|
$tag_index = 0; // the first node might be whitespace, |
61
|
|
|
// so this tells us where the start tag is |
62
|
|
|
$tbody_mode = false; // if true, then we need to wrap any stray |
63
|
|
|
// <tr>s with a <tbody>. |
64
|
|
|
|
65
|
|
|
foreach ($tokens_of_children as $token) { |
66
|
|
|
$is_child = ($nesting == 0); |
67
|
|
|
|
68
|
|
View Code Duplication |
if ($token === false) { |
|
|
|
|
69
|
|
|
// terminating sequence started |
70
|
|
|
} elseif ($token instanceof HTMLPurifier_Token_Start) { |
71
|
|
|
$nesting++; |
72
|
|
|
} elseif ($token instanceof HTMLPurifier_Token_End) { |
73
|
|
|
$nesting--; |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
// handle node collection |
77
|
|
|
if ($is_collecting) { |
78
|
|
|
if ($is_child) { |
79
|
|
|
// okay, let's stash the tokens away |
80
|
|
|
// first token tells us the type of the collection |
81
|
|
|
switch ($collection[$tag_index]->name) { |
82
|
|
|
case 'tbody': |
83
|
|
|
$tbody_mode = true; |
84
|
|
|
case 'tr': |
85
|
|
|
$content[] = $collection; |
86
|
|
|
break; |
87
|
|
|
case 'caption': |
88
|
|
|
if ($caption !== false) break; |
89
|
|
|
$caption = $collection; |
90
|
|
|
break; |
91
|
|
|
case 'thead': |
92
|
|
|
case 'tfoot': |
93
|
|
|
$tbody_mode = true; |
94
|
|
|
// XXX This breaks rendering properties with |
95
|
|
|
// Firefox, which never floats a <thead> to |
96
|
|
|
// the top. Ever. (Our scheme will float the |
97
|
|
|
// first <thead> to the top.) So maybe |
98
|
|
|
// <thead>s that are not first should be |
99
|
|
|
// turned into <tbody>? Very tricky, indeed. |
100
|
|
|
|
101
|
|
|
// access the appropriate variable, $thead or $tfoot |
102
|
|
|
$var = $collection[$tag_index]->name; |
103
|
|
|
if ($$var === false) { |
104
|
|
|
$$var = $collection; |
105
|
|
|
} else { |
106
|
|
|
// Oops, there's a second one! What |
107
|
|
|
// should we do? Current behavior is to |
108
|
|
|
// transmutate the first and last entries into |
109
|
|
|
// tbody tags, and then put into content. |
110
|
|
|
// Maybe a better idea is to *attach |
111
|
|
|
// it* to the existing thead or tfoot? |
112
|
|
|
// We don't do this, because Firefox |
113
|
|
|
// doesn't float an extra tfoot to the |
114
|
|
|
// bottom like it does for the first one. |
115
|
|
|
$collection[$tag_index]->name = 'tbody'; |
116
|
|
|
$collection[count($collection)-1]->name = 'tbody'; |
117
|
|
|
$content[] = $collection; |
118
|
|
|
} |
119
|
|
|
break; |
120
|
|
|
case 'colgroup': |
121
|
|
|
$cols[] = $collection; |
122
|
|
|
break; |
123
|
|
|
} |
124
|
|
|
$collection = array(); |
125
|
|
|
$is_collecting = false; |
126
|
|
|
$tag_index = 0; |
127
|
|
|
} else { |
128
|
|
|
// add the node to the collection |
129
|
|
|
$collection[] = $token; |
130
|
|
|
} |
131
|
|
|
} |
132
|
|
|
|
133
|
|
|
// terminate |
134
|
|
|
if ($token === false) break; |
135
|
|
|
|
136
|
|
|
if ($is_child) { |
137
|
|
|
// determine what we're dealing with |
138
|
|
|
if ($token->name == 'col') { |
139
|
|
|
// the only empty tag in the possie, we can handle it |
140
|
|
|
// immediately |
141
|
|
|
$cols[] = array_merge($collection, array($token)); |
142
|
|
|
$collection = array(); |
143
|
|
|
$tag_index = 0; |
144
|
|
|
continue; |
145
|
|
|
} |
146
|
|
|
switch($token->name) { |
147
|
|
|
case 'caption': |
148
|
|
|
case 'colgroup': |
149
|
|
|
case 'thead': |
150
|
|
|
case 'tfoot': |
151
|
|
|
case 'tbody': |
152
|
|
|
case 'tr': |
153
|
|
|
$is_collecting = true; |
154
|
|
|
$collection[] = $token; |
155
|
|
|
continue; |
156
|
|
|
default: |
157
|
|
|
if (!empty($token->is_whitespace)) { |
158
|
|
|
$collection[] = $token; |
159
|
|
|
$tag_index++; |
160
|
|
|
} |
161
|
|
|
continue; |
162
|
|
|
} |
163
|
|
|
} |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
if (empty($content)) return false; |
167
|
|
|
|
168
|
|
|
$ret = array(); |
169
|
|
|
if ($caption !== false) $ret = array_merge($ret, $caption); |
170
|
|
|
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); |
171
|
|
|
if ($thead !== false) $ret = array_merge($ret, $thead); |
172
|
|
|
if ($tfoot !== false) $ret = array_merge($ret, $tfoot); |
173
|
|
|
|
174
|
|
|
if ($tbody_mode) { |
175
|
|
|
// a little tricky, since the start of the collection may be |
176
|
|
|
// whitespace |
177
|
|
|
$inside_tbody = false; |
178
|
|
|
foreach ($content as $token_array) { |
179
|
|
|
// find the starting token |
180
|
|
|
foreach ($token_array as $t) { |
181
|
|
|
if ($t->name === 'tr' || $t->name === 'tbody') { |
182
|
|
|
break; |
183
|
|
|
} |
184
|
|
|
} // iterator variable carries over |
185
|
|
|
if ($t->name === 'tr') { |
186
|
|
View Code Duplication |
if ($inside_tbody) { |
187
|
|
|
$ret = array_merge($ret, $token_array); |
188
|
|
|
} else { |
189
|
|
|
$ret[] = new HTMLPurifier_Token_Start('tbody'); |
190
|
|
|
$ret = array_merge($ret, $token_array); |
191
|
|
|
$inside_tbody = true; |
192
|
|
|
} |
193
|
|
View Code Duplication |
} elseif ($t->name === 'tbody') { |
|
|
|
|
194
|
|
|
if ($inside_tbody) { |
195
|
|
|
$ret[] = new HTMLPurifier_Token_End('tbody'); |
196
|
|
|
$inside_tbody = false; |
197
|
|
|
$ret = array_merge($ret, $token_array); |
198
|
|
|
} else { |
199
|
|
|
$ret = array_merge($ret, $token_array); |
200
|
|
|
} |
201
|
|
|
} else { |
202
|
|
|
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR); |
203
|
|
|
} |
204
|
|
|
} |
205
|
|
|
if ($inside_tbody) { |
206
|
|
|
$ret[] = new HTMLPurifier_Token_End('tbody'); |
207
|
|
|
} |
208
|
|
|
} else { |
209
|
|
|
foreach ($content as $token_array) { |
210
|
|
|
// invariant: everything in here is <tr>s |
211
|
|
|
$ret = array_merge($ret, $token_array); |
212
|
|
|
} |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
if (!empty($collection) && $is_collecting == false){ |
|
|
|
|
216
|
|
|
// grab the trailing space |
217
|
|
|
$ret = array_merge($ret, $collection); |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
array_pop($tokens_of_children); // remove phantom token |
221
|
|
|
|
222
|
|
|
return ($ret === $tokens_of_children) ? true : $ret; |
|
|
|
|
223
|
|
|
|
224
|
|
|
} |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
// vim: et sw=4 sts=4 |
228
|
|
|
|
This check looks for the bodies of
if
statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.These
if
bodies can be removed. If you have an empty if but statements in theelse
branch, consider inverting the condition.could be turned into
This is much more concise to read.