|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
require_once( AMP__ROOT__ . '/includes/sanitizers/class-amp-base-sanitizer.php' ); |
|
4
|
|
|
|
|
5
|
|
|
/** |
|
6
|
|
|
* Strips blacklisted tags and attributes from content. |
|
7
|
|
|
* |
|
8
|
|
|
* See following for blacklist: |
|
9
|
|
|
* https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#html-tags |
|
10
|
|
|
*/ |
|
11
|
|
|
class AMP_Blacklist_Sanitizer extends AMP_Base_Sanitizer { |
|
12
|
|
|
const PATTERN_REL_WP_ATTACHMENT = '#wp-att-([\d]+)#'; |
|
13
|
|
|
|
|
14
|
|
|
protected $DEFAULT_ARGS = array( |
|
15
|
|
|
'add_blacklisted_protocols' => array(), |
|
16
|
|
|
'add_blacklisted_tags' => array(), |
|
17
|
|
|
'add_blacklisted_attributes' => array(), |
|
18
|
|
|
); |
|
19
|
|
|
|
|
20
|
|
|
public function sanitize() { |
|
21
|
|
|
$blacklisted_tags = $this->get_blacklisted_tags(); |
|
22
|
|
|
$blacklisted_attributes = $this->get_blacklisted_attributes(); |
|
23
|
|
|
$blacklisted_protocols = $this->get_blacklisted_protocols(); |
|
24
|
|
|
|
|
25
|
|
|
$body = $this->get_body_node(); |
|
26
|
|
|
$this->strip_tags( $body, $blacklisted_tags ); |
|
27
|
|
|
$this->strip_attributes_recursive( $body, $blacklisted_attributes, $blacklisted_protocols ); |
|
28
|
|
|
} |
|
29
|
|
|
|
|
30
|
|
|
private function strip_attributes_recursive( $node, $bad_attributes, $bad_protocols ) { |
|
31
|
|
|
if ( $node->nodeType !== XML_ELEMENT_NODE ) { |
|
32
|
|
|
return; |
|
33
|
|
|
} |
|
34
|
|
|
|
|
35
|
|
|
$node_name = $node->nodeName; |
|
36
|
|
|
|
|
37
|
|
|
// Some nodes may contain valid content but are themselves invalid. |
|
38
|
|
|
// Remove the node but preserve the children. |
|
39
|
|
|
if ( 'font' === $node_name ) { |
|
40
|
|
|
$this->replace_node_with_children( $node, $bad_attributes, $bad_protocols ); |
|
41
|
|
|
return; |
|
42
|
|
|
} elseif ( 'a' === $node_name && false === $this->validate_a_node( $node ) ) { |
|
43
|
|
|
$this->replace_node_with_children( $node, $bad_attributes, $bad_protocols ); |
|
44
|
|
|
return; |
|
45
|
|
|
} |
|
46
|
|
|
|
|
47
|
|
|
if ( $node->hasAttributes() ) { |
|
48
|
|
|
$length = $node->attributes->length; |
|
49
|
|
|
for ( $i = $length - 1; $i >= 0; $i-- ) { |
|
50
|
|
|
$attribute = $node->attributes->item( $i ); |
|
51
|
|
|
$attribute_name = strtolower( $attribute->name ); |
|
52
|
|
|
if ( in_array( $attribute_name, $bad_attributes ) ) { |
|
53
|
|
|
$node->removeAttribute( $attribute_name ); |
|
54
|
|
|
continue; |
|
55
|
|
|
} |
|
56
|
|
|
|
|
57
|
|
|
// on* attributes (like onclick) are a special case |
|
58
|
|
|
if ( 0 === stripos( $attribute_name, 'on' ) && $attribute_name != 'on' ) { |
|
59
|
|
|
$node->removeAttribute( $attribute_name ); |
|
60
|
|
|
continue; |
|
61
|
|
|
} elseif ( 'a' === $node_name ) { |
|
62
|
|
|
$this->sanitize_a_attribute( $node, $attribute ); |
|
63
|
|
|
} |
|
64
|
|
|
} |
|
65
|
|
|
} |
|
66
|
|
|
|
|
67
|
|
|
$length = $node->childNodes->length; |
|
68
|
|
|
for ( $i = $length - 1; $i >= 0; $i-- ) { |
|
69
|
|
|
$child_node = $node->childNodes->item( $i ); |
|
70
|
|
|
|
|
71
|
|
|
$this->strip_attributes_recursive( $child_node, $bad_attributes, $bad_protocols ); |
|
72
|
|
|
} |
|
73
|
|
|
} |
|
74
|
|
|
|
|
75
|
|
|
private function strip_tags( $node, $tag_names ) { |
|
76
|
|
|
foreach ( $tag_names as $tag_name ) { |
|
77
|
|
|
$elements = $node->getElementsByTagName( $tag_name ); |
|
78
|
|
|
$length = $elements->length; |
|
79
|
|
|
if ( 0 === $length ) { |
|
80
|
|
|
continue; |
|
81
|
|
|
} |
|
82
|
|
|
|
|
83
|
|
|
for ( $i = $length - 1; $i >= 0; $i-- ) { |
|
84
|
|
|
$element = $elements->item( $i ); |
|
85
|
|
|
$parent_node = $element->parentNode; |
|
86
|
|
|
$parent_node->removeChild( $element ); |
|
87
|
|
|
|
|
88
|
|
|
if ( 'body' !== $parent_node->nodeName && AMP_DOM_Utils::is_node_empty( $parent_node ) ) { |
|
89
|
|
|
$parent_node->parentNode->removeChild( $parent_node ); |
|
90
|
|
|
} |
|
91
|
|
|
} |
|
92
|
|
|
} |
|
93
|
|
|
} |
|
94
|
|
|
|
|
95
|
|
|
private function sanitize_a_attribute( $node, $attribute ) { |
|
96
|
|
|
$attribute_name = strtolower( $attribute->name ); |
|
97
|
|
|
|
|
98
|
|
|
if ( 'rel' === $attribute_name ) { |
|
99
|
|
|
$old_value = $attribute->value; |
|
100
|
|
|
$new_value = trim( preg_replace( self::PATTERN_REL_WP_ATTACHMENT, '', $old_value ) ); |
|
101
|
|
|
if ( empty( $new_value ) ) { |
|
102
|
|
|
$node->removeAttribute( $attribute_name ); |
|
103
|
|
|
} elseif ( $old_value !== $new_value ) { |
|
104
|
|
|
$node->setAttribute( $attribute_name, $new_value ); |
|
105
|
|
|
} |
|
106
|
|
|
} elseif ( 'rev' === $attribute_name ) { |
|
107
|
|
|
// rev removed from HTML5 spec, which was used by Jetpack Markdown. |
|
108
|
|
|
$node->removeAttribute( $attribute_name ); |
|
109
|
|
|
} elseif ( 'target' === $attribute_name ) { |
|
110
|
|
|
// _blank is the only allowed value and it must be lowercase. |
|
111
|
|
|
// replace _new with _blank and others should simply be removed. |
|
112
|
|
|
$old_value = strtolower( $attribute->value ); |
|
113
|
|
|
if ( '_blank' === $old_value || '_new' === $old_value ) { |
|
114
|
|
|
// _new is not allowed; swap with _blank |
|
115
|
|
|
$node->setAttribute( $attribute_name, '_blank' ); |
|
116
|
|
|
} else { |
|
117
|
|
|
// only _blank is allowed |
|
118
|
|
|
$node->removeAttribute( $attribute_name ); |
|
119
|
|
|
} |
|
120
|
|
|
} |
|
121
|
|
|
} |
|
122
|
|
|
|
|
123
|
|
|
private function validate_a_node( $node ) { |
|
124
|
|
|
// Get the href attribute |
|
125
|
|
|
$href = $node->getAttribute( 'href' ); |
|
126
|
|
|
|
|
127
|
|
|
// If no href is set and this isn't an anchor, it's invalid |
|
128
|
|
|
if ( empty( $href ) ) { |
|
129
|
|
|
$name_attr = $node->getAttribute( 'name' ); |
|
130
|
|
|
if ( ! empty( $name_attr ) ) { |
|
131
|
|
|
// No further validation is required |
|
132
|
|
|
return true; |
|
133
|
|
|
} else { |
|
134
|
|
|
return false; |
|
135
|
|
|
} |
|
136
|
|
|
} |
|
137
|
|
|
|
|
138
|
|
|
// If this is an anchor link, just return true |
|
139
|
|
|
if ( 0 === strpos( $href, '#' ) ) { |
|
140
|
|
|
return true; |
|
141
|
|
|
} |
|
142
|
|
|
|
|
143
|
|
|
// If the href starts with a '/', append the home_url to it for validation purposes. |
|
144
|
|
|
if ( 0 === stripos( $href, '/' ) ) { |
|
145
|
|
|
$href = untrailingslashit( get_home_url() ) . $href; |
|
146
|
|
|
} |
|
147
|
|
|
|
|
148
|
|
|
$valid_protocols = array( 'http', 'https', 'mailto', 'sms', 'tel', 'viber', 'whatsapp' ); |
|
149
|
|
|
$special_protocols = array( 'tel', 'sms' ); // these ones don't valid with `filter_var+FILTER_VALIDATE_URL` |
|
150
|
|
|
$protocol = strtok( $href, ':' ); |
|
151
|
|
|
|
|
152
|
|
|
if ( false === filter_var( $href, FILTER_VALIDATE_URL ) |
|
153
|
|
|
&& ! in_array( $protocol, $special_protocols ) ) { |
|
154
|
|
|
return false; |
|
155
|
|
|
} |
|
156
|
|
|
|
|
157
|
|
|
if ( ! in_array( $protocol, $valid_protocols ) ) { |
|
158
|
|
|
return false; |
|
159
|
|
|
} |
|
160
|
|
|
|
|
161
|
|
|
return true; |
|
162
|
|
|
} |
|
163
|
|
|
|
|
164
|
|
|
private function replace_node_with_children( $node, $bad_attributes, $bad_protocols ) { |
|
165
|
|
|
// If the node has children and also has a parent node, |
|
166
|
|
|
// clone and re-add all the children just before current node. |
|
167
|
|
|
if ( $node->hasChildNodes() && $node->parentNode ) { |
|
168
|
|
|
foreach ( $node->childNodes as $child_node ) { |
|
169
|
|
|
$new_child = $child_node->cloneNode( true ); |
|
170
|
|
|
$this->strip_attributes_recursive( $new_child, $bad_attributes, $bad_protocols ); |
|
171
|
|
|
$node->parentNode->insertBefore( $new_child, $node ); |
|
172
|
|
|
} |
|
173
|
|
|
} |
|
174
|
|
|
|
|
175
|
|
|
// Remove the node from the parent, if defined. |
|
176
|
|
|
if ( $node->parentNode ) { |
|
177
|
|
|
$node->parentNode->removeChild( $node ); |
|
178
|
|
|
} |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
private function merge_defaults_with_args( $key, $values ) { |
|
182
|
|
|
// Merge default values with user specified args |
|
183
|
|
|
if ( ! empty( $this->args[ $key ] ) |
|
184
|
|
|
&& is_array( $this->args[ $key ] ) ) { |
|
185
|
|
|
$values = array_merge( $values, $this->args[ $key ] ); |
|
186
|
|
|
} |
|
187
|
|
|
|
|
188
|
|
|
return $values; |
|
189
|
|
|
} |
|
190
|
|
|
|
|
191
|
|
|
private function get_blacklisted_protocols() { |
|
192
|
|
|
return $this->merge_defaults_with_args( 'add_blacklisted_protocols', array( |
|
193
|
|
|
'javascript', |
|
194
|
|
|
) ); |
|
195
|
|
|
} |
|
196
|
|
|
|
|
197
|
|
|
private function get_blacklisted_tags() { |
|
198
|
|
|
return $this->merge_defaults_with_args( 'add_blacklisted_tags', array( |
|
199
|
|
|
'script', |
|
200
|
|
|
'noscript', |
|
201
|
|
|
'style', |
|
202
|
|
|
'frame', |
|
203
|
|
|
'frameset', |
|
204
|
|
|
'object', |
|
205
|
|
|
'param', |
|
206
|
|
|
'applet', |
|
207
|
|
|
'form', |
|
208
|
|
|
'label', |
|
209
|
|
|
'input', |
|
210
|
|
|
'textarea', |
|
211
|
|
|
'select', |
|
212
|
|
|
'option', |
|
213
|
|
|
'link', |
|
214
|
|
|
'picture', |
|
215
|
|
|
|
|
216
|
|
|
// Sanitizers run after embed handlers, so if anything wasn't matched, it needs to be removed. |
|
217
|
|
|
'embed', |
|
218
|
|
|
'embedvideo', |
|
219
|
|
|
|
|
220
|
|
|
// Other weird ones |
|
221
|
|
|
'comments-count', |
|
222
|
|
|
|
|
223
|
|
|
// These are converted into amp-* versions |
|
224
|
|
|
//'img', |
|
225
|
|
|
//'video', |
|
226
|
|
|
//'audio', |
|
227
|
|
|
//'iframe', |
|
228
|
|
|
) ); |
|
229
|
|
|
} |
|
230
|
|
|
|
|
231
|
|
|
private function get_blacklisted_attributes() { |
|
232
|
|
|
return $this->merge_defaults_with_args( 'add_blacklisted_attributes', array( |
|
233
|
|
|
'style', |
|
234
|
|
|
'size', |
|
235
|
|
|
'clear', |
|
236
|
|
|
'align', |
|
237
|
|
|
'valign', |
|
238
|
|
|
) ); |
|
239
|
|
|
} |
|
240
|
|
|
} |
|
241
|
|
|
|