1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @file |
4
|
|
|
* The rules for generating output in the serializer. |
5
|
|
|
* |
6
|
|
|
* These output rules are likely to generate output similar to the document that |
7
|
|
|
* was parsed. It is not intended to output exactly the document that was parsed. |
8
|
|
|
*/ |
9
|
|
|
|
10
|
|
|
namespace Masterminds\HTML5\Serializer; |
11
|
|
|
|
12
|
|
|
use Masterminds\HTML5\Elements; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* Generate the output html5 based on element rules. |
16
|
|
|
*/ |
17
|
|
|
class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface |
18
|
|
|
{ |
19
|
|
|
/** |
20
|
|
|
* Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0. |
21
|
|
|
*/ |
22
|
|
|
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; |
23
|
|
|
|
24
|
|
|
const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; |
25
|
|
|
|
26
|
|
|
const NAMESPACE_SVG = 'http://www.w3.org/2000/svg'; |
27
|
|
|
|
28
|
|
|
const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; |
29
|
|
|
|
30
|
|
|
const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; |
31
|
|
|
|
32
|
|
|
const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Holds the HTML5 element names that causes a namespace switch. |
36
|
|
|
* |
37
|
|
|
* @var array |
38
|
|
|
*/ |
39
|
|
|
protected $implicitNamespaces = array( |
40
|
|
|
self::NAMESPACE_HTML, |
41
|
|
|
self::NAMESPACE_SVG, |
42
|
|
|
self::NAMESPACE_MATHML, |
43
|
|
|
self::NAMESPACE_XML, |
44
|
|
|
self::NAMESPACE_XMLNS, |
45
|
|
|
); |
46
|
|
|
|
47
|
|
|
const IM_IN_HTML = 1; |
48
|
|
|
|
49
|
|
|
const IM_IN_SVG = 2; |
50
|
|
|
|
51
|
|
|
const IM_IN_MATHML = 3; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Used as cache to detect if is available ENT_HTML5. |
55
|
|
|
* |
56
|
|
|
* @var bool |
57
|
|
|
*/ |
58
|
|
|
private $hasHTML5 = false; |
59
|
|
|
|
60
|
|
|
protected $traverser; |
61
|
|
|
|
62
|
|
|
protected $encode = false; |
63
|
|
|
|
64
|
|
|
protected $out; |
65
|
|
|
|
66
|
|
|
protected $outputMode; |
67
|
|
|
|
68
|
|
|
private $xpath; |
69
|
|
|
|
70
|
|
|
protected $nonBooleanAttributes = array( |
71
|
|
|
/* |
72
|
|
|
array( |
73
|
|
|
'nodeNamespace'=>'http://www.w3.org/1999/xhtml', |
74
|
|
|
'attrNamespace'=>'http://www.w3.org/1999/xhtml', |
75
|
|
|
|
76
|
|
|
'nodeName'=>'img', 'nodeName'=>array('img', 'a'), |
77
|
|
|
'attrName'=>'alt', 'attrName'=>array('title', 'alt'), |
78
|
|
|
), |
79
|
|
|
*/ |
80
|
|
|
array( |
81
|
|
|
'nodeNamespace' => 'http://www.w3.org/1999/xhtml', |
82
|
|
|
'attrName' => array('href', |
83
|
|
|
'hreflang', |
84
|
|
|
'http-equiv', |
85
|
|
|
'icon', |
86
|
|
|
'id', |
87
|
|
|
'keytype', |
88
|
|
|
'kind', |
89
|
|
|
'label', |
90
|
|
|
'lang', |
91
|
|
|
'language', |
92
|
|
|
'list', |
93
|
|
|
'maxlength', |
94
|
|
|
'media', |
95
|
|
|
'method', |
96
|
|
|
'name', |
97
|
|
|
'placeholder', |
98
|
|
|
'rel', |
99
|
|
|
'rows', |
100
|
|
|
'rowspan', |
101
|
|
|
'sandbox', |
102
|
|
|
'spellcheck', |
103
|
|
|
'scope', |
104
|
|
|
'seamless', |
105
|
|
|
'shape', |
106
|
|
|
'size', |
107
|
|
|
'sizes', |
108
|
|
|
'span', |
109
|
|
|
'src', |
110
|
|
|
'srcdoc', |
111
|
|
|
'srclang', |
112
|
|
|
'srcset', |
113
|
|
|
'start', |
114
|
|
|
'step', |
115
|
|
|
'style', |
116
|
|
|
'summary', |
117
|
|
|
'tabindex', |
118
|
|
|
'target', |
119
|
|
|
'title', |
120
|
|
|
'type', |
121
|
|
|
'value', |
122
|
|
|
'width', |
123
|
|
|
'border', |
124
|
|
|
'charset', |
125
|
|
|
'cite', |
126
|
|
|
'class', |
127
|
|
|
'code', |
128
|
|
|
'codebase', |
129
|
|
|
'color', |
130
|
|
|
'cols', |
131
|
|
|
'colspan', |
132
|
|
|
'content', |
133
|
|
|
'coords', |
134
|
|
|
'data', |
135
|
|
|
'datetime', |
136
|
|
|
'default', |
137
|
|
|
'dir', |
138
|
|
|
'dirname', |
139
|
|
|
'enctype', |
140
|
|
|
'for', |
141
|
|
|
'form', |
142
|
|
|
'formaction', |
143
|
|
|
'headers', |
144
|
|
|
'height', |
145
|
|
|
'accept', |
146
|
|
|
'accept-charset', |
147
|
|
|
'accesskey', |
148
|
|
|
'action', |
149
|
|
|
'align', |
150
|
|
|
'alt', |
151
|
|
|
'bgcolor', |
152
|
|
|
), |
153
|
|
|
), |
154
|
|
|
array( |
155
|
|
|
'nodeNamespace' => 'http://www.w3.org/1999/xhtml', |
156
|
|
|
'xpath' => 'starts-with(local-name(), \'data-\')', |
157
|
|
|
), |
158
|
|
|
); |
159
|
|
|
|
160
|
|
|
const DOCTYPE = '<!DOCTYPE html>'; |
161
|
|
|
|
162
|
64 |
|
public function __construct($output, $options = array()) |
163
|
|
|
{ |
164
|
64 |
|
if (isset($options['encode_entities'])) { |
165
|
64 |
|
$this->encode = $options['encode_entities']; |
166
|
64 |
|
} |
167
|
|
|
|
168
|
64 |
|
$this->outputMode = static::IM_IN_HTML; |
169
|
64 |
|
$this->out = $output; |
170
|
|
|
|
171
|
|
|
// If HHVM, see https://github.com/facebook/hhvm/issues/2727 |
172
|
64 |
|
$this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION'); |
173
|
64 |
|
} |
174
|
|
|
|
175
|
|
|
public function addRule(array $rule) |
176
|
|
|
{ |
177
|
|
|
$this->nonBooleanAttributes[] = $rule; |
178
|
|
|
} |
179
|
|
|
|
180
|
64 |
|
public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) |
181
|
|
|
{ |
182
|
64 |
|
$this->traverser = $traverser; |
183
|
|
|
|
184
|
64 |
|
return $this; |
185
|
|
|
} |
186
|
|
|
|
187
|
18 |
|
public function document($dom) |
188
|
|
|
{ |
189
|
18 |
|
$this->doctype(); |
190
|
18 |
|
if ($dom->documentElement) { |
191
|
17 |
|
foreach ($dom->childNodes as $node) { |
192
|
17 |
|
$this->traverser->node($node); |
193
|
17 |
|
} |
194
|
17 |
|
$this->nl(); |
195
|
17 |
|
} |
196
|
18 |
|
} |
197
|
|
|
|
198
|
19 |
|
protected function doctype() |
199
|
|
|
{ |
200
|
19 |
|
$this->wr(static::DOCTYPE); |
201
|
19 |
|
$this->nl(); |
202
|
19 |
|
} |
203
|
|
|
|
204
|
27 |
|
public function element($ele) |
205
|
|
|
{ |
206
|
27 |
|
$name = $ele->tagName; |
207
|
|
|
|
208
|
|
|
// Per spec: |
209
|
|
|
// If the element has a declared namespace in the HTML, MathML or |
210
|
|
|
// SVG namespaces, we use the lname instead of the tagName. |
211
|
27 |
|
if ($this->traverser->isLocalElement($ele)) { |
212
|
27 |
|
$name = $ele->localName; |
213
|
27 |
|
} |
214
|
|
|
|
215
|
|
|
// If we are in SVG or MathML there is special handling. |
216
|
|
|
// Using if/elseif instead of switch because it's faster in PHP. |
217
|
27 |
|
if ('svg' == $name) { |
218
|
3 |
|
$this->outputMode = static::IM_IN_SVG; |
219
|
3 |
|
$name = Elements::normalizeSvgElement($name); |
220
|
27 |
|
} elseif ('math' == $name) { |
221
|
2 |
|
$this->outputMode = static::IM_IN_MATHML; |
222
|
2 |
|
} |
223
|
|
|
|
224
|
27 |
|
$this->openTag($ele); |
225
|
27 |
|
if (Elements::isA($name, Elements::TEXT_RAW)) { |
226
|
4 |
|
foreach ($ele->childNodes as $child) { |
227
|
4 |
|
if ($child instanceof \DOMCharacterData) { |
228
|
4 |
|
$this->wr($child->data); |
229
|
4 |
|
} elseif ($child instanceof \DOMElement) { |
230
|
1 |
|
$this->element($child); |
231
|
1 |
|
} |
232
|
4 |
|
} |
233
|
4 |
|
} else { |
234
|
|
|
// Handle children. |
235
|
25 |
|
if ($ele->hasChildNodes()) { |
236
|
25 |
|
$this->traverser->children($ele->childNodes); |
237
|
25 |
|
} |
238
|
|
|
|
239
|
|
|
// Close out the SVG or MathML special handling. |
240
|
25 |
|
if ('svg' == $name || 'math' == $name) { |
241
|
5 |
|
$this->outputMode = static::IM_IN_HTML; |
242
|
5 |
|
} |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
// If not unary, add a closing tag. |
246
|
27 |
|
if (!Elements::isA($name, Elements::VOID_TAG)) { |
247
|
27 |
|
$this->closeTag($ele); |
248
|
27 |
|
} |
249
|
27 |
|
} |
250
|
|
|
|
251
|
|
|
/** |
252
|
|
|
* Write a text node. |
253
|
|
|
* |
254
|
|
|
* @param \DOMText $ele The text node to write |
255
|
|
|
*/ |
256
|
24 |
|
public function text($ele) |
257
|
|
|
{ |
258
|
24 |
|
if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { |
259
|
1 |
|
$this->wr($ele->data); |
260
|
|
|
|
261
|
1 |
|
return; |
262
|
|
|
} |
263
|
|
|
|
264
|
|
|
// FIXME: This probably needs some flags set. |
265
|
24 |
|
$this->wr($this->enc($ele->data)); |
266
|
24 |
|
} |
267
|
|
|
|
268
|
2 |
|
public function cdata($ele) |
269
|
|
|
{ |
270
|
|
|
// This encodes CDATA. |
271
|
2 |
|
$this->wr($ele->ownerDocument->saveXML($ele)); |
272
|
2 |
|
} |
273
|
|
|
|
274
|
3 |
|
public function comment($ele) |
275
|
|
|
{ |
276
|
|
|
// These produce identical output. |
277
|
|
|
// $this->wr('<!--')->wr($ele->data)->wr('-->'); |
278
|
3 |
|
$this->wr($ele->ownerDocument->saveXML($ele)); |
279
|
3 |
|
} |
280
|
|
|
|
281
|
3 |
|
public function processorInstruction($ele) |
282
|
|
|
{ |
283
|
3 |
|
$this->wr('<?') |
284
|
3 |
|
->wr($ele->target) |
285
|
3 |
|
->wr(' ') |
286
|
3 |
|
->wr($ele->data) |
287
|
3 |
|
->wr('?>'); |
288
|
3 |
|
} |
289
|
|
|
|
290
|
|
|
/** |
291
|
|
|
* Write the namespace attributes. |
292
|
|
|
* |
293
|
|
|
* |
294
|
|
|
* @param \DOMNode $ele The element being written |
295
|
|
|
*/ |
296
|
28 |
|
protected function namespaceAttrs($ele) |
297
|
|
|
{ |
298
|
28 |
|
if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) { |
299
|
28 |
|
$this->xpath = new \DOMXPath($ele->ownerDocument); |
300
|
28 |
|
} |
301
|
|
|
|
302
|
28 |
|
foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) { |
303
|
23 |
|
if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { |
304
|
3 |
|
$this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); |
305
|
3 |
|
} |
306
|
28 |
|
} |
307
|
28 |
|
} |
308
|
|
|
|
309
|
|
|
/** |
310
|
|
|
* Write the opening tag. |
311
|
|
|
* |
312
|
|
|
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
313
|
|
|
* qualified name (8.3). |
314
|
|
|
* |
315
|
|
|
* @param \DOMNode $ele The element being written |
316
|
|
|
*/ |
317
|
28 |
|
protected function openTag($ele) |
318
|
|
|
{ |
319
|
28 |
|
$this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); |
320
|
|
|
|
321
|
28 |
|
$this->attrs($ele); |
322
|
28 |
|
$this->namespaceAttrs($ele); |
323
|
|
|
|
324
|
28 |
|
if ($this->outputMode == static::IM_IN_HTML) { |
325
|
26 |
|
$this->wr('>'); |
326
|
26 |
|
} // If we are not in html mode we are in SVG, MathML, or XML embedded content. |
327
|
|
|
else { |
328
|
5 |
|
if ($ele->hasChildNodes()) { |
329
|
5 |
|
$this->wr('>'); |
330
|
5 |
|
} // If there are no children this is self closing. |
331
|
|
|
else { |
332
|
2 |
|
$this->wr(' />'); |
333
|
|
|
} |
334
|
|
|
} |
335
|
28 |
|
} |
336
|
|
|
|
337
|
39 |
|
protected function attrs($ele) |
338
|
|
|
{ |
339
|
|
|
// FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. |
340
|
39 |
|
if (!$ele->hasAttributes()) { |
341
|
23 |
|
return $this; |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
// TODO: Currently, this always writes name="value", and does not do |
345
|
|
|
// value-less attributes. |
346
|
30 |
|
$map = $ele->attributes; |
347
|
30 |
|
$len = $map->length; |
348
|
30 |
|
for ($i = 0; $i < $len; ++$i) { |
349
|
30 |
|
$node = $map->item($i); |
350
|
30 |
|
$val = $this->enc($node->value, true); |
351
|
|
|
|
352
|
|
|
// XXX: The spec says that we need to ensure that anything in |
353
|
|
|
// the XML, XMLNS, or XLink NS's should use the canonical |
354
|
|
|
// prefix. It seems that DOM does this for us already, but there |
355
|
|
|
// may be exceptions. |
356
|
30 |
|
$name = $node->nodeName; |
357
|
|
|
|
358
|
|
|
// Special handling for attributes in SVG and MathML. |
359
|
|
|
// Using if/elseif instead of switch because it's faster in PHP. |
360
|
30 |
|
if ($this->outputMode == static::IM_IN_SVG) { |
361
|
3 |
|
$name = Elements::normalizeSvgAttribute($name); |
362
|
30 |
|
} elseif ($this->outputMode == static::IM_IN_MATHML) { |
363
|
2 |
|
$name = Elements::normalizeMathMlAttribute($name); |
364
|
2 |
|
} |
365
|
|
|
|
366
|
30 |
|
$this->wr(' ')->wr($name); |
367
|
|
|
|
368
|
30 |
|
if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) { |
369
|
27 |
|
$this->wr('="')->wr($val)->wr('"'); |
370
|
27 |
|
} |
371
|
30 |
|
} |
372
|
30 |
|
} |
373
|
|
|
|
374
|
10 |
|
protected function nonBooleanAttribute(\DOMAttr $attr) |
375
|
|
|
{ |
376
|
10 |
|
$ele = $attr->ownerElement; |
377
|
10 |
|
foreach ($this->nonBooleanAttributes as $rule) { |
378
|
10 |
|
if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) { |
379
|
|
|
continue; |
380
|
|
|
} |
381
|
10 |
|
if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) { |
382
|
|
|
continue; |
383
|
|
|
} |
384
|
10 |
|
if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) { |
385
|
|
|
continue; |
386
|
|
|
} |
387
|
10 |
|
if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) { |
388
|
|
|
continue; |
389
|
|
|
} |
390
|
10 |
|
if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) { |
391
|
|
|
continue; |
392
|
|
|
} |
393
|
10 |
|
if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) { |
394
|
9 |
|
continue; |
395
|
|
|
} |
396
|
10 |
|
if (isset($rule['xpath'])) { |
397
|
9 |
|
$xp = $this->getXPath($attr); |
398
|
9 |
|
if (isset($rule['prefixes'])) { |
399
|
|
|
foreach ($rule['prefixes'] as $nsPrefix => $ns) { |
400
|
|
|
$xp->registerNamespace($nsPrefix, $ns); |
401
|
|
|
} |
402
|
|
|
} |
403
|
9 |
|
if (!$xp->evaluate($rule['xpath'], $attr)) { |
404
|
8 |
|
continue; |
405
|
|
|
} |
406
|
1 |
|
} |
407
|
|
|
|
408
|
4 |
|
return true; |
409
|
8 |
|
} |
410
|
|
|
|
411
|
8 |
|
return false; |
412
|
|
|
} |
413
|
|
|
|
414
|
9 |
|
private function getXPath(\DOMNode $node) |
415
|
|
|
{ |
416
|
9 |
|
if (!$this->xpath) { |
417
|
9 |
|
$this->xpath = new \DOMXPath($node->ownerDocument); |
418
|
9 |
|
} |
419
|
|
|
|
420
|
9 |
|
return $this->xpath; |
421
|
|
|
} |
422
|
|
|
|
423
|
|
|
/** |
424
|
|
|
* Write the closing tag. |
425
|
|
|
* |
426
|
|
|
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
427
|
|
|
* qualified name (8.3). |
428
|
|
|
* |
429
|
|
|
* @param \DOMNode $ele The element being written |
430
|
|
|
*/ |
431
|
27 |
|
protected function closeTag($ele) |
432
|
|
|
{ |
433
|
27 |
|
if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { |
434
|
27 |
|
$this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); |
435
|
27 |
|
} |
436
|
27 |
|
} |
437
|
|
|
|
438
|
|
|
/** |
439
|
|
|
* Write to the output. |
440
|
|
|
* |
441
|
|
|
* @param string $text The string to put into the output |
442
|
|
|
* |
443
|
|
|
* @return Traverser $this so it can be used in chaining |
444
|
|
|
*/ |
445
|
48 |
|
protected function wr($text) |
446
|
|
|
{ |
447
|
48 |
|
fwrite($this->out, $text); |
448
|
|
|
|
449
|
48 |
|
return $this; |
450
|
|
|
} |
451
|
|
|
|
452
|
|
|
/** |
453
|
|
|
* Write a new line character. |
454
|
|
|
* |
455
|
|
|
* @return Traverser $this so it can be used in chaining |
456
|
|
|
*/ |
457
|
20 |
|
protected function nl() |
458
|
|
|
{ |
459
|
20 |
|
fwrite($this->out, PHP_EOL); |
460
|
|
|
|
461
|
20 |
|
return $this; |
462
|
|
|
} |
463
|
|
|
|
464
|
|
|
/** |
465
|
|
|
* Encode text. |
466
|
|
|
* |
467
|
|
|
* When encode is set to false, the default value, the text passed in is |
468
|
|
|
* escaped per section 8.3 of the html5 spec. For details on how text is |
469
|
|
|
* escaped see the escape() method. |
470
|
|
|
* |
471
|
|
|
* When encoding is set to true the text is converted to named character |
472
|
|
|
* references where appropriate. Section 8.1.4 Character references of the |
473
|
|
|
* html5 spec refers to using named character references. This is useful for |
474
|
|
|
* characters that can't otherwise legally be used in the text. |
475
|
|
|
* |
476
|
|
|
* The named character references are listed in section 8.5. |
477
|
|
|
* |
478
|
|
|
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. |
479
|
|
|
* This includes such characters as +.# and many other common ones. By default |
480
|
|
|
* encoding here will just escape &'<>". |
481
|
|
|
* |
482
|
|
|
* Note, PHP 5.4+ has better html5 encoding. |
483
|
|
|
* |
484
|
|
|
* @todo Use the Entities class in php 5.3 to have html5 entities. |
485
|
|
|
* |
486
|
|
|
* @param string $text text to encode |
487
|
|
|
* @param bool $attribute True if we are encoding an attrubute, false otherwise |
488
|
|
|
* |
489
|
|
|
* @return string the encoded text |
490
|
|
|
*/ |
491
|
44 |
|
protected function enc($text, $attribute = false) |
492
|
|
|
{ |
493
|
|
|
// Escape the text rather than convert to named character references. |
494
|
44 |
|
if (!$this->encode) { |
495
|
44 |
|
return $this->escape($text, $attribute); |
496
|
|
|
} |
497
|
|
|
|
498
|
|
|
// If we are in PHP 5.4+ we can use the native html5 entity functionality to |
499
|
|
|
// convert the named character references. |
500
|
|
|
|
501
|
7 |
|
if ($this->hasHTML5) { |
502
|
7 |
|
return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); |
503
|
|
|
} // If a version earlier than 5.4 html5 entities are not entirely handled. |
504
|
|
|
// This manually handles them. |
505
|
|
|
else { |
506
|
|
|
return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); |
507
|
|
|
} |
508
|
|
|
} |
509
|
|
|
|
510
|
|
|
/** |
511
|
|
|
* Escape test. |
512
|
|
|
* |
513
|
|
|
* According to the html5 spec section 8.3 Serializing HTML fragments, text |
514
|
|
|
* within tags that are not style, script, xmp, iframe, noembed, and noframes |
515
|
|
|
* need to be properly escaped. |
516
|
|
|
* |
517
|
|
|
* The & should be converted to &, no breaking space unicode characters |
518
|
|
|
* converted to , when in attribute mode the " should be converted to |
519
|
|
|
* ", and when not in attribute mode the < and > should be converted to |
520
|
|
|
* < and >. |
521
|
|
|
* |
522
|
|
|
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString |
523
|
|
|
* |
524
|
|
|
* @param string $text text to escape |
525
|
|
|
* @param bool $attribute True if we are escaping an attrubute, false otherwise |
526
|
|
|
*/ |
527
|
51 |
|
protected function escape($text, $attribute = false) |
528
|
|
|
{ |
529
|
|
|
// Not using htmlspecialchars because, while it does escaping, it doesn't |
530
|
|
|
// match the requirements of section 8.5. For example, it doesn't handle |
531
|
|
|
// non-breaking spaces. |
532
|
51 |
|
if ($attribute) { |
533
|
|
|
$replace = array( |
534
|
38 |
|
'"' => '"', |
535
|
38 |
|
'&' => '&', |
536
|
38 |
|
"\xc2\xa0" => ' ', |
537
|
38 |
|
); |
538
|
38 |
|
} else { |
539
|
|
|
$replace = array( |
540
|
30 |
|
'<' => '<', |
541
|
30 |
|
'>' => '>', |
542
|
30 |
|
'&' => '&', |
543
|
30 |
|
"\xc2\xa0" => ' ', |
544
|
30 |
|
); |
545
|
|
|
} |
546
|
|
|
|
547
|
51 |
|
return strtr($text, $replace); |
548
|
|
|
} |
549
|
|
|
} |
550
|
|
|
|