1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @file |
4
|
|
|
* The rules for generating output in the serializer. |
5
|
|
|
* |
6
|
|
|
* These output rules are likely to generate output similar to the document that |
7
|
|
|
* was parsed. It is not intended to output exactly the document that was parsed. |
8
|
|
|
*/ |
9
|
|
|
namespace Masterminds\HTML5\Serializer; |
10
|
|
|
|
11
|
|
|
use Masterminds\HTML5\Elements; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Generate the output html5 based on element rules. |
15
|
|
|
*/ |
16
|
|
|
class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface |
17
|
|
|
{ |
18
|
|
|
/** |
19
|
|
|
* Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0 |
20
|
|
|
*/ |
21
|
|
|
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; |
22
|
|
|
|
23
|
|
|
const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; |
24
|
|
|
|
25
|
|
|
const NAMESPACE_SVG = 'http://www.w3.org/2000/svg'; |
26
|
|
|
|
27
|
|
|
const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; |
28
|
|
|
|
29
|
|
|
const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; |
30
|
|
|
|
31
|
|
|
const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Holds the HTML5 element names that causes a namespace switch |
35
|
|
|
* |
36
|
|
|
* @var array |
37
|
|
|
*/ |
38
|
|
|
protected $implicitNamespaces = array( |
39
|
|
|
self::NAMESPACE_HTML, |
40
|
|
|
self::NAMESPACE_SVG, |
41
|
|
|
self::NAMESPACE_MATHML, |
42
|
|
|
self::NAMESPACE_XML, |
43
|
|
|
self::NAMESPACE_XMLNS, |
44
|
|
|
); |
45
|
|
|
|
46
|
|
|
const IM_IN_HTML = 1; |
47
|
|
|
|
48
|
|
|
const IM_IN_SVG = 2; |
49
|
|
|
|
50
|
|
|
const IM_IN_MATHML = 3; |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* Used as cache to detect if is available ENT_HTML5 |
54
|
|
|
* @var boolean |
55
|
|
|
*/ |
56
|
|
|
private $hasHTML5 = false; |
57
|
|
|
|
58
|
|
|
protected $traverser; |
59
|
|
|
|
60
|
|
|
protected $encode = false; |
61
|
|
|
|
62
|
|
|
protected $out; |
63
|
|
|
|
64
|
|
|
protected $outputMode; |
65
|
|
|
|
66
|
|
|
private $xpath; |
67
|
|
|
|
68
|
|
|
protected $nonBooleanAttributes = array( |
69
|
|
|
/* |
70
|
|
|
array( |
71
|
|
|
'nodeNamespace'=>'http://www.w3.org/1999/xhtml', |
72
|
|
|
'attrNamespace'=>'http://www.w3.org/1999/xhtml', |
73
|
|
|
|
74
|
|
|
'nodeName'=>'img', 'nodeName'=>array('img', 'a'), |
75
|
|
|
'attrName'=>'alt', 'attrName'=>array('title', 'alt'), |
76
|
|
|
), |
77
|
|
|
*/ |
78
|
|
|
array( |
79
|
|
|
'nodeNamespace' => 'http://www.w3.org/1999/xhtml', |
80
|
|
|
'attrName' => array('href', |
81
|
|
|
'hreflang', |
82
|
|
|
'http-equiv', |
83
|
|
|
'icon', |
84
|
|
|
'id', |
85
|
|
|
'keytype', |
86
|
|
|
'kind', |
87
|
|
|
'label', |
88
|
|
|
'lang', |
89
|
|
|
'language', |
90
|
|
|
'list', |
91
|
|
|
'maxlength', |
92
|
|
|
'media', |
93
|
|
|
'method', |
94
|
|
|
'name', |
95
|
|
|
'placeholder', |
96
|
|
|
'rel', |
97
|
|
|
'rows', |
98
|
|
|
'rowspan', |
99
|
|
|
'sandbox', |
100
|
|
|
'spellcheck', |
101
|
|
|
'scope', |
102
|
|
|
'seamless', |
103
|
|
|
'shape', |
104
|
|
|
'size', |
105
|
|
|
'sizes', |
106
|
|
|
'span', |
107
|
|
|
'src', |
108
|
|
|
'srcdoc', |
109
|
|
|
'srclang', |
110
|
|
|
'srcset', |
111
|
|
|
'start', |
112
|
|
|
'step', |
113
|
|
|
'style', |
114
|
|
|
'summary', |
115
|
|
|
'tabindex', |
116
|
|
|
'target', |
117
|
|
|
'title', |
118
|
|
|
'type', |
119
|
|
|
'value', |
120
|
|
|
'width', |
121
|
|
|
'border', |
122
|
|
|
'charset', |
123
|
|
|
'cite', |
124
|
|
|
'class', |
125
|
|
|
'code', |
126
|
|
|
'codebase', |
127
|
|
|
'color', |
128
|
|
|
'cols', |
129
|
|
|
'colspan', |
130
|
|
|
'content', |
131
|
|
|
'coords', |
132
|
|
|
'data', |
133
|
|
|
'datetime', |
134
|
|
|
'default', |
135
|
|
|
'dir', |
136
|
|
|
'dirname', |
137
|
|
|
'enctype', |
138
|
|
|
'for', |
139
|
|
|
'form', |
140
|
|
|
'formaction', |
141
|
|
|
'headers', |
142
|
|
|
'height', |
143
|
|
|
'accept', |
144
|
|
|
'accept-charset', |
145
|
|
|
'accesskey', |
146
|
|
|
'action', |
147
|
|
|
'align', |
148
|
|
|
'alt', |
149
|
|
|
'bgcolor', |
150
|
|
|
), |
151
|
|
|
), |
152
|
|
|
array( |
153
|
|
|
'nodeNamespace' => 'http://www.w3.org/1999/xhtml', |
154
|
|
|
'xpath' => 'starts-with(local-name(), \'data-\')', |
155
|
|
|
), |
156
|
|
|
); |
157
|
|
|
|
158
|
|
|
const DOCTYPE = '<!DOCTYPE html>'; |
159
|
|
|
|
160
|
64 |
|
public function __construct($output, $options = array()) |
161
|
|
|
{ |
162
|
64 |
|
if (isset($options['encode_entities'])) { |
163
|
64 |
|
$this->encode = $options['encode_entities']; |
164
|
64 |
|
} |
165
|
|
|
|
166
|
64 |
|
$this->outputMode = static::IM_IN_HTML; |
167
|
64 |
|
$this->out = $output; |
168
|
|
|
|
169
|
|
|
// If HHVM, see https://github.com/facebook/hhvm/issues/2727 |
170
|
64 |
|
$this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION'); |
171
|
64 |
|
} |
172
|
|
|
public function addRule(array $rule) |
173
|
|
|
{ |
174
|
|
|
$this->nonBooleanAttributes[] = $rule; |
175
|
|
|
} |
176
|
|
|
|
177
|
64 |
|
public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) |
178
|
|
|
{ |
179
|
64 |
|
$this->traverser = $traverser; |
180
|
|
|
|
181
|
64 |
|
return $this; |
182
|
|
|
} |
183
|
|
|
|
184
|
18 |
|
public function document($dom) |
185
|
|
|
{ |
186
|
18 |
|
$this->doctype(); |
187
|
18 |
|
if ($dom->documentElement) { |
188
|
17 |
|
foreach ($dom->childNodes as $node) { |
189
|
17 |
|
$this->traverser->node($node); |
190
|
17 |
|
} |
191
|
17 |
|
$this->nl(); |
192
|
17 |
|
} |
193
|
18 |
|
} |
194
|
|
|
|
195
|
19 |
|
protected function doctype() |
196
|
|
|
{ |
197
|
19 |
|
$this->wr(static::DOCTYPE); |
198
|
19 |
|
$this->nl(); |
199
|
19 |
|
} |
200
|
|
|
|
201
|
27 |
|
public function element($ele) |
202
|
|
|
{ |
203
|
27 |
|
$name = $ele->tagName; |
204
|
|
|
|
205
|
|
|
// Per spec: |
206
|
|
|
// If the element has a declared namespace in the HTML, MathML or |
207
|
|
|
// SVG namespaces, we use the lname instead of the tagName. |
208
|
27 |
|
if ($this->traverser->isLocalElement($ele)) { |
209
|
27 |
|
$name = $ele->localName; |
210
|
27 |
|
} |
211
|
|
|
|
212
|
|
|
// If we are in SVG or MathML there is special handling. |
213
|
|
|
// Using if/elseif instead of switch because it's faster in PHP. |
214
|
27 |
|
if ($name == 'svg') { |
215
|
3 |
|
$this->outputMode = static::IM_IN_SVG; |
216
|
3 |
|
$name = Elements::normalizeSvgElement($name); |
217
|
27 |
|
} elseif ($name == 'math') { |
218
|
2 |
|
$this->outputMode = static::IM_IN_MATHML; |
219
|
2 |
|
} |
220
|
|
|
|
221
|
27 |
|
$this->openTag($ele); |
222
|
27 |
|
if (Elements::isA($name, Elements::TEXT_RAW)) { |
223
|
4 |
|
foreach ($ele->childNodes as $child) { |
224
|
4 |
|
if ($child instanceof \DOMCharacterData) { |
225
|
4 |
|
$this->wr($child->data); |
226
|
4 |
|
} elseif ($child instanceof \DOMElement) { |
227
|
1 |
|
$this->element($child); |
228
|
1 |
|
} |
229
|
4 |
|
} |
230
|
4 |
|
} else { |
231
|
|
|
// Handle children. |
232
|
25 |
|
if ($ele->hasChildNodes()) { |
233
|
25 |
|
$this->traverser->children($ele->childNodes); |
234
|
25 |
|
} |
235
|
|
|
|
236
|
|
|
// Close out the SVG or MathML special handling. |
237
|
25 |
|
if ($name == 'svg' || $name == 'math') { |
238
|
5 |
|
$this->outputMode = static::IM_IN_HTML; |
239
|
5 |
|
} |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
// If not unary, add a closing tag. |
243
|
27 |
|
if (! Elements::isA($name, Elements::VOID_TAG)) { |
244
|
27 |
|
$this->closeTag($ele); |
245
|
27 |
|
} |
246
|
27 |
|
} |
247
|
|
|
|
248
|
|
|
/** |
249
|
|
|
* Write a text node. |
250
|
|
|
* |
251
|
|
|
* @param \DOMText $ele |
252
|
|
|
* The text node to write. |
253
|
|
|
*/ |
254
|
24 |
|
public function text($ele) |
255
|
|
|
{ |
256
|
24 |
|
if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { |
257
|
1 |
|
$this->wr($ele->data); |
258
|
1 |
|
return; |
259
|
|
|
} |
260
|
|
|
|
261
|
|
|
// FIXME: This probably needs some flags set. |
262
|
24 |
|
$this->wr($this->enc($ele->data)); |
263
|
24 |
|
} |
264
|
|
|
|
265
|
2 |
|
public function cdata($ele) |
266
|
|
|
{ |
267
|
|
|
// This encodes CDATA. |
268
|
2 |
|
$this->wr($ele->ownerDocument->saveXML($ele)); |
269
|
2 |
|
} |
270
|
|
|
|
271
|
3 |
|
public function comment($ele) |
272
|
|
|
{ |
273
|
|
|
// These produce identical output. |
274
|
|
|
// $this->wr('<!--')->wr($ele->data)->wr('-->'); |
275
|
3 |
|
$this->wr($ele->ownerDocument->saveXML($ele)); |
276
|
3 |
|
} |
277
|
|
|
|
278
|
3 |
|
public function processorInstruction($ele) |
279
|
|
|
{ |
280
|
3 |
|
$this->wr('<?') |
281
|
3 |
|
->wr($ele->target) |
282
|
3 |
|
->wr(' ') |
283
|
3 |
|
->wr($ele->data) |
284
|
3 |
|
->wr('?>'); |
285
|
3 |
|
} |
286
|
|
|
/** |
287
|
|
|
* Write the namespace attributes |
288
|
|
|
* |
289
|
|
|
* |
290
|
|
|
* @param \DOMNode $ele |
291
|
|
|
* The element being written. |
292
|
|
|
*/ |
293
|
28 |
|
protected function namespaceAttrs($ele) |
294
|
|
|
{ |
295
|
28 |
|
if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){ |
296
|
28 |
|
$this->xpath = new \DOMXPath($ele->ownerDocument); |
297
|
28 |
|
} |
298
|
|
|
|
299
|
28 |
|
foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) { |
300
|
23 |
|
if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { |
301
|
3 |
|
$this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); |
302
|
3 |
|
} |
303
|
28 |
|
} |
304
|
28 |
|
} |
305
|
|
|
|
306
|
|
|
/** |
307
|
|
|
* Write the opening tag. |
308
|
|
|
* |
309
|
|
|
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
310
|
|
|
* qualified name (8.3). |
311
|
|
|
* |
312
|
|
|
* @param \DOMNode $ele |
313
|
|
|
* The element being written. |
314
|
|
|
*/ |
315
|
28 |
|
protected function openTag($ele) |
316
|
|
|
{ |
317
|
28 |
|
$this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); |
318
|
|
|
|
319
|
|
|
|
320
|
28 |
|
$this->attrs($ele); |
321
|
28 |
|
$this->namespaceAttrs($ele); |
322
|
|
|
|
323
|
|
|
|
324
|
28 |
|
if ($this->outputMode == static::IM_IN_HTML) { |
325
|
26 |
|
$this->wr('>'); |
326
|
26 |
|
} // If we are not in html mode we are in SVG, MathML, or XML embedded content. |
327
|
|
|
else { |
328
|
5 |
|
if ($ele->hasChildNodes()) { |
329
|
5 |
|
$this->wr('>'); |
330
|
5 |
|
} // If there are no children this is self closing. |
331
|
|
|
else { |
332
|
2 |
|
$this->wr(' />'); |
333
|
|
|
} |
334
|
|
|
} |
335
|
28 |
|
} |
336
|
|
|
|
337
|
39 |
|
protected function attrs($ele) |
338
|
|
|
{ |
339
|
|
|
// FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. |
340
|
39 |
|
if (! $ele->hasAttributes()) { |
341
|
23 |
|
return $this; |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
// TODO: Currently, this always writes name="value", and does not do |
345
|
|
|
// value-less attributes. |
346
|
30 |
|
$map = $ele->attributes; |
347
|
30 |
|
$len = $map->length; |
348
|
30 |
|
for ($i = 0; $i < $len; ++ $i) { |
349
|
30 |
|
$node = $map->item($i); |
350
|
30 |
|
$val = $this->enc($node->value, true); |
351
|
|
|
|
352
|
|
|
// XXX: The spec says that we need to ensure that anything in |
353
|
|
|
// the XML, XMLNS, or XLink NS's should use the canonical |
354
|
|
|
// prefix. It seems that DOM does this for us already, but there |
355
|
|
|
// may be exceptions. |
356
|
30 |
|
$name = $node->nodeName; |
357
|
|
|
|
358
|
|
|
// Special handling for attributes in SVG and MathML. |
359
|
|
|
// Using if/elseif instead of switch because it's faster in PHP. |
360
|
30 |
|
if ($this->outputMode == static::IM_IN_SVG) { |
361
|
3 |
|
$name = Elements::normalizeSvgAttribute($name); |
362
|
30 |
|
} elseif ($this->outputMode == static::IM_IN_MATHML) { |
363
|
2 |
|
$name = Elements::normalizeMathMlAttribute($name); |
364
|
2 |
|
} |
365
|
|
|
|
366
|
30 |
|
$this->wr(' ')->wr($name); |
367
|
|
|
|
368
|
30 |
|
if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) { |
369
|
27 |
|
$this->wr('="')->wr($val)->wr('"'); |
370
|
27 |
|
} |
371
|
30 |
|
} |
372
|
30 |
|
} |
373
|
|
|
|
374
|
|
|
|
375
|
10 |
|
protected function nonBooleanAttribute(\DOMAttr $attr) |
376
|
|
|
{ |
377
|
10 |
|
$ele = $attr->ownerElement; |
378
|
10 |
|
foreach($this->nonBooleanAttributes as $rule){ |
379
|
|
|
|
380
|
10 |
|
if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){ |
381
|
|
|
continue; |
382
|
|
|
} |
383
|
10 |
|
if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){ |
384
|
|
|
continue; |
385
|
|
|
} |
386
|
10 |
|
if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){ |
387
|
|
|
continue; |
388
|
|
|
} |
389
|
10 |
|
if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){ |
390
|
|
|
continue; |
391
|
|
|
} |
392
|
10 |
|
if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){ |
393
|
|
|
continue; |
394
|
|
|
} |
395
|
10 |
|
if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){ |
396
|
9 |
|
continue; |
397
|
|
|
} |
398
|
10 |
|
if(isset($rule['xpath'])){ |
399
|
|
|
|
400
|
9 |
|
$xp = $this->getXPath($attr); |
401
|
9 |
|
if(isset($rule['prefixes'])){ |
402
|
|
|
foreach($rule['prefixes'] as $nsPrefix => $ns){ |
403
|
|
|
$xp->registerNamespace($nsPrefix, $ns); |
404
|
|
|
} |
405
|
|
|
} |
406
|
9 |
|
if(!$xp->evaluate($rule['xpath'], $attr)){ |
407
|
8 |
|
continue; |
408
|
|
|
} |
409
|
1 |
|
} |
410
|
|
|
|
411
|
4 |
|
return true; |
412
|
8 |
|
} |
413
|
|
|
|
414
|
8 |
|
return false; |
415
|
|
|
} |
416
|
|
|
|
417
|
9 |
|
private function getXPath(\DOMNode $node){ |
418
|
9 |
|
if(!$this->xpath){ |
419
|
9 |
|
$this->xpath = new \DOMXPath($node->ownerDocument); |
420
|
9 |
|
} |
421
|
9 |
|
return $this->xpath; |
422
|
|
|
} |
423
|
|
|
|
424
|
|
|
/** |
425
|
|
|
* Write the closing tag. |
426
|
|
|
* |
427
|
|
|
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
428
|
|
|
* qualified name (8.3). |
429
|
|
|
* |
430
|
|
|
* @param \DOMNode $ele |
431
|
|
|
* The element being written. |
432
|
|
|
*/ |
433
|
27 |
|
protected function closeTag($ele) |
434
|
|
|
{ |
435
|
27 |
|
if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { |
436
|
27 |
|
$this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); |
437
|
27 |
|
} |
438
|
27 |
|
} |
439
|
|
|
|
440
|
|
|
/** |
441
|
|
|
* Write to the output. |
442
|
|
|
* |
443
|
|
|
* @param string $text |
444
|
|
|
* The string to put into the output. |
445
|
|
|
* |
446
|
|
|
* @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. |
447
|
|
|
*/ |
448
|
48 |
|
protected function wr($text) |
449
|
|
|
{ |
450
|
48 |
|
fwrite($this->out, $text); |
451
|
48 |
|
return $this; |
452
|
|
|
} |
453
|
|
|
|
454
|
|
|
/** |
455
|
|
|
* Write a new line character. |
456
|
|
|
* |
457
|
|
|
* @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. |
458
|
|
|
*/ |
459
|
20 |
|
protected function nl() |
460
|
|
|
{ |
461
|
20 |
|
fwrite($this->out, PHP_EOL); |
462
|
20 |
|
return $this; |
463
|
|
|
} |
464
|
|
|
|
465
|
|
|
/** |
466
|
|
|
* Encode text. |
467
|
|
|
* |
468
|
|
|
* When encode is set to false, the default value, the text passed in is |
469
|
|
|
* escaped per section 8.3 of the html5 spec. For details on how text is |
470
|
|
|
* escaped see the escape() method. |
471
|
|
|
* |
472
|
|
|
* When encoding is set to true the text is converted to named character |
473
|
|
|
* references where appropriate. Section 8.1.4 Character references of the |
474
|
|
|
* html5 spec refers to using named character references. This is useful for |
475
|
|
|
* characters that can't otherwise legally be used in the text. |
476
|
|
|
* |
477
|
|
|
* The named character references are listed in section 8.5. |
478
|
|
|
* |
479
|
|
|
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. |
480
|
|
|
* This includes such characters as +.# and many other common ones. By default |
481
|
|
|
* encoding here will just escape &'<>". |
482
|
|
|
* |
483
|
|
|
* Note, PHP 5.4+ has better html5 encoding. |
484
|
|
|
* |
485
|
|
|
* @todo Use the Entities class in php 5.3 to have html5 entities. |
486
|
|
|
* |
487
|
|
|
* @param string $text |
488
|
|
|
* text to encode. |
489
|
|
|
* @param boolean $attribute |
490
|
|
|
* True if we are encoding an attrubute, false otherwise |
491
|
|
|
* |
492
|
|
|
* @return string The encoded text. |
493
|
|
|
*/ |
494
|
44 |
|
protected function enc($text, $attribute = false) |
495
|
|
|
{ |
496
|
|
|
|
497
|
|
|
// Escape the text rather than convert to named character references. |
498
|
44 |
|
if (! $this->encode) { |
499
|
44 |
|
return $this->escape($text, $attribute); |
500
|
|
|
} |
501
|
|
|
|
502
|
|
|
// If we are in PHP 5.4+ we can use the native html5 entity functionality to |
503
|
|
|
// convert the named character references. |
504
|
|
|
|
505
|
7 |
|
if ($this->hasHTML5) { |
506
|
7 |
|
return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); |
507
|
|
|
} // If a version earlier than 5.4 html5 entities are not entirely handled. |
508
|
|
|
// This manually handles them. |
509
|
|
|
else { |
510
|
|
|
return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); |
511
|
|
|
} |
512
|
|
|
} |
513
|
|
|
|
514
|
|
|
/** |
515
|
|
|
* Escape test. |
516
|
|
|
* |
517
|
|
|
* According to the html5 spec section 8.3 Serializing HTML fragments, text |
518
|
|
|
* within tags that are not style, script, xmp, iframe, noembed, and noframes |
519
|
|
|
* need to be properly escaped. |
520
|
|
|
* |
521
|
|
|
* The & should be converted to &, no breaking space unicode characters |
522
|
|
|
* converted to , when in attribute mode the " should be converted to |
523
|
|
|
* ", and when not in attribute mode the < and > should be converted to |
524
|
|
|
* < and >. |
525
|
|
|
* |
526
|
|
|
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString |
527
|
|
|
* |
528
|
|
|
* @param string $text |
529
|
|
|
* text to escape. |
530
|
|
|
* @param boolean $attribute |
531
|
|
|
* True if we are escaping an attrubute, false otherwise |
532
|
|
|
*/ |
533
|
51 |
|
protected function escape($text, $attribute = false) |
534
|
|
|
{ |
535
|
|
|
|
536
|
|
|
// Not using htmlspecialchars because, while it does escaping, it doesn't |
537
|
|
|
// match the requirements of section 8.5. For example, it doesn't handle |
538
|
|
|
// non-breaking spaces. |
539
|
51 |
|
if ($attribute) { |
540
|
|
|
$replace = array( |
541
|
38 |
|
'"' => '"', |
542
|
38 |
|
'&' => '&', |
543
|
|
|
"\xc2\xa0" => ' ' |
544
|
38 |
|
); |
545
|
38 |
|
} else { |
546
|
|
|
$replace = array( |
547
|
30 |
|
'<' => '<', |
548
|
30 |
|
'>' => '>', |
549
|
30 |
|
'&' => '&', |
550
|
|
|
"\xc2\xa0" => ' ' |
551
|
30 |
|
); |
552
|
|
|
} |
553
|
|
|
|
554
|
51 |
|
return strtr($text, $replace); |
555
|
|
|
} |
556
|
|
|
} |
557
|
|
|
|