1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @file |
4
|
|
|
* The rules for generating output in the serializer. |
5
|
|
|
* |
6
|
|
|
* These output rules are likely to generate output similar to the document that |
7
|
|
|
* was parsed. It is not intended to output exactly the document that was parsed. |
8
|
|
|
*/ |
9
|
|
|
|
10
|
|
|
namespace Masterminds\HTML5\Serializer; |
11
|
|
|
|
12
|
|
|
use Masterminds\HTML5\Elements; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* Generate the output html5 based on element rules. |
16
|
|
|
*/ |
17
|
|
|
class OutputRules implements RulesInterface |
18
|
|
|
{ |
19
|
|
|
/** |
20
|
|
|
* Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0. |
21
|
|
|
*/ |
22
|
|
|
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; |
23
|
|
|
|
24
|
|
|
const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; |
25
|
|
|
|
26
|
|
|
const NAMESPACE_SVG = 'http://www.w3.org/2000/svg'; |
27
|
|
|
|
28
|
|
|
const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; |
29
|
|
|
|
30
|
|
|
const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; |
31
|
|
|
|
32
|
|
|
const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Holds the HTML5 element names that causes a namespace switch. |
36
|
|
|
* |
37
|
|
|
* @var array |
38
|
|
|
*/ |
39
|
|
|
protected $implicitNamespaces = array( |
40
|
|
|
self::NAMESPACE_HTML, |
41
|
|
|
self::NAMESPACE_SVG, |
42
|
|
|
self::NAMESPACE_MATHML, |
43
|
|
|
self::NAMESPACE_XML, |
44
|
|
|
self::NAMESPACE_XMLNS, |
45
|
|
|
); |
46
|
|
|
|
47
|
|
|
const IM_IN_HTML = 1; |
48
|
|
|
|
49
|
|
|
const IM_IN_SVG = 2; |
50
|
|
|
|
51
|
|
|
const IM_IN_MATHML = 3; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Used as cache to detect if is available ENT_HTML5. |
55
|
|
|
* |
56
|
|
|
* @var bool |
57
|
|
|
*/ |
58
|
|
|
private $hasHTML5 = false; |
59
|
|
|
|
60
|
|
|
protected $traverser; |
61
|
|
|
|
62
|
|
|
protected $encode = false; |
63
|
|
|
|
64
|
|
|
protected $out; |
65
|
|
|
|
66
|
|
|
protected $outputMode; |
67
|
|
|
|
68
|
|
|
private $xpath; |
69
|
|
|
|
70
|
|
|
protected $nonBooleanAttributes = array( |
71
|
|
|
/* |
72
|
|
|
array( |
73
|
|
|
'nodeNamespace'=>'http://www.w3.org/1999/xhtml', |
74
|
|
|
'attrNamespace'=>'http://www.w3.org/1999/xhtml', |
75
|
|
|
|
76
|
|
|
'nodeName'=>'img', 'nodeName'=>array('img', 'a'), |
77
|
|
|
'attrName'=>'alt', 'attrName'=>array('title', 'alt'), |
78
|
|
|
), |
79
|
|
|
*/ |
80
|
|
|
array( |
81
|
|
|
'nodeNamespace' => 'http://www.w3.org/1999/xhtml', |
82
|
|
|
'attrName' => array('href', |
83
|
|
|
'hreflang', |
84
|
|
|
'http-equiv', |
85
|
|
|
'icon', |
86
|
|
|
'id', |
87
|
|
|
'keytype', |
88
|
|
|
'kind', |
89
|
|
|
'label', |
90
|
|
|
'lang', |
91
|
|
|
'language', |
92
|
|
|
'list', |
93
|
|
|
'maxlength', |
94
|
|
|
'media', |
95
|
|
|
'method', |
96
|
|
|
'name', |
97
|
|
|
'placeholder', |
98
|
|
|
'rel', |
99
|
|
|
'rows', |
100
|
|
|
'rowspan', |
101
|
|
|
'sandbox', |
102
|
|
|
'spellcheck', |
103
|
|
|
'scope', |
104
|
|
|
'seamless', |
105
|
|
|
'shape', |
106
|
|
|
'size', |
107
|
|
|
'sizes', |
108
|
|
|
'span', |
109
|
|
|
'src', |
110
|
|
|
'srcdoc', |
111
|
|
|
'srclang', |
112
|
|
|
'srcset', |
113
|
|
|
'start', |
114
|
|
|
'step', |
115
|
|
|
'style', |
116
|
|
|
'summary', |
117
|
|
|
'tabindex', |
118
|
|
|
'target', |
119
|
|
|
'title', |
120
|
|
|
'type', |
121
|
|
|
'value', |
122
|
|
|
'width', |
123
|
|
|
'border', |
124
|
|
|
'charset', |
125
|
|
|
'cite', |
126
|
|
|
'class', |
127
|
|
|
'code', |
128
|
|
|
'codebase', |
129
|
|
|
'color', |
130
|
|
|
'cols', |
131
|
|
|
'colspan', |
132
|
|
|
'content', |
133
|
|
|
'coords', |
134
|
|
|
'data', |
135
|
|
|
'datetime', |
136
|
|
|
'default', |
137
|
|
|
'dir', |
138
|
|
|
'dirname', |
139
|
|
|
'enctype', |
140
|
|
|
'for', |
141
|
|
|
'form', |
142
|
|
|
'formaction', |
143
|
|
|
'headers', |
144
|
|
|
'height', |
145
|
|
|
'accept', |
146
|
|
|
'accept-charset', |
147
|
|
|
'accesskey', |
148
|
|
|
'action', |
149
|
|
|
'align', |
150
|
|
|
'alt', |
151
|
|
|
'bgcolor', |
152
|
|
|
), |
153
|
|
|
), |
154
|
|
|
array( |
155
|
|
|
'nodeNamespace' => 'http://www.w3.org/1999/xhtml', |
156
|
|
|
'xpath' => 'starts-with(local-name(), \'data-\')', |
157
|
|
|
), |
158
|
|
|
); |
159
|
|
|
|
160
|
|
|
const DOCTYPE = '<!DOCTYPE html>'; |
161
|
|
|
|
162
|
64 |
|
public function __construct($output, $options = array()) |
163
|
|
|
{ |
164
|
64 |
|
if (isset($options['encode_entities'])) { |
165
|
64 |
|
$this->encode = $options['encode_entities']; |
166
|
64 |
|
} |
167
|
|
|
|
168
|
64 |
|
$this->outputMode = static::IM_IN_HTML; |
169
|
64 |
|
$this->out = $output; |
170
|
64 |
|
$this->hasHTML5 = defined('ENT_HTML5'); |
171
|
64 |
|
} |
172
|
|
|
|
173
|
|
|
public function addRule(array $rule) |
174
|
|
|
{ |
175
|
|
|
$this->nonBooleanAttributes[] = $rule; |
176
|
|
|
} |
177
|
|
|
|
178
|
64 |
|
public function setTraverser(Traverser $traverser) |
179
|
|
|
{ |
180
|
64 |
|
$this->traverser = $traverser; |
181
|
|
|
|
182
|
64 |
|
return $this; |
183
|
|
|
} |
184
|
|
|
|
185
|
17 |
|
public function unsetTraverser() |
186
|
|
|
{ |
187
|
17 |
|
$this->traverser = null; |
188
|
|
|
|
189
|
17 |
|
return $this; |
190
|
|
|
} |
191
|
|
|
|
192
|
18 |
|
public function document($dom) |
193
|
|
|
{ |
194
|
18 |
|
$this->doctype(); |
195
|
18 |
|
if ($dom->documentElement) { |
196
|
17 |
|
foreach ($dom->childNodes as $node) { |
197
|
17 |
|
$this->traverser->node($node); |
198
|
17 |
|
} |
199
|
17 |
|
$this->nl(); |
200
|
17 |
|
} |
201
|
18 |
|
} |
202
|
|
|
|
203
|
19 |
|
protected function doctype() |
204
|
|
|
{ |
205
|
19 |
|
$this->wr(static::DOCTYPE); |
206
|
19 |
|
$this->nl(); |
207
|
19 |
|
} |
208
|
|
|
|
209
|
27 |
|
public function element($ele) |
210
|
|
|
{ |
211
|
27 |
|
$name = $ele->tagName; |
212
|
|
|
|
213
|
|
|
// Per spec: |
214
|
|
|
// If the element has a declared namespace in the HTML, MathML or |
215
|
|
|
// SVG namespaces, we use the lname instead of the tagName. |
216
|
27 |
|
if ($this->traverser->isLocalElement($ele)) { |
217
|
27 |
|
$name = $ele->localName; |
218
|
27 |
|
} |
219
|
|
|
|
220
|
|
|
// If we are in SVG or MathML there is special handling. |
221
|
|
|
// Using if/elseif instead of switch because it's faster in PHP. |
222
|
27 |
|
if ('svg' == $name) { |
223
|
3 |
|
$this->outputMode = static::IM_IN_SVG; |
224
|
3 |
|
$name = Elements::normalizeSvgElement($name); |
225
|
27 |
|
} elseif ('math' == $name) { |
226
|
2 |
|
$this->outputMode = static::IM_IN_MATHML; |
227
|
2 |
|
} |
228
|
|
|
|
229
|
27 |
|
$this->openTag($ele); |
230
|
27 |
|
if (Elements::isA($name, Elements::TEXT_RAW)) { |
231
|
4 |
|
foreach ($ele->childNodes as $child) { |
232
|
4 |
|
if ($child instanceof \DOMCharacterData) { |
233
|
4 |
|
$this->wr($child->data); |
234
|
4 |
|
} elseif ($child instanceof \DOMElement) { |
235
|
1 |
|
$this->element($child); |
236
|
1 |
|
} |
237
|
4 |
|
} |
238
|
4 |
|
} else { |
239
|
|
|
// Handle children. |
240
|
25 |
|
if ($ele->hasChildNodes()) { |
241
|
25 |
|
$this->traverser->children($ele->childNodes); |
242
|
25 |
|
} |
243
|
|
|
|
244
|
|
|
// Close out the SVG or MathML special handling. |
245
|
25 |
|
if ('svg' == $name || 'math' == $name) { |
246
|
5 |
|
$this->outputMode = static::IM_IN_HTML; |
247
|
5 |
|
} |
248
|
|
|
} |
249
|
|
|
|
250
|
|
|
// If not unary, add a closing tag. |
251
|
27 |
|
if (!Elements::isA($name, Elements::VOID_TAG)) { |
252
|
27 |
|
$this->closeTag($ele); |
253
|
27 |
|
} |
254
|
27 |
|
} |
255
|
|
|
|
256
|
|
|
/** |
257
|
|
|
* Write a text node. |
258
|
|
|
* |
259
|
|
|
* @param \DOMText $ele The text node to write. |
260
|
|
|
*/ |
261
|
24 |
|
public function text($ele) |
262
|
|
|
{ |
263
|
24 |
|
if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { |
264
|
1 |
|
$this->wr($ele->data); |
265
|
|
|
|
266
|
1 |
|
return; |
267
|
|
|
} |
268
|
|
|
|
269
|
|
|
// FIXME: This probably needs some flags set. |
270
|
24 |
|
$this->wr($this->enc($ele->data)); |
271
|
24 |
|
} |
272
|
|
|
|
273
|
2 |
|
public function cdata($ele) |
274
|
|
|
{ |
275
|
|
|
// This encodes CDATA. |
276
|
2 |
|
$this->wr($ele->ownerDocument->saveXML($ele)); |
277
|
2 |
|
} |
278
|
|
|
|
279
|
3 |
|
public function comment($ele) |
280
|
|
|
{ |
281
|
|
|
// These produce identical output. |
282
|
|
|
// $this->wr('<!--')->wr($ele->data)->wr('-->'); |
283
|
3 |
|
$this->wr($ele->ownerDocument->saveXML($ele)); |
284
|
3 |
|
} |
285
|
|
|
|
286
|
3 |
|
public function processorInstruction($ele) |
287
|
|
|
{ |
288
|
3 |
|
$this->wr('<?') |
289
|
3 |
|
->wr($ele->target) |
290
|
3 |
|
->wr(' ') |
291
|
3 |
|
->wr($ele->data) |
292
|
3 |
|
->wr('?>'); |
293
|
3 |
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* Write the namespace attributes. |
297
|
|
|
* |
298
|
|
|
* @param \DOMNode $ele The element being written. |
299
|
|
|
*/ |
300
|
28 |
|
protected function namespaceAttrs($ele) |
301
|
|
|
{ |
302
|
28 |
|
if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) { |
303
|
28 |
|
$this->xpath = new \DOMXPath($ele->ownerDocument); |
304
|
28 |
|
} |
305
|
|
|
|
306
|
28 |
|
foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) { |
307
|
23 |
|
if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { |
308
|
3 |
|
$this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); |
309
|
3 |
|
} |
310
|
28 |
|
} |
311
|
28 |
|
} |
312
|
|
|
|
313
|
|
|
/** |
314
|
|
|
* Write the opening tag. |
315
|
|
|
* |
316
|
|
|
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
317
|
|
|
* qualified name (8.3). |
318
|
|
|
* |
319
|
|
|
* @param \DOMNode $ele The element being written. |
320
|
|
|
*/ |
321
|
28 |
|
protected function openTag($ele) |
322
|
|
|
{ |
323
|
28 |
|
$this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); |
324
|
|
|
|
325
|
28 |
|
$this->attrs($ele); |
326
|
28 |
|
$this->namespaceAttrs($ele); |
327
|
|
|
|
328
|
28 |
|
if ($this->outputMode == static::IM_IN_HTML) { |
329
|
26 |
|
$this->wr('>'); |
330
|
26 |
|
} // If we are not in html mode we are in SVG, MathML, or XML embedded content. |
331
|
|
|
else { |
332
|
5 |
|
if ($ele->hasChildNodes()) { |
333
|
5 |
|
$this->wr('>'); |
334
|
5 |
|
} // If there are no children this is self closing. |
335
|
|
|
else { |
336
|
2 |
|
$this->wr(' />'); |
337
|
|
|
} |
338
|
|
|
} |
339
|
28 |
|
} |
340
|
|
|
|
341
|
39 |
|
protected function attrs($ele) |
342
|
|
|
{ |
343
|
|
|
// FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. |
344
|
39 |
|
if (!$ele->hasAttributes()) { |
345
|
23 |
|
return $this; |
346
|
|
|
} |
347
|
|
|
|
348
|
|
|
// TODO: Currently, this always writes name="value", and does not do |
349
|
|
|
// value-less attributes. |
350
|
30 |
|
$map = $ele->attributes; |
351
|
30 |
|
$len = $map->length; |
352
|
30 |
|
for ($i = 0; $i < $len; ++$i) { |
353
|
30 |
|
$node = $map->item($i); |
354
|
30 |
|
$val = $this->enc($node->value, true); |
355
|
|
|
|
356
|
|
|
// XXX: The spec says that we need to ensure that anything in |
357
|
|
|
// the XML, XMLNS, or XLink NS's should use the canonical |
358
|
|
|
// prefix. It seems that DOM does this for us already, but there |
359
|
|
|
// may be exceptions. |
360
|
30 |
|
$name = $node->nodeName; |
361
|
|
|
|
362
|
|
|
// Special handling for attributes in SVG and MathML. |
363
|
|
|
// Using if/elseif instead of switch because it's faster in PHP. |
364
|
30 |
|
if ($this->outputMode == static::IM_IN_SVG) { |
365
|
3 |
|
$name = Elements::normalizeSvgAttribute($name); |
366
|
30 |
|
} elseif ($this->outputMode == static::IM_IN_MATHML) { |
367
|
2 |
|
$name = Elements::normalizeMathMlAttribute($name); |
368
|
2 |
|
} |
369
|
|
|
|
370
|
30 |
|
$this->wr(' ')->wr($name); |
371
|
|
|
|
372
|
30 |
|
if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) { |
373
|
27 |
|
$this->wr('="')->wr($val)->wr('"'); |
374
|
27 |
|
} |
375
|
30 |
|
} |
376
|
30 |
|
} |
377
|
|
|
|
378
|
10 |
|
protected function nonBooleanAttribute(\DOMAttr $attr) |
379
|
|
|
{ |
380
|
10 |
|
$ele = $attr->ownerElement; |
381
|
10 |
|
foreach ($this->nonBooleanAttributes as $rule) { |
382
|
10 |
|
if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) { |
383
|
|
|
continue; |
384
|
|
|
} |
385
|
10 |
|
if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) { |
386
|
|
|
continue; |
387
|
|
|
} |
388
|
10 |
|
if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) { |
389
|
|
|
continue; |
390
|
|
|
} |
391
|
10 |
|
if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) { |
392
|
|
|
continue; |
393
|
|
|
} |
394
|
10 |
|
if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) { |
395
|
|
|
continue; |
396
|
|
|
} |
397
|
10 |
|
if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) { |
398
|
9 |
|
continue; |
399
|
|
|
} |
400
|
10 |
|
if (isset($rule['xpath'])) { |
401
|
9 |
|
$xp = $this->getXPath($attr); |
402
|
9 |
|
if (isset($rule['prefixes'])) { |
403
|
|
|
foreach ($rule['prefixes'] as $nsPrefix => $ns) { |
404
|
|
|
$xp->registerNamespace($nsPrefix, $ns); |
405
|
|
|
} |
406
|
|
|
} |
407
|
9 |
|
if (!$xp->evaluate($rule['xpath'], $attr)) { |
408
|
8 |
|
continue; |
409
|
|
|
} |
410
|
1 |
|
} |
411
|
|
|
|
412
|
4 |
|
return true; |
413
|
8 |
|
} |
414
|
|
|
|
415
|
8 |
|
return false; |
416
|
|
|
} |
417
|
|
|
|
418
|
9 |
|
private function getXPath(\DOMNode $node) |
419
|
|
|
{ |
420
|
9 |
|
if (!$this->xpath) { |
421
|
9 |
|
$this->xpath = new \DOMXPath($node->ownerDocument); |
422
|
9 |
|
} |
423
|
|
|
|
424
|
9 |
|
return $this->xpath; |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
/** |
428
|
|
|
* Write the closing tag. |
429
|
|
|
* |
430
|
|
|
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
431
|
|
|
* qualified name (8.3). |
432
|
|
|
* |
433
|
|
|
* @param \DOMNode $ele The element being written. |
434
|
|
|
*/ |
435
|
27 |
|
protected function closeTag($ele) |
436
|
|
|
{ |
437
|
27 |
|
if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { |
438
|
27 |
|
$this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); |
439
|
27 |
|
} |
440
|
27 |
|
} |
441
|
|
|
|
442
|
|
|
/** |
443
|
|
|
* Write to the output. |
444
|
|
|
* |
445
|
|
|
* @param string $text The string to put into the output |
446
|
|
|
* |
447
|
|
|
* @return $this |
448
|
|
|
*/ |
449
|
48 |
|
protected function wr($text) |
450
|
|
|
{ |
451
|
48 |
|
fwrite($this->out, $text); |
452
|
|
|
|
453
|
48 |
|
return $this; |
454
|
|
|
} |
455
|
|
|
|
456
|
|
|
/** |
457
|
|
|
* Write a new line character. |
458
|
|
|
* |
459
|
|
|
* @return $this |
460
|
|
|
*/ |
461
|
20 |
|
protected function nl() |
462
|
|
|
{ |
463
|
20 |
|
fwrite($this->out, PHP_EOL); |
464
|
|
|
|
465
|
20 |
|
return $this; |
466
|
|
|
} |
467
|
|
|
|
468
|
|
|
/** |
469
|
|
|
* Encode text. |
470
|
|
|
* |
471
|
|
|
* When encode is set to false, the default value, the text passed in is |
472
|
|
|
* escaped per section 8.3 of the html5 spec. For details on how text is |
473
|
|
|
* escaped see the escape() method. |
474
|
|
|
* |
475
|
|
|
* When encoding is set to true the text is converted to named character |
476
|
|
|
* references where appropriate. Section 8.1.4 Character references of the |
477
|
|
|
* html5 spec refers to using named character references. This is useful for |
478
|
|
|
* characters that can't otherwise legally be used in the text. |
479
|
|
|
* |
480
|
|
|
* The named character references are listed in section 8.5. |
481
|
|
|
* |
482
|
|
|
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. |
483
|
|
|
* This includes such characters as +.# and many other common ones. By default |
484
|
|
|
* encoding here will just escape &'<>". |
485
|
|
|
* |
486
|
|
|
* Note, PHP 5.4+ has better html5 encoding. |
487
|
|
|
* |
488
|
|
|
* @todo Use the Entities class in php 5.3 to have html5 entities. |
489
|
|
|
* |
490
|
|
|
* @param string $text Text to encode. |
491
|
|
|
* @param bool $attribute True if we are encoding an attrubute, false otherwise. |
492
|
|
|
* |
493
|
|
|
* @return string The encoded text. |
494
|
|
|
*/ |
495
|
44 |
|
protected function enc($text, $attribute = false) |
496
|
|
|
{ |
497
|
|
|
// Escape the text rather than convert to named character references. |
498
|
44 |
|
if (!$this->encode) { |
499
|
44 |
|
return $this->escape($text, $attribute); |
500
|
|
|
} |
501
|
|
|
|
502
|
|
|
// If we are in PHP 5.4+ we can use the native html5 entity functionality to |
503
|
|
|
// convert the named character references. |
504
|
|
|
|
505
|
7 |
|
if ($this->hasHTML5) { |
506
|
7 |
|
return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); |
507
|
|
|
} // If a version earlier than 5.4 html5 entities are not entirely handled. |
508
|
|
|
// This manually handles them. |
509
|
|
|
else { |
510
|
|
|
return strtr($text, HTML5Entities::$map); |
511
|
|
|
} |
512
|
|
|
} |
513
|
|
|
|
514
|
|
|
/** |
515
|
|
|
* Escape test. |
516
|
|
|
* |
517
|
|
|
* According to the html5 spec section 8.3 Serializing HTML fragments, text |
518
|
|
|
* within tags that are not style, script, xmp, iframe, noembed, and noframes |
519
|
|
|
* need to be properly escaped. |
520
|
|
|
* |
521
|
|
|
* The & should be converted to &, no breaking space unicode characters |
522
|
|
|
* converted to , when in attribute mode the " should be converted to |
523
|
|
|
* ", and when not in attribute mode the < and > should be converted to |
524
|
|
|
* < and >. |
525
|
|
|
* |
526
|
|
|
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString |
527
|
|
|
* |
528
|
|
|
* @param string $text Text to escape. |
529
|
|
|
* @param bool $attribute True if we are escaping an attrubute, false otherwise. |
530
|
|
|
*/ |
531
|
51 |
|
protected function escape($text, $attribute = false) |
532
|
|
|
{ |
533
|
|
|
// Not using htmlspecialchars because, while it does escaping, it doesn't |
534
|
|
|
// match the requirements of section 8.5. For example, it doesn't handle |
535
|
|
|
// non-breaking spaces. |
536
|
51 |
|
if ($attribute) { |
537
|
|
|
$replace = array( |
538
|
38 |
|
'"' => '"', |
539
|
38 |
|
'&' => '&', |
540
|
38 |
|
"\xc2\xa0" => ' ', |
541
|
38 |
|
); |
542
|
38 |
|
} else { |
543
|
|
|
$replace = array( |
544
|
30 |
|
'<' => '<', |
545
|
30 |
|
'>' => '>', |
546
|
30 |
|
'&' => '&', |
547
|
30 |
|
"\xc2\xa0" => ' ', |
548
|
30 |
|
); |
549
|
|
|
} |
550
|
|
|
|
551
|
51 |
|
return strtr($text, $replace); |
552
|
|
|
} |
553
|
|
|
} |
554
|
|
|
|