|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* @file |
|
4
|
|
|
* The rules for generating output in the serializer. |
|
5
|
|
|
* |
|
6
|
|
|
* These output rules are likely to generate output similar to the document that |
|
7
|
|
|
* was parsed. It is not intended to output exactly the document that was parsed. |
|
8
|
|
|
*/ |
|
9
|
|
|
|
|
10
|
|
|
namespace Masterminds\HTML5\Serializer; |
|
11
|
|
|
|
|
12
|
|
|
use Masterminds\HTML5\Elements; |
|
13
|
|
|
|
|
14
|
|
|
/** |
|
15
|
|
|
* Generate the output html5 based on element rules. |
|
16
|
|
|
*/ |
|
17
|
|
|
class OutputRules implements RulesInterface |
|
18
|
|
|
{ |
|
19
|
|
|
/** |
|
20
|
|
|
* Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0. |
|
21
|
|
|
*/ |
|
22
|
|
|
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; |
|
23
|
|
|
|
|
24
|
|
|
const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; |
|
25
|
|
|
|
|
26
|
|
|
const NAMESPACE_SVG = 'http://www.w3.org/2000/svg'; |
|
27
|
|
|
|
|
28
|
|
|
const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; |
|
29
|
|
|
|
|
30
|
|
|
const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; |
|
31
|
|
|
|
|
32
|
|
|
const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; |
|
33
|
|
|
|
|
34
|
|
|
/** |
|
35
|
|
|
* Holds the HTML5 element names that causes a namespace switch. |
|
36
|
|
|
* |
|
37
|
|
|
* @var array |
|
38
|
|
|
*/ |
|
39
|
|
|
protected $implicitNamespaces = array( |
|
40
|
|
|
self::NAMESPACE_HTML, |
|
41
|
|
|
self::NAMESPACE_SVG, |
|
42
|
|
|
self::NAMESPACE_MATHML, |
|
43
|
|
|
self::NAMESPACE_XML, |
|
44
|
|
|
self::NAMESPACE_XMLNS, |
|
45
|
|
|
); |
|
46
|
|
|
|
|
47
|
|
|
const IM_IN_HTML = 1; |
|
48
|
|
|
|
|
49
|
|
|
const IM_IN_SVG = 2; |
|
50
|
|
|
|
|
51
|
|
|
const IM_IN_MATHML = 3; |
|
52
|
|
|
|
|
53
|
|
|
/** |
|
54
|
|
|
* Used as cache to detect if is available ENT_HTML5. |
|
55
|
|
|
* |
|
56
|
|
|
* @var bool |
|
57
|
|
|
*/ |
|
58
|
|
|
private $hasHTML5 = false; |
|
59
|
|
|
|
|
60
|
|
|
protected $traverser; |
|
61
|
|
|
|
|
62
|
|
|
protected $encode = false; |
|
63
|
|
|
|
|
64
|
|
|
protected $out; |
|
65
|
|
|
|
|
66
|
|
|
protected $outputMode; |
|
67
|
|
|
|
|
68
|
|
|
private $xpath; |
|
69
|
|
|
|
|
70
|
|
|
protected $nonBooleanAttributes = array( |
|
71
|
|
|
/* |
|
72
|
|
|
array( |
|
73
|
|
|
'nodeNamespace'=>'http://www.w3.org/1999/xhtml', |
|
74
|
|
|
'attrNamespace'=>'http://www.w3.org/1999/xhtml', |
|
75
|
|
|
|
|
76
|
|
|
'nodeName'=>'img', 'nodeName'=>array('img', 'a'), |
|
77
|
|
|
'attrName'=>'alt', 'attrName'=>array('title', 'alt'), |
|
78
|
|
|
), |
|
79
|
|
|
*/ |
|
80
|
|
|
array( |
|
81
|
|
|
'nodeNamespace' => 'http://www.w3.org/1999/xhtml', |
|
82
|
|
|
'attrName' => array('href', |
|
83
|
|
|
'hreflang', |
|
84
|
|
|
'http-equiv', |
|
85
|
|
|
'icon', |
|
86
|
|
|
'id', |
|
87
|
|
|
'keytype', |
|
88
|
|
|
'kind', |
|
89
|
|
|
'label', |
|
90
|
|
|
'lang', |
|
91
|
|
|
'language', |
|
92
|
|
|
'list', |
|
93
|
|
|
'maxlength', |
|
94
|
|
|
'media', |
|
95
|
|
|
'method', |
|
96
|
|
|
'name', |
|
97
|
|
|
'placeholder', |
|
98
|
|
|
'rel', |
|
99
|
|
|
'rows', |
|
100
|
|
|
'rowspan', |
|
101
|
|
|
'sandbox', |
|
102
|
|
|
'spellcheck', |
|
103
|
|
|
'scope', |
|
104
|
|
|
'seamless', |
|
105
|
|
|
'shape', |
|
106
|
|
|
'size', |
|
107
|
|
|
'sizes', |
|
108
|
|
|
'span', |
|
109
|
|
|
'src', |
|
110
|
|
|
'srcdoc', |
|
111
|
|
|
'srclang', |
|
112
|
|
|
'srcset', |
|
113
|
|
|
'start', |
|
114
|
|
|
'step', |
|
115
|
|
|
'style', |
|
116
|
|
|
'summary', |
|
117
|
|
|
'tabindex', |
|
118
|
|
|
'target', |
|
119
|
|
|
'title', |
|
120
|
|
|
'type', |
|
121
|
|
|
'value', |
|
122
|
|
|
'width', |
|
123
|
|
|
'border', |
|
124
|
|
|
'charset', |
|
125
|
|
|
'cite', |
|
126
|
|
|
'class', |
|
127
|
|
|
'code', |
|
128
|
|
|
'codebase', |
|
129
|
|
|
'color', |
|
130
|
|
|
'cols', |
|
131
|
|
|
'colspan', |
|
132
|
|
|
'content', |
|
133
|
|
|
'coords', |
|
134
|
|
|
'data', |
|
135
|
|
|
'datetime', |
|
136
|
|
|
'default', |
|
137
|
|
|
'dir', |
|
138
|
|
|
'dirname', |
|
139
|
|
|
'enctype', |
|
140
|
|
|
'for', |
|
141
|
|
|
'form', |
|
142
|
|
|
'formaction', |
|
143
|
|
|
'headers', |
|
144
|
|
|
'height', |
|
145
|
|
|
'accept', |
|
146
|
|
|
'accept-charset', |
|
147
|
|
|
'accesskey', |
|
148
|
|
|
'action', |
|
149
|
|
|
'align', |
|
150
|
|
|
'alt', |
|
151
|
|
|
'bgcolor', |
|
152
|
|
|
), |
|
153
|
|
|
), |
|
154
|
|
|
array( |
|
155
|
|
|
'nodeNamespace' => 'http://www.w3.org/1999/xhtml', |
|
156
|
|
|
'xpath' => 'starts-with(local-name(), \'data-\')', |
|
157
|
|
|
), |
|
158
|
|
|
); |
|
159
|
|
|
|
|
160
|
|
|
const DOCTYPE = '<!DOCTYPE html>'; |
|
161
|
|
|
|
|
162
|
64 |
|
public function __construct($output, $options = array()) |
|
163
|
|
|
{ |
|
164
|
64 |
|
if (isset($options['encode_entities'])) { |
|
165
|
64 |
|
$this->encode = $options['encode_entities']; |
|
166
|
64 |
|
} |
|
167
|
|
|
|
|
168
|
64 |
|
$this->outputMode = static::IM_IN_HTML; |
|
169
|
64 |
|
$this->out = $output; |
|
170
|
64 |
|
$this->hasHTML5 = defined('ENT_HTML5'); |
|
171
|
64 |
|
} |
|
172
|
|
|
|
|
173
|
|
|
public function addRule(array $rule) |
|
174
|
|
|
{ |
|
175
|
|
|
$this->nonBooleanAttributes[] = $rule; |
|
176
|
|
|
} |
|
177
|
|
|
|
|
178
|
64 |
|
public function setTraverser(Traverser $traverser) |
|
179
|
|
|
{ |
|
180
|
64 |
|
$this->traverser = $traverser; |
|
181
|
|
|
|
|
182
|
64 |
|
return $this; |
|
183
|
|
|
} |
|
184
|
|
|
|
|
185
|
17 |
|
public function unsetTraverser() |
|
186
|
|
|
{ |
|
187
|
17 |
|
$this->traverser = null; |
|
188
|
|
|
|
|
189
|
17 |
|
return $this; |
|
190
|
|
|
} |
|
191
|
|
|
|
|
192
|
18 |
|
public function document($dom) |
|
193
|
|
|
{ |
|
194
|
18 |
|
$this->doctype(); |
|
195
|
18 |
|
if ($dom->documentElement) { |
|
196
|
17 |
|
foreach ($dom->childNodes as $node) { |
|
197
|
17 |
|
$this->traverser->node($node); |
|
198
|
17 |
|
} |
|
199
|
17 |
|
$this->nl(); |
|
200
|
17 |
|
} |
|
201
|
18 |
|
} |
|
202
|
|
|
|
|
203
|
19 |
|
protected function doctype() |
|
204
|
|
|
{ |
|
205
|
19 |
|
$this->wr(static::DOCTYPE); |
|
206
|
19 |
|
$this->nl(); |
|
207
|
19 |
|
} |
|
208
|
|
|
|
|
209
|
27 |
|
public function element($ele) |
|
210
|
|
|
{ |
|
211
|
27 |
|
$name = $ele->tagName; |
|
212
|
|
|
|
|
213
|
|
|
// Per spec: |
|
214
|
|
|
// If the element has a declared namespace in the HTML, MathML or |
|
215
|
|
|
// SVG namespaces, we use the lname instead of the tagName. |
|
216
|
27 |
|
if ($this->traverser->isLocalElement($ele)) { |
|
217
|
27 |
|
$name = $ele->localName; |
|
218
|
27 |
|
} |
|
219
|
|
|
|
|
220
|
|
|
// If we are in SVG or MathML there is special handling. |
|
221
|
|
|
// Using if/elseif instead of switch because it's faster in PHP. |
|
222
|
27 |
|
if ('svg' == $name) { |
|
223
|
3 |
|
$this->outputMode = static::IM_IN_SVG; |
|
224
|
3 |
|
$name = Elements::normalizeSvgElement($name); |
|
225
|
27 |
|
} elseif ('math' == $name) { |
|
226
|
2 |
|
$this->outputMode = static::IM_IN_MATHML; |
|
227
|
2 |
|
} |
|
228
|
|
|
|
|
229
|
27 |
|
$this->openTag($ele); |
|
230
|
27 |
|
if (Elements::isA($name, Elements::TEXT_RAW)) { |
|
231
|
4 |
|
foreach ($ele->childNodes as $child) { |
|
232
|
4 |
|
if ($child instanceof \DOMCharacterData) { |
|
233
|
4 |
|
$this->wr($child->data); |
|
234
|
4 |
|
} elseif ($child instanceof \DOMElement) { |
|
235
|
1 |
|
$this->element($child); |
|
236
|
1 |
|
} |
|
237
|
4 |
|
} |
|
238
|
4 |
|
} else { |
|
239
|
|
|
// Handle children. |
|
240
|
25 |
|
if ($ele->hasChildNodes()) { |
|
241
|
25 |
|
$this->traverser->children($ele->childNodes); |
|
242
|
25 |
|
} |
|
243
|
|
|
|
|
244
|
|
|
// Close out the SVG or MathML special handling. |
|
245
|
25 |
|
if ('svg' == $name || 'math' == $name) { |
|
246
|
5 |
|
$this->outputMode = static::IM_IN_HTML; |
|
247
|
5 |
|
} |
|
248
|
|
|
} |
|
249
|
|
|
|
|
250
|
|
|
// If not unary, add a closing tag. |
|
251
|
27 |
|
if (!Elements::isA($name, Elements::VOID_TAG)) { |
|
252
|
27 |
|
$this->closeTag($ele); |
|
253
|
27 |
|
} |
|
254
|
27 |
|
} |
|
255
|
|
|
|
|
256
|
|
|
/** |
|
257
|
|
|
* Write a text node. |
|
258
|
|
|
* |
|
259
|
|
|
* @param \DOMText $ele The text node to write. |
|
260
|
|
|
*/ |
|
261
|
24 |
|
public function text($ele) |
|
262
|
|
|
{ |
|
263
|
24 |
|
if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { |
|
264
|
1 |
|
$this->wr($ele->data); |
|
265
|
|
|
|
|
266
|
1 |
|
return; |
|
267
|
|
|
} |
|
268
|
|
|
|
|
269
|
|
|
// FIXME: This probably needs some flags set. |
|
270
|
24 |
|
$this->wr($this->enc($ele->data)); |
|
271
|
24 |
|
} |
|
272
|
|
|
|
|
273
|
2 |
|
public function cdata($ele) |
|
274
|
|
|
{ |
|
275
|
|
|
// This encodes CDATA. |
|
276
|
2 |
|
$this->wr($ele->ownerDocument->saveXML($ele)); |
|
277
|
2 |
|
} |
|
278
|
|
|
|
|
279
|
3 |
|
public function comment($ele) |
|
280
|
|
|
{ |
|
281
|
|
|
// These produce identical output. |
|
282
|
|
|
// $this->wr('<!--')->wr($ele->data)->wr('-->'); |
|
283
|
3 |
|
$this->wr($ele->ownerDocument->saveXML($ele)); |
|
284
|
3 |
|
} |
|
285
|
|
|
|
|
286
|
3 |
|
public function processorInstruction($ele) |
|
287
|
|
|
{ |
|
288
|
3 |
|
$this->wr('<?') |
|
289
|
3 |
|
->wr($ele->target) |
|
290
|
3 |
|
->wr(' ') |
|
291
|
3 |
|
->wr($ele->data) |
|
292
|
3 |
|
->wr('?>'); |
|
293
|
3 |
|
} |
|
294
|
|
|
|
|
295
|
|
|
/** |
|
296
|
|
|
* Write the namespace attributes. |
|
297
|
|
|
* |
|
298
|
|
|
* @param \DOMNode $ele The element being written. |
|
299
|
|
|
*/ |
|
300
|
28 |
|
protected function namespaceAttrs($ele) |
|
301
|
|
|
{ |
|
302
|
28 |
|
if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) { |
|
303
|
28 |
|
$this->xpath = new \DOMXPath($ele->ownerDocument); |
|
304
|
28 |
|
} |
|
305
|
|
|
|
|
306
|
28 |
|
foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) { |
|
307
|
23 |
|
if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { |
|
308
|
3 |
|
$this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); |
|
309
|
3 |
|
} |
|
310
|
28 |
|
} |
|
311
|
28 |
|
} |
|
312
|
|
|
|
|
313
|
|
|
/** |
|
314
|
|
|
* Write the opening tag. |
|
315
|
|
|
* |
|
316
|
|
|
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
|
317
|
|
|
* qualified name (8.3). |
|
318
|
|
|
* |
|
319
|
|
|
* @param \DOMNode $ele The element being written. |
|
320
|
|
|
*/ |
|
321
|
28 |
|
protected function openTag($ele) |
|
322
|
|
|
{ |
|
323
|
28 |
|
$this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); |
|
324
|
|
|
|
|
325
|
28 |
|
$this->attrs($ele); |
|
326
|
28 |
|
$this->namespaceAttrs($ele); |
|
327
|
|
|
|
|
328
|
28 |
|
if ($this->outputMode == static::IM_IN_HTML) { |
|
329
|
26 |
|
$this->wr('>'); |
|
330
|
26 |
|
} // If we are not in html mode we are in SVG, MathML, or XML embedded content. |
|
331
|
|
|
else { |
|
332
|
5 |
|
if ($ele->hasChildNodes()) { |
|
333
|
5 |
|
$this->wr('>'); |
|
334
|
5 |
|
} // If there are no children this is self closing. |
|
335
|
|
|
else { |
|
336
|
2 |
|
$this->wr(' />'); |
|
337
|
|
|
} |
|
338
|
|
|
} |
|
339
|
28 |
|
} |
|
340
|
|
|
|
|
341
|
39 |
|
protected function attrs($ele) |
|
342
|
|
|
{ |
|
343
|
|
|
// FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. |
|
344
|
39 |
|
if (!$ele->hasAttributes()) { |
|
345
|
23 |
|
return $this; |
|
346
|
|
|
} |
|
347
|
|
|
|
|
348
|
|
|
// TODO: Currently, this always writes name="value", and does not do |
|
349
|
|
|
// value-less attributes. |
|
350
|
30 |
|
$map = $ele->attributes; |
|
351
|
30 |
|
$len = $map->length; |
|
352
|
30 |
|
for ($i = 0; $i < $len; ++$i) { |
|
353
|
30 |
|
$node = $map->item($i); |
|
354
|
30 |
|
$val = $this->enc($node->value, true); |
|
355
|
|
|
|
|
356
|
|
|
// XXX: The spec says that we need to ensure that anything in |
|
357
|
|
|
// the XML, XMLNS, or XLink NS's should use the canonical |
|
358
|
|
|
// prefix. It seems that DOM does this for us already, but there |
|
359
|
|
|
// may be exceptions. |
|
360
|
30 |
|
$name = $node->nodeName; |
|
361
|
|
|
|
|
362
|
|
|
// Special handling for attributes in SVG and MathML. |
|
363
|
|
|
// Using if/elseif instead of switch because it's faster in PHP. |
|
364
|
30 |
|
if ($this->outputMode == static::IM_IN_SVG) { |
|
365
|
3 |
|
$name = Elements::normalizeSvgAttribute($name); |
|
366
|
30 |
|
} elseif ($this->outputMode == static::IM_IN_MATHML) { |
|
367
|
2 |
|
$name = Elements::normalizeMathMlAttribute($name); |
|
368
|
2 |
|
} |
|
369
|
|
|
|
|
370
|
30 |
|
$this->wr(' ')->wr($name); |
|
371
|
|
|
|
|
372
|
30 |
|
if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) { |
|
373
|
27 |
|
$this->wr('="')->wr($val)->wr('"'); |
|
374
|
27 |
|
} |
|
375
|
30 |
|
} |
|
376
|
30 |
|
} |
|
377
|
|
|
|
|
378
|
10 |
|
protected function nonBooleanAttribute(\DOMAttr $attr) |
|
379
|
|
|
{ |
|
380
|
10 |
|
$ele = $attr->ownerElement; |
|
381
|
10 |
|
foreach ($this->nonBooleanAttributes as $rule) { |
|
382
|
10 |
|
if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) { |
|
383
|
|
|
continue; |
|
384
|
|
|
} |
|
385
|
10 |
|
if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) { |
|
386
|
|
|
continue; |
|
387
|
|
|
} |
|
388
|
10 |
|
if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) { |
|
389
|
|
|
continue; |
|
390
|
|
|
} |
|
391
|
10 |
|
if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) { |
|
392
|
|
|
continue; |
|
393
|
|
|
} |
|
394
|
10 |
|
if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) { |
|
395
|
|
|
continue; |
|
396
|
|
|
} |
|
397
|
10 |
|
if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) { |
|
398
|
9 |
|
continue; |
|
399
|
|
|
} |
|
400
|
10 |
|
if (isset($rule['xpath'])) { |
|
401
|
9 |
|
$xp = $this->getXPath($attr); |
|
402
|
9 |
|
if (isset($rule['prefixes'])) { |
|
403
|
|
|
foreach ($rule['prefixes'] as $nsPrefix => $ns) { |
|
404
|
|
|
$xp->registerNamespace($nsPrefix, $ns); |
|
405
|
|
|
} |
|
406
|
|
|
} |
|
407
|
9 |
|
if (!$xp->evaluate($rule['xpath'], $attr)) { |
|
408
|
8 |
|
continue; |
|
409
|
|
|
} |
|
410
|
1 |
|
} |
|
411
|
|
|
|
|
412
|
4 |
|
return true; |
|
413
|
8 |
|
} |
|
414
|
|
|
|
|
415
|
8 |
|
return false; |
|
416
|
|
|
} |
|
417
|
|
|
|
|
418
|
9 |
|
private function getXPath(\DOMNode $node) |
|
419
|
|
|
{ |
|
420
|
9 |
|
if (!$this->xpath) { |
|
421
|
9 |
|
$this->xpath = new \DOMXPath($node->ownerDocument); |
|
422
|
9 |
|
} |
|
423
|
|
|
|
|
424
|
9 |
|
return $this->xpath; |
|
425
|
|
|
} |
|
426
|
|
|
|
|
427
|
|
|
/** |
|
428
|
|
|
* Write the closing tag. |
|
429
|
|
|
* |
|
430
|
|
|
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
|
431
|
|
|
* qualified name (8.3). |
|
432
|
|
|
* |
|
433
|
|
|
* @param \DOMNode $ele The element being written. |
|
434
|
|
|
*/ |
|
435
|
27 |
|
protected function closeTag($ele) |
|
436
|
|
|
{ |
|
437
|
27 |
|
if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { |
|
438
|
27 |
|
$this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); |
|
439
|
27 |
|
} |
|
440
|
27 |
|
} |
|
441
|
|
|
|
|
442
|
|
|
/** |
|
443
|
|
|
* Write to the output. |
|
444
|
|
|
* |
|
445
|
|
|
* @param string $text The string to put into the output |
|
446
|
|
|
* |
|
447
|
|
|
* @return $this |
|
448
|
|
|
*/ |
|
449
|
48 |
|
protected function wr($text) |
|
450
|
|
|
{ |
|
451
|
48 |
|
fwrite($this->out, $text); |
|
452
|
|
|
|
|
453
|
48 |
|
return $this; |
|
454
|
|
|
} |
|
455
|
|
|
|
|
456
|
|
|
/** |
|
457
|
|
|
* Write a new line character. |
|
458
|
|
|
* |
|
459
|
|
|
* @return $this |
|
460
|
|
|
*/ |
|
461
|
20 |
|
protected function nl() |
|
462
|
|
|
{ |
|
463
|
20 |
|
fwrite($this->out, PHP_EOL); |
|
464
|
|
|
|
|
465
|
20 |
|
return $this; |
|
466
|
|
|
} |
|
467
|
|
|
|
|
468
|
|
|
/** |
|
469
|
|
|
* Encode text. |
|
470
|
|
|
* |
|
471
|
|
|
* When encode is set to false, the default value, the text passed in is |
|
472
|
|
|
* escaped per section 8.3 of the html5 spec. For details on how text is |
|
473
|
|
|
* escaped see the escape() method. |
|
474
|
|
|
* |
|
475
|
|
|
* When encoding is set to true the text is converted to named character |
|
476
|
|
|
* references where appropriate. Section 8.1.4 Character references of the |
|
477
|
|
|
* html5 spec refers to using named character references. This is useful for |
|
478
|
|
|
* characters that can't otherwise legally be used in the text. |
|
479
|
|
|
* |
|
480
|
|
|
* The named character references are listed in section 8.5. |
|
481
|
|
|
* |
|
482
|
|
|
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. |
|
483
|
|
|
* This includes such characters as +.# and many other common ones. By default |
|
484
|
|
|
* encoding here will just escape &'<>". |
|
485
|
|
|
* |
|
486
|
|
|
* Note, PHP 5.4+ has better html5 encoding. |
|
487
|
|
|
* |
|
488
|
|
|
* @todo Use the Entities class in php 5.3 to have html5 entities. |
|
489
|
|
|
* |
|
490
|
|
|
* @param string $text Text to encode. |
|
491
|
|
|
* @param bool $attribute True if we are encoding an attrubute, false otherwise. |
|
492
|
|
|
* |
|
493
|
|
|
* @return string The encoded text. |
|
494
|
|
|
*/ |
|
495
|
44 |
|
protected function enc($text, $attribute = false) |
|
496
|
|
|
{ |
|
497
|
|
|
// Escape the text rather than convert to named character references. |
|
498
|
44 |
|
if (!$this->encode) { |
|
499
|
44 |
|
return $this->escape($text, $attribute); |
|
500
|
|
|
} |
|
501
|
|
|
|
|
502
|
|
|
// If we are in PHP 5.4+ we can use the native html5 entity functionality to |
|
503
|
|
|
// convert the named character references. |
|
504
|
|
|
|
|
505
|
7 |
|
if ($this->hasHTML5) { |
|
506
|
7 |
|
return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); |
|
507
|
|
|
} // If a version earlier than 5.4 html5 entities are not entirely handled. |
|
508
|
|
|
// This manually handles them. |
|
509
|
|
|
else { |
|
510
|
|
|
return strtr($text, HTML5Entities::$map); |
|
511
|
|
|
} |
|
512
|
|
|
} |
|
513
|
|
|
|
|
514
|
|
|
/** |
|
515
|
|
|
* Escape test. |
|
516
|
|
|
* |
|
517
|
|
|
* According to the html5 spec section 8.3 Serializing HTML fragments, text |
|
518
|
|
|
* within tags that are not style, script, xmp, iframe, noembed, and noframes |
|
519
|
|
|
* need to be properly escaped. |
|
520
|
|
|
* |
|
521
|
|
|
* The & should be converted to &, no breaking space unicode characters |
|
522
|
|
|
* converted to , when in attribute mode the " should be converted to |
|
523
|
|
|
* ", and when not in attribute mode the < and > should be converted to |
|
524
|
|
|
* < and >. |
|
525
|
|
|
* |
|
526
|
|
|
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString |
|
527
|
|
|
* |
|
528
|
|
|
* @param string $text Text to escape. |
|
529
|
|
|
* @param bool $attribute True if we are escaping an attrubute, false otherwise. |
|
530
|
|
|
*/ |
|
531
|
51 |
|
protected function escape($text, $attribute = false) |
|
532
|
|
|
{ |
|
533
|
|
|
// Not using htmlspecialchars because, while it does escaping, it doesn't |
|
534
|
|
|
// match the requirements of section 8.5. For example, it doesn't handle |
|
535
|
|
|
// non-breaking spaces. |
|
536
|
51 |
|
if ($attribute) { |
|
537
|
|
|
$replace = array( |
|
538
|
38 |
|
'"' => '"', |
|
539
|
38 |
|
'&' => '&', |
|
540
|
38 |
|
"\xc2\xa0" => ' ', |
|
541
|
38 |
|
); |
|
542
|
38 |
|
} else { |
|
543
|
|
|
$replace = array( |
|
544
|
30 |
|
'<' => '<', |
|
545
|
30 |
|
'>' => '>', |
|
546
|
30 |
|
'&' => '&', |
|
547
|
30 |
|
"\xc2\xa0" => ' ', |
|
548
|
30 |
|
); |
|
549
|
|
|
} |
|
550
|
|
|
|
|
551
|
51 |
|
return strtr($text, $replace); |
|
552
|
|
|
} |
|
553
|
|
|
} |
|
554
|
|
|
|