Completed
Push — master ( 6c5dea...5376d4 )
by Asmir
20s queued 12s
created

OutputRules::unsetTraverser()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
/**
3
 * @file
4
 * The rules for generating output in the serializer.
5
 *
6
 * These output rules are likely to generate output similar to the document that
7
 * was parsed. It is not intended to output exactly the document that was parsed.
8
 */
9
10
namespace Masterminds\HTML5\Serializer;
11
12
use Masterminds\HTML5\Elements;
13
14
/**
15
 * Generate the output html5 based on element rules.
16
 */
17
class OutputRules implements RulesInterface
18
{
19
    /**
20
     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
21
     */
22
    const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
23
24
    const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
25
26
    const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
27
28
    const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
29
30
    const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
31
32
    const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
33
34
    /**
35
     * Holds the HTML5 element names that causes a namespace switch.
36
     *
37
     * @var array
38
     */
39
    protected $implicitNamespaces = array(
40
        self::NAMESPACE_HTML,
41
        self::NAMESPACE_SVG,
42
        self::NAMESPACE_MATHML,
43
        self::NAMESPACE_XML,
44
        self::NAMESPACE_XMLNS,
45
    );
46
47
    const IM_IN_HTML = 1;
48
49
    const IM_IN_SVG = 2;
50
51
    const IM_IN_MATHML = 3;
52
53
    /**
54
     * Used as cache to detect if is available ENT_HTML5.
55
     *
56
     * @var bool
57
     */
58
    private $hasHTML5 = false;
59
60
    protected $traverser;
61
62
    protected $encode = false;
63
64
    protected $out;
65
66
    protected $outputMode;
67
68
    private $xpath;
69
70
    protected $nonBooleanAttributes = array(
71
        /*
72
        array(
73
            'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
74
            'attrNamespace'=>'http://www.w3.org/1999/xhtml',
75
76
            'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
77
            'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
78
        ),
79
        */
80
        array(
81
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
82
            'attrName' => array('href',
83
                'hreflang',
84
                'http-equiv',
85
                'icon',
86
                'id',
87
                'keytype',
88
                'kind',
89
                'label',
90
                'lang',
91
                'language',
92
                'list',
93
                'maxlength',
94
                'media',
95
                'method',
96
                'name',
97
                'placeholder',
98
                'rel',
99
                'rows',
100
                'rowspan',
101
                'sandbox',
102
                'spellcheck',
103
                'scope',
104
                'seamless',
105
                'shape',
106
                'size',
107
                'sizes',
108
                'span',
109
                'src',
110
                'srcdoc',
111
                'srclang',
112
                'srcset',
113
                'start',
114
                'step',
115
                'style',
116
                'summary',
117
                'tabindex',
118
                'target',
119
                'title',
120
                'type',
121
                'value',
122
                'width',
123
                'border',
124
                'charset',
125
                'cite',
126
                'class',
127
                'code',
128
                'codebase',
129
                'color',
130
                'cols',
131
                'colspan',
132
                'content',
133
                'coords',
134
                'data',
135
                'datetime',
136
                'default',
137
                'dir',
138
                'dirname',
139
                'enctype',
140
                'for',
141
                'form',
142
                'formaction',
143
                'headers',
144
                'height',
145
                'accept',
146
                'accept-charset',
147
                'accesskey',
148
                'action',
149
                'align',
150
                'alt',
151
                'bgcolor',
152
            ),
153
        ),
154
        array(
155
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
156
            'xpath' => 'starts-with(local-name(), \'data-\')',
157
        ),
158
    );
159
160
    const DOCTYPE = '<!DOCTYPE html>';
161
162 64
    public function __construct($output, $options = array())
163
    {
164 64
        if (isset($options['encode_entities'])) {
165 64
            $this->encode = $options['encode_entities'];
166 64
        }
167
168 64
        $this->outputMode = static::IM_IN_HTML;
169 64
        $this->out = $output;
170 64
        $this->hasHTML5 = defined('ENT_HTML5');
171 64
    }
172
173
    public function addRule(array $rule)
174
    {
175
        $this->nonBooleanAttributes[] = $rule;
176
    }
177
178 64
    public function setTraverser(Traverser $traverser)
179
    {
180 64
        $this->traverser = $traverser;
181
182 64
        return $this;
183
    }
184
185 17
    public function unsetTraverser()
186
    {
187 17
        $this->traverser = null;
188
189 17
        return $this;
190
    }
191
192 18
    public function document($dom)
193
    {
194 18
        $this->doctype();
195 18
        if ($dom->documentElement) {
196 17
            foreach ($dom->childNodes as $node) {
197 17
                $this->traverser->node($node);
198 17
            }
199 17
            $this->nl();
200 17
        }
201 18
    }
202
203 19
    protected function doctype()
204
    {
205 19
        $this->wr(static::DOCTYPE);
206 19
        $this->nl();
207 19
    }
208
209 27
    public function element($ele)
210
    {
211 27
        $name = $ele->tagName;
212
213
        // Per spec:
214
        // If the element has a declared namespace in the HTML, MathML or
215
        // SVG namespaces, we use the lname instead of the tagName.
216 27
        if ($this->traverser->isLocalElement($ele)) {
217 27
            $name = $ele->localName;
218 27
        }
219
220
        // If we are in SVG or MathML there is special handling.
221
        // Using if/elseif instead of switch because it's faster in PHP.
222 27
        if ('svg' == $name) {
223 3
            $this->outputMode = static::IM_IN_SVG;
224 3
            $name = Elements::normalizeSvgElement($name);
225 27
        } elseif ('math' == $name) {
226 2
            $this->outputMode = static::IM_IN_MATHML;
227 2
        }
228
229 27
        $this->openTag($ele);
230 27
        if (Elements::isA($name, Elements::TEXT_RAW)) {
231 4
            foreach ($ele->childNodes as $child) {
232 4
                if ($child instanceof \DOMCharacterData) {
233 4
                    $this->wr($child->data);
234 4
                } elseif ($child instanceof \DOMElement) {
235 1
                    $this->element($child);
236 1
                }
237 4
            }
238 4
        } else {
239
            // Handle children.
240 25
            if ($ele->hasChildNodes()) {
241 25
                $this->traverser->children($ele->childNodes);
242 25
            }
243
244
            // Close out the SVG or MathML special handling.
245 25
            if ('svg' == $name || 'math' == $name) {
246 5
                $this->outputMode = static::IM_IN_HTML;
247 5
            }
248
        }
249
250
        // If not unary, add a closing tag.
251 27
        if (!Elements::isA($name, Elements::VOID_TAG)) {
252 27
            $this->closeTag($ele);
253 27
        }
254 27
    }
255
256
    /**
257
     * Write a text node.
258
     *
259
     * @param \DOMText $ele The text node to write.
260
     */
261 24
    public function text($ele)
262
    {
263 24
        if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
264 1
            $this->wr($ele->data);
265
266 1
            return;
267
        }
268
269
        // FIXME: This probably needs some flags set.
270 24
        $this->wr($this->enc($ele->data));
271 24
    }
272
273 2
    public function cdata($ele)
274
    {
275
        // This encodes CDATA.
276 2
        $this->wr($ele->ownerDocument->saveXML($ele));
277 2
    }
278
279 3
    public function comment($ele)
280
    {
281
        // These produce identical output.
282
        // $this->wr('<!--')->wr($ele->data)->wr('-->');
283 3
        $this->wr($ele->ownerDocument->saveXML($ele));
284 3
    }
285
286 3
    public function processorInstruction($ele)
287
    {
288 3
        $this->wr('<?')
289 3
            ->wr($ele->target)
290 3
            ->wr(' ')
291 3
            ->wr($ele->data)
292 3
            ->wr('?>');
293 3
    }
294
295
    /**
296
     * Write the namespace attributes.
297
     *
298
     * @param \DOMNode $ele The element being written.
299
     */
300 28
    protected function namespaceAttrs($ele)
301
    {
302 28
        if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
303 28
            $this->xpath = new \DOMXPath($ele->ownerDocument);
304 28
        }
305
306 28
        foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
307 23
            if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
308 3
                $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
309 3
            }
310 28
        }
311 28
    }
312
313
    /**
314
     * Write the opening tag.
315
     *
316
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
317
     * qualified name (8.3).
318
     *
319
     * @param \DOMNode $ele The element being written.
320
     */
321 28
    protected function openTag($ele)
322
    {
323 28
        $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
324
325 28
        $this->attrs($ele);
326 28
        $this->namespaceAttrs($ele);
327
328 28
        if ($this->outputMode == static::IM_IN_HTML) {
329 26
            $this->wr('>');
330 26
        }         // If we are not in html mode we are in SVG, MathML, or XML embedded content.
331
        else {
332 5
            if ($ele->hasChildNodes()) {
333 5
                $this->wr('>');
334 5
            }             // If there are no children this is self closing.
335
            else {
336 2
                $this->wr(' />');
337
            }
338
        }
339 28
    }
340
341 39
    protected function attrs($ele)
342
    {
343
        // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
344 39
        if (!$ele->hasAttributes()) {
345 23
            return $this;
346
        }
347
348
        // TODO: Currently, this always writes name="value", and does not do
349
        // value-less attributes.
350 30
        $map = $ele->attributes;
351 30
        $len = $map->length;
352 30
        for ($i = 0; $i < $len; ++$i) {
353 30
            $node = $map->item($i);
354 30
            $val = $this->enc($node->value, true);
355
356
            // XXX: The spec says that we need to ensure that anything in
357
            // the XML, XMLNS, or XLink NS's should use the canonical
358
            // prefix. It seems that DOM does this for us already, but there
359
            // may be exceptions.
360 30
            $name = $node->nodeName;
361
362
            // Special handling for attributes in SVG and MathML.
363
            // Using if/elseif instead of switch because it's faster in PHP.
364 30
            if ($this->outputMode == static::IM_IN_SVG) {
365 3
                $name = Elements::normalizeSvgAttribute($name);
366 30
            } elseif ($this->outputMode == static::IM_IN_MATHML) {
367 2
                $name = Elements::normalizeMathMlAttribute($name);
368 2
            }
369
370 30
            $this->wr(' ')->wr($name);
371
372 30
            if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
373 27
                $this->wr('="')->wr($val)->wr('"');
374 27
            }
375 30
        }
376 30
    }
377
378 10
    protected function nonBooleanAttribute(\DOMAttr $attr)
379
    {
380 10
        $ele = $attr->ownerElement;
381 10
        foreach ($this->nonBooleanAttributes as $rule) {
382 10
            if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
383
                continue;
384
            }
385 10
            if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
386
                continue;
387
            }
388 10
            if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
389
                continue;
390
            }
391 10
            if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
392
                continue;
393
            }
394 10
            if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
395
                continue;
396
            }
397 10
            if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
398 9
                continue;
399
            }
400 10
            if (isset($rule['xpath'])) {
401 9
                $xp = $this->getXPath($attr);
402 9
                if (isset($rule['prefixes'])) {
403
                    foreach ($rule['prefixes'] as $nsPrefix => $ns) {
404
                        $xp->registerNamespace($nsPrefix, $ns);
405
                    }
406
                }
407 9
                if (!$xp->evaluate($rule['xpath'], $attr)) {
408 8
                    continue;
409
                }
410 1
            }
411
412 4
            return true;
413 8
        }
414
415 8
        return false;
416
    }
417
418 9
    private function getXPath(\DOMNode $node)
419
    {
420 9
        if (!$this->xpath) {
421 9
            $this->xpath = new \DOMXPath($node->ownerDocument);
422 9
        }
423
424 9
        return $this->xpath;
425
    }
426
427
    /**
428
     * Write the closing tag.
429
     *
430
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
431
     * qualified name (8.3).
432
     *
433
     * @param \DOMNode $ele The element being written.
434
     */
435 27
    protected function closeTag($ele)
436
    {
437 27
        if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
438 27
            $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
439 27
        }
440 27
    }
441
442
    /**
443
     * Write to the output.
444
     *
445
     * @param string $text The string to put into the output
446
     *
447
     * @return $this
448
     */
449 48
    protected function wr($text)
450
    {
451 48
        fwrite($this->out, $text);
452
453 48
        return $this;
454
    }
455
456
    /**
457
     * Write a new line character.
458
     *
459
     * @return $this
460
     */
461 20
    protected function nl()
462
    {
463 20
        fwrite($this->out, PHP_EOL);
464
465 20
        return $this;
466
    }
467
468
    /**
469
     * Encode text.
470
     *
471
     * When encode is set to false, the default value, the text passed in is
472
     * escaped per section 8.3 of the html5 spec. For details on how text is
473
     * escaped see the escape() method.
474
     *
475
     * When encoding is set to true the text is converted to named character
476
     * references where appropriate. Section 8.1.4 Character references of the
477
     * html5 spec refers to using named character references. This is useful for
478
     * characters that can't otherwise legally be used in the text.
479
     *
480
     * The named character references are listed in section 8.5.
481
     *
482
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
483
     *      This includes such characters as +.# and many other common ones. By default
484
     *      encoding here will just escape &'<>".
485
     *
486
     *      Note, PHP 5.4+ has better html5 encoding.
487
     *
488
     * @todo Use the Entities class in php 5.3 to have html5 entities.
489
     *
490
     * @param string $text      Text to encode.
491
     * @param bool   $attribute True if we are encoding an attrubute, false otherwise.
492
     *
493
     * @return string The encoded text.
494
     */
495 44
    protected function enc($text, $attribute = false)
496
    {
497
        // Escape the text rather than convert to named character references.
498 44
        if (!$this->encode) {
499 44
            return $this->escape($text, $attribute);
500
        }
501
502
        // If we are in PHP 5.4+ we can use the native html5 entity functionality to
503
        // convert the named character references.
504
505 7
        if ($this->hasHTML5) {
506 7
            return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
507
        }         // If a version earlier than 5.4 html5 entities are not entirely handled.
508
        // This manually handles them.
509
        else {
510
            return strtr($text, HTML5Entities::$map);
511
        }
512
    }
513
514
    /**
515
     * Escape test.
516
     *
517
     * According to the html5 spec section 8.3 Serializing HTML fragments, text
518
     * within tags that are not style, script, xmp, iframe, noembed, and noframes
519
     * need to be properly escaped.
520
     *
521
     * The & should be converted to &amp;, no breaking space unicode characters
522
     * converted to &nbsp;, when in attribute mode the " should be converted to
523
     * &quot;, and when not in attribute mode the < and > should be converted to
524
     * &lt; and &gt;.
525
     *
526
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
527
     *
528
     * @param string $text      Text to escape.
529
     * @param bool   $attribute True if we are escaping an attrubute, false otherwise.
530
     */
531 51
    protected function escape($text, $attribute = false)
532
    {
533
        // Not using htmlspecialchars because, while it does escaping, it doesn't
534
        // match the requirements of section 8.5. For example, it doesn't handle
535
        // non-breaking spaces.
536 51
        if ($attribute) {
537
            $replace = array(
538 38
                '"' => '&quot;',
539 38
                '&' => '&amp;',
540 38
                "\xc2\xa0" => '&nbsp;',
541 38
            );
542 38
        } else {
543
            $replace = array(
544 30
                '<' => '&lt;',
545 30
                '>' => '&gt;',
546 30
                '&' => '&amp;',
547 30
                "\xc2\xa0" => '&nbsp;',
548 30
            );
549
        }
550
551 51
        return strtr($text, $replace);
552
    }
553
}
554