Completed
Pull Request — master (#160)
by
unknown
01:39
created

OutputRules::getXPath()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 8
ccs 5
cts 5
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
/**
3
 * @file
4
 * The rules for generating output in the serializer.
5
 *
6
 * These output rules are likely to generate output similar to the document that
7
 * was parsed. It is not intended to output exactly the document that was parsed.
8
 */
9
10
namespace Masterminds\HTML5\Serializer;
11
12
use Masterminds\HTML5\Elements;
13
14
/**
15
 * Generate the output html5 based on element rules.
16
 */
17
class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
18
{
19
    /**
20
     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
21
     */
22
    const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
23
24
    const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
25
26
    const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
27
28
    const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
29
30
    const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
31
32
    const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
33
34
    /**
35
     * Holds the HTML5 element names that causes a namespace switch.
36
     *
37
     * @var array
38
     */
39
    protected $implicitNamespaces = array(
40
        self::NAMESPACE_HTML,
41
        self::NAMESPACE_SVG,
42
        self::NAMESPACE_MATHML,
43
        self::NAMESPACE_XML,
44
        self::NAMESPACE_XMLNS,
45
    );
46
47
    const IM_IN_HTML = 1;
48
49
    const IM_IN_SVG = 2;
50
51
    const IM_IN_MATHML = 3;
52
53
    /**
54
     * Used as cache to detect if is available ENT_HTML5.
55
     *
56
     * @var bool
57
     */
58
    private $hasHTML5 = false;
59
60
    protected $traverser;
61
62
    protected $encode = false;
63
64
    protected $out;
65
66
    protected $outputMode;
67
68
    private $xpath;
69
70
    protected $nonBooleanAttributes = array(
71
        /*
72
        array(
73
            'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
74
            'attrNamespace'=>'http://www.w3.org/1999/xhtml',
75
76
            'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
77
            'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
78
        ),
79
        */
80
        array(
81
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
82
            'attrName' => array('href',
83
                'hreflang',
84
                'http-equiv',
85
                'icon',
86
                'id',
87
                'keytype',
88
                'kind',
89
                'label',
90
                'lang',
91
                'language',
92
                'list',
93
                'maxlength',
94
                'media',
95
                'method',
96
                'name',
97
                'placeholder',
98
                'rel',
99
                'rows',
100
                'rowspan',
101
                'sandbox',
102
                'spellcheck',
103
                'scope',
104
                'seamless',
105
                'shape',
106
                'size',
107
                'sizes',
108
                'span',
109
                'src',
110
                'srcdoc',
111
                'srclang',
112
                'srcset',
113
                'start',
114
                'step',
115
                'style',
116
                'summary',
117
                'tabindex',
118
                'target',
119
                'title',
120
                'type',
121
                'value',
122
                'width',
123
                'border',
124
                'charset',
125
                'cite',
126
                'class',
127
                'code',
128
                'codebase',
129
                'color',
130
                'cols',
131
                'colspan',
132
                'content',
133
                'coords',
134
                'data',
135
                'datetime',
136
                'default',
137
                'dir',
138
                'dirname',
139
                'enctype',
140
                'for',
141
                'form',
142
                'formaction',
143
                'headers',
144
                'height',
145
                'accept',
146
                'accept-charset',
147
                'accesskey',
148
                'action',
149
                'align',
150
                'alt',
151
                'bgcolor',
152
            ),
153
        ),
154
        array(
155
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
156
            'xpath' => 'starts-with(local-name(), \'data-\')',
157
        ),
158
    );
159
160
    const DOCTYPE = '<!DOCTYPE html>';
161
162 64
    public function __construct($output, $options = array())
163
    {
164 64
        if (isset($options['encode_entities'])) {
165 64
            $this->encode = $options['encode_entities'];
166 64
        }
167
168 64
        $this->outputMode = static::IM_IN_HTML;
169 64
        $this->out = $output;
170
171
        // If HHVM, see https://github.com/facebook/hhvm/issues/2727
172 64
        $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
173 64
    }
174
175
    public function addRule(array $rule)
176
    {
177
        $this->nonBooleanAttributes[] = $rule;
178
    }
179
180 64
    public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
181
    {
182 64
        $this->traverser = $traverser;
183
184 64
        return $this;
185
    }
186
187 18
    public function document($dom)
188
    {
189 18
        $this->doctype();
190 18
        if ($dom->documentElement) {
191 17
            foreach ($dom->childNodes as $node) {
192 17
                $this->traverser->node($node);
193 17
            }
194 17
            $this->nl();
195 17
        }
196 18
    }
197
198 19
    protected function doctype()
199
    {
200 19
        $this->wr(static::DOCTYPE);
201 19
        $this->nl();
202 19
    }
203
204 27
    public function element($ele)
205
    {
206 27
        $name = $ele->tagName;
207
208
        // Per spec:
209
        // If the element has a declared namespace in the HTML, MathML or
210
        // SVG namespaces, we use the lname instead of the tagName.
211 27
        if ($this->traverser->isLocalElement($ele)) {
212 27
            $name = $ele->localName;
213 27
        }
214
215
        // If we are in SVG or MathML there is special handling.
216
        // Using if/elseif instead of switch because it's faster in PHP.
217 27
        if ('svg' == $name) {
218 3
            $this->outputMode = static::IM_IN_SVG;
219 3
            $name = Elements::normalizeSvgElement($name);
220 27
        } elseif ('math' == $name) {
221 2
            $this->outputMode = static::IM_IN_MATHML;
222 2
        }
223
224 27
        $this->openTag($ele);
225 27
        if (Elements::isA($name, Elements::TEXT_RAW)) {
226 4
            foreach ($ele->childNodes as $child) {
227 4
                if ($child instanceof \DOMCharacterData) {
228 4
                    $this->wr($child->data);
229 4
                } elseif ($child instanceof \DOMElement) {
230 1
                    $this->element($child);
231 1
                }
232 4
            }
233 4
        } else {
234
            // Handle children.
235 25
            if ($ele->hasChildNodes()) {
236 25
                $this->traverser->children($ele->childNodes);
237 25
            }
238
239
            // Close out the SVG or MathML special handling.
240 25
            if ('svg' == $name || 'math' == $name) {
241 5
                $this->outputMode = static::IM_IN_HTML;
242 5
            }
243
        }
244
245
        // If not unary, add a closing tag.
246 27
        if (!Elements::isA($name, Elements::VOID_TAG)) {
247 27
            $this->closeTag($ele);
248 27
        }
249 27
    }
250
251
    /**
252
     * Write a text node.
253
     *
254
     * @param \DOMText $ele The text node to write
255
     */
256 24
    public function text($ele)
257
    {
258 24
        if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
259 1
            $this->wr($ele->data);
260
261 1
            return;
262
        }
263
264
        // FIXME: This probably needs some flags set.
265 24
        $this->wr($this->enc($ele->data));
266 24
    }
267
268 2
    public function cdata($ele)
269
    {
270
        // This encodes CDATA.
271 2
        $this->wr($ele->ownerDocument->saveXML($ele));
272 2
    }
273
274 3
    public function comment($ele)
275
    {
276
        // These produce identical output.
277
        // $this->wr('<!--')->wr($ele->data)->wr('-->');
278 3
        $this->wr($ele->ownerDocument->saveXML($ele));
279 3
    }
280
281 3
    public function processorInstruction($ele)
282
    {
283 3
        $this->wr('<?')
284 3
            ->wr($ele->target)
285 3
            ->wr(' ')
286 3
            ->wr($ele->data)
287 3
            ->wr('?>');
288 3
    }
289
290
    /**
291
     * Write the namespace attributes.
292
     *
293
     *
294
     * @param \DOMNode $ele The element being written
295
     */
296 28
    protected function namespaceAttrs($ele)
297
    {
298 28
        if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
299 28
            $this->xpath = new \DOMXPath($ele->ownerDocument);
300 28
        }
301
302 28
        foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
303 23
            if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
304 3
                $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
305 3
            }
306 28
        }
307 28
    }
308
309
    /**
310
     * Write the opening tag.
311
     *
312
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
313
     * qualified name (8.3).
314
     *
315
     * @param \DOMNode $ele The element being written
316
     */
317 28
    protected function openTag($ele)
318
    {
319 28
        $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
320
321 28
        $this->attrs($ele);
322 28
        $this->namespaceAttrs($ele);
323
324 28
        if ($this->outputMode == static::IM_IN_HTML) {
325 26
            $this->wr('>');
326 26
        }         // If we are not in html mode we are in SVG, MathML, or XML embedded content.
327
        else {
328 5
            if ($ele->hasChildNodes()) {
329 5
                $this->wr('>');
330 5
            }             // If there are no children this is self closing.
331
            else {
332 2
                $this->wr(' />');
333
            }
334
        }
335 28
    }
336
337 39
    protected function attrs($ele)
338
    {
339
        // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
340 39
        if (!$ele->hasAttributes()) {
341 23
            return $this;
342
        }
343
344
        // TODO: Currently, this always writes name="value", and does not do
345
        // value-less attributes.
346 30
        $map = $ele->attributes;
347 30
        $len = $map->length;
348 30
        for ($i = 0; $i < $len; ++$i) {
349 30
            $node = $map->item($i);
350 30
            $val = $this->enc($node->value, true);
351
352
            // XXX: The spec says that we need to ensure that anything in
353
            // the XML, XMLNS, or XLink NS's should use the canonical
354
            // prefix. It seems that DOM does this for us already, but there
355
            // may be exceptions.
356 30
            $name = $node->nodeName;
357
358
            // Special handling for attributes in SVG and MathML.
359
            // Using if/elseif instead of switch because it's faster in PHP.
360 30
            if ($this->outputMode == static::IM_IN_SVG) {
361 3
                $name = Elements::normalizeSvgAttribute($name);
362 30
            } elseif ($this->outputMode == static::IM_IN_MATHML) {
363 2
                $name = Elements::normalizeMathMlAttribute($name);
364 2
            }
365
366 30
            $this->wr(' ')->wr($name);
367
368 30
            if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
369 27
                $this->wr('="')->wr($val)->wr('"');
370 27
            }
371 30
        }
372 30
    }
373
374 10
    protected function nonBooleanAttribute(\DOMAttr $attr)
375
    {
376 10
        $ele = $attr->ownerElement;
377 10
        foreach ($this->nonBooleanAttributes as $rule) {
378 10
            if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
379
                continue;
380
            }
381 10
            if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
382
                continue;
383
            }
384 10
            if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
385
                continue;
386
            }
387 10
            if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
388
                continue;
389
            }
390 10
            if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
391
                continue;
392
            }
393 10
            if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
394 9
                continue;
395
            }
396 10
            if (isset($rule['xpath'])) {
397 9
                $xp = $this->getXPath($attr);
398 9
                if (isset($rule['prefixes'])) {
399
                    foreach ($rule['prefixes'] as $nsPrefix => $ns) {
400
                        $xp->registerNamespace($nsPrefix, $ns);
401
                    }
402
                }
403 9
                if (!$xp->evaluate($rule['xpath'], $attr)) {
404 8
                    continue;
405
                }
406 1
            }
407
408 4
            return true;
409 8
        }
410
411 8
        return false;
412
    }
413
414 9
    private function getXPath(\DOMNode $node)
415
    {
416 9
        if (!$this->xpath) {
417 9
            $this->xpath = new \DOMXPath($node->ownerDocument);
418 9
        }
419
420 9
        return $this->xpath;
421
    }
422
423
    /**
424
     * Write the closing tag.
425
     *
426
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
427
     * qualified name (8.3).
428
     *
429
     * @param \DOMNode $ele The element being written
430
     */
431 27
    protected function closeTag($ele)
432
    {
433 27
        if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
434 27
            $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
435 27
        }
436 27
    }
437
438
    /**
439
     * Write to the output.
440
     *
441
     * @param string $text The string to put into the output
442
     *
443
     * @return Traverser $this so it can be used in chaining
444
     */
445 48
    protected function wr($text)
446
    {
447 48
        fwrite($this->out, $text);
448
449 48
        return $this;
450
    }
451
452
    /**
453
     * Write a new line character.
454
     *
455
     * @return Traverser $this so it can be used in chaining
456
     */
457 20
    protected function nl()
458
    {
459 20
        fwrite($this->out, PHP_EOL);
460
461 20
        return $this;
462
    }
463
464
    /**
465
     * Encode text.
466
     *
467
     * When encode is set to false, the default value, the text passed in is
468
     * escaped per section 8.3 of the html5 spec. For details on how text is
469
     * escaped see the escape() method.
470
     *
471
     * When encoding is set to true the text is converted to named character
472
     * references where appropriate. Section 8.1.4 Character references of the
473
     * html5 spec refers to using named character references. This is useful for
474
     * characters that can't otherwise legally be used in the text.
475
     *
476
     * The named character references are listed in section 8.5.
477
     *
478
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
479
     *      This includes such characters as +.# and many other common ones. By default
480
     *      encoding here will just escape &'<>".
481
     *
482
     *      Note, PHP 5.4+ has better html5 encoding.
483
     *
484
     * @todo Use the Entities class in php 5.3 to have html5 entities.
485
     *
486
     * @param string $text      text to encode
487
     * @param bool   $attribute True if we are encoding an attrubute, false otherwise
488
     *
489
     * @return string the encoded text
490
     */
491 44
    protected function enc($text, $attribute = false)
492
    {
493
        // Escape the text rather than convert to named character references.
494 44
        if (!$this->encode) {
495 44
            return $this->escape($text, $attribute);
496
        }
497
498
        // If we are in PHP 5.4+ we can use the native html5 entity functionality to
499
        // convert the named character references.
500
501 7
        if ($this->hasHTML5) {
502 7
            return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
503
        }         // If a version earlier than 5.4 html5 entities are not entirely handled.
504
        // This manually handles them.
505
        else {
506
            return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
507
        }
508
    }
509
510
    /**
511
     * Escape test.
512
     *
513
     * According to the html5 spec section 8.3 Serializing HTML fragments, text
514
     * within tags that are not style, script, xmp, iframe, noembed, and noframes
515
     * need to be properly escaped.
516
     *
517
     * The & should be converted to &amp;, no breaking space unicode characters
518
     * converted to &nbsp;, when in attribute mode the " should be converted to
519
     * &quot;, and when not in attribute mode the < and > should be converted to
520
     * &lt; and &gt;.
521
     *
522
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
523
     *
524
     * @param string $text      text to escape
525
     * @param bool   $attribute True if we are escaping an attrubute, false otherwise
526
     */
527 51
    protected function escape($text, $attribute = false)
528
    {
529
        // Not using htmlspecialchars because, while it does escaping, it doesn't
530
        // match the requirements of section 8.5. For example, it doesn't handle
531
        // non-breaking spaces.
532 51
        if ($attribute) {
533
            $replace = array(
534 38
                '"' => '&quot;',
535 38
                '&' => '&amp;',
536 38
                "\xc2\xa0" => '&nbsp;',
537 38
            );
538 38
        } else {
539
            $replace = array(
540 30
                '<' => '&lt;',
541 30
                '>' => '&gt;',
542 30
                '&' => '&amp;',
543 30
                "\xc2\xa0" => '&nbsp;',
544 30
            );
545
        }
546
547 51
        return strtr($text, $replace);
548
    }
549
}
550