Completed
Push — master ( c025ac...30aab1 )
by Asmir
05:47
created

OutputRules::element()   C

Complexity

Conditions 12
Paths 60

Size

Total Lines 46
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 31
CRAP Score 12

Importance

Changes 0
Metric Value
dl 0
loc 46
ccs 31
cts 31
cp 1
rs 5.15
c 0
b 0
f 0
cc 12
eloc 23
nc 60
nop 1
crap 12

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @file
4
 * The rules for generating output in the serializer.
5
 *
6
 * These output rules are likely to generate output similar to the document that
7
 * was parsed. It is not intended to output exactly the document that was parsed.
8
 */
9
namespace Masterminds\Html5\Serializer;
10
11
use Masterminds\Html5\Elements;
12
13
/**
14
 * Generate the output html5 based on element rules.
15
 */
16
class OutputRules implements \Masterminds\Html5\Serializer\RulesInterface
17
{
18
    /**
19
     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
20
     */
21
    const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
22
23
    const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
24
25
    const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
26
27
    const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
28
29
    const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
30
31
    const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
32
33
    /**
34
     * Holds the HTML5 element names that causes a namespace switch
35
     *
36
     * @var array
37
     */
38
    protected $implicitNamespaces = array(
39
        self::NAMESPACE_HTML,
40
        self::NAMESPACE_SVG,
41
        self::NAMESPACE_MATHML,
42
        self::NAMESPACE_XML,
43
        self::NAMESPACE_XMLNS,
44
    );
45
46
    const IM_IN_HTML = 1;
47
48
    const IM_IN_SVG = 2;
49
50
    const IM_IN_MATHML = 3;
51
52
    /**
53
     * Used as cache to detect if is available ENT_HTML5
54
     * @var boolean
55
     */
56
    private $hasHTML5 = false;
57
58
    protected $traverser;
59
60
    protected $encode = false;
61
62
    protected $out;
63
64
    protected $outputMode;
65
66
    private $xpath;
67
68
    protected $nonBooleanAttributes = array(
69
        /*
70
        array(
71
            'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
72
            'attrNamespace'=>'http://www.w3.org/1999/xhtml',
73
74
            'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
75
            'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
76
        ),
77
        */
78
        array(
79
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
80
            'attrName' => array('href',
81
                'hreflang',
82
                'http-equiv',
83
                'icon',
84
                'id',
85
                'keytype',
86
                'kind',
87
                'label',
88
                'lang',
89
                'language',
90
                'list',
91
                'maxlength',
92
                'media',
93
                'method',
94
                'name',
95
                'placeholder',
96
                'rel',
97
                'rows',
98
                'rowspan',
99
                'sandbox',
100
                'spellcheck',
101
                'scope',
102
                'seamless',
103
                'shape',
104
                'size',
105
                'sizes',
106
                'span',
107
                'src',
108
                'srcdoc',
109
                'srclang',
110
                'srcset',
111
                'start',
112
                'step',
113
                'style',
114
                'summary',
115
                'tabindex',
116
                'target',
117
                'title',
118
                'type',
119
                'value',
120
                'width',
121
                'border',
122
                'charset',
123
                'cite',
124
                'class',
125
                'code',
126
                'codebase',
127
                'color',
128
                'cols',
129
                'colspan',
130
                'content',
131
                'coords',
132
                'data',
133
                'datetime',
134
                'default',
135
                'dir',
136
                'dirname',
137
                'enctype',
138
                'for',
139
                'form',
140
                'formaction',
141
                'headers',
142
                'height',
143
                'accept',
144
                'accept-charset',
145
                'accesskey',
146
                'action',
147
                'align',
148
                'alt',
149
                'bgcolor',
150
            ),
151
        ),
152
        array(
153
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
154
            'xpath' => 'starts-with(local-name(), \'data-\')',
155
        ),
156
    );
157
158
    const DOCTYPE = '<!DOCTYPE html>';
159
160 62
    public function __construct($output, $options = array())
161
    {
162 62
        if (isset($options['encode_entities'])) {
163 62
            $this->encode = $options['encode_entities'];
164 62
        }
165
166 62
        $this->outputMode = static::IM_IN_HTML;
167 62
        $this->out = $output;
168
169
        // If HHVM, see https://github.com/facebook/hhvm/issues/2727
170 62
        $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
171 62
    }
172
    public function addRule(array $rule)
173
    {
174
        $this->nonBooleanAttributes[] = $rule;
175
    }
176
177 62
    public function setTraverser(\Masterminds\Html5\Serializer\Traverser $traverser)
178
    {
179 62
        $this->traverser = $traverser;
180
181 62
        return $this;
182
    }
183
184 18
    public function document($dom)
185
    {
186 18
        $this->doctype();
187 18
        if ($dom->documentElement) {
188 17
            foreach ($dom->childNodes as $node) {
189 17
                $this->traverser->node($node);
190 17
            }
191 17
            $this->nl();
192 17
        }
193 18
    }
194
195 19
    protected function doctype()
196
    {
197 19
        $this->wr(static::DOCTYPE);
198 19
        $this->nl();
199 19
    }
200
201 26
    public function element($ele)
202
    {
203 26
        $name = $ele->tagName;
204
205
        // Per spec:
206
        // If the element has a declared namespace in the HTML, MathML or
207
        // SVG namespaces, we use the lname instead of the tagName.
208 26
        if ($this->traverser->isLocalElement($ele)) {
209 26
            $name = $ele->localName;
210 26
        }
211
212
        // If we are in SVG or MathML there is special handling.
213
        // Using if/elseif instead of switch because it's faster in PHP.
214 26
        if ($name == 'svg') {
215 3
            $this->outputMode = static::IM_IN_SVG;
216 3
            $name = Elements::normalizeSvgElement($name);
217 26
        } elseif ($name == 'math') {
218 2
            $this->outputMode = static::IM_IN_MATHML;
219 2
        }
220
221 26
        $this->openTag($ele);
222 26
        if (Elements::isA($name, Elements::TEXT_RAW)) {
223 4
            foreach ($ele->childNodes as $child) {
224 4
                if ($child instanceof \DOMCharacterData) {
225 4
                    $this->wr($child->data);
226 4
                } elseif ($child instanceof \DOMElement) {
227 1
                    $this->element($child);
228 1
                }
229 4
            }
230 4
        } else {
231
            // Handle children.
232 24
            if ($ele->hasChildNodes()) {
233 24
                $this->traverser->children($ele->childNodes);
234 24
            }
235
236
            // Close out the SVG or MathML special handling.
237 24
            if ($name == 'svg' || $name == 'math') {
238 5
                $this->outputMode = static::IM_IN_HTML;
239 5
            }
240
        }
241
242
        // If not unary, add a closing tag.
243 26
        if (! Elements::isA($name, Elements::VOID_TAG)) {
244 26
            $this->closeTag($ele);
245 26
        }
246 26
    }
247
248
    /**
249
     * Write a text node.
250
     *
251
     * @param \DOMText $ele
252
     *            The text node to write.
253
     */
254 23
    public function text($ele)
255
    {
256 23
        if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
257 1
            $this->wr($ele->data);
258 1
            return;
259
        }
260
261
        // FIXME: This probably needs some flags set.
262 23
        $this->wr($this->enc($ele->data));
263 23
    }
264
265 2
    public function cdata($ele)
266
    {
267
        // This encodes CDATA.
268 2
        $this->wr($ele->ownerDocument->saveXML($ele));
269 2
    }
270
271 3
    public function comment($ele)
272
    {
273
        // These produce identical output.
274
        // $this->wr('<!--')->wr($ele->data)->wr('-->');
275 3
        $this->wr($ele->ownerDocument->saveXML($ele));
276 3
    }
277
278 2
    public function processorInstruction($ele)
279
    {
280 2
        $this->wr('<?')
281 2
            ->wr($ele->target)
282 2
            ->wr(' ')
283 2
            ->wr($ele->data)
284 2
            ->wr('?>');
285 2
    }
286
    /**
287
     * Write the namespace attributes
288
     *
289
     *
290
     * @param \DOMNode $ele
291
     *            The element being written.
292
     */
293 27
    protected function namespaceAttrs($ele)
294
    {
295 27
        if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){
296 27
            $this->xpath = new \DOMXPath($ele->ownerDocument);
297 27
        }
298
299 27
        foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) {
300 22
            if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
301 3
                $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
302 3
            }
303 27
        }
304 27
    }
305
306
    /**
307
     * Write the opening tag.
308
     *
309
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
310
     * qualified name (8.3).
311
     *
312
     * @param \DOMNode $ele
313
     *            The element being written.
314
     */
315 27
    protected function openTag($ele)
316
    {
317 27
        $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
318
319
320 27
        $this->attrs($ele);
321 27
        $this->namespaceAttrs($ele);
322
323
324 27
        if ($this->outputMode == static::IM_IN_HTML) {
325 25
            $this->wr('>');
326 25
        }         // If we are not in html mode we are in SVG, MathML, or XML embedded content.
327
        else {
328 5
            if ($ele->hasChildNodes()) {
329 5
                $this->wr('>');
330 5
            }             // If there are no children this is self closing.
331
            else {
332 2
                $this->wr(' />');
333
            }
334
        }
335 27
    }
336
337 38
    protected function attrs($ele)
338
    {
339
        // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
340 38
        if (! $ele->hasAttributes()) {
341 22
            return $this;
342
        }
343
344
        // TODO: Currently, this always writes name="value", and does not do
345
        // value-less attributes.
346 29
        $map = $ele->attributes;
347 29
        $len = $map->length;
348 29
        for ($i = 0; $i < $len; ++ $i) {
349 29
            $node = $map->item($i);
350 29
            $val = $this->enc($node->value, true);
351
352
            // XXX: The spec says that we need to ensure that anything in
353
            // the XML, XMLNS, or XLink NS's should use the canonical
354
            // prefix. It seems that DOM does this for us already, but there
355
            // may be exceptions.
356 29
            $name = $node->nodeName;
357
358
            // Special handling for attributes in SVG and MathML.
359
            // Using if/elseif instead of switch because it's faster in PHP.
360 29
            if ($this->outputMode == static::IM_IN_SVG) {
361 3
                $name = Elements::normalizeSvgAttribute($name);
362 29
            } elseif ($this->outputMode == static::IM_IN_MATHML) {
363 2
                $name = Elements::normalizeMathMlAttribute($name);
364 2
            }
365
366 29
            $this->wr(' ')->wr($name);
367
368 29
            if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) {
369 26
                $this->wr('="')->wr($val)->wr('"');
370 26
            }
371 29
        }
372 29
    }
373
374
375 10
    protected function nonBooleanAttribute(\DOMAttr $attr)
376
    {
377 10
        $ele = $attr->ownerElement;
378 10
        foreach($this->nonBooleanAttributes as $rule){
379
380 10
            if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){
381
                continue;
382
            }
383 10
            if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){
384
                continue;
385
            }
386 10
            if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){
387
                continue;
388
            }
389 10
            if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){
390
                continue;
391
            }
392 10
            if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){
393
                continue;
394
            }
395 10
            if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){
396 9
                continue;
397
            }
398 10
            if(isset($rule['xpath'])){
399
400 9
                $xp = $this->getXPath($attr);
401 9
                if(isset($rule['prefixes'])){
402
                    foreach($rule['prefixes'] as $nsPrefix => $ns){
403
                        $xp->registerNamespace($nsPrefix, $ns);
404
                    }
405
                }
406 9
                if(!$xp->evaluate($rule['xpath'], $attr)){
407 8
                    continue;
408
                }
409 1
            }
410
411 4
            return true;
412 8
        }
413
414 8
        return false;
415
    }
416
417 9
    private function getXPath(\DOMNode $node){
418 9
        if(!$this->xpath){
419 9
            $this->xpath = new \DOMXPath($node->ownerDocument);
420 9
        }
421 9
        return $this->xpath;
422
    }
423
424
    /**
425
     * Write the closing tag.
426
     *
427
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
428
     * qualified name (8.3).
429
     *
430
     * @param \DOMNode $ele
431
     *            The element being written.
432
     */
433 26
    protected function closeTag($ele)
434
    {
435 26
        if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
436 26
            $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
437 26
        }
438 26
    }
439
440
    /**
441
     * Write to the output.
442
     *
443
     * @param string $text
444
     *            The string to put into the output.
445
     *
446
     * @return \Masterminds\Html5\Serializer\Traverser $this so it can be used in chaining.
447
     */
448 46
    protected function wr($text)
449
    {
450 46
        fwrite($this->out, $text);
451 46
        return $this;
452
    }
453
454
    /**
455
     * Write a new line character.
456
     *
457
     * @return \Masterminds\Html5\Serializer\Traverser $this so it can be used in chaining.
458
     */
459 20
    protected function nl()
460
    {
461 20
        fwrite($this->out, PHP_EOL);
462 20
        return $this;
463
    }
464
465
    /**
466
     * Encode text.
467
     *
468
     * When encode is set to false, the default value, the text passed in is
469
     * escaped per section 8.3 of the html5 spec. For details on how text is
470
     * escaped see the escape() method.
471
     *
472
     * When encoding is set to true the text is converted to named character
473
     * references where appropriate. Section 8.1.4 Character references of the
474
     * html5 spec refers to using named character references. This is useful for
475
     * characters that can't otherwise legally be used in the text.
476
     *
477
     * The named character references are listed in section 8.5.
478
     *
479
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
480
     *      This includes such characters as +.# and many other common ones. By default
481
     *      encoding here will just escape &'<>".
482
     *
483
     *      Note, PHP 5.4+ has better html5 encoding.
484
     *
485
     * @todo Use the Entities class in php 5.3 to have html5 entities.
486
     *
487
     * @param string $text
488
     *            text to encode.
489
     * @param boolean $attribute
490
     *            True if we are encoding an attrubute, false otherwise
491
     *
492
     * @return string The encoded text.
493
     */
494 43
    protected function enc($text, $attribute = false)
495
    {
496
497
        // Escape the text rather than convert to named character references.
498 43
        if (! $this->encode) {
499 43
            return $this->escape($text, $attribute);
500
        }
501
502
        // If we are in PHP 5.4+ we can use the native html5 entity functionality to
503
        // convert the named character references.
504
505 7
        if ($this->hasHTML5) {
506 7
            return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
507
        }         // If a version earlier than 5.4 html5 entities are not entirely handled.
508
        // This manually handles them.
509
        else {
510
            return strtr($text, \Masterminds\Html5\Serializer\Html5Entities::$map);
511
        }
512
    }
513
514
    /**
515
     * Escape test.
516
     *
517
     * According to the html5 spec section 8.3 Serializing HTML fragments, text
518
     * within tags that are not style, script, xmp, iframe, noembed, and noframes
519
     * need to be properly escaped.
520
     *
521
     * The & should be converted to &amp;, no breaking space unicode characters
522
     * converted to &nbsp;, when in attribute mode the " should be converted to
523
     * &quot;, and when not in attribute mode the < and > should be converted to
524
     * &lt; and &gt;.
525
     *
526
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
527
     *
528
     * @param string $text
529
     *            text to escape.
530
     * @param boolean $attribute
531
     *            True if we are escaping an attrubute, false otherwise
532
     */
533 50
    protected function escape($text, $attribute = false)
534
    {
535
536
        // Not using htmlspecialchars because, while it does escaping, it doesn't
537
        // match the requirements of section 8.5. For example, it doesn't handle
538
        // non-breaking spaces.
539 50
        if ($attribute) {
540
            $replace = array(
541 37
                '"' => '&quot;',
542 37
                '&' => '&amp;',
543
                "\xc2\xa0" => '&nbsp;'
544 37
            );
545 37
        } else {
546
            $replace = array(
547 29
                '<' => '&lt;',
548 29
                '>' => '&gt;',
549 29
                '&' => '&amp;',
550
                "\xc2\xa0" => '&nbsp;'
551 29
            );
552
        }
553
554 50
        return strtr($text, $replace);
555
    }
556
}
557