Completed
Push — master ( 30aab1...cadcfa )
by Asmir
07:00
created

OutputRules::nonBooleanAttribute()   D

Complexity

Conditions 22
Paths 12

Size

Total Lines 41

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 38.0687

Importance

Changes 0
Metric Value
dl 0
loc 41
ccs 19
cts 28
cp 0.6786
rs 4.1666
c 0
b 0
f 0
cc 22
nc 12
nop 1
crap 38.0687

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @file
4
 * The rules for generating output in the serializer.
5
 *
6
 * These output rules are likely to generate output similar to the document that
7
 * was parsed. It is not intended to output exactly the document that was parsed.
8
 */
9
namespace Masterminds\HTML5\Serializer;
10
11
use Masterminds\HTML5\Elements;
12
13
/**
14
 * Generate the output html5 based on element rules.
15
 */
16
class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
17
{
18
    /**
19
     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
20
     */
21
    const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
22
23
    const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
24
25
    const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
26
27
    const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
28
29
    const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
30
31
    const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
32
33
    /**
34
     * Holds the HTML5 element names that causes a namespace switch
35
     *
36
     * @var array
37
     */
38
    protected $implicitNamespaces = array(
39
        self::NAMESPACE_HTML,
40
        self::NAMESPACE_SVG,
41
        self::NAMESPACE_MATHML,
42
        self::NAMESPACE_XML,
43
        self::NAMESPACE_XMLNS,
44
    );
45
46
    const IM_IN_HTML = 1;
47
48
    const IM_IN_SVG = 2;
49
50
    const IM_IN_MATHML = 3;
51
52
    /**
53
     * Used as cache to detect if is available ENT_HTML5
54
     * @var boolean
55
     */
56
    private $hasHTML5 = false;
57
58
    protected $traverser;
59
60
    protected $encode = false;
61
62
    protected $out;
63
64
    protected $outputMode;
65
66
    private $xpath;
67
68
    protected $nonBooleanAttributes = array(
69
        /*
70
        array(
71
            'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
72
            'attrNamespace'=>'http://www.w3.org/1999/xhtml',
73
74
            'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
75
            'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
76
        ),
77
        */
78
        array(
79
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
80
            'attrName' => array('href',
81
                'hreflang',
82
                'http-equiv',
83
                'icon',
84
                'id',
85
                'keytype',
86
                'kind',
87
                'label',
88
                'lang',
89
                'language',
90
                'list',
91
                'maxlength',
92
                'media',
93
                'method',
94
                'name',
95
                'placeholder',
96
                'rel',
97
                'rows',
98
                'rowspan',
99
                'sandbox',
100
                'spellcheck',
101
                'scope',
102
                'seamless',
103
                'shape',
104
                'size',
105
                'sizes',
106
                'span',
107
                'src',
108
                'srcdoc',
109
                'srclang',
110
                'srcset',
111
                'start',
112
                'step',
113
                'style',
114
                'summary',
115
                'tabindex',
116
                'target',
117
                'title',
118
                'type',
119
                'value',
120
                'width',
121
                'border',
122
                'charset',
123
                'cite',
124
                'class',
125
                'code',
126
                'codebase',
127
                'color',
128
                'cols',
129
                'colspan',
130
                'content',
131
                'coords',
132
                'data',
133
                'datetime',
134
                'default',
135
                'dir',
136
                'dirname',
137
                'enctype',
138
                'for',
139
                'form',
140
                'formaction',
141
                'headers',
142
                'height',
143
                'accept',
144
                'accept-charset',
145
                'accesskey',
146
                'action',
147
                'align',
148
                'alt',
149
                'bgcolor',
150
            ),
151
        ),
152
        array(
153
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
154
            'xpath' => 'starts-with(local-name(), \'data-\')',
155
        ),
156
    );
157
158
    const DOCTYPE = '<!DOCTYPE html>';
159
160 64
    public function __construct($output, $options = array())
161
    {
162 64
        if (isset($options['encode_entities'])) {
163 64
            $this->encode = $options['encode_entities'];
164 64
        }
165
166 64
        $this->outputMode = static::IM_IN_HTML;
167 64
        $this->out = $output;
168
169
        // If HHVM, see https://github.com/facebook/hhvm/issues/2727
170 64
        $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
171 64
    }
172
    public function addRule(array $rule)
173
    {
174
        $this->nonBooleanAttributes[] = $rule;
175
    }
176
177 64
    public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
178
    {
179 64
        $this->traverser = $traverser;
180
181 64
        return $this;
182
    }
183
184 18
    public function document($dom)
185
    {
186 18
        $this->doctype();
187 18
        if ($dom->documentElement) {
188 17
            foreach ($dom->childNodes as $node) {
189 17
                $this->traverser->node($node);
190 17
            }
191 17
            $this->nl();
192 17
        }
193 18
    }
194
195 19
    protected function doctype()
196
    {
197 19
        $this->wr(static::DOCTYPE);
198 19
        $this->nl();
199 19
    }
200
201 27
    public function element($ele)
202
    {
203 27
        $name = $ele->tagName;
204
205
        // Per spec:
206
        // If the element has a declared namespace in the HTML, MathML or
207
        // SVG namespaces, we use the lname instead of the tagName.
208 27
        if ($this->traverser->isLocalElement($ele)) {
209 27
            $name = $ele->localName;
210 27
        }
211
212
        // If we are in SVG or MathML there is special handling.
213
        // Using if/elseif instead of switch because it's faster in PHP.
214 27
        if ($name == 'svg') {
215 3
            $this->outputMode = static::IM_IN_SVG;
216 3
            $name = Elements::normalizeSvgElement($name);
217 27
        } elseif ($name == 'math') {
218 2
            $this->outputMode = static::IM_IN_MATHML;
219 2
        }
220
221 27
        $this->openTag($ele);
222 27
        if (Elements::isA($name, Elements::TEXT_RAW)) {
223 4
            foreach ($ele->childNodes as $child) {
224 4
                if ($child instanceof \DOMCharacterData) {
225 4
                    $this->wr($child->data);
226 4
                } elseif ($child instanceof \DOMElement) {
227 1
                    $this->element($child);
228 1
                }
229 4
            }
230 4
        } else {
231
            // Handle children.
232 25
            if ($ele->hasChildNodes()) {
233 25
                $this->traverser->children($ele->childNodes);
234 25
            }
235
236
            // Close out the SVG or MathML special handling.
237 25
            if ($name == 'svg' || $name == 'math') {
238 5
                $this->outputMode = static::IM_IN_HTML;
239 5
            }
240
        }
241
242
        // If not unary, add a closing tag.
243 27
        if (! Elements::isA($name, Elements::VOID_TAG)) {
244 27
            $this->closeTag($ele);
245 27
        }
246 27
    }
247
248
    /**
249
     * Write a text node.
250
     *
251
     * @param \DOMText $ele
252
     *            The text node to write.
253
     */
254 24
    public function text($ele)
255
    {
256 24
        if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
257 1
            $this->wr($ele->data);
258 1
            return;
259
        }
260
261
        // FIXME: This probably needs some flags set.
262 24
        $this->wr($this->enc($ele->data));
263 24
    }
264
265 2
    public function cdata($ele)
266
    {
267
        // This encodes CDATA.
268 2
        $this->wr($ele->ownerDocument->saveXML($ele));
269 2
    }
270
271 3
    public function comment($ele)
272
    {
273
        // These produce identical output.
274
        // $this->wr('<!--')->wr($ele->data)->wr('-->');
275 3
        $this->wr($ele->ownerDocument->saveXML($ele));
276 3
    }
277
278 3
    public function processorInstruction($ele)
279
    {
280 3
        $this->wr('<?')
281 3
            ->wr($ele->target)
282 3
            ->wr(' ')
283 3
            ->wr($ele->data)
284 3
            ->wr('?>');
285 3
    }
286
    /**
287
     * Write the namespace attributes
288
     *
289
     *
290
     * @param \DOMNode $ele
291
     *            The element being written.
292
     */
293 28
    protected function namespaceAttrs($ele)
294
    {
295 28
        if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){
296 28
            $this->xpath = new \DOMXPath($ele->ownerDocument);
297 28
        }
298
299 28
        foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) {
300 23
            if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
301 3
                $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
302 3
            }
303 28
        }
304 28
    }
305
306
    /**
307
     * Write the opening tag.
308
     *
309
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
310
     * qualified name (8.3).
311
     *
312
     * @param \DOMNode $ele
313
     *            The element being written.
314
     */
315 28
    protected function openTag($ele)
316
    {
317 28
        $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
318
319
320 28
        $this->attrs($ele);
321 28
        $this->namespaceAttrs($ele);
322
323
324 28
        if ($this->outputMode == static::IM_IN_HTML) {
325 26
            $this->wr('>');
326 26
        }         // If we are not in html mode we are in SVG, MathML, or XML embedded content.
327
        else {
328 5
            if ($ele->hasChildNodes()) {
329 5
                $this->wr('>');
330 5
            }             // If there are no children this is self closing.
331
            else {
332 2
                $this->wr(' />');
333
            }
334
        }
335 28
    }
336
337 39
    protected function attrs($ele)
338
    {
339
        // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
340 39
        if (! $ele->hasAttributes()) {
341 23
            return $this;
342
        }
343
344
        // TODO: Currently, this always writes name="value", and does not do
345
        // value-less attributes.
346 30
        $map = $ele->attributes;
347 30
        $len = $map->length;
348 30
        for ($i = 0; $i < $len; ++ $i) {
349 30
            $node = $map->item($i);
350 30
            $val = $this->enc($node->value, true);
351
352
            // XXX: The spec says that we need to ensure that anything in
353
            // the XML, XMLNS, or XLink NS's should use the canonical
354
            // prefix. It seems that DOM does this for us already, but there
355
            // may be exceptions.
356 30
            $name = $node->nodeName;
357
358
            // Special handling for attributes in SVG and MathML.
359
            // Using if/elseif instead of switch because it's faster in PHP.
360 30
            if ($this->outputMode == static::IM_IN_SVG) {
361 3
                $name = Elements::normalizeSvgAttribute($name);
362 30
            } elseif ($this->outputMode == static::IM_IN_MATHML) {
363 2
                $name = Elements::normalizeMathMlAttribute($name);
364 2
            }
365
366 30
            $this->wr(' ')->wr($name);
367
368 30
            if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) {
369 27
                $this->wr('="')->wr($val)->wr('"');
370 27
            }
371 30
        }
372 30
    }
373
374
375 10
    protected function nonBooleanAttribute(\DOMAttr $attr)
376
    {
377 10
        $ele = $attr->ownerElement;
378 10
        foreach($this->nonBooleanAttributes as $rule){
379
380 10
            if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){
381
                continue;
382
            }
383 10
            if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){
384
                continue;
385
            }
386 10
            if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){
387
                continue;
388
            }
389 10
            if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){
390
                continue;
391
            }
392 10
            if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){
393
                continue;
394
            }
395 10
            if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){
396 9
                continue;
397
            }
398 10
            if(isset($rule['xpath'])){
399
400 9
                $xp = $this->getXPath($attr);
401 9
                if(isset($rule['prefixes'])){
402
                    foreach($rule['prefixes'] as $nsPrefix => $ns){
403
                        $xp->registerNamespace($nsPrefix, $ns);
404
                    }
405
                }
406 9
                if(!$xp->evaluate($rule['xpath'], $attr)){
407 8
                    continue;
408
                }
409 1
            }
410
411 4
            return true;
412 8
        }
413
414 8
        return false;
415
    }
416
417 9
    private function getXPath(\DOMNode $node){
418 9
        if(!$this->xpath){
419 9
            $this->xpath = new \DOMXPath($node->ownerDocument);
420 9
        }
421 9
        return $this->xpath;
422
    }
423
424
    /**
425
     * Write the closing tag.
426
     *
427
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
428
     * qualified name (8.3).
429
     *
430
     * @param \DOMNode $ele
431
     *            The element being written.
432
     */
433 27
    protected function closeTag($ele)
434
    {
435 27
        if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
436 27
            $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
437 27
        }
438 27
    }
439
440
    /**
441
     * Write to the output.
442
     *
443
     * @param string $text
444
     *            The string to put into the output.
445
     *
446
     * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
447
     */
448 48
    protected function wr($text)
449
    {
450 48
        fwrite($this->out, $text);
451 48
        return $this;
452
    }
453
454
    /**
455
     * Write a new line character.
456
     *
457
     * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
458
     */
459 20
    protected function nl()
460
    {
461 20
        fwrite($this->out, PHP_EOL);
462 20
        return $this;
463
    }
464
465
    /**
466
     * Encode text.
467
     *
468
     * When encode is set to false, the default value, the text passed in is
469
     * escaped per section 8.3 of the html5 spec. For details on how text is
470
     * escaped see the escape() method.
471
     *
472
     * When encoding is set to true the text is converted to named character
473
     * references where appropriate. Section 8.1.4 Character references of the
474
     * html5 spec refers to using named character references. This is useful for
475
     * characters that can't otherwise legally be used in the text.
476
     *
477
     * The named character references are listed in section 8.5.
478
     *
479
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
480
     *      This includes such characters as +.# and many other common ones. By default
481
     *      encoding here will just escape &'<>".
482
     *
483
     *      Note, PHP 5.4+ has better html5 encoding.
484
     *
485
     * @todo Use the Entities class in php 5.3 to have html5 entities.
486
     *
487
     * @param string $text
488
     *            text to encode.
489
     * @param boolean $attribute
490
     *            True if we are encoding an attrubute, false otherwise
491
     *
492
     * @return string The encoded text.
493
     */
494 44
    protected function enc($text, $attribute = false)
495
    {
496
497
        // Escape the text rather than convert to named character references.
498 44
        if (! $this->encode) {
499 44
            return $this->escape($text, $attribute);
500
        }
501
502
        // If we are in PHP 5.4+ we can use the native html5 entity functionality to
503
        // convert the named character references.
504
505 7
        if ($this->hasHTML5) {
506 7
            return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
507
        }         // If a version earlier than 5.4 html5 entities are not entirely handled.
508
        // This manually handles them.
509
        else {
510
            return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
511
        }
512
    }
513
514
    /**
515
     * Escape test.
516
     *
517
     * According to the html5 spec section 8.3 Serializing HTML fragments, text
518
     * within tags that are not style, script, xmp, iframe, noembed, and noframes
519
     * need to be properly escaped.
520
     *
521
     * The & should be converted to &amp;, no breaking space unicode characters
522
     * converted to &nbsp;, when in attribute mode the " should be converted to
523
     * &quot;, and when not in attribute mode the < and > should be converted to
524
     * &lt; and &gt;.
525
     *
526
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
527
     *
528
     * @param string $text
529
     *            text to escape.
530
     * @param boolean $attribute
531
     *            True if we are escaping an attrubute, false otherwise
532
     */
533 51
    protected function escape($text, $attribute = false)
534
    {
535
536
        // Not using htmlspecialchars because, while it does escaping, it doesn't
537
        // match the requirements of section 8.5. For example, it doesn't handle
538
        // non-breaking spaces.
539 51
        if ($attribute) {
540
            $replace = array(
541 38
                '"' => '&quot;',
542 38
                '&' => '&amp;',
543
                "\xc2\xa0" => '&nbsp;'
544 38
            );
545 38
        } else {
546
            $replace = array(
547 30
                '<' => '&lt;',
548 30
                '>' => '&gt;',
549 30
                '&' => '&amp;',
550
                "\xc2\xa0" => '&nbsp;'
551 30
            );
552
        }
553
554 51
        return strtr($text, $replace);
555
    }
556
}
557