Completed
Pull Request — master (#160)
by
unknown
03:26 queued 01:05
created

OutputRules   F

Complexity

Total Complexity 80

Size/Duplication

Total Lines 532
Duplicated Lines 0 %

Coupling/Cohesion

Dependencies 3

Test Coverage

Coverage 92.86%

Importance

Changes 0
Metric Value
wmc 80
cbo 3
dl 0
loc 532
ccs 169
cts 182
cp 0.9286
rs 2
c 0
b 0
f 0

20 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 12 3
A addRule() 0 4 1
A setTraverser() 0 6 1
A document() 0 10 3
A doctype() 0 5 1
C element() 0 46 12
A text() 0 11 4
A cdata() 0 5 1
A comment() 0 6 1
A processorInstruction() 0 8 1
A namespaceAttrs() 0 12 5
A openTag() 0 19 4
B attrs() 0 36 8
D nonBooleanAttribute() 0 39 22
A getXPath() 0 8 2
A closeTag() 0 6 4
A wr() 0 6 1
A nl() 0 6 1
A enc() 0 18 3
A escape() 0 22 2

How to fix   Complexity   

Complex Class

Complex classes like OutputRules often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use OutputRules, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * @file
4
 * The rules for generating output in the serializer.
5
 *
6
 * These output rules are likely to generate output similar to the document that
7
 * was parsed. It is not intended to output exactly the document that was parsed.
8
 */
9
10
namespace Masterminds\HTML5\Serializer;
11
12
use Masterminds\HTML5\Elements;
13
14
/**
15
 * Generate the output html5 based on element rules.
16
 */
17
class OutputRules implements RulesInterface
18
{
19
    /**
20
     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
21
     */
22
    const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
23
24
    const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
25
26
    const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
27
28
    const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
29
30
    const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
31
32
    const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
33
34
    /**
35
     * Holds the HTML5 element names that causes a namespace switch.
36
     *
37
     * @var array
38
     */
39
    protected $implicitNamespaces = array(
40
        self::NAMESPACE_HTML,
41
        self::NAMESPACE_SVG,
42
        self::NAMESPACE_MATHML,
43
        self::NAMESPACE_XML,
44
        self::NAMESPACE_XMLNS,
45
    );
46
47
    const IM_IN_HTML = 1;
48
49
    const IM_IN_SVG = 2;
50
51
    const IM_IN_MATHML = 3;
52
53
    /**
54
     * Used as cache to detect if is available ENT_HTML5.
55
     *
56
     * @var bool
57
     */
58
    private $hasHTML5 = false;
59
60
    protected $traverser;
61
62
    protected $encode = false;
63
64
    protected $out;
65
66
    protected $outputMode;
67
68
    private $xpath;
69
70
    protected $nonBooleanAttributes = array(
71
        /*
72
        array(
73
            'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
74
            'attrNamespace'=>'http://www.w3.org/1999/xhtml',
75
76
            'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
77
            'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
78
        ),
79
        */
80
        array(
81
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
82
            'attrName' => array('href',
83
                'hreflang',
84
                'http-equiv',
85
                'icon',
86
                'id',
87
                'keytype',
88
                'kind',
89
                'label',
90
                'lang',
91
                'language',
92
                'list',
93
                'maxlength',
94
                'media',
95
                'method',
96
                'name',
97
                'placeholder',
98
                'rel',
99
                'rows',
100
                'rowspan',
101
                'sandbox',
102
                'spellcheck',
103
                'scope',
104
                'seamless',
105
                'shape',
106
                'size',
107
                'sizes',
108
                'span',
109
                'src',
110
                'srcdoc',
111
                'srclang',
112
                'srcset',
113
                'start',
114
                'step',
115
                'style',
116
                'summary',
117
                'tabindex',
118
                'target',
119
                'title',
120
                'type',
121
                'value',
122
                'width',
123
                'border',
124
                'charset',
125
                'cite',
126
                'class',
127
                'code',
128
                'codebase',
129
                'color',
130
                'cols',
131
                'colspan',
132
                'content',
133
                'coords',
134
                'data',
135
                'datetime',
136
                'default',
137
                'dir',
138
                'dirname',
139
                'enctype',
140
                'for',
141
                'form',
142
                'formaction',
143
                'headers',
144
                'height',
145
                'accept',
146
                'accept-charset',
147
                'accesskey',
148
                'action',
149
                'align',
150
                'alt',
151
                'bgcolor',
152
            ),
153
        ),
154
        array(
155
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
156
            'xpath' => 'starts-with(local-name(), \'data-\')',
157
        ),
158
    );
159
160
    const DOCTYPE = '<!DOCTYPE html>';
161
162 64
    public function __construct($output, $options = array())
163
    {
164 64
        if (isset($options['encode_entities'])) {
165 64
            $this->encode = $options['encode_entities'];
166 64
        }
167
168 64
        $this->outputMode = static::IM_IN_HTML;
169 64
        $this->out = $output;
170
171
        // If HHVM, see https://github.com/facebook/hhvm/issues/2727
172 64
        $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
173 64
    }
174
175
    public function addRule(array $rule)
176
    {
177
        $this->nonBooleanAttributes[] = $rule;
178
    }
179
180 64
    public function setTraverser(Traverser $traverser)
181
    {
182 64
        $this->traverser = $traverser;
183
184 64
        return $this;
185
    }
186
187 18
    public function document($dom)
188
    {
189 18
        $this->doctype();
190 18
        if ($dom->documentElement) {
191 17
            foreach ($dom->childNodes as $node) {
192 17
                $this->traverser->node($node);
193 17
            }
194 17
            $this->nl();
195 17
        }
196 18
    }
197
198 19
    protected function doctype()
199
    {
200 19
        $this->wr(static::DOCTYPE);
201 19
        $this->nl();
202 19
    }
203
204 27
    public function element($ele)
205
    {
206 27
        $name = $ele->tagName;
207
208
        // Per spec:
209
        // If the element has a declared namespace in the HTML, MathML or
210
        // SVG namespaces, we use the lname instead of the tagName.
211 27
        if ($this->traverser->isLocalElement($ele)) {
212 27
            $name = $ele->localName;
213 27
        }
214
215
        // If we are in SVG or MathML there is special handling.
216
        // Using if/elseif instead of switch because it's faster in PHP.
217 27
        if ('svg' == $name) {
218 3
            $this->outputMode = static::IM_IN_SVG;
219 3
            $name = Elements::normalizeSvgElement($name);
220 27
        } elseif ('math' == $name) {
221 2
            $this->outputMode = static::IM_IN_MATHML;
222 2
        }
223
224 27
        $this->openTag($ele);
225 27
        if (Elements::isA($name, Elements::TEXT_RAW)) {
226 4
            foreach ($ele->childNodes as $child) {
227 4
                if ($child instanceof \DOMCharacterData) {
228 4
                    $this->wr($child->data);
229 4
                } elseif ($child instanceof \DOMElement) {
230 1
                    $this->element($child);
231 1
                }
232 4
            }
233 4
        } else {
234
            // Handle children.
235 25
            if ($ele->hasChildNodes()) {
236 25
                $this->traverser->children($ele->childNodes);
237 25
            }
238
239
            // Close out the SVG or MathML special handling.
240 25
            if ('svg' == $name || 'math' == $name) {
241 5
                $this->outputMode = static::IM_IN_HTML;
242 5
            }
243
        }
244
245
        // If not unary, add a closing tag.
246 27
        if (!Elements::isA($name, Elements::VOID_TAG)) {
247 27
            $this->closeTag($ele);
248 27
        }
249 27
    }
250
251
    /**
252
     * Write a text node.
253
     *
254
     * @param \DOMText $ele The text node to write.
255
     */
256 24
    public function text($ele)
257
    {
258 24
        if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
259 1
            $this->wr($ele->data);
260
261 1
            return;
262
        }
263
264
        // FIXME: This probably needs some flags set.
265 24
        $this->wr($this->enc($ele->data));
266 24
    }
267
268 2
    public function cdata($ele)
269
    {
270
        // This encodes CDATA.
271 2
        $this->wr($ele->ownerDocument->saveXML($ele));
272 2
    }
273
274 3
    public function comment($ele)
275
    {
276
        // These produce identical output.
277
        // $this->wr('<!--')->wr($ele->data)->wr('-->');
278 3
        $this->wr($ele->ownerDocument->saveXML($ele));
279 3
    }
280
281 3
    public function processorInstruction($ele)
282
    {
283 3
        $this->wr('<?')
284 3
            ->wr($ele->target)
285 3
            ->wr(' ')
286 3
            ->wr($ele->data)
287 3
            ->wr('?>');
288 3
    }
289
290
    /**
291
     * Write the namespace attributes.
292
     *
293
     * @param \DOMNode $ele The element being written.
294
     */
295 28
    protected function namespaceAttrs($ele)
296
    {
297 28
        if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
298 28
            $this->xpath = new \DOMXPath($ele->ownerDocument);
299 28
        }
300
301 28
        foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
302 23
            if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
303 3
                $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
304 3
            }
305 28
        }
306 28
    }
307
308
    /**
309
     * Write the opening tag.
310
     *
311
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
312
     * qualified name (8.3).
313
     *
314
     * @param \DOMNode $ele The element being written.
315
     */
316 28
    protected function openTag($ele)
317
    {
318 28
        $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
319
320 28
        $this->attrs($ele);
321 28
        $this->namespaceAttrs($ele);
322
323 28
        if ($this->outputMode == static::IM_IN_HTML) {
324 26
            $this->wr('>');
325 26
        }         // If we are not in html mode we are in SVG, MathML, or XML embedded content.
326
        else {
327 5
            if ($ele->hasChildNodes()) {
328 5
                $this->wr('>');
329 5
            }             // If there are no children this is self closing.
330
            else {
331 2
                $this->wr(' />');
332
            }
333
        }
334 28
    }
335
336 39
    protected function attrs($ele)
337
    {
338
        // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
339 39
        if (!$ele->hasAttributes()) {
340 23
            return $this;
341
        }
342
343
        // TODO: Currently, this always writes name="value", and does not do
344
        // value-less attributes.
345 30
        $map = $ele->attributes;
346 30
        $len = $map->length;
347 30
        for ($i = 0; $i < $len; ++$i) {
348 30
            $node = $map->item($i);
349 30
            $val = $this->enc($node->value, true);
350
351
            // XXX: The spec says that we need to ensure that anything in
352
            // the XML, XMLNS, or XLink NS's should use the canonical
353
            // prefix. It seems that DOM does this for us already, but there
354
            // may be exceptions.
355 30
            $name = $node->nodeName;
356
357
            // Special handling for attributes in SVG and MathML.
358
            // Using if/elseif instead of switch because it's faster in PHP.
359 30
            if ($this->outputMode == static::IM_IN_SVG) {
360 3
                $name = Elements::normalizeSvgAttribute($name);
361 30
            } elseif ($this->outputMode == static::IM_IN_MATHML) {
362 2
                $name = Elements::normalizeMathMlAttribute($name);
363 2
            }
364
365 30
            $this->wr(' ')->wr($name);
366
367 30
            if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
368 27
                $this->wr('="')->wr($val)->wr('"');
369 27
            }
370 30
        }
371 30
    }
372
373 10
    protected function nonBooleanAttribute(\DOMAttr $attr)
374
    {
375 10
        $ele = $attr->ownerElement;
376 10
        foreach ($this->nonBooleanAttributes as $rule) {
377 10
            if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
378
                continue;
379
            }
380 10
            if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
381
                continue;
382
            }
383 10
            if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
384
                continue;
385
            }
386 10
            if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
387
                continue;
388
            }
389 10
            if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
390
                continue;
391
            }
392 10
            if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
393 9
                continue;
394
            }
395 10
            if (isset($rule['xpath'])) {
396 9
                $xp = $this->getXPath($attr);
397 9
                if (isset($rule['prefixes'])) {
398
                    foreach ($rule['prefixes'] as $nsPrefix => $ns) {
399
                        $xp->registerNamespace($nsPrefix, $ns);
400
                    }
401
                }
402 9
                if (!$xp->evaluate($rule['xpath'], $attr)) {
403 8
                    continue;
404
                }
405 1
            }
406
407 4
            return true;
408 8
        }
409
410 8
        return false;
411
    }
412
413 9
    private function getXPath(\DOMNode $node)
414
    {
415 9
        if (!$this->xpath) {
416 9
            $this->xpath = new \DOMXPath($node->ownerDocument);
417 9
        }
418
419 9
        return $this->xpath;
420
    }
421
422
    /**
423
     * Write the closing tag.
424
     *
425
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
426
     * qualified name (8.3).
427
     *
428
     * @param \DOMNode $ele The element being written.
429
     */
430 27
    protected function closeTag($ele)
431
    {
432 27
        if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
433 27
            $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
434 27
        }
435 27
    }
436
437
    /**
438
     * Write to the output.
439
     *
440
     * @param string $text The string to put into the output
441
     *
442
     * @return $this
443
     */
444 48
    protected function wr($text)
445
    {
446 48
        fwrite($this->out, $text);
447
448 48
        return $this;
449
    }
450
451
    /**
452
     * Write a new line character.
453
     *
454
     * @return $this
455
     */
456 20
    protected function nl()
457
    {
458 20
        fwrite($this->out, PHP_EOL);
459
460 20
        return $this;
461
    }
462
463
    /**
464
     * Encode text.
465
     *
466
     * When encode is set to false, the default value, the text passed in is
467
     * escaped per section 8.3 of the html5 spec. For details on how text is
468
     * escaped see the escape() method.
469
     *
470
     * When encoding is set to true the text is converted to named character
471
     * references where appropriate. Section 8.1.4 Character references of the
472
     * html5 spec refers to using named character references. This is useful for
473
     * characters that can't otherwise legally be used in the text.
474
     *
475
     * The named character references are listed in section 8.5.
476
     *
477
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
478
     *      This includes such characters as +.# and many other common ones. By default
479
     *      encoding here will just escape &'<>".
480
     *
481
     *      Note, PHP 5.4+ has better html5 encoding.
482
     *
483
     * @todo Use the Entities class in php 5.3 to have html5 entities.
484
     *
485
     * @param string $text      Text to encode.
486
     * @param bool   $attribute True if we are encoding an attrubute, false otherwise.
487
     *
488
     * @return string The encoded text.
489
     */
490 44
    protected function enc($text, $attribute = false)
491
    {
492
        // Escape the text rather than convert to named character references.
493 44
        if (!$this->encode) {
494 44
            return $this->escape($text, $attribute);
495
        }
496
497
        // If we are in PHP 5.4+ we can use the native html5 entity functionality to
498
        // convert the named character references.
499
500 7
        if ($this->hasHTML5) {
501 7
            return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
502
        }         // If a version earlier than 5.4 html5 entities are not entirely handled.
503
        // This manually handles them.
504
        else {
505
            return strtr($text, HTML5Entities::$map);
506
        }
507
    }
508
509
    /**
510
     * Escape test.
511
     *
512
     * According to the html5 spec section 8.3 Serializing HTML fragments, text
513
     * within tags that are not style, script, xmp, iframe, noembed, and noframes
514
     * need to be properly escaped.
515
     *
516
     * The & should be converted to &amp;, no breaking space unicode characters
517
     * converted to &nbsp;, when in attribute mode the " should be converted to
518
     * &quot;, and when not in attribute mode the < and > should be converted to
519
     * &lt; and &gt;.
520
     *
521
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
522
     *
523
     * @param string $text      Text to escape.
524
     * @param bool   $attribute True if we are escaping an attrubute, false otherwise.
525
     */
526 51
    protected function escape($text, $attribute = false)
527
    {
528
        // Not using htmlspecialchars because, while it does escaping, it doesn't
529
        // match the requirements of section 8.5. For example, it doesn't handle
530
        // non-breaking spaces.
531 51
        if ($attribute) {
532
            $replace = array(
533 38
                '"' => '&quot;',
534 38
                '&' => '&amp;',
535 38
                "\xc2\xa0" => '&nbsp;',
536 38
            );
537 38
        } else {
538
            $replace = array(
539 30
                '<' => '&lt;',
540 30
                '>' => '&gt;',
541 30
                '&' => '&amp;',
542 30
                "\xc2\xa0" => '&nbsp;',
543 30
            );
544
        }
545
546 51
        return strtr($text, $replace);
547
    }
548
}
549