Completed
Push — master ( c71ba2...adfa1d )
by Robbert
10:16
created

Parser   A

Complexity

Total Complexity 19

Size/Duplication

Total Lines 94
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Test Coverage

Coverage 71.7%

Importance

Changes 6
Bugs 3 Features 1
Metric Value
wmc 19
c 6
b 3
f 1
lcom 1
cbo 2
dl 0
loc 94
ccs 38
cts 53
cp 0.717
rs 10

7 Methods

Rating   Name   Duplication   Size   Complexity  
A throwError() 0 11 2
A __construct() 0 9 3
A parse() 0 15 4
A parsePartial() 0 11 2
A insertEncoding() 0 12 3
A removeEncoding() 0 5 1
A parseFull() 0 17 4
1
<?php
2
3
namespace arc\html;
4
5
class Parser
6
{
7
    public $options = [
8
        'libxml_options' => 0
9
    ];
10
11 4
    public function __construct( $options = array() )
12
    {
13 4
        $optionList = [ 'libxml_options' ];
14 4
        foreach( $options as $option => $optionValue ) {
15
            if ( in_array( $option, $optionList ) ) {
16
                $this->{$option} = $optionValue;
17
            }
18
        }
19 4
    }
20
21 4
    public function parse( $html, $encoding = null )
22
    {
23 4
        if ( !$html ) {
24
            return \arc\html\Proxy( null );
25
        }
26 4
        if ( $html instanceof Proxy ) { // already parsed
27
            return $html;
28
        }
29 4
        $html = (string) $html;
30 4
        if ( stripos($html, '<body>')!==false ) {
31 4
            return $this->parseFull( $html, $encoding );
32
        } else {
33 1
            return $this->parsePartial( $html, $encoding );
34
        }
35
    }
36
37 1
    private function parsePartial( $html, $encoding )
38
    {
39 1
        $result = $this->parseFull( '<body id="ArcPartialHTML">'.$html.'</body>', $encoding );
40 1
        if ( $result ) {
41 1
            $result = new \arc\html\Proxy( $result->find('#ArcPartialHTML')[0]->children(), $this );
42
//            $result = new \arc\html\Proxy( $result->children(), $this );
0 ignored issues
show
Unused Code Comprehensibility introduced by
53% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
43
        } else {
44
            throw new \arc\Exception('parse error');
45
        }
46 1
        return $result;
47
    }
48
49
    private function throwError($prevErrorSetting)
50
    {
51
            $errors = libxml_get_errors();
52
            libxml_clear_errors();
53
            libxml_use_internal_errors( $prevErrorSetting );
54
            $message = 'Incorrect html passed.';
55
            foreach ( $errors as $error ) {
56
                $message .= "\nline: ".$error->line."; column: ".$error->column."; ".$error->message;
57
            }
58
            throw new \arc\Exception( $message, \arc\exceptions::ILLEGAL_ARGUMENT );
59
    }
60
61 1
    private function insertEncoding($html, $encoding)
62
    {
63 1
        $meta = '<meta id="ArcTempEncoding" http-equiv="content-type" content="text/html; charset="'.  htmlspecialchars($encoding) .'">';
64 1
        if ( preg_match('/<head([^>]*)>/i', $html) ) {
65 1
            $html = preg_replace('/<head([^>]*)>/i', '<head\\1>'.$meta, $html);
66 1
        } else if ( preg_match('/<body([^>]*)>/i', $html) ) {
67 1
            $html = preg_replace('/<body([^>]*)>/i', '<head>'.$meta.'</head><body\\1>', $html);
68
        } else {
69
            $html = $meta.$html;
70
        }
71 1
        return $html;
72
    }
73
74 1
    private function removeEncoding( $dom )
75
    {
76 1
        $meta = $dom->getElementById('ArcTempEncoding');
77 1
        $meta->parentNode->removeChild($meta);
78 1
    }
79
80 4
    private function parseFull( $html, $encoding )
81
    {
82 4
        $dom = new \DomDocument();
83 4
        libxml_disable_entity_loader(); // prevents XXE attacks
84 4
        $prevErrorSetting = libxml_use_internal_errors(true);
85 4
        if ( $encoding ) {
86 1
            $html = $this->insertEncoding($html, $encoding);
87
        }
88 4
        if ( !$dom->loadHTML( $html, $this->options['libxml_options'] ) ) {
89
            $this->throwError($prevErrorSetting);
90
        }
91 4
        if ( $encoding ) {
92 1
            $this->removeEncoding( $dom );
93
        }
94 4
        libxml_use_internal_errors( $prevErrorSetting );
95 4
        return new \arc\html\Proxy( simplexml_import_dom( $dom ), $this );
96
    }
97
98
}
99