Parser   A
last analyzed

Complexity

Total Complexity 19

Size/Duplication

Total Lines 93
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Test Coverage

Coverage 70.49%

Importance

Changes 0
Metric Value
wmc 19
lcom 1
cbo 2
dl 0
loc 93
ccs 43
cts 61
cp 0.7049
rs 10
c 0
b 0
f 0

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 9 3
A parse() 0 15 4
A parsePartial() 0 10 2
A throwError() 0 11 2
A insertEncoding() 0 12 3
A removeEncoding() 0 5 1
A parseFull() 0 17 4
1
<?php
2
3
namespace arc\html;
4
5
class Parser
6
{
7
    public $options = [
8
        'libxml_options' => 0
9
    ];
10
11 8
    public function __construct( $options = array() )
12
    {
13 8
        $optionList = [ 'libxml_options' ];
14 8
        foreach( $options as $option => $optionValue ) {
15
            if ( in_array( $option, $optionList ) ) {
16
                $this->{$option} = $optionValue;
17
            }
18 4
        }
19 8
    }
20
21 8
    public function parse( $html, $encoding = null )
22
    {
23 8
        if ( !$html ) {
24
            return \arc\html\Proxy( null );
25
        }
26 8
        if ( $html instanceof Proxy ) { // already parsed
27
            return $html;
28
        }
29 8
        $html = (string) $html;
30 8
        if ( stripos($html, '<body>')!==false ) {
31 8
            return $this->parseFull( $html, $encoding );
32
        } else {
33 2
            return $this->parsePartial( $html, $encoding );
34
        }
35
    }
36
37 2
    private function parsePartial( $html, $encoding )
38
    {
39 2
        $result = $this->parseFull( '<body id="ArcPartialHTML">'.$html.'</body>', $encoding );
40 2
        if ( $result ) {
41 2
            $result = new \arc\html\Proxy( $result->find('#ArcPartialHTML')[0]->children(), $this );
42 1
        } else {
43
            throw new \arc\UnknownError('Could not parse html.', \arc\exceptions::ILLEGAL_ARGUMENT );
44
        }
45 2
        return $result;
46
    }
47
48
    private function throwError($prevErrorSetting)
49
    {
50
            $errors = libxml_get_errors();
51
            libxml_clear_errors();
52
            libxml_use_internal_errors( $prevErrorSetting );
53
            $message = 'Incorrect html passed.';
54
            foreach ( $errors as $error ) {
55
                $message .= "\nline: ".$error->line."; column: ".$error->column."; ".$error->message;
56
            }
57
            throw new \arc\UnknownError( $message, \arc\exceptions::ILLEGAL_ARGUMENT );
58
    }
59
60 2
    private function insertEncoding($html, $encoding)
61
    {
62 2
        $meta = '<meta id="ArcTempEncoding" http-equiv="content-type" content="text/html; charset="'.  htmlspecialchars($encoding) .'">';
63 2
        if ( preg_match('/<head([^>]*)>/i', $html) ) {
64 2
            $html = preg_replace('/<head([^>]*)>/i', '<head\\1>'.$meta, $html);
65 2
        } else if ( preg_match('/<body([^>]*)>/i', $html) ) {
66 2
            $html = preg_replace('/<body([^>]*)>/i', '<head>'.$meta.'</head><body\\1>', $html);
67 1
        } else {
68
            $html = $meta.$html;
69
        }
70 2
        return $html;
71
    }
72
73 2
    private function removeEncoding( $dom )
74
    {
75 2
        $meta = $dom->getElementById('ArcTempEncoding');
76 2
        $meta->parentNode->removeChild($meta);
77 2
    }
78
79 8
    private function parseFull( $html, $encoding )
80
    {
81 8
        $dom = new \DomDocument();
82 8
        libxml_disable_entity_loader(); // prevents XXE attacks
83 8
        $prevErrorSetting = libxml_use_internal_errors(true);
84 8
        if ( $encoding ) {
85 2
            $html = $this->insertEncoding($html, $encoding);
86 1
        }
87 8
        if ( !$dom->loadHTML( $html, $this->options['libxml_options'] ) ) {
88
            $this->throwError($prevErrorSetting);
89
        }
90 8
        if ( $encoding ) {
91 2
            $this->removeEncoding( $dom );
92 1
        }
93 8
        libxml_use_internal_errors( $prevErrorSetting );
94 8
        return new \arc\html\Proxy( simplexml_import_dom( $dom ), $this );
95
    }
96
97
}
98