Completed
Push — master ( 89d6c5...14fca1 )
by Robbert
03:32
created

Parser::parsePartial()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2.0116

Importance

Changes 4
Bugs 1 Features 1
Metric Value
c 4
b 1
f 1
dl 0
loc 10
ccs 6
cts 7
cp 0.8571
rs 9.4285
cc 2
eloc 7
nc 2
nop 2
crap 2.0116
1
<?php
2
3
namespace arc\html;
4
5
class Parser
6
{
7
    public $options = [
8
        'libxml_options' => 0
9
    ];
10
11 4
    public function __construct( $options = array() )
12
    {
13 4
        $optionList = [ 'libxml_options' ];
14 4
        foreach( $options as $option => $optionValue ) {
15
            if ( in_array( $option, $optionList ) ) {
16
                $this->{$option} = $optionValue;
17
            }
18 4
        }
19 4
    }
20
21 4
    public function parse( $html, $encoding = null )
22
    {
23 4
        if ( !$html ) {
24
            return \arc\html\Proxy( null );
25
        }
26 4
        if ( $html instanceof Proxy ) { // already parsed
27
            return $html;
28
        }
29 4
        $html = (string) $html;
30 4
        if ( stripos($html, '<body>')!==false ) {
31 4
            return $this->parseFull( $html, $encoding );
32
        } else {
33 1
            return $this->parsePartial( $html, $encoding );
34
        }
35
    }
36
37 1
    private function parsePartial( $html, $encoding )
38
    {
39 1
        $result = $this->parseFull( '<body id="ArcPartialHTML">'.$html.'</body>', $encoding );
40 1
        if ( $result ) {
41 1
            $result = new \arc\html\Proxy( $result->find('#ArcPartialHTML')[0]->children(), $this );
42 1
        } else {
43
            throw new \arc\Exception('parse error');
44
        }
45 1
        return $result;
46
    }
47
48
    private function throwError($prevErrorSetting)
49
    {
50
            $errors = libxml_get_errors();
51
            libxml_clear_errors();
52
            libxml_use_internal_errors( $prevErrorSetting );
53
            $message = 'Incorrect html passed.';
54
            foreach ( $errors as $error ) {
55
                $message .= "\nline: ".$error->line."; column: ".$error->column."; ".$error->message;
56
            }
57
            throw new \arc\Exception( $message, \arc\exceptions::ILLEGAL_ARGUMENT );
58
    }
59
60 1
    private function insertEncoding($html, $encoding)
61
    {
62 1
        $meta = '<meta id="ArcTempEncoding" http-equiv="content-type" content="text/html; charset="'.  htmlspecialchars($encoding) .'">';
63 1
        if ( preg_match('/<head([^>]*)>/i', $html) ) {
64 1
            $html = preg_replace('/<head([^>]*)>/i', '<head\\1>'.$meta, $html);
65 1
        } else if ( preg_match('/<body([^>]*)>/i', $html) ) {
66 1
            $html = preg_replace('/<body([^>]*)>/i', '<head>'.$meta.'</head><body\\1>', $html);
67 1
        } else {
68
            $html = $meta.$html;
69
        }
70 1
        return $html;
71
    }
72
73 1
    private function removeEncoding( $dom )
74
    {
75 1
        $meta = $dom->getElementById('ArcTempEncoding');
76 1
        $meta->parentNode->removeChild($meta);
77 1
    }
78
79 4
    private function parseFull( $html, $encoding )
80
    {
81 4
        $dom = new \DomDocument();
82 4
        libxml_disable_entity_loader(); // prevents XXE attacks
83 4
        $prevErrorSetting = libxml_use_internal_errors(true);
84 4
        if ( $encoding ) {
85 1
            $html = $this->insertEncoding($html, $encoding);
86 1
        }
87 4
        if ( !$dom->loadHTML( $html, $this->options['libxml_options'] ) ) {
88
            $this->throwError($prevErrorSetting);
89
        }
90 4
        if ( $encoding ) {
91 1
            $this->removeEncoding( $dom );
92 1
        }
93 4
        libxml_use_internal_errors( $prevErrorSetting );
94 4
        return new \arc\html\Proxy( simplexml_import_dom( $dom ), $this );
95
    }
96
97
}
98