Completed
Pull Request — master (#2)
by Robbert
02:39 queued 28s
created

Parser::parseFull()   B

Complexity

Conditions 6
Paths 10

Size

Total Lines 32
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 7.0487

Importance

Changes 4
Bugs 1 Features 1
Metric Value
c 4
b 1
f 1
dl 0
loc 32
ccs 18
cts 26
cp 0.6923
rs 8.439
cc 6
eloc 24
nc 10
nop 2
crap 7.0487
1
<?php
2
3
namespace arc\html;
4
5
class Parser
6
{
7
    public $options = [
8
        'libxml_options' => 0
9
    ];
10
11 5
    public function __construct( $options = array() )
12
    {
13 5
        $optionList = [ 'libxml_options' ];
14 5
        foreach( $options as $option => $optionValue ) {
15
            if ( in_array( $option, $optionList ) ) {
16
                $this->{$option} = $optionValue;
17
            }
18 5
        }
19 5
    }
20
21 5
    public function parse( $html, $encoding = 'UTF-8' )
22
    {
23 5
        if ( !$html ) {
24
            return \arc\html\Proxy( null );
25
        }
26 5
        if ( $html instanceof Proxy ) { // already parsed
27
            return $html;
28
        }
29 5
        $html = (string) $html;
30 5
        if ( stripos($html, '<html>')!==false ) {
31 5
            return $this->parseFull( $html, $encoding );
32
        } else {
33
            return $this->parsePartial( $html, $encoding );
34
        }
35
    }
36
37
    private function parsePartial( $html, $encoding = 'UTF-8')
38
    {
39
        $result = $this->parseFull( '<div id="ArcPartialHTML">'.$html.'</div>', $encoding );
40
        if ( $result ) {
41
            $result = new \arc\html\Proxy( $result->find('#ArcPartialHTML')[0]->children(), $this );
42
        } else {
43
            throw new \arc\Exception('parse error');
44
        }
45
        return $result;
46
    }
47
48 5
    private function parseFull( $html, $encoding = 'UTF-8')
49
    {
50 5
        $dom = new \DomDocument('1.0', $encoding);
51 5
        $prefix = '';
52 5
        if ( isset($encoding) ) {
53
            $prefix  = <<<EOS
54
<head id='ar_html_parser_encoding_header'>
55
<meta http-equiv="content-type" content="text/html; charset=$encoding">
56 2
</head>
57 2
EOS;
58 2
        }
59 5
        libxml_disable_entity_loader(); // prevents XXE attacks
60 5
        $prevErrorSetting = libxml_use_internal_errors(true);
61 5
        if ( $dom->loadHTML( $prefix . $html, $this->options['libxml_options'] ) ) {
62 5
            if ( isset($encoding) ) {
63 2
                $elm  = $dom->getElementById('ar_html_parser_encoding_header');
64 2
                if ( isset($elm) ) {
65 2
                    $elm->parentNode->removeChild($elm);
66 2
                }
67 2
            }
68 5
            libxml_use_internal_errors( $prevErrorSetting );
69 5
            return new \arc\html\Proxy( simplexml_import_dom( $dom ), $this );
70
        }
71
        $errors = libxml_get_errors();
72
        libxml_clear_errors();
73
        libxml_use_internal_errors( $prevErrorSetting );
74
        $message = 'Incorrect html passed.';
75
        foreach ( $errors as $error ) {
76
            $message .= "\nline: ".$error->line."; column: ".$error->column."; ".$error->message;
77
        }
78
        throw new \arc\Exception( $message, \arc\exceptions::ILLEGAL_ARGUMENT );
79
    }
80
81
}
82