Completed
Push — master ( bcd462...884415 )
by Auke
02:11
created

Parser::throwError()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 11
ccs 0
cts 9
cp 0
rs 9.4285
cc 2
eloc 8
nc 2
nop 1
crap 6
1
<?php
2
3
namespace arc\html;
4
5
class Parser 
6
{
7
    public $options = [
8
        'libxml_options' => 0
9
    ];
10
11 4
    public function __construct( $options = array() ) 
12
    {
13 4
        $optionList = [ 'libxml_options' ];
14 4
        foreach( $options as $option => $optionValue ) {
15
            if ( in_array( $option, $optionList ) ) {
16
                $this->{$option} = $optionValue;
17
            }
18 4
        }
19 4
    }
20
21 4
    public function parse( $html, $encoding = null ) 
22
    {
23 4
        if ( !$html ) {
24
            return \arc\html\Proxy( null );
25
        }
26 4
        if ( $html instanceof Proxy ) { // already parsed
27
            return $html;
28
        }
29 4
        $html = (string) $html;
30 4
        if ( stripos($html, '<html>')!==false ) {
31 4
            return $this->parseFull( $html, $encoding );
32
        } else {        
33
            return $this->parsePartial( $html, $encoding );
34
        }
35
    }
36
37
    private function parsePartial( $html, $encoding ) 
38
    {
39
        $result = $this->parseFull( '<div id="ArcPartialHTML">'.$html.'</div>', $encoding );
40
        if ( $result ) {
41
            $result = new \arc\html\Proxy( $result->find('#ArcPartialHTML')[0]->children(), $this );
42
        } else {
43
            throw new \arc\Exception('parse error');
44
        }
45
        return $result;
46
    }
47
48
    private function throwError($prevErrorSetting)
49
    {
50
            $errors = libxml_get_errors();
51
            libxml_clear_errors();
52
            libxml_use_internal_errors( $prevErrorSetting );
53
            $message = 'Incorrect html passed.';
54
            foreach ( $errors as $error ) {
55
                $message .= "\nline: ".$error->line."; column: ".$error->column."; ".$error->message;
56
            }
57
            throw new \arc\Exception( $message, \arc\exceptions::ILLEGAL_ARGUMENT );
58
    }
59
60 1
    private function insertEncoding($html, $encoding)
61
    {
62 1
        $meta = '<meta id="ArcTempEncoding" charset="'.htmlspecialchars($encoding).'">';
63 1
        if ( preg_match('/<head([^>]*)>/i', $html) ) {
64 1
            $html = preg_replace('/<head([^>]*)>/i', '<head\\1>'.$meta, $html);
65 1
        } else if ( preg_match('/<body([^>]*)>/i') ) {
66
            $html = preg_replace('/<body([^>]*)>/i', '<head>'.$meta.'</head><body\\1>', $html);
67
        } else {
68
            $html = $meta.$html;
69
        }
70 1
        return $html;
71
    }
72
73 1
    private function removeEncoding( $dom, $encoding)
0 ignored issues
show
Unused Code introduced by
The parameter $encoding is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
74
    {
75 1
        $meta = $dom->getElementById('ArcTempEncoding');
76 1
        $meta->parentNode->removeChild($meta);
77 1
    }
78
79 4
    private function parseFull( $html, $encoding ) 
80
    {
81 4
        $dom = new \DomDocument();
82 4
        libxml_disable_entity_loader(); // prevents XXE attacks
83 4
        $prevErrorSetting = libxml_use_internal_errors(true);
84 4
        if ( $encoding ) {
85 1
            $html = $this->insertEncoding($html, $encoding);
86 1
        }
87 4
        if ( !$dom->loadHTML( $html, $this->options['libxml_options'] ) ) {
88
            $this->throwError($prevErrorSetting);
89
        }
90 4
        if ( $encoding ) {
91 1
            $this->removeEncoding($dom, $encoding);
92 1
        }
93 4
        libxml_use_internal_errors( $prevErrorSetting );
94 4
        return new \arc\html\Proxy( simplexml_import_dom( $dom ), $this );
95
    }
96
97
}