Completed
Push — master ( c71ba2...adfa1d )
by Robbert
10:16
created

Parser::throwError()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 11
ccs 0
cts 8
cp 0
rs 9.4285
cc 2
eloc 8
nc 2
nop 1
crap 6
1
<?php
2
3
namespace arc\html;
4
5
class Parser
6
{
7
    public $options = [
8
        'libxml_options' => 0
9
    ];
10
11 4
    public function __construct( $options = array() )
12
    {
13 4
        $optionList = [ 'libxml_options' ];
14 4
        foreach( $options as $option => $optionValue ) {
15
            if ( in_array( $option, $optionList ) ) {
16
                $this->{$option} = $optionValue;
17
            }
18
        }
19 4
    }
20
21 4
    public function parse( $html, $encoding = null )
22
    {
23 4
        if ( !$html ) {
24
            return \arc\html\Proxy( null );
25
        }
26 4
        if ( $html instanceof Proxy ) { // already parsed
27
            return $html;
28
        }
29 4
        $html = (string) $html;
30 4
        if ( stripos($html, '<body>')!==false ) {
31 4
            return $this->parseFull( $html, $encoding );
32
        } else {
33 1
            return $this->parsePartial( $html, $encoding );
34
        }
35
    }
36
37 1
    private function parsePartial( $html, $encoding )
38
    {
39 1
        $result = $this->parseFull( '<body id="ArcPartialHTML">'.$html.'</body>', $encoding );
40 1
        if ( $result ) {
41 1
            $result = new \arc\html\Proxy( $result->find('#ArcPartialHTML')[0]->children(), $this );
42
//            $result = new \arc\html\Proxy( $result->children(), $this );
0 ignored issues
show
Unused Code Comprehensibility introduced by
53% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
43
        } else {
44
            throw new \arc\Exception('parse error');
45
        }
46 1
        return $result;
47
    }
48
49
    private function throwError($prevErrorSetting)
50
    {
51
            $errors = libxml_get_errors();
52
            libxml_clear_errors();
53
            libxml_use_internal_errors( $prevErrorSetting );
54
            $message = 'Incorrect html passed.';
55
            foreach ( $errors as $error ) {
56
                $message .= "\nline: ".$error->line."; column: ".$error->column."; ".$error->message;
57
            }
58
            throw new \arc\Exception( $message, \arc\exceptions::ILLEGAL_ARGUMENT );
59
    }
60
61 1
    private function insertEncoding($html, $encoding)
62
    {
63 1
        $meta = '<meta id="ArcTempEncoding" http-equiv="content-type" content="text/html; charset="'.  htmlspecialchars($encoding) .'">';
64 1
        if ( preg_match('/<head([^>]*)>/i', $html) ) {
65 1
            $html = preg_replace('/<head([^>]*)>/i', '<head\\1>'.$meta, $html);
66 1
        } else if ( preg_match('/<body([^>]*)>/i', $html) ) {
67 1
            $html = preg_replace('/<body([^>]*)>/i', '<head>'.$meta.'</head><body\\1>', $html);
68
        } else {
69
            $html = $meta.$html;
70
        }
71 1
        return $html;
72
    }
73
74 1
    private function removeEncoding( $dom )
75
    {
76 1
        $meta = $dom->getElementById('ArcTempEncoding');
77 1
        $meta->parentNode->removeChild($meta);
78 1
    }
79
80 4
    private function parseFull( $html, $encoding )
81
    {
82 4
        $dom = new \DomDocument();
83 4
        libxml_disable_entity_loader(); // prevents XXE attacks
84 4
        $prevErrorSetting = libxml_use_internal_errors(true);
85 4
        if ( $encoding ) {
86 1
            $html = $this->insertEncoding($html, $encoding);
87
        }
88 4
        if ( !$dom->loadHTML( $html, $this->options['libxml_options'] ) ) {
89
            $this->throwError($prevErrorSetting);
90
        }
91 4
        if ( $encoding ) {
92 1
            $this->removeEncoding( $dom );
93
        }
94 4
        libxml_use_internal_errors( $prevErrorSetting );
95 4
        return new \arc\html\Proxy( simplexml_import_dom( $dom ), $this );
96
    }
97
98
}
99