1 | <?php |
||
5 | class Parser |
||
6 | { |
||
7 | public $options = [ |
||
8 | 'libxml_options' => 0 |
||
9 | ]; |
||
10 | |||
11 | 4 | public function __construct( $options = array() ) |
|
20 | |||
21 | 4 | public function parse( $html, $encoding = null ) |
|
22 | { |
||
23 | 4 | if ( !$html ) { |
|
24 | return \arc\html\Proxy( null ); |
||
25 | } |
||
26 | 4 | if ( $html instanceof Proxy ) { // already parsed |
|
27 | return $html; |
||
28 | } |
||
29 | 4 | $html = (string) $html; |
|
30 | 4 | if ( stripos($html, '<body>')!==false ) { |
|
31 | 4 | return $this->parseFull( $html, $encoding ); |
|
32 | } else { |
||
33 | 1 | return $this->parsePartial( $html, $encoding ); |
|
34 | } |
||
35 | } |
||
36 | |||
37 | 1 | private function parsePartial( $html, $encoding ) |
|
38 | { |
||
39 | 1 | $result = $this->parseFull( '<body id="ArcPartialHTML">'.$html.'</body>', $encoding ); |
|
40 | 1 | if ( $result ) { |
|
41 | 1 | $result = new \arc\html\Proxy( $result->find('#ArcPartialHTML')[0]->children(), $this ); |
|
42 | 1 | } else { |
|
43 | throw new \arc\Exception('parse error'); |
||
44 | } |
||
45 | 1 | return $result; |
|
46 | } |
||
47 | |||
48 | private function throwError($prevErrorSetting) |
||
59 | |||
60 | 1 | private function insertEncoding($html, $encoding) |
|
61 | { |
||
62 | 1 | $meta = '<meta id="ArcTempEncoding" http-equiv="content-type" content="text/html; charset="'. htmlspecialchars($encoding) .'">'; |
|
63 | 1 | if ( preg_match('/<head([^>]*)>/i', $html) ) { |
|
64 | 1 | $html = preg_replace('/<head([^>]*)>/i', '<head\\1>'.$meta, $html); |
|
65 | 1 | } else if ( preg_match('/<body([^>]*)>/i', $html) ) { |
|
66 | 1 | $html = preg_replace('/<body([^>]*)>/i', '<head>'.$meta.'</head><body\\1>', $html); |
|
67 | 1 | } else { |
|
68 | $html = $meta.$html; |
||
69 | } |
||
70 | 1 | return $html; |
|
71 | } |
||
72 | |||
73 | 1 | private function removeEncoding( $dom ) |
|
74 | { |
||
75 | 1 | $meta = $dom->getElementById('ArcTempEncoding'); |
|
76 | 1 | $meta->parentNode->removeChild($meta); |
|
77 | 1 | } |
|
78 | |||
79 | 4 | private function parseFull( $html, $encoding ) |
|
96 | |||
97 | } |
||
98 |