@@ -35,72 +35,72 @@ discard block |
||
35 | 35 | * @throws InvalidXmlException When parsing of XML with schema or callable produces any errors unrelated to the XML parsing itself |
36 | 36 | * @throws XmlParsingException When parsing of XML file returns error |
37 | 37 | */ |
38 | - public static function load( $content, $allowDocumentType = false, $setRootElement = null, $schemaOrCallable = null ) { |
|
39 | - if ( !extension_loaded( 'dom' ) ) { |
|
40 | - throw new RuntimeException( 'Extension DOM is required.' ); |
|
38 | + public static function load($content, $allowDocumentType = false, $setRootElement = null, $schemaOrCallable = null) { |
|
39 | + if (!extension_loaded('dom')) { |
|
40 | + throw new RuntimeException('Extension DOM is required.'); |
|
41 | 41 | } |
42 | 42 | |
43 | - $internalErrors = libxml_use_internal_errors( true ); |
|
43 | + $internalErrors = libxml_use_internal_errors(true); |
|
44 | 44 | $disableEntities = libxml_disable_entity_loader(); |
45 | 45 | libxml_clear_errors(); |
46 | 46 | |
47 | - $dom = new DOMDocument( '1.0', 'UTF-8' ); |
|
47 | + $dom = new DOMDocument('1.0', 'UTF-8'); |
|
48 | 48 | $dom->validateOnParse = true; |
49 | 49 | |
50 | - if ( is_string( $setRootElement ) && !empty( $setRootElement ) ) { |
|
50 | + if (is_string($setRootElement) && !empty($setRootElement)) { |
|
51 | 51 | $content = "<$setRootElement>$content</$setRootElement>"; |
52 | 52 | } |
53 | 53 | |
54 | - $res = $dom->loadXML( $content, LIBXML_NONET | LIBXML_NOBLANKS | ( defined( 'LIBXML_COMPACT' ) ? LIBXML_COMPACT : 0 ) ); |
|
54 | + $res = $dom->loadXML($content, LIBXML_NONET | LIBXML_NOBLANKS | (defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0)); |
|
55 | 55 | |
56 | - if ( !$res ) { |
|
57 | - libxml_disable_entity_loader( $disableEntities ); |
|
56 | + if (!$res) { |
|
57 | + libxml_disable_entity_loader($disableEntities); |
|
58 | 58 | |
59 | - throw new XmlParsingException( implode( "\n", static::getXmlErrors( $internalErrors ) ) ); |
|
59 | + throw new XmlParsingException(implode("\n", static::getXmlErrors($internalErrors))); |
|
60 | 60 | } |
61 | 61 | |
62 | 62 | $dom->normalizeDocument(); |
63 | 63 | |
64 | - libxml_use_internal_errors( $internalErrors ); |
|
65 | - libxml_disable_entity_loader( $disableEntities ); |
|
64 | + libxml_use_internal_errors($internalErrors); |
|
65 | + libxml_disable_entity_loader($disableEntities); |
|
66 | 66 | |
67 | - foreach ( $dom->childNodes as $child ) { |
|
68 | - if ( XML_DOCUMENT_TYPE_NODE === $child->nodeType && !$allowDocumentType ) { |
|
69 | - throw new XmlParsingException( 'Document types are not allowed.' ); |
|
67 | + foreach ($dom->childNodes as $child) { |
|
68 | + if (XML_DOCUMENT_TYPE_NODE === $child->nodeType && !$allowDocumentType) { |
|
69 | + throw new XmlParsingException('Document types are not allowed.'); |
|
70 | 70 | } |
71 | 71 | } |
72 | 72 | |
73 | - if ( null !== $schemaOrCallable ) { |
|
74 | - $internalErrors = libxml_use_internal_errors( true ); |
|
73 | + if (null !== $schemaOrCallable) { |
|
74 | + $internalErrors = libxml_use_internal_errors(true); |
|
75 | 75 | libxml_clear_errors(); |
76 | 76 | |
77 | 77 | $e = null; |
78 | - if ( is_callable( $schemaOrCallable ) ) { |
|
78 | + if (is_callable($schemaOrCallable)) { |
|
79 | 79 | try { |
80 | - $valid = call_user_func( $schemaOrCallable, $dom, $internalErrors ); |
|
81 | - } catch ( Exception $e ) { |
|
80 | + $valid = call_user_func($schemaOrCallable, $dom, $internalErrors); |
|
81 | + } catch (Exception $e) { |
|
82 | 82 | $valid = false; |
83 | 83 | } |
84 | - } elseif ( !is_array( $schemaOrCallable ) && is_file( (string)$schemaOrCallable ) ) { |
|
85 | - $schemaSource = file_get_contents( (string)$schemaOrCallable ); |
|
86 | - $valid = @$dom->schemaValidateSource( $schemaSource ); |
|
84 | + } elseif (!is_array($schemaOrCallable) && is_file((string)$schemaOrCallable)) { |
|
85 | + $schemaSource = file_get_contents((string)$schemaOrCallable); |
|
86 | + $valid = @$dom->schemaValidateSource($schemaSource); |
|
87 | 87 | } else { |
88 | - libxml_use_internal_errors( $internalErrors ); |
|
88 | + libxml_use_internal_errors($internalErrors); |
|
89 | 89 | |
90 | - throw new XmlParsingException( 'The schemaOrCallable argument has to be a valid path to XSD file or callable.' ); |
|
90 | + throw new XmlParsingException('The schemaOrCallable argument has to be a valid path to XSD file or callable.'); |
|
91 | 91 | } |
92 | 92 | |
93 | - if ( !$valid ) { |
|
94 | - $messages = static::getXmlErrors( $internalErrors ); |
|
95 | - if ( empty( $messages ) ) { |
|
96 | - throw new InvalidXmlException( 'The XML is not valid.', 0, $e ); |
|
93 | + if (!$valid) { |
|
94 | + $messages = static::getXmlErrors($internalErrors); |
|
95 | + if (empty($messages)) { |
|
96 | + throw new InvalidXmlException('The XML is not valid.', 0, $e); |
|
97 | 97 | } |
98 | - throw new XmlParsingException( implode( "\n", $messages ), 0, $e ); |
|
98 | + throw new XmlParsingException(implode("\n", $messages), 0, $e); |
|
99 | 99 | } |
100 | 100 | } |
101 | 101 | |
102 | 102 | libxml_clear_errors(); |
103 | - libxml_use_internal_errors( $internalErrors ); |
|
103 | + libxml_use_internal_errors($internalErrors); |
|
104 | 104 | |
105 | 105 | return $dom; |
106 | 106 | } |
@@ -110,14 +110,14 @@ discard block |
||
110 | 110 | * |
111 | 111 | * @return array |
112 | 112 | */ |
113 | - private static function getXmlErrors( $internalErrors ) { |
|
114 | - $errors = []; |
|
115 | - foreach ( libxml_get_errors() as $error ) { |
|
116 | - $errors[] = sprintf( |
|
113 | + private static function getXmlErrors($internalErrors) { |
|
114 | + $errors = [ ]; |
|
115 | + foreach (libxml_get_errors() as $error) { |
|
116 | + $errors[ ] = sprintf( |
|
117 | 117 | '[%s %s] %s (in %s - line %d, column %d)', |
118 | 118 | LIBXML_ERR_WARNING == $error->level ? 'WARNING' : 'ERROR', |
119 | 119 | $error->code, |
120 | - trim( $error->message ), |
|
120 | + trim($error->message), |
|
121 | 121 | $error->file ?: 'n/a', |
122 | 122 | $error->line, |
123 | 123 | $error->column |
@@ -125,7 +125,7 @@ discard block |
||
125 | 125 | } |
126 | 126 | |
127 | 127 | libxml_clear_errors(); |
128 | - libxml_use_internal_errors( $internalErrors ); |
|
128 | + libxml_use_internal_errors($internalErrors); |
|
129 | 129 | |
130 | 130 | return $errors; |
131 | 131 | } |
@@ -19,12 +19,12 @@ |
||
19 | 19 | */ |
20 | 20 | protected function getNodeListFromQueryPath() { |
21 | 21 | |
22 | - $xpath = new DOMXPath( $this->dom ); |
|
22 | + $xpath = new DOMXPath($this->dom); |
|
23 | 23 | |
24 | - if ( $this->isXmlFragment ) { |
|
25 | - $htmlNodeList = $xpath->query( "/" . self::fragmentDocumentRoot ); |
|
24 | + if ($this->isXmlFragment) { |
|
25 | + $htmlNodeList = $xpath->query("/" . self::fragmentDocumentRoot); |
|
26 | 26 | } else { |
27 | - $htmlNodeList = $xpath->query( "/html" ); |
|
27 | + $htmlNodeList = $xpath->query("/html"); |
|
28 | 28 | } |
29 | 29 | |
30 | 30 | return $htmlNodeList; |
@@ -17,14 +17,14 @@ |
||
17 | 17 | /** |
18 | 18 | * @return DOMNodeList |
19 | 19 | */ |
20 | - protected function getNodeListFromQueryPath(){ |
|
20 | + protected function getNodeListFromQueryPath() { |
|
21 | 21 | |
22 | - $xpath = new DOMXPath( $this->dom ); |
|
22 | + $xpath = new DOMXPath($this->dom); |
|
23 | 23 | |
24 | - if ( $this->isXmlFragment ) { |
|
25 | - $xmlNodeList = $xpath->query( "/" . self::fragmentDocumentRoot ); |
|
24 | + if ($this->isXmlFragment) { |
|
25 | + $xmlNodeList = $xpath->query("/" . self::fragmentDocumentRoot); |
|
26 | 26 | } else { |
27 | - $xmlNodeList = $xpath->query( "*" ); |
|
27 | + $xmlNodeList = $xpath->query("*"); |
|
28 | 28 | } |
29 | 29 | |
30 | 30 | return $xmlNodeList; |
@@ -76,10 +76,10 @@ discard block |
||
76 | 76 | * @throws InvalidXmlException |
77 | 77 | * @throws XmlParsingException |
78 | 78 | */ |
79 | - protected function __construct( $html, $isXmlFragment ) { |
|
80 | - $html = $this->removeNotPrintableChars( $html ); |
|
79 | + protected function __construct($html, $isXmlFragment) { |
|
80 | + $html = $this->removeNotPrintableChars($html); |
|
81 | 81 | $this->isXmlFragment = $isXmlFragment; |
82 | - $this->dom = XmlDomLoader::load( $html, true, ( $isXmlFragment ? self::fragmentDocumentRoot : null ) ); |
|
82 | + $this->dom = XmlDomLoader::load($html, true, ($isXmlFragment ? self::fragmentDocumentRoot : null)); |
|
83 | 83 | $this->elements = new ArrayObject(); |
84 | 84 | } |
85 | 85 | |
@@ -95,8 +95,8 @@ discard block |
||
95 | 95 | * @throws InvalidXmlException |
96 | 96 | * @throws XmlParsingException |
97 | 97 | */ |
98 | - public static function parse( $html, $isXmlFragment = false ) { |
|
99 | - $parser = new static( $html, $isXmlFragment ); |
|
98 | + public static function parse($html, $isXmlFragment = false) { |
|
99 | + $parser = new static($html, $isXmlFragment); |
|
100 | 100 | |
101 | 101 | return $parser->extractNodes(); |
102 | 102 | } |
@@ -109,36 +109,36 @@ discard block |
||
109 | 109 | * |
110 | 110 | * @return string |
111 | 111 | */ |
112 | - protected function removeNotPrintableChars( $seg ) { |
|
112 | + protected function removeNotPrintableChars($seg) { |
|
113 | 113 | |
114 | - preg_match_all( self::regexpAscii, $seg, $matches ); |
|
114 | + preg_match_all(self::regexpAscii, $seg, $matches); |
|
115 | 115 | |
116 | - if ( !empty( $matches[ 1 ] ) ) { |
|
116 | + if (!empty($matches[ 1 ])) { |
|
117 | 117 | $test_src = $seg; |
118 | - foreach ( $matches[ 1 ] as $v ) { |
|
119 | - $key = sprintf( "%02X", ord( $v ) ); |
|
120 | - $hexNum = sprintf( "/(\\x{%s})/u", $key ); |
|
121 | - $test_src = preg_replace( $hexNum, self::$asciiPlaceHoldMap[ $key ][ 'placeHold' ], $test_src, 1 ); |
|
118 | + foreach ($matches[ 1 ] as $v) { |
|
119 | + $key = sprintf("%02X", ord($v)); |
|
120 | + $hexNum = sprintf("/(\\x{%s})/u", $key); |
|
121 | + $test_src = preg_replace($hexNum, self::$asciiPlaceHoldMap[ $key ][ 'placeHold' ], $test_src, 1); |
|
122 | 122 | } |
123 | 123 | |
124 | 124 | $seg = $test_src; |
125 | 125 | } |
126 | 126 | |
127 | - preg_match_all( self::regexpEntity, $seg, $matches ); |
|
127 | + preg_match_all(self::regexpEntity, $seg, $matches); |
|
128 | 128 | |
129 | - if ( !empty( $matches[ 1 ] ) ) { |
|
129 | + if (!empty($matches[ 1 ])) { |
|
130 | 130 | $test_src = $seg; |
131 | - foreach ( $matches[ 1 ] as $v ) { |
|
132 | - $byte = sprintf( "%02X", hexdec( $v ) ); |
|
133 | - if ( $byte[ 0 ] == '0' ) { |
|
131 | + foreach ($matches[ 1 ] as $v) { |
|
132 | + $byte = sprintf("%02X", hexdec($v)); |
|
133 | + if ($byte[ 0 ] == '0') { |
|
134 | 134 | $regexp = '/&#x([' . $byte[ 0 ] . ']?' . $byte[ 1 ] . ');/u'; |
135 | 135 | } else { |
136 | 136 | $regexp = '/&#x(' . $byte . ');/u'; |
137 | 137 | } |
138 | 138 | |
139 | - $key = sprintf( "%02X", hexdec( $v ) ); |
|
140 | - if ( array_key_exists( $key, self::$asciiPlaceHoldMap ) ) { |
|
141 | - $test_src = preg_replace( $regexp, self::$asciiPlaceHoldMap[ $key ][ 'placeHold' ], $test_src ); |
|
139 | + $key = sprintf("%02X", hexdec($v)); |
|
140 | + if (array_key_exists($key, self::$asciiPlaceHoldMap)) { |
|
141 | + $test_src = preg_replace($regexp, self::$asciiPlaceHoldMap[ $key ][ 'placeHold' ], $test_src); |
|
142 | 142 | } |
143 | 143 | |
144 | 144 | } |
@@ -155,20 +155,20 @@ discard block |
||
155 | 155 | * |
156 | 156 | * @return ArrayObject |
157 | 157 | */ |
158 | - protected function mapElements( DOMNodeList $elementList, ArrayObject $elements ) { |
|
158 | + protected function mapElements(DOMNodeList $elementList, ArrayObject $elements) { |
|
159 | 159 | |
160 | - for ( $i = 0; $i < $elementList->length; $i++ ) { |
|
160 | + for ($i = 0; $i < $elementList->length; $i++) { |
|
161 | 161 | |
162 | - $element = $elementList->item( $i ); |
|
162 | + $element = $elementList->item($i); |
|
163 | 163 | |
164 | - $elements[] = (object)[ |
|
165 | - 'node' => $this->dom->saveXML( $element ), |
|
164 | + $elements[ ] = (object)[ |
|
165 | + 'node' => $this->dom->saveXML($element), |
|
166 | 166 | 'tagName' => $element->nodeName, |
167 | - 'attributes' => $this->getAttributes( $element ), |
|
168 | - 'text' => ( $element instanceof DOMText ? $element->textContent : null ), |
|
169 | - 'self_closed' => ( $element instanceof DOMText ) ? null : !$element->hasChildNodes(), |
|
170 | - 'has_children' => ( $element instanceof DOMText ) ? null : $element->hasChildNodes(), |
|
171 | - 'inner_html' => $element->hasChildNodes() ? $this->mapElements( $element->childNodes, new ArrayObject() ) : new ArrayObject() |
|
167 | + 'attributes' => $this->getAttributes($element), |
|
168 | + 'text' => ($element instanceof DOMText ? $element->textContent : null), |
|
169 | + 'self_closed' => ($element instanceof DOMText) ? null : !$element->hasChildNodes(), |
|
170 | + 'has_children' => ($element instanceof DOMText) ? null : $element->hasChildNodes(), |
|
171 | + 'inner_html' => $element->hasChildNodes() ? $this->mapElements($element->childNodes, new ArrayObject()) : new ArrayObject() |
|
172 | 172 | ]; |
173 | 173 | |
174 | 174 | } |
@@ -182,18 +182,18 @@ discard block |
||
182 | 182 | * |
183 | 183 | * @return array |
184 | 184 | */ |
185 | - protected function getAttributes( DOMNode $element ) { |
|
185 | + protected function getAttributes(DOMNode $element) { |
|
186 | 186 | |
187 | - if ( !$element->hasAttributes() ) { |
|
188 | - return []; |
|
187 | + if (!$element->hasAttributes()) { |
|
188 | + return [ ]; |
|
189 | 189 | } |
190 | 190 | |
191 | - $attributesMap = []; |
|
191 | + $attributesMap = [ ]; |
|
192 | 192 | |
193 | 193 | /** |
194 | 194 | * @var DOMAttr $attr |
195 | 195 | */ |
196 | - foreach ( $element->attributes as $attr ) { |
|
196 | + foreach ($element->attributes as $attr) { |
|
197 | 197 | $attributesMap[ $attr->nodeName ] = $attr->nodeValue; |
198 | 198 | } |
199 | 199 | |
@@ -209,15 +209,15 @@ discard block |
||
209 | 209 | |
210 | 210 | $htmlNodeList = $this->getNodeListFromQueryPath(); |
211 | 211 | |
212 | - if ( !$htmlNodeList instanceof DOMNodeList ) { |
|
213 | - throw new DOMException( 'Bad DOMNodeList' ); |
|
212 | + if (!$htmlNodeList instanceof DOMNodeList) { |
|
213 | + throw new DOMException('Bad DOMNodeList'); |
|
214 | 214 | } |
215 | 215 | |
216 | - if ( $this->isXmlFragment && $htmlNodeList->item( 0 )->nodeName == self::fragmentDocumentRoot ) { |
|
216 | + if ($this->isXmlFragment && $htmlNodeList->item(0)->nodeName == self::fragmentDocumentRoot) { |
|
217 | 217 | // there is a fake root node, skip the first element end start with child nodes |
218 | - $this->mapElements( $htmlNodeList->item( 0 )->childNodes, $this->elements ); |
|
218 | + $this->mapElements($htmlNodeList->item(0)->childNodes, $this->elements); |
|
219 | 219 | } else { |
220 | - $this->mapElements( $htmlNodeList, $this->elements ); |
|
220 | + $this->mapElements($htmlNodeList, $this->elements); |
|
221 | 221 | } |
222 | 222 | return $this->elements; |
223 | 223 |