@@ -35,72 +35,72 @@ discard block |
||
| 35 | 35 | * @throws InvalidXmlException When parsing of XML with schema or callable produces any errors unrelated to the XML parsing itself |
| 36 | 36 | * @throws XmlParsingException When parsing of XML file returns error |
| 37 | 37 | */ |
| 38 | - public static function load( $content, $allowDocumentType = false, $setRootElement = null, $schemaOrCallable = null ) { |
|
| 39 | - if ( !extension_loaded( 'dom' ) ) { |
|
| 40 | - throw new RuntimeException( 'Extension DOM is required.' ); |
|
| 38 | + public static function load($content, $allowDocumentType = false, $setRootElement = null, $schemaOrCallable = null) { |
|
| 39 | + if (!extension_loaded('dom')) { |
|
| 40 | + throw new RuntimeException('Extension DOM is required.'); |
|
| 41 | 41 | } |
| 42 | 42 | |
| 43 | - $internalErrors = libxml_use_internal_errors( true ); |
|
| 43 | + $internalErrors = libxml_use_internal_errors(true); |
|
| 44 | 44 | $disableEntities = libxml_disable_entity_loader(); |
| 45 | 45 | libxml_clear_errors(); |
| 46 | 46 | |
| 47 | - $dom = new DOMDocument( '1.0', 'UTF-8' ); |
|
| 47 | + $dom = new DOMDocument('1.0', 'UTF-8'); |
|
| 48 | 48 | $dom->validateOnParse = true; |
| 49 | 49 | |
| 50 | - if ( is_string( $setRootElement ) && !empty( $setRootElement ) ) { |
|
| 50 | + if (is_string($setRootElement) && !empty($setRootElement)) { |
|
| 51 | 51 | $content = "<$setRootElement>$content</$setRootElement>"; |
| 52 | 52 | } |
| 53 | 53 | |
| 54 | - $res = $dom->loadXML( $content, LIBXML_NONET | LIBXML_NOBLANKS | ( defined( 'LIBXML_COMPACT' ) ? LIBXML_COMPACT : 0 ) ); |
|
| 54 | + $res = $dom->loadXML($content, LIBXML_NONET | LIBXML_NOBLANKS | (defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0)); |
|
| 55 | 55 | |
| 56 | - if ( !$res ) { |
|
| 57 | - libxml_disable_entity_loader( $disableEntities ); |
|
| 56 | + if (!$res) { |
|
| 57 | + libxml_disable_entity_loader($disableEntities); |
|
| 58 | 58 | |
| 59 | - throw new XmlParsingException( implode( "\n", static::getXmlErrors( $internalErrors ) ) ); |
|
| 59 | + throw new XmlParsingException(implode("\n", static::getXmlErrors($internalErrors))); |
|
| 60 | 60 | } |
| 61 | 61 | |
| 62 | 62 | $dom->normalizeDocument(); |
| 63 | 63 | |
| 64 | - libxml_use_internal_errors( $internalErrors ); |
|
| 65 | - libxml_disable_entity_loader( $disableEntities ); |
|
| 64 | + libxml_use_internal_errors($internalErrors); |
|
| 65 | + libxml_disable_entity_loader($disableEntities); |
|
| 66 | 66 | |
| 67 | - foreach ( $dom->childNodes as $child ) { |
|
| 68 | - if ( XML_DOCUMENT_TYPE_NODE === $child->nodeType && !$allowDocumentType ) { |
|
| 69 | - throw new XmlParsingException( 'Document types are not allowed.' ); |
|
| 67 | + foreach ($dom->childNodes as $child) { |
|
| 68 | + if (XML_DOCUMENT_TYPE_NODE === $child->nodeType && !$allowDocumentType) { |
|
| 69 | + throw new XmlParsingException('Document types are not allowed.'); |
|
| 70 | 70 | } |
| 71 | 71 | } |
| 72 | 72 | |
| 73 | - if ( null !== $schemaOrCallable ) { |
|
| 74 | - $internalErrors = libxml_use_internal_errors( true ); |
|
| 73 | + if (null !== $schemaOrCallable) { |
|
| 74 | + $internalErrors = libxml_use_internal_errors(true); |
|
| 75 | 75 | libxml_clear_errors(); |
| 76 | 76 | |
| 77 | 77 | $e = null; |
| 78 | - if ( is_callable( $schemaOrCallable ) ) { |
|
| 78 | + if (is_callable($schemaOrCallable)) { |
|
| 79 | 79 | try { |
| 80 | - $valid = call_user_func( $schemaOrCallable, $dom, $internalErrors ); |
|
| 81 | - } catch ( Exception $e ) { |
|
| 80 | + $valid = call_user_func($schemaOrCallable, $dom, $internalErrors); |
|
| 81 | + } catch (Exception $e) { |
|
| 82 | 82 | $valid = false; |
| 83 | 83 | } |
| 84 | - } elseif ( !is_array( $schemaOrCallable ) && is_file( (string)$schemaOrCallable ) ) { |
|
| 85 | - $schemaSource = file_get_contents( (string)$schemaOrCallable ); |
|
| 86 | - $valid = @$dom->schemaValidateSource( $schemaSource ); |
|
| 84 | + } elseif (!is_array($schemaOrCallable) && is_file((string)$schemaOrCallable)) { |
|
| 85 | + $schemaSource = file_get_contents((string)$schemaOrCallable); |
|
| 86 | + $valid = @$dom->schemaValidateSource($schemaSource); |
|
| 87 | 87 | } else { |
| 88 | - libxml_use_internal_errors( $internalErrors ); |
|
| 88 | + libxml_use_internal_errors($internalErrors); |
|
| 89 | 89 | |
| 90 | - throw new XmlParsingException( 'The schemaOrCallable argument has to be a valid path to XSD file or callable.' ); |
|
| 90 | + throw new XmlParsingException('The schemaOrCallable argument has to be a valid path to XSD file or callable.'); |
|
| 91 | 91 | } |
| 92 | 92 | |
| 93 | - if ( !$valid ) { |
|
| 94 | - $messages = static::getXmlErrors( $internalErrors ); |
|
| 95 | - if ( empty( $messages ) ) { |
|
| 96 | - throw new InvalidXmlException( 'The XML is not valid.', 0, $e ); |
|
| 93 | + if (!$valid) { |
|
| 94 | + $messages = static::getXmlErrors($internalErrors); |
|
| 95 | + if (empty($messages)) { |
|
| 96 | + throw new InvalidXmlException('The XML is not valid.', 0, $e); |
|
| 97 | 97 | } |
| 98 | - throw new XmlParsingException( implode( "\n", $messages ), 0, $e ); |
|
| 98 | + throw new XmlParsingException(implode("\n", $messages), 0, $e); |
|
| 99 | 99 | } |
| 100 | 100 | } |
| 101 | 101 | |
| 102 | 102 | libxml_clear_errors(); |
| 103 | - libxml_use_internal_errors( $internalErrors ); |
|
| 103 | + libxml_use_internal_errors($internalErrors); |
|
| 104 | 104 | |
| 105 | 105 | return $dom; |
| 106 | 106 | } |
@@ -110,14 +110,14 @@ discard block |
||
| 110 | 110 | * |
| 111 | 111 | * @return array |
| 112 | 112 | */ |
| 113 | - private static function getXmlErrors( $internalErrors ) { |
|
| 114 | - $errors = []; |
|
| 115 | - foreach ( libxml_get_errors() as $error ) { |
|
| 116 | - $errors[] = sprintf( |
|
| 113 | + private static function getXmlErrors($internalErrors) { |
|
| 114 | + $errors = [ ]; |
|
| 115 | + foreach (libxml_get_errors() as $error) { |
|
| 116 | + $errors[ ] = sprintf( |
|
| 117 | 117 | '[%s %s] %s (in %s - line %d, column %d)', |
| 118 | 118 | LIBXML_ERR_WARNING == $error->level ? 'WARNING' : 'ERROR', |
| 119 | 119 | $error->code, |
| 120 | - trim( $error->message ), |
|
| 120 | + trim($error->message), |
|
| 121 | 121 | $error->file ?: 'n/a', |
| 122 | 122 | $error->line, |
| 123 | 123 | $error->column |
@@ -125,7 +125,7 @@ discard block |
||
| 125 | 125 | } |
| 126 | 126 | |
| 127 | 127 | libxml_clear_errors(); |
| 128 | - libxml_use_internal_errors( $internalErrors ); |
|
| 128 | + libxml_use_internal_errors($internalErrors); |
|
| 129 | 129 | |
| 130 | 130 | return $errors; |
| 131 | 131 | } |
@@ -19,12 +19,12 @@ |
||
| 19 | 19 | */ |
| 20 | 20 | protected function getNodeListFromQueryPath() { |
| 21 | 21 | |
| 22 | - $xpath = new DOMXPath( $this->dom ); |
|
| 22 | + $xpath = new DOMXPath($this->dom); |
|
| 23 | 23 | |
| 24 | - if ( $this->isXmlFragment ) { |
|
| 25 | - $htmlNodeList = $xpath->query( "/" . self::fragmentDocumentRoot ); |
|
| 24 | + if ($this->isXmlFragment) { |
|
| 25 | + $htmlNodeList = $xpath->query("/" . self::fragmentDocumentRoot); |
|
| 26 | 26 | } else { |
| 27 | - $htmlNodeList = $xpath->query( "/html" ); |
|
| 27 | + $htmlNodeList = $xpath->query("/html"); |
|
| 28 | 28 | } |
| 29 | 29 | |
| 30 | 30 | return $htmlNodeList; |
@@ -17,14 +17,14 @@ |
||
| 17 | 17 | /** |
| 18 | 18 | * @return DOMNodeList |
| 19 | 19 | */ |
| 20 | - protected function getNodeListFromQueryPath(){ |
|
| 20 | + protected function getNodeListFromQueryPath() { |
|
| 21 | 21 | |
| 22 | - $xpath = new DOMXPath( $this->dom ); |
|
| 22 | + $xpath = new DOMXPath($this->dom); |
|
| 23 | 23 | |
| 24 | - if ( $this->isXmlFragment ) { |
|
| 25 | - $xmlNodeList = $xpath->query( "/" . self::fragmentDocumentRoot ); |
|
| 24 | + if ($this->isXmlFragment) { |
|
| 25 | + $xmlNodeList = $xpath->query("/" . self::fragmentDocumentRoot); |
|
| 26 | 26 | } else { |
| 27 | - $xmlNodeList = $xpath->query( "*" ); |
|
| 27 | + $xmlNodeList = $xpath->query("*"); |
|
| 28 | 28 | } |
| 29 | 29 | |
| 30 | 30 | return $xmlNodeList; |
@@ -76,10 +76,10 @@ discard block |
||
| 76 | 76 | * @throws InvalidXmlException |
| 77 | 77 | * @throws XmlParsingException |
| 78 | 78 | */ |
| 79 | - protected function __construct( $html, $isXmlFragment ) { |
|
| 80 | - $html = $this->removeNotPrintableChars( $html ); |
|
| 79 | + protected function __construct($html, $isXmlFragment) { |
|
| 80 | + $html = $this->removeNotPrintableChars($html); |
|
| 81 | 81 | $this->isXmlFragment = $isXmlFragment; |
| 82 | - $this->dom = XmlDomLoader::load( $html, true, ( $isXmlFragment ? self::fragmentDocumentRoot : null ) ); |
|
| 82 | + $this->dom = XmlDomLoader::load($html, true, ($isXmlFragment ? self::fragmentDocumentRoot : null)); |
|
| 83 | 83 | $this->elements = new ArrayObject(); |
| 84 | 84 | } |
| 85 | 85 | |
@@ -95,8 +95,8 @@ discard block |
||
| 95 | 95 | * @throws InvalidXmlException |
| 96 | 96 | * @throws XmlParsingException |
| 97 | 97 | */ |
| 98 | - public static function parse( $html, $isXmlFragment = false ) { |
|
| 99 | - $parser = new static( $html, $isXmlFragment ); |
|
| 98 | + public static function parse($html, $isXmlFragment = false) { |
|
| 99 | + $parser = new static($html, $isXmlFragment); |
|
| 100 | 100 | |
| 101 | 101 | return $parser->extractNodes(); |
| 102 | 102 | } |
@@ -109,36 +109,36 @@ discard block |
||
| 109 | 109 | * |
| 110 | 110 | * @return string |
| 111 | 111 | */ |
| 112 | - protected function removeNotPrintableChars( $seg ) { |
|
| 112 | + protected function removeNotPrintableChars($seg) { |
|
| 113 | 113 | |
| 114 | - preg_match_all( self::regexpAscii, $seg, $matches ); |
|
| 114 | + preg_match_all(self::regexpAscii, $seg, $matches); |
|
| 115 | 115 | |
| 116 | - if ( !empty( $matches[ 1 ] ) ) { |
|
| 116 | + if (!empty($matches[ 1 ])) { |
|
| 117 | 117 | $test_src = $seg; |
| 118 | - foreach ( $matches[ 1 ] as $v ) { |
|
| 119 | - $key = sprintf( "%02X", ord( $v ) ); |
|
| 120 | - $hexNum = sprintf( "/(\\x{%s})/u", $key ); |
|
| 121 | - $test_src = preg_replace( $hexNum, self::$asciiPlaceHoldMap[ $key ][ 'placeHold' ], $test_src, 1 ); |
|
| 118 | + foreach ($matches[ 1 ] as $v) { |
|
| 119 | + $key = sprintf("%02X", ord($v)); |
|
| 120 | + $hexNum = sprintf("/(\\x{%s})/u", $key); |
|
| 121 | + $test_src = preg_replace($hexNum, self::$asciiPlaceHoldMap[ $key ][ 'placeHold' ], $test_src, 1); |
|
| 122 | 122 | } |
| 123 | 123 | |
| 124 | 124 | $seg = $test_src; |
| 125 | 125 | } |
| 126 | 126 | |
| 127 | - preg_match_all( self::regexpEntity, $seg, $matches ); |
|
| 127 | + preg_match_all(self::regexpEntity, $seg, $matches); |
|
| 128 | 128 | |
| 129 | - if ( !empty( $matches[ 1 ] ) ) { |
|
| 129 | + if (!empty($matches[ 1 ])) { |
|
| 130 | 130 | $test_src = $seg; |
| 131 | - foreach ( $matches[ 1 ] as $v ) { |
|
| 132 | - $byte = sprintf( "%02X", hexdec( $v ) ); |
|
| 133 | - if ( $byte[ 0 ] == '0' ) { |
|
| 131 | + foreach ($matches[ 1 ] as $v) { |
|
| 132 | + $byte = sprintf("%02X", hexdec($v)); |
|
| 133 | + if ($byte[ 0 ] == '0') { |
|
| 134 | 134 | $regexp = '/&#x([' . $byte[ 0 ] . ']?' . $byte[ 1 ] . ');/u'; |
| 135 | 135 | } else { |
| 136 | 136 | $regexp = '/&#x(' . $byte . ');/u'; |
| 137 | 137 | } |
| 138 | 138 | |
| 139 | - $key = sprintf( "%02X", hexdec( $v ) ); |
|
| 140 | - if ( array_key_exists( $key, self::$asciiPlaceHoldMap ) ) { |
|
| 141 | - $test_src = preg_replace( $regexp, self::$asciiPlaceHoldMap[ $key ][ 'placeHold' ], $test_src ); |
|
| 139 | + $key = sprintf("%02X", hexdec($v)); |
|
| 140 | + if (array_key_exists($key, self::$asciiPlaceHoldMap)) { |
|
| 141 | + $test_src = preg_replace($regexp, self::$asciiPlaceHoldMap[ $key ][ 'placeHold' ], $test_src); |
|
| 142 | 142 | } |
| 143 | 143 | |
| 144 | 144 | } |
@@ -155,20 +155,20 @@ discard block |
||
| 155 | 155 | * |
| 156 | 156 | * @return ArrayObject |
| 157 | 157 | */ |
| 158 | - protected function mapElements( DOMNodeList $elementList, ArrayObject $elements ) { |
|
| 158 | + protected function mapElements(DOMNodeList $elementList, ArrayObject $elements) { |
|
| 159 | 159 | |
| 160 | - for ( $i = 0; $i < $elementList->length; $i++ ) { |
|
| 160 | + for ($i = 0; $i < $elementList->length; $i++) { |
|
| 161 | 161 | |
| 162 | - $element = $elementList->item( $i ); |
|
| 162 | + $element = $elementList->item($i); |
|
| 163 | 163 | |
| 164 | - $elements[] = (object)[ |
|
| 165 | - 'node' => $this->dom->saveXML( $element ), |
|
| 164 | + $elements[ ] = (object)[ |
|
| 165 | + 'node' => $this->dom->saveXML($element), |
|
| 166 | 166 | 'tagName' => $element->nodeName, |
| 167 | - 'attributes' => $this->getAttributes( $element ), |
|
| 168 | - 'text' => ( $element instanceof DOMText ? $element->textContent : null ), |
|
| 169 | - 'self_closed' => ( $element instanceof DOMText ) ? null : !$element->hasChildNodes(), |
|
| 170 | - 'has_children' => ( $element instanceof DOMText ) ? null : $element->hasChildNodes(), |
|
| 171 | - 'inner_html' => $element->hasChildNodes() ? $this->mapElements( $element->childNodes, new ArrayObject() ) : new ArrayObject() |
|
| 167 | + 'attributes' => $this->getAttributes($element), |
|
| 168 | + 'text' => ($element instanceof DOMText ? $element->textContent : null), |
|
| 169 | + 'self_closed' => ($element instanceof DOMText) ? null : !$element->hasChildNodes(), |
|
| 170 | + 'has_children' => ($element instanceof DOMText) ? null : $element->hasChildNodes(), |
|
| 171 | + 'inner_html' => $element->hasChildNodes() ? $this->mapElements($element->childNodes, new ArrayObject()) : new ArrayObject() |
|
| 172 | 172 | ]; |
| 173 | 173 | |
| 174 | 174 | } |
@@ -182,18 +182,18 @@ discard block |
||
| 182 | 182 | * |
| 183 | 183 | * @return array |
| 184 | 184 | */ |
| 185 | - protected function getAttributes( DOMNode $element ) { |
|
| 185 | + protected function getAttributes(DOMNode $element) { |
|
| 186 | 186 | |
| 187 | - if ( !$element->hasAttributes() ) { |
|
| 188 | - return []; |
|
| 187 | + if (!$element->hasAttributes()) { |
|
| 188 | + return [ ]; |
|
| 189 | 189 | } |
| 190 | 190 | |
| 191 | - $attributesMap = []; |
|
| 191 | + $attributesMap = [ ]; |
|
| 192 | 192 | |
| 193 | 193 | /** |
| 194 | 194 | * @var DOMAttr $attr |
| 195 | 195 | */ |
| 196 | - foreach ( $element->attributes as $attr ) { |
|
| 196 | + foreach ($element->attributes as $attr) { |
|
| 197 | 197 | $attributesMap[ $attr->nodeName ] = $attr->nodeValue; |
| 198 | 198 | } |
| 199 | 199 | |
@@ -209,15 +209,15 @@ discard block |
||
| 209 | 209 | |
| 210 | 210 | $htmlNodeList = $this->getNodeListFromQueryPath(); |
| 211 | 211 | |
| 212 | - if ( !$htmlNodeList instanceof DOMNodeList ) { |
|
| 213 | - throw new DOMException( 'Bad DOMNodeList' ); |
|
| 212 | + if (!$htmlNodeList instanceof DOMNodeList) { |
|
| 213 | + throw new DOMException('Bad DOMNodeList'); |
|
| 214 | 214 | } |
| 215 | 215 | |
| 216 | - if ( $this->isXmlFragment && $htmlNodeList->item( 0 )->nodeName == self::fragmentDocumentRoot ) { |
|
| 216 | + if ($this->isXmlFragment && $htmlNodeList->item(0)->nodeName == self::fragmentDocumentRoot) { |
|
| 217 | 217 | // there is a fake root node, skip the first element end start with child nodes |
| 218 | - $this->mapElements( $htmlNodeList->item( 0 )->childNodes, $this->elements ); |
|
| 218 | + $this->mapElements($htmlNodeList->item(0)->childNodes, $this->elements); |
|
| 219 | 219 | } else { |
| 220 | - $this->mapElements( $htmlNodeList, $this->elements ); |
|
| 220 | + $this->mapElements($htmlNodeList, $this->elements); |
|
| 221 | 221 | } |
| 222 | 222 | return $this->elements; |
| 223 | 223 | |