| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  * File containing the {@see AppUtils\XMLHelper_StringLoader} class. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * @package Application Utils | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * @subpackage XMLHelper | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * @see XMLHelper_StringLoader | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | declare(strict_types=1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | namespace AppUtils; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  * Wrapper around the `DOMDocument->loadHTML()` method to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  * make it easier to work with, and add a number of utility | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |  * methods around it.  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  * Usage: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  * <code> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  * <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  * // Create a loader from a full HTML document string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  * $loader = XMLHelper_HTMLLoader::loadHTML($htmlDocument); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  * // Create a loader from an HTML fragment | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |  * $loader = XMLHelper_HTMLLoader::loadHTMLFragment('<p>Fragment</p>'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |  * ?> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  * </code> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  * @package Application Utils | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  * @subpackage XMLHelper | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |  * @author Sebastian Mordziol <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  | class XMLHelper_HTMLLoader | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     const ERROR_STRING_ALREADY_HAS_BODY_TAG = 57001; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     * @var \DOMElement | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     private $bodyNode; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     * @var XMLHelper_DOMErrors | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     private $errors; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     * @var string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     private static $htmlTemplate =  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     '<!DOCTYPE html>'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     '<html>'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         '<head>'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |             '<meta charset="utf-8">'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         '</head>'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         '<body>'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |             '%1$s'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         '</body>'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |     '</html>'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |     * @var \DOMDocument | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     private $dom; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     private function __construct(string $html) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         $this->load($html); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |     * Creates an HTML loader from an HTML fragment (without | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |     * doctype, head and body elements). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |     * @param string $fragment | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |     * @return XMLHelper_HTMLLoader | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |     public static function loadFragment(string $fragment) : XMLHelper_HTMLLoader | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         self::checkFragment($fragment); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         // inject the HTML fragment into a valid HTML structure | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         $pseudoHTML = sprintf(self::$htmlTemplate, $fragment); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         return new XMLHelper_HTMLLoader($pseudoHTML); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |     * Creates an HTML loader from a full HTML document (including | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |     * doctype, head and body elements). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |     * @param string $html | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |     * @return XMLHelper_HTMLLoader | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |     public static function loadHTML(string $html) : XMLHelper_HTMLLoader | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         return  new XMLHelper_HTMLLoader($html); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |     * Verifies that the fragment does not already contain a body element or doctype. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |     * @param string $fragment | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |     * @throws XMLHelper_Exception | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |     private static function checkFragment(string $fragment) : void | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         if(!stristr($fragment, '<body') && !stristr($fragment, 'doctype')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |             return; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         throw new XMLHelper_Exception( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |             'Cannot convert string with existing body or doctype', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |             sprintf( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |                 'The string already contains a body tag or doctype, which conflicts with the conversion process. Source string: [%s]', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |                 htmlspecialchars($fragment) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |             ), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |             self::ERROR_STRING_ALREADY_HAS_BODY_TAG | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |         ); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |     private function load(string $html) : void | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |         $prev = libxml_use_internal_errors(true); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |                  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         $this->dom = new \DOMDocument(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |         $this->dom->preserveWhiteSpace = false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |         $this->dom->loadHTML($html); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |         $this->errors = new XMLHelper_DOMErrors(libxml_get_errors()); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |         libxml_use_internal_errors($prev); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |         $this->bodyNode = $this->dom->getElementsByTagName('body')->item(0); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |     public function getBodyNode() : \DOMElement | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |         return $this->bodyNode; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |     * Retrieves the document's `<body>` tag node. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |     * @return \DOMDocument | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 149 |  |  |     */ | 
            
                                                                        
                            
            
                                    
            
            
                | 150 |  |  |     public function getDOM() : \DOMDocument | 
            
                                                                        
                            
            
                                    
            
            
                | 151 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 152 |  |  |         return $this->dom; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |     * Retrieves all nodes from the HTML fragment (= child nodes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |     * of the `<body>` element). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |     * @return \DOMNodeList | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |     public function getFragmentNodes() : \DOMNodeList | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         return $this->bodyNode->childNodes; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |     * Retrieves the LibXML HTML parsing errors collection, which | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |     * can be used to review any errors that occurred while loading | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |     * the HTML document. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |     * @return XMLHelper_DOMErrors | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |     public function getErrors() : XMLHelper_DOMErrors | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |         return $this->errors; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |     * Returns a valid HTML string. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |     * @return string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |     public function toHTML() : string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |         return $this->dom->saveHTML(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |     * Returns a valid XML string. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |     * @return string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |     public function toXML() : string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |         return $this->dom->saveXML(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |    /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |     * Converts the HTML fragment to valid XML (= all | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |     * child nodes of the `<body>` element). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |     *  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |     * @return string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |     */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |     public function fragmentToXML() : string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |         $nodes = $this->getFragmentNodes(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |         // capture all elements except the body tag itself | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |         $xml = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |         foreach($nodes as $child)  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |         { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |             $xml .= $this->dom->saveXML($child); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |          | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  |         return $xml; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 217 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 218 |  |  |  |