Passed
Push — master ( 16baae...6b22f3 )
by Sebastian
02:40
created

XMLHelper_HTMLLoader   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 181
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 15
eloc 46
dl 0
loc 181
c 1
b 0
f 0
rs 10

12 Methods

Rating   Name   Duplication   Size   Complexity  
A toXML() 0 3 1
A getErrors() 0 3 1
A getBodyNode() 0 3 1
A checkFragment() 0 14 3
A __construct() 0 3 1
A fragmentToXML() 0 12 2
A getDOM() 0 3 1
A getFragmentNodes() 0 3 1
A loadFragment() 0 8 1
A loadHTML() 0 3 1
A toHTML() 0 3 1
A load() 0 13 1
1
<?php
2
/**
3
 * File containing the {@see AppUtils\XMLHelper_StringLoader} class.
4
 *
5
 * @package Application Utils
6
 * @subpackage XMLHelper
7
 * @see XMLHelper_StringLoader
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
/**
15
 * Wrapper around the `DOMDocument->loadHTML()` method to
16
 * make it easier to work with, and add a number of utility
17
 * methods around it. 
18
 * 
19
 * Usage:
20
 * 
21
 * <code>
22
 * <?php
23
 * // Create a loader from a full HTML document string
24
 * $loader = XMLHelper_HTMLLoader::loadHTML($htmlDocument);
25
 * 
26
 * // Create a loader from an HTML fragment
27
 * $loader = XMLHelper_HTMLLoader::loadHTMLFragment('<p>Fragment</p>');
28
 * ?>
29
 * </code>
30
 *
31
 * @package Application Utils
32
 * @subpackage XMLHelper
33
 * @author Sebastian Mordziol <[email protected]>
34
 */
35
class XMLHelper_HTMLLoader
36
{
37
    const ERROR_STRING_ALREADY_HAS_BODY_TAG = 57001;
38
    
39
   /**
40
    * @var \DOMElement
41
    */
42
    private $bodyNode;
43
    
44
   /**
45
    * @var XMLHelper_DOMErrors
46
    */
47
    private $errors;
48
    
49
   /**
50
    * @var string
51
    */
52
    private static $htmlTemplate = 
53
    '<!DOCTYPE html>'.
54
    '<html>'.
55
        '<head>'.
56
            '<meta charset="utf-8">'.
57
        '</head>'.
58
        '<body>'.
59
            '%1$s'.
60
        '</body>'.
61
    '</html>';
62
    
63
   /**
64
    * @var \DOMDocument
65
    */
66
    private $dom;
67
68
    private function __construct(string $html)
69
    {
70
        $this->load($html);
71
    }
72
    
73
   /**
74
    * Creates an HTML loader from an HTML fragment (without
75
    * doctype, head and body elements).
76
    * 
77
    * @param string $fragment
78
    * @return XMLHelper_HTMLLoader
79
    */
80
    public static function loadFragment(string $fragment) : XMLHelper_HTMLLoader
81
    {
82
        self::checkFragment($fragment);
83
        
84
        // inject the HTML fragment into a valid HTML structure
85
        $pseudoHTML = sprintf(self::$htmlTemplate, $fragment);
86
        
87
        return new XMLHelper_HTMLLoader($pseudoHTML);
88
    }
89
    
90
   /**
91
    * Creates an HTML loader from a full HTML document (including
92
    * doctype, head and body elements).
93
    * 
94
    * @param string $html
95
    * @return XMLHelper_HTMLLoader
96
    */
97
    public static function loadHTML(string $html) : XMLHelper_HTMLLoader
98
    {
99
        return  new XMLHelper_HTMLLoader($html);
100
    }
101
102
   /**
103
    * Verifies that the fragment does not already contain a body element or doctype.
104
    * 
105
    * @param string $fragment
106
    * @throws XMLHelper_Exception
107
    */
108
    private static function checkFragment(string $fragment) : void
109
    {
110
        if(!stristr($fragment, '<body') && !stristr($fragment, 'doctype'))
111
        {
112
            return;
113
        }
114
        
115
        throw new XMLHelper_Exception(
116
            'Cannot convert string with existing body or doctype',
117
            sprintf(
118
                'The string already contains a body tag or doctype, which conflicts with the conversion process. Source string: [%s]',
119
                htmlspecialchars($fragment)
120
            ),
121
            self::ERROR_STRING_ALREADY_HAS_BODY_TAG
122
        );
123
    }
124
        
125
    private function load(string $html) : void
126
    {
127
        $prev = libxml_use_internal_errors(true);
128
                
129
        $this->dom = new \DOMDocument();
130
        $this->dom->preserveWhiteSpace = false;
131
        $this->dom->loadHTML($html);
132
        
133
        $this->errors = new XMLHelper_DOMErrors(libxml_get_errors());
134
        
135
        libxml_use_internal_errors($prev);
136
        
137
        $this->bodyNode = $this->dom->getElementsByTagName('body')->item(0);
138
    }
139
    
140
    public function getBodyNode() : \DOMElement
141
    {
142
        return $this->bodyNode;
143
    }
144
    
145
   /**
146
    * Retrieves the document's `<body>` tag node.
147
    * 
148
    * @return \DOMDocument
149
    */
150
    public function getDOM() : \DOMDocument
151
    {
152
        return $this->dom;
153
    }
154
    
155
   /**
156
    * Retrieves all nodes from the HTML fragment (= child nodes
157
    * of the `<body>` element).
158
    * 
159
    * @return \DOMNodeList
160
    */
161
    public function getFragmentNodes() : \DOMNodeList
162
    {
163
        return $this->bodyNode->childNodes;
164
    }
165
    
166
   /**
167
    * Retrieves the LibXML HTML parsing errors collection, which
168
    * can be used to review any errors that occurred while loading
169
    * the HTML document.
170
    * 
171
    * @return XMLHelper_DOMErrors
172
    */
173
    public function getErrors() : XMLHelper_DOMErrors
174
    {
175
        return $this->errors;
176
    }
177
    
178
   /**
179
    * Returns a valid HTML string.
180
    * 
181
    * @return string
182
    */
183
    public function toHTML() : string
184
    {
185
        return $this->dom->saveHTML();
186
    }
187
    
188
   /**
189
    * Returns a valid XML string.
190
    * 
191
    * @return string
192
    */
193
    public function toXML() : string
194
    {
195
        return $this->dom->saveXML();
196
    }
197
    
198
   /**
199
    * Converts the HTML fragment to valid XML (= all
200
    * child nodes of the `<body>` element).
201
    * 
202
    * @return string
203
    */
204
    public function fragmentToXML() : string
205
    {
206
        $nodes = $this->getFragmentNodes();
207
        
208
        // capture all elements except the body tag itself
209
        $xml = '';
210
        foreach($nodes as $child) 
211
        {
212
            $xml .= $this->dom->saveXML($child);
213
        }
214
        
215
        return $xml;
216
    }
217
}
218