Completed
Pull Request — master (#182)
by
unknown
01:57
created

HTML5::getErrors()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Masterminds;
4
5
use Masterminds\HTML5\Parser\DOMTreeBuilder;
6
use Masterminds\HTML5\Parser\Normalizer;
7
use Masterminds\HTML5\Parser\Scanner;
8
use Masterminds\HTML5\Parser\Tokenizer;
9
use Masterminds\HTML5\Serializer\OutputRules;
10
use Masterminds\HTML5\Serializer\Traverser;
11
12
/**
13
 * This class offers convenience methods for parsing and serializing HTML5.
14
 * It is roughly designed to mirror the \DOMDocument native class.
15
 */
16
class HTML5
17
{
18
    /**
19
     * Global options for the parser and serializer.
20
     *
21
     * @var array
22
     */
23
    private $defaultOptions = array(
24
        // Whether the serializer should aggressively encode all characters as entities.
25
        'encode_entities' => false,
26
27
        // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
28
        'disable_html_ns' => false,
29
30
        // Whether to add missing root elements.
31
        'normalize'       => false,
32
    );
33
34
    protected $errors = array();
35
36 76
    public function __construct(array $defaultOptions = array())
37
    {
38 76
        $this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions);
39 76
    }
40
41
    /**
42
     * Get the current default options.
43
     *
44
     * @return array
45
     */
46 48
    public function getOptions()
47
    {
48 48
        return $this->defaultOptions;
49
    }
50
51
    /**
52
     * Load and parse an HTML file.
53
     *
54
     * This will apply the HTML5 parser, which is tolerant of many
55
     * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
56
     * 3. Note that in these cases, not all of the old data will be
57
     * preserved. For example, XHTML's XML declaration will be removed.
58
     *
59
     * The rules governing parsing are set out in the HTML 5 spec.
60
     *
61
     * @param string|resource $file    The path to the file to parse. If this is a resource, it is
62
     *                                 assumed to be an open stream whose pointer is set to the first
63
     *                                 byte of input.
64
     * @param array           $options Configuration options when parsing the HTML.
65
     *
66
     * @return \DOMDocument A DOM document. These object type is defined by the libxml
67
     *                      library, and should have been included with your version of PHP.
68
     */
69 6
    public function load($file, array $options = array())
70
    {
71
        // Handle the case where file is a resource.
72 6
        if (is_resource($file)) {
73 1
            return $this->parse(stream_get_contents($file), $options);
74
        }
75
76 6
        return $this->parse(file_get_contents($file), $options);
77
    }
78
79
    /**
80
     * Parse a HTML Document from a string.
81
     *
82
     * Take a string of HTML 5 (or earlier) and parse it into a
83
     * DOMDocument.
84
     *
85
     * @param string $string  A html5 document as a string.
86
     * @param array  $options Configuration options when parsing the HTML.
87
     *
88
     * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
89
     *                      almost all distribtions of PHP.
90
     */
91 59
    public function loadHTML($string, array $options = array())
92
    {
93 59
        return $this->parse($string, $options);
94
    }
95
96
    /**
97
     * Convenience function to load an HTML file.
98
     *
99
     * This is here to provide backwards compatibility with the
100
     * PHP DOM implementation. It simply calls load().
101
     *
102
     * @param string $file    The path to the file to parse. If this is a resource, it is
103
     *                        assumed to be an open stream whose pointer is set to the first
104
     *                        byte of input.
105
     * @param array  $options Configuration options when parsing the HTML.
106
     *
107
     * @return \DOMDocument A DOM document. These object type is defined by the libxml
108
     *                      library, and should have been included with your version of PHP.
109
     */
110 1
    public function loadHTMLFile($file, array $options = array())
111
    {
112 1
        return $this->load($file, $options);
113
    }
114
115
    /**
116
     * Parse a HTML fragment from a string.
117
     *
118
     * @param string $string  the HTML5 fragment as a string
119
     * @param array  $options Configuration options when parsing the HTML
120
     *
121
     * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
122
     *                              almost all distributions of PHP.
123
     */
124 12
    public function loadHTMLFragment($string, array $options = array())
125
    {
126 12
        return $this->parseFragment($string, $options);
127
    }
128
129
    /**
130
     * Return all errors encountered into parsing phase.
131
     *
132
     * @return array
133
     */
134 15
    public function getErrors()
135
    {
136 15
        return $this->errors;
137
    }
138
139
    /**
140
     * Return true it some errors were encountered into parsing phase.
141
     *
142
     * @return bool
143
     */
144 6
    public function hasErrors()
145
    {
146 6
        return count($this->errors) > 0;
147
    }
148
149
    /**
150
     * Parse an input string.
151
     *
152
     * @param string $input
153
     * @param array  $options
154
     *
155
     * @return \DOMDocument
156
     */
157 64
    public function parse($input, array $options = array())
158
    {
159 64
        if (isset($options['normalize']) && $options['normalize']) {
160
            $input = $this->normalize($input);
161
        }
162
163 64
        $this->errors = array();
164 64
        $options = array_merge($this->defaultOptions, $options);
165 64
        $events = new DOMTreeBuilder(false, $options);
166 64
        $scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8');
167 64
        $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
168
169 64
        $parser->parse();
170 64
        $this->errors = $events->getErrors();
171
172 64
        return $events->document();
173
    }
174
175
    /**
176
     * Parse an input stream where the stream is a fragment.
177
     *
178
     * Lower-level loading function. This requires an input stream instead
179
     * of a string, file, or resource.
180
     *
181
     * @param string $input   The input data to parse in the form of a string.
182
     * @param array  $options An array of options.
183
     *
184
     * @return \DOMDocumentFragment
185
     */
186 16
    public function parseFragment($input, array $options = array())
187 1
    {
188 16
        $options = array_merge($this->defaultOptions, $options);
189 16
        $events = new DOMTreeBuilder(true, $options);
190 16
        $scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8');
191 16
        $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
192
193 16
        $parser->parse();
194 16
        $this->errors = $events->getErrors();
195
196 16
        return $events->fragment();
197
    }
198
199
    /**
200
     * Save a DOM into a given file as HTML5.
201
     *
202
     * @param mixed           $dom     The DOM to be serialized.
203
     * @param string|resource $file    The filename to be written or resource to write to.
204
     * @param array           $options Configuration options when serializing the DOM. These include:
205
     *                                 - encode_entities: Text written to the output is escaped by default and not all
206
     *                                 entities are encoded. If this is set to true all entities will be encoded.
207
     *                                 Defaults to false.
208
     */
209 17
    public function save($dom, $file, $options = array())
210
    {
211 17
        $close = true;
212 17
        if (is_resource($file)) {
213 17
            $stream = $file;
214 17
            $close = false;
215 17
        } else {
216 1
            $stream = fopen($file, 'wb');
217
        }
218 17
        $options = array_merge($this->defaultOptions, $options);
219 17
        $rules = new OutputRules($stream, $options);
220 17
        $trav = new Traverser($dom, $stream, $rules, $options);
221
222 17
        $trav->walk();
223
224 17
        if ($close) {
225 1
            fclose($stream);
226 1
        }
227 17
    }
228
229
    /**
230
     * Convert a DOM into an HTML5 string.
231
     *
232
     * @param mixed $dom     The DOM to be serialized.
233
     * @param array $options Configuration options when serializing the DOM. These include:
234
     *                       - encode_entities: Text written to the output is escaped by default and not all
235
     *                       entities are encoded. If this is set to true all entities will be encoded.
236
     *                       Defaults to false.
237
     *
238
     * @return string A HTML5 documented generated from the DOM.
239
     */
240 16
    public function saveHTML($dom, $options = array())
241
    {
242 16
        $stream = fopen('php://temp', 'wb');
243 16
        $this->save($dom, $stream, array_merge($this->defaultOptions, $options));
244
245 16
        return stream_get_contents($stream, -1, 0);
246
    }
247
248
    /**
249
     * Add missing root elements to the input HTML.
250
     *
251
     * @param  string $input
252
     * @return string
253
     */
254
    protected function normalize($input)
255
    {
256
        $normalizer = new Normalizer;
257
        $normalizer->loadHtml($input);
258
259
        return $normalizer->saveHtml();
260
    }
261
}
262