Completed
Push — master ( 30aab1...cadcfa )
by Asmir
07:00
created

HTML5   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 235
Duplicated Lines 0 %

Coupling/Cohesion

Dependencies 5

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 17
cbo 5
dl 0
loc 235
ccs 55
cts 55
cp 1
rs 10
c 0
b 0
f 0

12 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A getOptions() 0 4 1
A load() 0 9 2
A loadHTML() 0 4 1
A loadHTMLFile() 0 4 1
A loadHTMLFragment() 0 4 1
A getErrors() 0 4 1
A hasErrors() 0 4 1
A parse() 0 13 2
A parseFragment() 0 12 2
A save() 0 19 3
A saveHTML() 0 7 1
1
<?php
2
namespace Masterminds;
3
4
use Masterminds\HTML5\Parser\FileInputStream;
5
use Masterminds\HTML5\Parser\InputStream;
6
use Masterminds\HTML5\Parser\StringInputStream;
7
use Masterminds\HTML5\Parser\DOMTreeBuilder;
8
use Masterminds\HTML5\Parser\Scanner;
9
use Masterminds\HTML5\Parser\Tokenizer;
10
use Masterminds\HTML5\Serializer\OutputRules;
11
use Masterminds\HTML5\Serializer\Traverser;
12
13
/**
14
 * This class offers convenience methods for parsing and serializing HTML5.
15
 * It is roughly designed to mirror the \DOMDocument class that is
16
 * provided with most versions of PHP.
17
 */
18
class HTML5
19
{
20
21
    /**
22
     * Global options for the parser and serializer.
23
     *
24
     * @var array
25
     */
26
    protected $options = array(
27
        // If the serializer should encode all entities.
28
        'encode_entities' => false
29
    );
30
31
    protected $errors = array();
32
33 74
    public function __construct(array $options = array())
34
    {
35 74
        $this->options = array_merge($this->options, $options);
36 74
    }
37
38
    /**
39
     * Get the default options.
40
     *
41
     * @return array The default options.
42
     */
43 71
    public function getOptions()
44
    {
45 71
        return $this->options;
46
    }
47
48
    /**
49
     * Load and parse an HTML file.
50
     *
51
     * This will apply the HTML5 parser, which is tolerant of many
52
     * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
53
     * 3. Note that in these cases, not all of the old data will be
54
     * preserved. For example, XHTML's XML declaration will be removed.
55
     *
56
     * The rules governing parsing are set out in the HTML 5 spec.
57
     *
58
     * @param string|resource $file
59
     *            The path to the file to parse. If this is a resource, it is
60
     *            assumed to be an open stream whose pointer is set to the first
61
     *            byte of input.
62
     * @param array $options
63
     *            Configuration options when parsing the HTML
64
     * @return \DOMDocument A DOM document. These object type is defined by the libxml
65
     *         library, and should have been included with your version of PHP.
66
     */
67 4
    public function load($file, array $options = array())
68
    {
69
        // Handle the case where file is a resource.
70 4
        if (is_resource($file)) {
71 1
            return $this->parse(stream_get_contents($file), $options);
72
        }
73
74 4
        return $this->parse(file_get_contents($file), $options);
75
    }
76
77
    /**
78
     * Parse a HTML Document from a string.
79
     *
80
     * Take a string of HTML 5 (or earlier) and parse it into a
81
     * DOMDocument.
82
     *
83
     * @param string $string
84
     *            A html5 document as a string.
85
     * @param array $options
86
     *            Configuration options when parsing the HTML
87
     * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
88
     *         almost all distribtions of PHP.
89
     */
90 59
    public function loadHTML($string, array $options = array())
91
    {
92 59
        return $this->parse($string, $options);
93
    }
94
95
    /**
96
     * Convenience function to load an HTML file.
97
     *
98
     * This is here to provide backwards compatibility with the
99
     * PHP DOM implementation. It simply calls load().
100
     *
101
     * @param string $file
102
     *            The path to the file to parse. If this is a resource, it is
103
     *            assumed to be an open stream whose pointer is set to the first
104
     *            byte of input.
105
     * @param array $options
106
     *            Configuration options when parsing the HTML
107
     *
108
     * @return \DOMDocument A DOM document. These object type is defined by the libxml
109
     *         library, and should have been included with your version of PHP.
110
     */
111 1
    public function loadHTMLFile($file, array $options = array())
112
    {
113 1
        return $this->load($file, $options);
114
    }
115
116
    /**
117
     * Parse a HTML fragment from a string.
118
     *
119
     * @param string $string The HTML5 fragment as a string.
120
     * @param array $options Configuration options when parsing the HTML
121
     *
122
     * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
123
     *         almost all distributions of PHP.
124
     */
125 12
    public function loadHTMLFragment($string, array $options = array())
126
    {
127 12
        return $this->parseFragment($string, $options);
128
    }
129
130
    /**
131
     * Return all errors encountered into parsing phase
132
     *
133
     * @return array
134
     */
135 13
    public function getErrors()
136
    {
137 13
        return $this->errors;
138
    }
139
140
    /**
141
     * Return true it some errors were encountered into parsing phase
142
     *
143
     * @return bool
144
     */
145 4
    public function hasErrors()
146
    {
147 4
        return count($this->errors) > 0;
148
    }
149
150
    /**
151
     * Parse an input stream.
152
     *
153
     * Lower-level loading function. This requires an input stream instead
154
     * of a string, file, or resource.
155
     *
156
     * @param string $input
157
     * @param array $options
158
     *
159
     * @return \DOMDocument
160
     */
161 62
    public function parse($input, array $options = array())
162
    {
163 62
        $this->errors = array();
164 62
        $options = array_merge($this->getOptions(), $options);
165 62
        $events = new DOMTreeBuilder(false, $options);
166 62
        $scanner = new Scanner($input);
167 62
        $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
168
169 62
        $parser->parse();
170 62
        $this->errors = $events->getErrors();
171
172 62
        return $events->document();
173
    }
174
175
    /**
176
     * Parse an input stream where the stream is a fragment.
177
     *
178
     * Lower-level loading function. This requires an input stream instead
179
     * of a string, file, or resource.
180
     *
181
     * @param string $input The input data to parse in the form of a string.
182
     * @param array $options An array of options
183
     *
184
     * @return \DOMDocumentFragment
185
     */
186 16
    public function parseFragment($input, array $options = array())
187
    {
188 16
        $options = array_merge($this->getOptions(), $options);
189 16
        $events = new DOMTreeBuilder(true, $options);
190 16
        $scanner = new Scanner($input);
191 16
        $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
192
193 16
        $parser->parse();
194 16
        $this->errors = $events->getErrors();
195
196 16
        return $events->fragment();
197
    }
198
199
    /**
200
     * Save a DOM into a given file as HTML5.
201
     *
202
     * @param mixed $dom
203
     *            The DOM to be serialized.
204
     * @param string $file
205
     *            The filename to be written.
206
     * @param array $options
207
     *            Configuration options when serializing the DOM. These include:
208
     *            - encode_entities: Text written to the output is escaped by default and not all
209
     *            entities are encoded. If this is set to true all entities will be encoded.
210
     *            Defaults to false.
211
     */
212 17
    public function save($dom, $file, $options = array())
213
    {
214 17
        $close = true;
215 17
        if (is_resource($file)) {
216 17
            $stream = $file;
217 17
            $close = false;
218 17
        } else {
219 1
            $stream = fopen($file, 'w');
220
        }
221 17
        $options = array_merge($this->getOptions(), $options);
222 17
        $rules = new OutputRules($stream, $options);
223 17
        $trav = new Traverser($dom, $stream, $rules, $options);
224
225 17
        $trav->walk();
226
227 17
        if ($close) {
228 1
            fclose($stream);
229 1
        }
230 17
    }
231
232
    /**
233
     * Convert a DOM into an HTML5 string.
234
     *
235
     * @param mixed $dom
236
     *            The DOM to be serialized.
237
     * @param array $options
238
     *            Configuration options when serializing the DOM. These include:
239
     *            - encode_entities: Text written to the output is escaped by default and not all
240
     *            entities are encoded. If this is set to true all entities will be encoded.
241
     *            Defaults to false.
242
     *
243
     * @return string A HTML5 documented generated from the DOM.
244
     */
245 16
    public function saveHTML($dom, $options = array())
246
    {
247 16
        $stream = fopen('php://temp', 'w');
248 16
        $this->save($dom, $stream, array_merge($this->getOptions(), $options));
0 ignored issues
show
Documentation introduced by
$stream is of type resource, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
249
250 16
        return stream_get_contents($stream, - 1, 0);
251
    }
252
}
253