Completed
Pull Request — master (#103)
by Asmir
08:22 queued 06:05
created

AbstractHtml5   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 232
Duplicated Lines 0 %

Coupling/Cohesion

Dependencies 7

Test Coverage

Coverage 100%

Importance

Changes 1
Bugs 0 Features 1
Metric Value
wmc 15
c 1
b 0
f 1
cbo 7
dl 0
loc 232
ccs 57
cts 57
cp 1
rs 10

12 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A getOptions() 0 4 1
A load() 0 12 2
A loadHTML() 0 6 1
A loadHTMLFile() 0 4 1
A loadHTMLFragment() 0 6 1
A getErrors() 0 4 1
A hasErrors() 0 4 1
A parse() 0 12 1
A parseFragment() 0 11 1
A save() 0 19 3
A saveHTML() 0 7 1
1
<?php
2
namespace Masterminds;
3
4
use Masterminds\Html5\Parser\FileInputStream;
5
use Masterminds\Html5\Parser\StringInputStream;
6
use Masterminds\Html5\Parser\DOMTreeBuilder;
7
use Masterminds\Html5\Parser\Scanner;
8
use Masterminds\Html5\Parser\Tokenizer;
9
use Masterminds\Html5\Serializer\OutputRules;
10
use Masterminds\Html5\Serializer\Traverser;
11
12
/**
13
 * This class offers convenience methods for parsing and serializing HTML5.
14
 * It is roughly designed to mirror the \DOMDocument class that is
15
 * provided with most versions of PHP.
16
 *
17
 */
18
abstract class AbstractHtml5
19
{
20
21
    /**
22
     * Global options for the parser and serializer.
23
     *
24
     * @var array
25
     */
26
    protected $options = array(
27
        // If the serializer should encode all entities.
28
        'encode_entities' => false
29
    );
30
31
    protected $errors = array();
32
33 68
    public function __construct(array $options = array())
34
    {
35 68
        $this->options = array_merge($this->options, $options);
36 68
    }
37
38
    /**
39
     * Get the default options.
40
     *
41
     * @return array The default options.
42
     */
43 65
    public function getOptions()
44
    {
45 65
        return $this->options;
46
    }
47
48
    /**
49
     * Load and parse an HTML file.
50
     *
51
     * This will apply the HTML5 parser, which is tolerant of many
52
     * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
53
     * 3. Note that in these cases, not all of the old data will be
54
     * preserved. For example, XHTML's XML declaration will be removed.
55
     *
56
     * The rules governing parsing are set out in the HTML 5 spec.
57
     *
58
     * @param string $file
59
     *            The path to the file to parse. If this is a resource, it is
60
     *            assumed to be an open stream whose pointer is set to the first
61
     *            byte of input.
62
     * @param array $options
63
     *            Configuration options when parsing the HTML
64
     * @return \DOMDocument A DOM document. These object type is defined by the libxml
65
     *         library, and should have been included with your version of PHP.
66
     */
67 4
    public function load($file, array $options = array())
68
    {
69
        // Handle the case where file is a resource.
70 4
        if (is_resource($file)) {
71
            // FIXME: We need a StreamInputStream class.
72 1
            return $this->loadHTML(stream_get_contents($file), $options);
73 1
        }
74
75 4
        $input = new FileInputStream($file);
76
77 4
        return $this->parse($input, $options);
78
    }
79
80
    /**
81
     * Parse a HTML Document from a string.
82
     *
83
     * Take a string of HTML 5 (or earlier) and parse it into a
84
     * DOMDocument.
85
     *
86
     * @param string $string
87
     *            A html5 document as a string.
88
     * @param array $options
89
     *            Configuration options when parsing the HTML
90
     * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
91
     *         almost all distribtions of PHP.
92
     */
93 56
    public function loadHTML($string, array $options = array())
94
    {
95 56
        $input = new StringInputStream($string);
96
97 56
        return $this->parse($input, $options);
98
    }
99
100
    /**
101
     * Convenience function to load an HTML file.
102
     *
103
     * This is here to provide backwards compatibility with the
104
     * PHP DOM implementation. It simply calls load().
105
     *
106
     * @param string $file
107
     *            The path to the file to parse. If this is a resource, it is
108
     *            assumed to be an open stream whose pointer is set to the first
109
     *            byte of input.
110
     * @param array $options
111
     *            Configuration options when parsing the HTML
112
     *
113
     * @return \DOMDocument A DOM document. These object type is defined by the libxml
114
     *         library, and should have been included with your version of PHP.
115
     */
116 1
    public function loadHTMLFile($file, array $options = array())
117
    {
118 1
        return $this->load($file, $options);
119
    }
120
121
    /**
122
     * Parse a HTML fragment from a string.
123
     *
124
     * @param string $string
125
     *            The html5 fragment as a string.
126
     * @param array $options
127
     *            Configuration options when parsing the HTML
128
     *
129
     * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
130
     *         almost all distributions of PHP.
131
     */
132 12
    public function loadHTMLFragment($string, array $options = array())
133
    {
134 12
        $input = new StringInputStream($string);
135
136 12
        return $this->parseFragment($input, $options);
137
    }
138
139
    /**
140
     * Return all errors encountered into parsing phase
141
     *
142
     * @return array
143
     */
144 12
    public function getErrors()
145
    {
146 12
        return $this->errors;
147
    }
148
149
    /**
150
     * Return true it some errors were encountered into parsing phase
151
     *
152
     * @return bool
153
     */
154 4
    public function hasErrors()
155
    {
156 4
        return count($this->errors) > 0;
157
    }
158
159
    /**
160
     * Parse an input stream.
161
     *
162
     * Lower-level loading function. This requires an input stream instead
163
     * of a string, file, or resource.
164
     */
165 58
    public function parse(\Masterminds\Html5\Parser\InputStream $input, array $options = array())
166
    {
167 58
        $this->errors = array();
168 58
        $events = new DOMTreeBuilder(false, array_merge($this->getOptions(), $options));
169 58
        $scanner = new Scanner($input);
170 58
        $parser = new Tokenizer($scanner, $events);
171
172 58
        $parser->parse();
173 58
        $this->errors = $events->getErrors();
174
175 58
        return $events->document();
176
    }
177
178
    /**
179
     * Parse an input stream where the stream is a fragment.
180
     *
181
     * Lower-level loading function. This requires an input stream instead
182
     * of a string, file, or resource.
183
     */
184 14
    public function parseFragment(\Masterminds\Html5\Parser\InputStream $input, array $options = array())
185
    {
186 14
        $events = new DOMTreeBuilder(true, array_merge($this->getOptions(), $options));
187 14
        $scanner = new Scanner($input);
188 14
        $parser = new Tokenizer($scanner, $events);
189
190 14
        $parser->parse();
191 14
        $this->errors = $events->getErrors();
192
193 14
        return $events->fragment();
194
    }
195
196
    /**
197
     * Save a DOM into a given file as HTML5.
198
     *
199
     * @param mixed $dom
200
     *            The DOM to be serialized.
201
     * @param string|resource $file
202
     *            The filename to be written or resource to write to.
203
     * @param array $options
204
     *            Configuration options when serializing the DOM. These include:
205
     *            - encode_entities: Text written to the output is escaped by default and not all
206
     *            entities are encoded. If this is set to true all entities will be encoded.
207
     *            Defaults to false.
208
     */
209 15
    public function save($dom, $file, $options = array())
210
    {
211 15
        $close = true;
212 15
        if (is_resource($file)) {
213 15
            $stream = $file;
214 15
            $close = false;
215 15
        } else {
216 1
            $stream = fopen($file, 'w');
217
        }
218 15
        $options = array_merge($this->getOptions(), $options);
219 15
        $rules = new OutputRules($stream, $options);
220 15
        $trav = new Traverser($dom, $stream, $rules, $options);
221
222 15
        $trav->walk();
223
224 15
        if ($close) {
225 1
            fclose($stream);
226 1
        }
227 15
    }
228
229
    /**
230
     * Convert a DOM into an HTML5 string.
231
     *
232
     * @param mixed $dom
233
     *            The DOM to be serialized.
234
     * @param array $options
235
     *            Configuration options when serializing the DOM. These include:
236
     *            - encode_entities: Text written to the output is escaped by default and not all
237
     *            entities are encoded. If this is set to true all entities will be encoded.
238
     *            Defaults to false.
239
     *
240
     * @return string A HTML5 documented generated from the DOM.
241
     */
242 14
    public function saveHTML($dom, $options = array())
243
    {
244 14
        $stream = fopen('php://temp', 'w');
245 14
        $this->save($dom, $stream, array_merge($this->getOptions(), $options));
246
247 14
        return stream_get_contents($stream, - 1, 0);
248
    }
249
}
250