Completed
Push — master ( 624e7e...cc8002 )
by Joschi
02:36
created

HtmlDocumentFactory::isNotAllowedError()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 2
eloc 2
nc 2
nop 1
crap 2
1
<?php
2
3
/**
4
 * rdfa-lite-microdata
5
 *
6
 * @category Jkphl
7
 * @package Jkphl\RdfaLiteMicrodata
8
 * @subpackage Jkphl\RdfaLiteMicrodata\Infrastructure
9
 * @author Joschi Kuphal <[email protected]> / @jkphl
10
 * @copyright Copyright © 2017 Joschi Kuphal <[email protected]> / @jkphl
11
 * @license http://opensource.org/licenses/MIT The MIT License (MIT)
12
 */
13
14
/***********************************************************************************
15
 *  The MIT License (MIT)
16
 *
17
 *  Copyright © 2017 Joschi Kuphal <[email protected]> / @jkphl
18
 *
19
 *  Permission is hereby granted, free of charge, to any person obtaining a copy of
20
 *  this software and associated documentation files (the "Software"), to deal in
21
 *  the Software without restriction, including without limitation the rights to
22
 *  use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
23
 *  the Software, and to permit persons to whom the Software is furnished to do so,
24
 *  subject to the following conditions:
25
 *
26
 *  The above copyright notice and this permission notice shall be included in all
27
 *  copies or substantial portions of the Software.
28
 *
29
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30
 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
31
 *  FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
32
 *  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
33
 *  IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34
 *  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35
 ***********************************************************************************/
36
37
namespace Jkphl\RdfaLiteMicrodata\Infrastructure\Factories;
38
39
use Jkphl\RdfaLiteMicrodata\Application\Contract\DocumentFactoryInterface;
40
use Jkphl\RdfaLiteMicrodata\Infrastructure\Exceptions\HtmlParsingException;
41
use Jkphl\RdfaLiteMicrodata\Infrastructure\Exceptions\InvalidArgumentException;
42
43
/**
44
 * HTML document factory
45
 *
46
 * @package Jkphl\RdfaLiteMicrodata
47
 * @subpackage Jkphl\RdfaLiteMicrodata\Infrastructure
48
 */
49
class HtmlDocumentFactory implements DocumentFactoryInterface
50
{
51
    /**
52
     * HTML5 elements
53
     *
54
     * @var array
55
     */
56
    protected static $html5 = [
57
        'a',
58
        'abbr',
59
        'acronym',
60
        'address',
61
        'applet',
62
        'area',
63
        'article',
64
        'aside',
65
        'audio',
66
        'b',
67
        'base',
68
        'basefont',
69
        'bdi',
70
        'bdo',
71
        'bgsound',
72
        'big',
73
        'blink',
74
        'blockquote',
75
        'body',
76
        'br',
77
        'button',
78
        'canvas',
79
        'caption',
80
        'center',
81
        'cite',
82
        'code',
83
        'col',
84
        'colgroup',
85
        'content',
86
        'data',
87
        'datalist',
88
        'dd',
89
        'decorator',
90
        'del',
91
        'details',
92
        'dfn',
93
        'dir',
94
        'div',
95
        'dl',
96
        'dt',
97
        'element',
98
        'em',
99
        'embed',
100
        'fieldset',
101
        'figcaption',
102
        'figure',
103
        'font',
104
        'footer',
105
        'form',
106
        'frame',
107
        'frameset',
108
        'h1',
109
        'h2',
110
        'h3',
111
        'h4',
112
        'h5',
113
        'h6',
114
        'head',
115
        'header',
116
        'hgroup',
117
        'hr',
118
        'html',
119
        'i',
120
        'iframe',
121
        'img',
122
        'input',
123
        'ins',
124
        'isindex',
125
        'kbd',
126
        'keygen',
127
        'label',
128
        'legend',
129
        'li',
130
        'link',
131
        'listing',
132
        'main',
133
        'map',
134
        'mark',
135
        'marquee',
136
        'menu',
137
        'menuitem',
138
        'meta',
139
        'meter',
140
        'nav',
141
        'nobr',
142
        'noframes',
143
        'noscript',
144
        'object',
145
        'ol',
146
        'optgroup',
147
        'option',
148
        'output',
149
        'p',
150
        'param',
151
        'plaintext',
152
        'pre',
153
        'progress',
154
        'q',
155
        'rp',
156
        'rt',
157
        'ruby',
158
        's',
159
        'samp',
160
        'script',
161
        'section',
162
        'select',
163
        'shadow',
164
        'small',
165
        'source',
166
        'spacer',
167
        'span',
168
        'strike',
169
        'strong',
170
        'style',
171
        'sub',
172
        'summary',
173
        'sup',
174
        'table',
175
        'tbody',
176
        'td',
177
        'template',
178
        'textarea',
179
        'tfoot',
180
        'th',
181
        'thead',
182
        'time',
183
        'title',
184
        'tr',
185
        'track',
186
        'tt',
187
        'u',
188
        'ul',
189
        'var',
190
        'video',
191
        'wbr',
192
        'xmp'
193
    ];
194
    /**
195
     * Custom HTML parsing error handler
196
     *
197
     * @var callable|null
198
     */
199
    protected $errorHandler;
200
201
    /**
202
     * Constructor
203
     *
204
     * @param callable|null $errorHandler Custom HTML parsing error handler
205
     */
206 18
    public function __construct(callable $errorHandler = null)
207
    {
208 18
        $this->errorHandler = $errorHandler;
209 18
    }
210
211
    /**
212
     * Create a DOM document from a source
213
     *
214
     * @param mixed $source Source
215
     * @return \DOMDocument DOM document
216
     */
217 18
    public function createDocumentFromSource($source)
218
    {
219 18
        $dom = new \DOMDocument();
220 18
        libxml_use_internal_errors(true);
221 18
        $dom->loadHTML($source);
222 18
        $errors = libxml_get_errors();
223 18
        libxml_use_internal_errors(false);
224 18
        $this->processParsingErrors($errors);
225 16
        return $dom;
226
    }
227
228
    /**
229
     * Process parsing errors
230
     *
231
     * @param \LibXMLError[] $errors Parsing errors
232
     * @throws InvalidArgumentException If it's not a HTML5 error
233
     */
234 18
    protected function processParsingErrors(array $errors)
235
    {
236
        /** @var \LibXMLError $error */
237 18
        foreach ($errors as $error) {
238 8
            if ($this->isNotInvalidHtml5TagError($error) && $this->isNotAllowedError($error)) {
239 2
                throw new HtmlParsingException($error);
240
            }
241 16
        }
242 16
    }
243
244
    /**
245
     * Test whether a parsing error is not because of an "invalid" HTML5 tag
246
     *
247
     * @param \LibXMLError $error Parsing error
248
     * @return bool Error is not because of an "invalid" HTML5 tag
249
     */
250 8
    protected function isNotInvalidHtml5TagError(\LibXMLError $error)
251
    {
252 8
        return ($error->code != 801) ||
253
            (
254 8
                preg_match('/^tag\s+(\S+)\s+invalid$/', strtolower($error->message), $tag) &&
255 8
                !in_array($tag[1], self::$html5)
256 8
            );
257
    }
258
259
    /**
260
     * Test whether a parsing error is allowed per custom HTML parser error handler
261
     *
262
     * @param \LibXMLError $error Parsing error
263
     * @return bool Error is not allowed
264
     */
265 3
    protected function isNotAllowedError(\LibXMLError $error)
266
    {
267 3
        return !(is_callable($this->errorHandler) && call_user_func($this->errorHandler, $error));
268
    }
269
}
270