Completed
Push — master ( fd987e...4f1c16 )
by Kevin
02:24
created

Element::getAttributes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Kevintweber\HtmlTokenizer\Tokens;
4
5
use Kevintweber\HtmlTokenizer\Exceptions\ParseException;
6
7
class Element extends AbstractToken
8
{
9
    /** @var array */
10
    private $attributes;
11
12
    /** @var array[Token] */
13
    private $children;
14
15
    /** @var string */
16
    private $name;
17
18 55
    public function __construct(Token $parent = null, $throwOnError = false)
19
    {
20 55
        parent::__construct(Token::ELEMENT, $parent, $throwOnError);
21
22 55
        $this->attributes = array();
23 55
        $this->children = array();
24 55
        $this->name = null;
25 55
    }
26
27
    /**
28
     * Does the parent have an implied closing tag?
29
     *
30
     * @param string $html
31
     *
32
     * @return boolean
33
     */
34 20
    public function isClosingElementImplied($html)
35
    {
36 20
        $parent = $this->getParent();
37 20
        if ($parent === null || !($parent instanceof self)) {
38 1
            return false;
39
        }
40
41 19
        $name = $this->parseElementName($html);
42 19
        $parentName = $parent->getName();
43
44
        // HEAD: no closing tag.
45 19
        if ($name === 'body' && $parentName === 'head') {
46 2
            return true;
47
        }
48
49
        // P
50
        $elementsNotChildrenOfP = array(
51 18
            'address',
52 18
            'article',
53 18
            'aside',
54 18
            'blockquote',
55 18
            'details',
56 18
            'div',
57 18
            'dl',
58 18
            'fieldset',
59 18
            'figcaption',
60 18
            'figure',
61 18
            'footer',
62 18
            'form',
63 18
            'h1',
64 18
            'h2',
65 18
            'h3',
66 18
            'h4',
67 18
            'h5',
68 18
            'h6',
69 18
            'header',
70 18
            'hgroup',
71 18
            'hr',
72 18
            'main',
73 18
            'menu',
74 18
            'nav',
75 18
            'ol',
76 18
            'p',
77 18
            'pre',
78 18
            'section',
79 18
            'table',
80
            'ul'
81 18
        );
82 18
        if ($parentName === 'p' && array_search($name, $elementsNotChildrenOfP) !== false) {
83 3
            return true;
84
        }
85
86
        // LI
87 15
        if ($parentName == 'li' && $name == 'li') {
88 1
            return true;
89
        }
90
91
        // DT and DD
92 14
        if (($parentName == 'dt' || $parentName == 'dd') && ($name == 'dt' || $name == 'dd')) {
93 4
            return true;
94
        }
95
96
        // RP and RT
97 10
        if (($parentName == 'rp' || $parentName == 'rt') && ($name == 'rp' || $name == 'rt')) {
98 4
            return true;
99
        }
100
101 6
        return false;
102
    }
103
104
    /**
105
     * Will parse this element.
106
     *
107
     * @param string $html
108
     *
109
     * @return string Remaining HTML.
110
     */
111 39
    public function parse($html)
112
    {
113 39
        $this->name = $this->parseElementName($html);
114
115
        // Parse attributes.
116 39
        $remainingHtml = mb_substr($html, mb_strlen($this->name) + 1);
117 39
        while (mb_strpos($remainingHtml, '>') !== false && preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) {
118 18
            $remainingHtml = $this->parseAttribute(trim($remainingHtml));
119 18
        }
120
121
        // Find position of end of tag.
122 39
        $posOfClosingBracket = mb_strpos($remainingHtml, '>');
123 39
        if ($posOfClosingBracket === false) {
124 4
            if ($this->getThrowOnError()) {
125 1
                throw new ParseException('Invalid element: missing closing bracket.');
126
            }
127
128 3
            return '';
129
        }
130
131
        // Is self-closing?
132 35
        $posOfSelfClosingBracket = mb_strpos($remainingHtml, '/>');
133 35
        $remainingHtml = trim(mb_substr($remainingHtml, $posOfClosingBracket + 1));
134 35
        if ($posOfSelfClosingBracket !== false && $posOfSelfClosingBracket == $posOfClosingBracket - 1) {
135
            // Self-closing element.
136 6
            return $remainingHtml;
137
        }
138
139
        // Lets close those closed-only elements that are left open.
140
        $closedOnlyElements = array(
141 30
            'base',
142 30
            'link',
143 30
            'meta',
144 30
            'hr',
145
            'br'
146 30
        );
147 30
        if (array_search($this->name, $closedOnlyElements) !== false) {
148 4
            return $remainingHtml;
149
        }
150
151
        // Open element.
152 30
        return $this->parseContents($remainingHtml);
153
    }
154
155
    /**
156
     * Will parse attributes.
157
     *
158
     * @param string $html
159
     *
160
     * @return string Remaining HTML.
161
     */
162 18
    private function parseAttribute($html)
163
    {
164 18
        $remainingHtml = trim($html);
165
166
        // Will match the first entire name/value attribute pair.
167 18
        preg_match(
168 18
            "/((([a-z0-9\-_]+:)?[a-z0-9\-_]+)(\s*=\s*)?)/i",
169 18
            $remainingHtml,
170
            $attributeMatches
171 18
        );
172
173 18
        $name = $attributeMatches[2];
174 18
        $remainingHtml = mb_substr(mb_strstr($remainingHtml, $name), mb_strlen($name));
175 18
        if (preg_match("/^\s*=\s*/", $remainingHtml) === 0) {
176
            // Valueless attribute.
177 3
            $this->attributes[trim($name)] = true;
178 3
        } else {
179 17
            $remainingHtml = ltrim($remainingHtml, ' =');
180 17
            if ($remainingHtml[0] === "'" || $remainingHtml[0] === '"') {
181
                // Quote enclosed attribute value.
182 14
                $valueMatchSuccessful = preg_match(
183 14
                    "/" . $remainingHtml[0] . "(.*?(?<!\\\))" . $remainingHtml[0] . "/s",
184 14
                    $remainingHtml,
185
                    $valueMatches
186 14
                );
187 14
                if ($valueMatchSuccessful !== 1) {
188 1
                    if ($this->getThrowOnError()) {
189 1
                        throw new ParseException('Invalid value encapsulation.');
190
                    }
191
192 1
                    return '';
193
                }
194
195 13
                $value = $valueMatches[1];
196 13
            } else {
197
                // No quotes enclosing the attribute value.
198 6
                preg_match("/(\s*([^>\s]*(?<!\/)))/", $remainingHtml, $valueMatches);
199 6
                $value = $valueMatches[2];
200
            }
201
202 16
            $this->attributes[trim($name)] = $value;
203
204
            // Determine remaining html.
205 16
            $posOfAttributeValue = mb_strpos($html, $value);
206 16
            $remainingHtml = trim(
207 16
                mb_substr($html, $posOfAttributeValue + mb_strlen($value))
208 16
            );
209 16
            $remainingHtml = ltrim($remainingHtml, '\'"/ ');
210
        }
211
212 17
        return $remainingHtml;
213
    }
214
215
    /**
216
     * Will parse the contents of this element.
217
     *
218
     * @param string $html
219
     *
220
     * @return string Remaining HTML.
221
     */
222 30
    private function parseContents($html)
223
    {
224 30
        $remainingHtml = trim($html);
225 30
        if ($remainingHtml == '') {
226 10
            return '';
227
        }
228
229
        // Nothing to parse inside a script tag.
230 20
        if ($this->name == 'script') {
231 3
            return $this->parseForeignContents('script', $remainingHtml);
232
        }
233
234
        // Nothing to parse inside a style tag.
235 18
        if ($this->name == 'style') {
236 2
            return $this->parseForeignContents('style', $remainingHtml);
237
        }
238
239
        // Parse contents one token at a time.
240 16
        while (preg_match("/^<\/\s*" . $this->name . "\s*>/is", $remainingHtml) === 0) {
241 14
            $token = TokenFactory::buildFromHtml(
242 14
                $remainingHtml,
243 14
                $this,
244 14
                $this->getThrowOnError()
245 14
            );
246
247 14
            if ($token === false || $token->isClosingElementImplied($remainingHtml)) {
248 2
                return $remainingHtml;
249
            }
250
251 14
            $remainingHtml = trim($token->parse($remainingHtml));
252 14
            $this->children[] = $token;
253 14
        }
254
255
        // Remove remaining closing tag.
256 16
        $posOfClosingBracket = mb_strpos($remainingHtml, '>');
257
258 16
        return mb_substr($remainingHtml, $posOfClosingBracket + 1);
259
    }
260
261
    /**
262
     * Will get the element name from the html string.
263
     *
264
     * @param $html string
265
     *
266
     * @return string The element name.
267
     */
268 53
    private function parseElementName($html)
269
    {
270 53
        $elementMatchSuccessful = preg_match(
271 53
            "/^(<(([a-z0-9\-]+:)?[a-z0-9\-]+))/i",
272 53
            $html,
273
            $elementMatches
274 53
        );
275 53
        if ($elementMatchSuccessful !== 1) {
276 1
            if ($this->getThrowOnError()) {
277 1
                throw new ParseException('Invalid element name.');
278
            }
279
280 1
            return '';
281
        }
282
283 52
        return mb_strtolower($elementMatches[2]);
284
    }
285
286
    /**
287
     * Will parse the script contents correctly.
288
     *
289
     * @param $html string
290
     *
291
     * @return string The remaining HTML.
292
     */
293 5
    private function parseForeignContents($tag, $html)
294
    {
295 5
        $remainingHtml = trim($html);
296
297 5
        $matchingResult = preg_match(
298 5
            "/(<\/\s*" . $tag . "\s*>)/i",
299 5
            $html,
300
            $endOfScriptMatches
301 5
        );
302 5
        if ($matchingResult === 0) {
303 2
            $value = $remainingHtml;
304 2
            $remainingHtml = '';
305 2
        } else {
306 3
            $closingTag = $endOfScriptMatches[1];
307 3
            $value = trim(
308 3
                mb_substr($remainingHtml, 0, mb_strpos($remainingHtml, $closingTag))
309 3
            );
310 3
            $remainingHtml = mb_substr(
311 3
                mb_strstr($remainingHtml, $closingTag),
312 3
                mb_strlen($closingTag)
313 3
            );
314
        }
315
316
        // Handle no contents.
317 5
        if ($value == '') {
318 1
            return $remainingHtml;
319
        }
320
321 5
        $text = new Text($this, $this->getThrowOnError(), $value);
322 5
        $this->children[] = $text;
323
324 5
        return $remainingHtml;
325
    }
326
327
    /**
328
     * Getter for 'attributes'.
329
     *
330
     * @return array
331
     */
332 1
    public function getAttributes()
333
    {
334 1
        return $this->attributes;
335
    }
336
337
    /**
338
     * @return boolean
339
     */
340 1
    public function hasAttributes()
341
    {
342 1
        return !empty($this->attributes);
343
    }
344
345
    /**
346
     * Getter for 'children'.
347
     *
348
     * @return array
349
     */
350 1
    public function getChildren()
351
    {
352 1
        return $this->children;
353
    }
354
355
    /**
356
     * @return boolean
357
     */
358 1
    public function hasChildren()
359
    {
360 1
        return !empty($this->children);
361
    }
362
363
    /**
364
     * Getter for 'name'.
365
     *
366
     * @return string
367
     */
368 30
    public function getName()
369
    {
370 30
        return $this->name;
371
    }
372
373 23
    public function toArray()
374
    {
375
        $result = array(
376 23
            'type' => 'element',
377 23
            'name' => $this->name
378 23
        );
379
380 23
        if (!empty($this->attributes)) {
381 12
            $result['attributes'] = array();
382 12
            foreach ($this->attributes as $name => $value) {
383 12
                $result['attributes'][$name] = $value;
384 12
            }
385 12
        }
386
387 23
        if (!empty($this->children)) {
388 12
            $result['children'] = array();
389 12
            foreach ($this->children as $child) {
390 12
                $result['children'][] = $child->toArray();
391 12
            }
392 12
        }
393
394 23
        return $result;
395
    }
396
}
397