Completed
Push — master ( aa9157...b6caf4 )
by Kevin
02:09
created

Element::toArray()   B

Complexity

Conditions 5
Paths 4

Size

Total Lines 23
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 5

Importance

Changes 2
Bugs 0 Features 1
Metric Value
c 2
b 0
f 1
dl 0
loc 23
ccs 17
cts 17
cp 1
rs 8.5906
cc 5
eloc 13
nc 4
nop 0
crap 5
1
<?php
2
3
namespace Kevintweber\HtmlTokenizer\Tokens;
4
5
use Kevintweber\HtmlTokenizer\Exceptions\ParseException;
6
7
class Element extends AbstractToken
8
{
9
    /** @var array[Token] */
10
    private $attributes;
11
12
    /** @var array[Token] */
13
    private $children;
14
15
    /** @var string */
16
    private $name;
17
18 22
    public function __construct(Token $parent = null, $throwOnError = false)
19
    {
20 22
        parent::__construct(Token::ELEMENT, $parent, $throwOnError);
21
22 22
        $this->attributes = array();
23 22
        $this->children = array();
24 22
        $this->name = null;
25 22
    }
26
27
    public function isClosingElementImplied($html)
28
    {
29
        $name = $this->parseElementName($html);
30
        $parentName = null;
31
        $parent = $this->getParent();
32
        if ($parent instanceof self) {
33
            $parentName = $parent->getName();
34
        }
35
36
        // HEAD: no closing tag.
37
        if ($name === 'body' && $parentName === 'head') {
38
            return true;
39
        }
40
41
        // Closed-only elements.
42
        // Closing tags not required.  We will close them now.
43
        switch ($parentName) {
44
        case 'base':
45
        case 'link':
46
        case 'meta':
47
        case 'hr':
48
        case 'br':
49
            return true;
50
        }
51
52
        // P
53
        if ($parentName === 'p') {
54
            switch ($name) {
55
            case 'address':
56
            case 'article':
57
            case 'aside':
58
            case 'blockquote':
59
            case 'details':
60
            case 'div':
61
            case 'dl':
62
            case 'fieldset':
63
            case 'figcaption':
64
            case 'figure':
65
            case 'footer':
66
            case 'form':
67
            case 'h1':
68
            case 'h2':
69
            case 'h3':
70
            case 'h4':
71
            case 'h5':
72
            case 'h6':
73
            case 'header':
74
            case 'hgroup':
75
            case 'hr':
76
            case 'main':
77
            case 'menu':
78
            case 'nav':
79
            case 'ol':
80
            case 'p':
81
            case 'pre':
82
            case 'section':
83
            case 'table':
84
            case 'ul':
85
                return true;
86
            }
87
        }
88
89
        // LI
90
        if ($parentName == 'li' && $name == 'li') {
91
            return true;
92
        }
93
94
        // DT and DD
95
        if (($parentName == 'dt' || $parentName == 'dd') && ($name == 'dt' || $name == 'dd')) {
96
            return true;
97
        }
98
99
        // RP and RT
100
        if (($parentName == 'rp' || $parentName == 'rt') && ($name == 'rp' || $name == 'rt')) {
101
            return true;
102
        }
103
104
        return false;
105
    }
106
107 12
    public static function isMatch($html)
108
    {
109 12
        return preg_match("/^<[a-zA-Z]/", $html) === 1;
110
    }
111
112 21
    public function parse($html)
113
    {
114 21
        $this->name = $this->parseElementName($html);
115
116
        // Parse attributes.
117 21
        $remainingHtml = substr($html, strlen($this->name) + 1);
118 21
        while (strpos($remainingHtml, '>') !== false && preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) {
119 11
            $remainingHtml = $this->parseAttribute($remainingHtml);
120 11
        }
121
122
        // Find position of end of tag.
123 21
        $posOfClosingBracket = strpos($remainingHtml, '>');
124 21
        if ($posOfClosingBracket === false) {
125 2
            if ($this->getThrowOnError()) {
126 1
                throw new ParseException('Invalid element: missing closing bracket.');
127
            }
128
129 1
            return '';
130
        }
131
132 19
        $remainingHtml = trim(substr($remainingHtml, $posOfClosingBracket + 1));
133
134
        // Is self-closing?
135 19
        $posOfSelfClosingBracket = strpos($remainingHtml, '/>');
136 19
        if ($posOfSelfClosingBracket !== false && $posOfSelfClosingBracket == $posOfClosingBracket - 1) {
137
            // Self-closing element.
138
            return $remainingHtml;
139
        }
140
141
        // Open element.
142 19
        return $this->parseContents($remainingHtml);
143
    }
144
145 11
    private function parseAttribute($html)
146
    {
147
        // Will match the first entire name/value attribute pair.
148 11
        $attrMatchSuccessful = preg_match(
149 11
            "/(\s*([^>\s]*))/",
150 11
            $html,
151
            $attributeMatches
152 11
        );
153 11
        if ($attrMatchSuccessful !== 1) {
154
            if ($this->getThrowOnError()) {
155
                throw new ParseException('Invalid attribute.');
156
            }
157
158
            return '';
159
        }
160
161 11
        $posOfEqualsSign = strpos($attributeMatches[2], '=');
162 11
        if ($posOfEqualsSign === false) {
163
            // Valueless attribute.
164 2
            $this->attributes[trim($attributeMatches[2])] = true;
165 2
        } else {
166 10
            list($name, $value) = explode('=', $attributeMatches[2]);
167 10
            if ($value[0] === "'" || $value[0] === '"') {
168 9
                $valueMatchSuccessful = preg_match(
169 9
                    "/" . $value[0] . "(.*?(?<!\\\))" . $value[0] . "/s",
170 9
                    $value,
171
                    $valueMatches
172 9
                );
173 9
                if ($valueMatchSuccessful !== 1) {
174
                    if ($this->getThrowOnError()) {
175
                        throw new ParseException('Invalid value encapsulation.');
176
                    }
177
178
                    return '';
179
                }
180
181 9
                $value = $valueMatches[1];
182 9
            }
183
184 10
            $this->attributes[trim($name)] = trim($value);
185
        }
186
187
        // Return the html minus the current attribute.
188 11
        $posOfAttribute = strpos($html, $attributeMatches[2]);
189
190 11
        return substr($html, $posOfAttribute + strlen($attributeMatches[2]));
191
    }
192
193 19
    private function parseContents($html)
194
    {
195 19
        $remainingHtml = trim($html);
196 19
        if ($remainingHtml == '') {
197 11
            return '';
198
        }
199
200
        // Parse contents one token at a time.
201 8
        while (preg_match("/^<\/\s*" . $this->name . "\s*>/is", $remainingHtml) === 0) {
202
            // Validate closing bracket actually exists.
203 6
            $posOfClosingBracket = strpos($remainingHtml, '>');
204 6
            if ($posOfClosingBracket === false) {
205 1
                if ($this->getThrowOnError()) {
206
                    throw new ParseException('Invalid attribute.');
207
                }
208
209 1
                return '';
210
            }
211
212 5
            $token = TokenFactory::buildFromHtml(
213 5
                $remainingHtml,
214 5
                null,
215 5
                $this->getThrowOnError()
216 5
            );
217
218 5
            if ($token === false || $token->isClosingElementImplied($remainingHtml)) {
219
                return $remainingHtml;
220
            }
221
222 5
            $remainingHtml = trim($token->parse($remainingHtml));
223 5
            $this->children[] = $token;
224 5
        }
225
226
        // Remove remaining closing tag.
227 7
        $posOfClosingBracket = strpos($remainingHtml, '>');
228
229 7
        return substr($remainingHtml, $posOfClosingBracket + 1);
230
    }
231
232
    /**
233
     * Will get the element name from the html string.
234
     *
235
     * @param $html string
236
     *
237
     * @return string The element name.
238
     */
239 21
    private function parseElementName($html)
240
    {
241 21
        $elementMatchSuccessful = preg_match(
242 21
            "/^(<(([a-z0-9\-]+:)?[a-z0-9\-]+))/i",
243 21
            $html,
244
            $elementMatches
245 21
        );
246 21
        if ($elementMatchSuccessful !== 1) {
247
            if ($this->getThrowOnError()) {
248
                throw new ParseException('Invalid element name.');
249
            }
250
251
            return '';
252
        }
253
254 21
        return strtolower($elementMatches[2]);
255
    }
256
257
    public function getAttributes()
258
    {
259
        return $this->attributes;
260
    }
261
262
    public function hasAttributes()
263
    {
264
        return !empty($this->attributes);
265
    }
266
267
    public function getChildren()
268
    {
269
        return $this->children;
270
    }
271
272
    public function hasChildren()
273
    {
274
        return !empty($this->children);
275
    }
276
277
    /**
278
     * Getter for 'name'.
279
     *
280
     * @return string
281
     */
282 9
    public function getName()
283
    {
284 9
        return $this->name;
285
    }
286
287 11
    public function toArray()
288
    {
289
        $result = array(
290 11
            'type' => 'element',
291 11
            'name' => $this->name
292 11
        );
293
294 11
        if (!empty($this->attributes)) {
295 7
            $result['attributes'] = array();
296 7
            foreach ($this->attributes as $name => $value) {
297 7
                $result['attributes'][$name] = $value;
298 7
            }
299 7
        }
300
301 11
        if (!empty($this->children)) {
302 2
            $result['children'] = array();
303 2
            foreach ($this->children as $child) {
304 2
                $result['children'][] = $child->toArray();
305 2
            }
306 2
        }
307
308 11
        return $result;
309
    }
310
}
311