Completed
Push — master ( 1dc2c9...b928ab )
by Kevin
02:12
created

Element::parse()   C

Complexity

Conditions 7
Paths 8

Size

Total Lines 31
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 16
CRAP Score 7

Importance

Changes 4
Bugs 2 Features 2
Metric Value
c 4
b 2
f 2
dl 0
loc 31
ccs 16
cts 16
cp 1
rs 6.7272
cc 7
eloc 15
nc 8
nop 1
crap 7
1
<?php
2
3
namespace Kevintweber\HtmlTokenizer\Tokens;
4
5
use Kevintweber\HtmlTokenizer\Exceptions\ParseException;
6
7
class Element extends AbstractToken
8
{
9
    /** @var array[Token] */
10
    private $attributes;
11
12
    /** @var array[Token] */
13
    private $children;
14
15
    /** @var string */
16
    private $name;
17
18 44
    public function __construct(Token $parent = null, $throwOnError = false)
19
    {
20 44
        parent::__construct(Token::ELEMENT, $parent, $throwOnError);
21
22 44
        $this->attributes = array();
23 44
        $this->children = array();
24 44
        $this->name = null;
25 44
    }
26
27 22
    public function isClosingElementImplied($html)
28
    {
29 22
        $parent = $this->getParent();
30 22
        if ($parent === null || !($parent instanceof self)) {
31 1
            return false;
32
        }
33
34 21
        $name = $this->parseElementName($html);
35 21
        $parentName = $parent->getName();
36
37
        // HEAD: no closing tag.
38 21
        if ($name === 'body' && $parentName === 'head') {
39 2
            return true;
40
        }
41
42
        // Closed-only elements.
43
        // Closing tags not required.  We will close them now.
44
        switch ($parentName) {
45 20
        case 'base':
46 20
        case 'link':
47 20
        case 'meta':
48 20
        case 'hr':
49 20
        case 'br':
50 6
            return true;
51
        }
52
53
        // P
54 15
        if ($parentName === 'p') {
55
            switch ($name) {
56 4
            case 'address':
57 4
            case 'article':
58 4
            case 'aside':
59 4
            case 'blockquote':
60 4
            case 'details':
61 4
            case 'div':
62 4
            case 'dl':
63 4
            case 'fieldset':
64 4
            case 'figcaption':
65 4
            case 'figure':
66 4
            case 'footer':
67 4
            case 'form':
68 4
            case 'h1':
69 4
            case 'h2':
70 4
            case 'h3':
71 4
            case 'h4':
72 4
            case 'h5':
73 4
            case 'h6':
74 4
            case 'header':
75 4
            case 'hgroup':
76 4
            case 'hr':
77 4
            case 'main':
78 4
            case 'menu':
79 4
            case 'nav':
80 4
            case 'ol':
81 4
            case 'p':
82 4
            case 'pre':
83 4
            case 'section':
84 4
            case 'table':
85 4
            case 'ul':
86 3
                return true;
87
            }
88 1
        }
89
90
        // LI
91 12
        if ($parentName == 'li' && $name == 'li') {
92 1
            return true;
93
        }
94
95
        // DT and DD
96 11
        if (($parentName == 'dt' || $parentName == 'dd') && ($name == 'dt' || $name == 'dd')) {
97 4
            return true;
98
        }
99
100
        // RP and RT
101 7
        if (($parentName == 'rp' || $parentName == 'rt') && ($name == 'rp' || $name == 'rt')) {
102 4
            return true;
103
        }
104
105 3
        return false;
106
    }
107
108 14
    public static function isMatch($html)
109
    {
110 14
        return preg_match("/^<[a-zA-Z]/", $html) === 1;
111
    }
112
113 23
    public function parse($html)
114
    {
115 23
        $this->name = $this->parseElementName($html);
116
117
        // Parse attributes.
118 23
        $remainingHtml = substr($html, strlen($this->name) + 1);
119 23
        while (strpos($remainingHtml, '>') !== false && preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) {
120 11
            $remainingHtml = $this->parseAttribute($remainingHtml);
121 11
        }
122
123
        // Find position of end of tag.
124 23
        $posOfClosingBracket = strpos($remainingHtml, '>');
125 23
        if ($posOfClosingBracket === false) {
126 2
            if ($this->getThrowOnError()) {
127 1
                throw new ParseException('Invalid element: missing closing bracket.');
128
            }
129
130 1
            return '';
131
        }
132
133
        // Is self-closing?
134 21
        $posOfSelfClosingBracket = strpos($remainingHtml, '/>');
135 21
        $remainingHtml = trim(substr($remainingHtml, $posOfClosingBracket + 1));
136 21
        if ($posOfSelfClosingBracket !== false && $posOfSelfClosingBracket == $posOfClosingBracket - 1) {
137
            // Self-closing element.
138 12
            return $remainingHtml;
139
        }
140
141
        // Open element.
142 10
        return $this->parseContents($remainingHtml);
143
    }
144
145 11
    private function parseAttribute($html)
146
    {
147
        // Will match the first entire name/value attribute pair.
148 11
        $attrMatchSuccessful = preg_match(
149 11
            "/(\s*([^>\s]*))/",
150 11
            $html,
151
            $attributeMatches
152 11
        );
153 11
        if ($attrMatchSuccessful !== 1) {
154
            if ($this->getThrowOnError()) {
155
                throw new ParseException('Invalid attribute.');
156
            }
157
158
            return '';
159
        }
160
161 11
        $posOfEqualsSign = strpos($attributeMatches[2], '=');
162 11
        if ($posOfEqualsSign === false) {
163
            // Valueless attribute.
164 2
            $this->attributes[trim($attributeMatches[2])] = true;
165 2
        } else {
166 10
            list($name, $value) = explode('=', $attributeMatches[2]);
167 10
            if ($value[0] === "'" || $value[0] === '"') {
168 9
                $valueMatchSuccessful = preg_match(
169 9
                    "/" . $value[0] . "(.*?(?<!\\\))" . $value[0] . "/s",
170 9
                    $value,
171
                    $valueMatches
172 9
                );
173 9
                if ($valueMatchSuccessful !== 1) {
174
                    if ($this->getThrowOnError()) {
175
                        throw new ParseException('Invalid value encapsulation.');
176
                    }
177
178
                    return '';
179
                }
180
181 9
                $value = $valueMatches[1];
182 9
            }
183
184 10
            $this->attributes[trim($name)] = trim($value);
185
        }
186
187
        // Return the html minus the current attribute.
188 11
        $posOfAttribute = strpos($html, $attributeMatches[2]);
189
190 11
        return substr($html, $posOfAttribute + strlen($attributeMatches[2]));
191
    }
192
193 10
    private function parseContents($html)
194
    {
195 10
        $remainingHtml = trim($html);
196 10
        if ($remainingHtml == '') {
197 1
            return '';
198
        }
199
200
        // Parse contents one token at a time.
201 9
        while (preg_match("/^<\/\s*" . $this->name . "\s*>/is", $remainingHtml) === 0) {
202 7
            $token = TokenFactory::buildFromHtml(
203 7
                $remainingHtml,
204 7
                $this,
205 7
                $this->getThrowOnError()
206 7
            );
207
208 7
            if ($token === false || $token->isClosingElementImplied($remainingHtml)) {
209 2
                return $remainingHtml;
210
            }
211
212 7
            $remainingHtml = trim($token->parse($remainingHtml));
213 7
            $this->children[] = $token;
214 7
        }
215
216
        // Remove remaining closing tag.
217 9
        $posOfClosingBracket = strpos($remainingHtml, '>');
218
219 9
        return substr($remainingHtml, $posOfClosingBracket + 1);
220
    }
221
222
    /**
223
     * Will get the element name from the html string.
224
     *
225
     * @param $html string
226
     *
227
     * @return string The element name.
228
     */
229 42
    private function parseElementName($html)
230
    {
231 42
        $elementMatchSuccessful = preg_match(
232 42
            "/^(<(([a-z0-9\-]+:)?[a-z0-9\-]+))/i",
233 42
            $html,
234
            $elementMatches
235 42
        );
236 42
        if ($elementMatchSuccessful !== 1) {
237
            if ($this->getThrowOnError()) {
238
                throw new ParseException('Invalid element name.');
239
            }
240
241
            return '';
242
        }
243
244 42
        return strtolower($elementMatches[2]);
245
    }
246
247
    public function getAttributes()
248
    {
249
        return $this->attributes;
250
    }
251
252
    public function hasAttributes()
253
    {
254
        return !empty($this->attributes);
255
    }
256
257
    public function getChildren()
258
    {
259
        return $this->children;
260
    }
261
262
    public function hasChildren()
263
    {
264
        return !empty($this->children);
265
    }
266
267
    /**
268
     * Getter for 'name'.
269
     *
270
     * @return string
271
     */
272 30
    public function getName()
273
    {
274 30
        return $this->name;
275
    }
276
277 13
    public function toArray()
278
    {
279
        $result = array(
280 13
            'type' => 'element',
281 13
            'name' => $this->name
282 13
        );
283
284 13
        if (!empty($this->attributes)) {
285 7
            $result['attributes'] = array();
286 7
            foreach ($this->attributes as $name => $value) {
287 7
                $result['attributes'][$name] = $value;
288 7
            }
289 7
        }
290
291 13
        if (!empty($this->children)) {
292 4
            $result['children'] = array();
293 4
            foreach ($this->children as $child) {
294 4
                $result['children'][] = $child->toArray();
295 4
            }
296 4
        }
297
298 13
        return $result;
299
    }
300
}
301