Completed
Push — master ( b928ab...26824d )
by Kevin
02:08
created

Element::parse()   C

Complexity

Conditions 7
Paths 8

Size

Total Lines 31
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 16
CRAP Score 7

Importance

Changes 4
Bugs 2 Features 2
Metric Value
c 4
b 2
f 2
dl 0
loc 31
ccs 16
cts 16
cp 1
rs 6.7272
cc 7
eloc 15
nc 8
nop 1
crap 7
1
<?php
2
3
namespace Kevintweber\HtmlTokenizer\Tokens;
4
5
use Kevintweber\HtmlTokenizer\Exceptions\ParseException;
6
7
class Element extends AbstractToken
8
{
9
    /** @var array[Token] */
10
    private $attributes;
11
12
    /** @var array[Token] */
13
    private $children;
14
15
    /** @var string */
16
    private $name;
17
18 46
    public function __construct(Token $parent = null, $throwOnError = false)
19
    {
20 46
        parent::__construct(Token::ELEMENT, $parent, $throwOnError);
21
22 46
        $this->attributes = array();
23 46
        $this->children = array();
24 46
        $this->name = null;
25 46
    }
26
27 22
    public function isClosingElementImplied($html)
28
    {
29 22
        $parent = $this->getParent();
30 22
        if ($parent === null || !($parent instanceof self)) {
31 1
            return false;
32
        }
33
34 21
        $name = $this->parseElementName($html);
35 21
        $parentName = $parent->getName();
36
37
        // HEAD: no closing tag.
38 21
        if ($name === 'body' && $parentName === 'head') {
39 2
            return true;
40
        }
41
42
        // Closed-only elements.
43
        // Closing tags not required.  We will close them now.
44 20
        if ($this->isClosedOnlyElement($parentName)) {
45 6
            return true;
46
        }
47
48
        // P
49 15
        if ($parentName === 'p') {
50
            switch ($name) {
51 4
            case 'address':
52 4
            case 'article':
53 4
            case 'aside':
54 4
            case 'blockquote':
55 4
            case 'details':
56 4
            case 'div':
57 4
            case 'dl':
58 4
            case 'fieldset':
59 4
            case 'figcaption':
60 4
            case 'figure':
61 4
            case 'footer':
62 4
            case 'form':
63 4
            case 'h1':
64 4
            case 'h2':
65 4
            case 'h3':
66 4
            case 'h4':
67 4
            case 'h5':
68 4
            case 'h6':
69 4
            case 'header':
70 4
            case 'hgroup':
71 4
            case 'hr':
72 4
            case 'main':
73 4
            case 'menu':
74 4
            case 'nav':
75 4
            case 'ol':
76 4
            case 'p':
77 4
            case 'pre':
78 4
            case 'section':
79 4
            case 'table':
80 4
            case 'ul':
81 3
                return true;
82
            }
83 1
        }
84
85
        // LI
86 12
        if ($parentName == 'li' && $name == 'li') {
87 1
            return true;
88
        }
89
90
        // DT and DD
91 11
        if (($parentName == 'dt' || $parentName == 'dd') && ($name == 'dt' || $name == 'dd')) {
92 4
            return true;
93
        }
94
95
        // RP and RT
96 7
        if (($parentName == 'rp' || $parentName == 'rt') && ($name == 'rp' || $name == 'rt')) {
97 4
            return true;
98
        }
99
100 3
        return false;
101
    }
102
103 20
    private function isClosedOnlyElement($name)
104
    {
105 20
        return $name == 'base' ||
106 19
            $name == 'link' ||
107 18
            $name == 'meta' ||
108 17
            $name == 'hr' ||
109 20
            $name == 'br';
110
    }
111
112 14
    public static function isMatch($html)
113
    {
114 14
        return preg_match("/^<[a-zA-Z]/", $html) === 1;
115
    }
116
117 25
    public function parse($html)
118
    {
119 25
        $this->name = $this->parseElementName($html);
120
121
        // Parse attributes.
122 25
        $remainingHtml = substr($html, strlen($this->name) + 1);
123 25
        while (strpos($remainingHtml, '>') !== false && preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) {
124 12
            $remainingHtml = $this->parseAttribute($remainingHtml);
125 12
        }
126
127
        // Find position of end of tag.
128 25
        $posOfClosingBracket = strpos($remainingHtml, '>');
129 25
        if ($posOfClosingBracket === false) {
130 2
            if ($this->getThrowOnError()) {
131 1
                throw new ParseException('Invalid element: missing closing bracket.');
132
            }
133
134 1
            return '';
135
        }
136
137
        // Is self-closing?
138 23
        $posOfSelfClosingBracket = strpos($remainingHtml, '/>');
139 23
        $remainingHtml = trim(substr($remainingHtml, $posOfClosingBracket + 1));
140 23
        if ($posOfSelfClosingBracket !== false && $posOfSelfClosingBracket == $posOfClosingBracket - 1) {
141
            // Self-closing element.
142 13
            return $remainingHtml;
143
        }
144
145
        // Open element.
146 11
        return $this->parseContents($remainingHtml);
147
    }
148
149 12
    private function parseAttribute($html)
150
    {
151
        // Will match the first entire name/value attribute pair.
152 12
        $attrMatchSuccessful = preg_match(
153 12
            "/(\s*([^>\s]*))/",
154 12
            $html,
155
            $attributeMatches
156 12
        );
157 12
        if ($attrMatchSuccessful !== 1) {
158
            if ($this->getThrowOnError()) {
159
                throw new ParseException('Invalid attribute.');
160
            }
161
162
            return '';
163
        }
164
165 12
        $posOfEqualsSign = strpos($attributeMatches[2], '=');
166 12
        if ($posOfEqualsSign === false) {
167
            // Valueless attribute.
168 2
            $this->attributes[trim($attributeMatches[2])] = true;
169 2
        } else {
170 11
            list($name, $value) = explode('=', $attributeMatches[2]);
171 11
            if ($value[0] === "'" || $value[0] === '"') {
172 10
                $valueMatchSuccessful = preg_match(
173 10
                    "/" . $value[0] . "(.*?(?<!\\\))" . $value[0] . "/s",
174 10
                    $value,
175
                    $valueMatches
176 10
                );
177 10
                if ($valueMatchSuccessful !== 1) {
178
                    if ($this->getThrowOnError()) {
179
                        throw new ParseException('Invalid value encapsulation.');
180
                    }
181
182
                    return '';
183
                }
184
185 10
                $value = $valueMatches[1];
186 10
            }
187
188 11
            $this->attributes[trim($name)] = trim($value);
189
        }
190
191
        // Return the html minus the current attribute.
192 12
        $posOfAttribute = strpos($html, $attributeMatches[2]);
193
194 12
        return substr($html, $posOfAttribute + strlen($attributeMatches[2]));
195
    }
196
197 11
    private function parseContents($html)
198
    {
199 11
        $remainingHtml = trim($html);
200 11
        if ($remainingHtml == '') {
201 1
            return '';
202
        }
203
204
        // Parse contents one token at a time.
205 10
        while (preg_match("/^<\/\s*" . $this->name . "\s*>/is", $remainingHtml) === 0) {
206 8
            $token = TokenFactory::buildFromHtml(
207 8
                $remainingHtml,
208 8
                $this,
209 8
                $this->getThrowOnError()
210 8
            );
211
212 8
            if ($token === false || $token->isClosingElementImplied($remainingHtml)) {
213 2
                return $remainingHtml;
214
            }
215
216 8
            $remainingHtml = trim($token->parse($remainingHtml));
217 8
            $this->children[] = $token;
218 8
        }
219
220
        // Remove remaining closing tag.
221 10
        $posOfClosingBracket = strpos($remainingHtml, '>');
222
223 10
        return substr($remainingHtml, $posOfClosingBracket + 1);
224
    }
225
226
    /**
227
     * Will get the element name from the html string.
228
     *
229
     * @param $html string
230
     *
231
     * @return string The element name.
232
     */
233 44
    private function parseElementName($html)
234
    {
235 44
        $elementMatchSuccessful = preg_match(
236 44
            "/^(<(([a-z0-9\-]+:)?[a-z0-9\-]+))/i",
237 44
            $html,
238
            $elementMatches
239 44
        );
240 44
        if ($elementMatchSuccessful !== 1) {
241
            if ($this->getThrowOnError()) {
242
                throw new ParseException('Invalid element name.');
243
            }
244
245
            return '';
246
        }
247
248 44
        return strtolower($elementMatches[2]);
249
    }
250
251 1
    public function getAttributes()
252
    {
253 1
        return $this->attributes;
254
    }
255
256 1
    public function hasAttributes()
257
    {
258 1
        return !empty($this->attributes);
259
    }
260
261 1
    public function getChildren()
262
    {
263 1
        return $this->children;
264
    }
265
266 1
    public function hasChildren()
267
    {
268 1
        return !empty($this->children);
269
    }
270
271
    /**
272
     * Getter for 'name'.
273
     *
274
     * @return string
275
     */
276 30
    public function getName()
277
    {
278 30
        return $this->name;
279
    }
280
281 13
    public function toArray()
282
    {
283
        $result = array(
284 13
            'type' => 'element',
285 13
            'name' => $this->name
286 13
        );
287
288 13
        if (!empty($this->attributes)) {
289 7
            $result['attributes'] = array();
290 7
            foreach ($this->attributes as $name => $value) {
291 7
                $result['attributes'][$name] = $value;
292 7
            }
293 7
        }
294
295 13
        if (!empty($this->children)) {
296 4
            $result['children'] = array();
297 4
            foreach ($this->children as $child) {
298 4
                $result['children'][] = $child->toArray();
299 4
            }
300 4
        }
301
302 13
        return $result;
303
    }
304
}
305