Completed
Push — master ( 26824d...9bbb30 )
by Kevin
02:15
created

Element::parseContents()   B

Complexity

Conditions 5
Paths 4

Size

Total Lines 28
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 5

Importance

Changes 2
Bugs 1 Features 1
Metric Value
c 2
b 1
f 1
dl 0
loc 28
ccs 17
cts 17
cp 1
rs 8.439
cc 5
eloc 15
nc 4
nop 1
crap 5
1
<?php
2
3
namespace Kevintweber\HtmlTokenizer\Tokens;
4
5
use Kevintweber\HtmlTokenizer\Exceptions\ParseException;
6
7
class Element extends AbstractToken
8
{
9
    /** @var array[Token] */
10
    private $attributes;
11
12
    /** @var array[Token] */
13
    private $children;
14
15
    /** @var string */
16
    private $name;
17
18 49
    public function __construct(Token $parent = null, $throwOnError = false)
19
    {
20 49
        parent::__construct(Token::ELEMENT, $parent, $throwOnError);
21
22 49
        $this->attributes = array();
23 49
        $this->children = array();
24 49
        $this->name = null;
25 49
    }
26
27 22
    public function isClosingElementImplied($html)
28
    {
29 22
        $parent = $this->getParent();
30 22
        if ($parent === null || !($parent instanceof self)) {
31 1
            return false;
32
        }
33
34 21
        $name = $this->parseElementName($html);
35 21
        $parentName = $parent->getName();
36
37
        // HEAD: no closing tag.
38 21
        if ($name === 'body' && $parentName === 'head') {
39 2
            return true;
40
        }
41
42
        // Closed-only elements.
43
        // Closing tags not required.  We will close them now.
44
        $closedOnlyElements = array(
45 20
            'base',
46 20
            'link',
47 20
            'meta',
48 20
            'hr',
49
            'br'
50 20
        );
51 20
        if (array_search($parentName, $closedOnlyElements) !== false) {
52 6
            return true;
53
        }
54
55
        // P
56
        $elementsNotChildrenOfP = array(
57 15
            'address',
58 15
            'article',
59 15
            'aside',
60 15
            'blockquote',
61 15
            'details',
62 15
            'div',
63 15
            'dl',
64 15
            'fieldset',
65 15
            'figcaption',
66 15
            'figure',
67 15
            'footer',
68 15
            'form',
69 15
            'h1',
70 15
            'h2',
71 15
            'h3',
72 15
            'h4',
73 15
            'h5',
74 15
            'h6',
75 15
            'header',
76 15
            'hgroup',
77 15
            'hr',
78 15
            'main',
79 15
            'menu',
80 15
            'nav',
81 15
            'ol',
82 15
            'p',
83 15
            'pre',
84 15
            'section',
85 15
            'table',
86
            'ul'
87 15
        );
88 15
        if ($parentName === 'p' && array_search($name, $elementsNotChildrenOfP) !== false) {
89 3
            return true;
90
        }
91
92
        // LI
93 12
        if ($parentName == 'li' && $name == 'li') {
94 1
            return true;
95
        }
96
97
        // DT and DD
98 11
        if (($parentName == 'dt' || $parentName == 'dd') && ($name == 'dt' || $name == 'dd')) {
99 4
            return true;
100
        }
101
102
        // RP and RT
103 7
        if (($parentName == 'rp' || $parentName == 'rt') && ($name == 'rp' || $name == 'rt')) {
104 4
            return true;
105
        }
106
107 3
        return false;
108
    }
109
110 28
    public function parse($html)
111
    {
112 28
        $this->name = $this->parseElementName($html);
113
114
        // Parse attributes.
115 28
        $remainingHtml = substr($html, strlen($this->name) + 1);
116 28
        while (strpos($remainingHtml, '>') !== false && preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) {
117 13
            $remainingHtml = $this->parseAttribute($remainingHtml);
118 13
        }
119
120
        // Find position of end of tag.
121 28
        $posOfClosingBracket = strpos($remainingHtml, '>');
122 28
        if ($posOfClosingBracket === false) {
123 4
            if ($this->getThrowOnError()) {
124 1
                throw new ParseException('Invalid element: missing closing bracket.');
125
            }
126
127 3
            return '';
128
        }
129
130
        // Is self-closing?
131 24
        $posOfSelfClosingBracket = strpos($remainingHtml, '/>');
132 24
        $remainingHtml = trim(substr($remainingHtml, $posOfClosingBracket + 1));
133 24
        if ($posOfSelfClosingBracket !== false && $posOfSelfClosingBracket == $posOfClosingBracket - 1) {
134
            // Self-closing element.
135 13
            return $remainingHtml;
136
        }
137
138
        // Open element.
139 12
        return $this->parseContents($remainingHtml);
140
    }
141
142 13
    private function parseAttribute($html)
143
    {
144
        // Will match the first entire name/value attribute pair.
145 13
        $attrMatchSuccessful = preg_match(
0 ignored issues
show
Unused Code introduced by
$attrMatchSuccessful is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
146 13
            "/(\s*([^>\s]*))/",
147 13
            $html,
148
            $attributeMatches
149 13
        );
150
151 13
        $posOfEqualsSign = strpos($attributeMatches[2], '=');
152 13
        if ($posOfEqualsSign === false) {
153
            // Valueless attribute.
154 2
            $this->attributes[trim($attributeMatches[2])] = true;
155 2
        } else {
156 12
            list($name, $value) = explode('=', $attributeMatches[2]);
157 12
            if ($value[0] === "'" || $value[0] === '"') {
158 11
                $valueMatchSuccessful = preg_match(
159 11
                    "/" . $value[0] . "(.*?(?<!\\\))" . $value[0] . "/s",
160 11
                    $value,
161
                    $valueMatches
162 11
                );
163 11
                if ($valueMatchSuccessful !== 1) {
164 1
                    if ($this->getThrowOnError()) {
165 1
                        throw new ParseException('Invalid value encapsulation.');
166
                    }
167
168 1
                    return '';
169
                }
170
171 10
                $value = $valueMatches[1];
172 10
            }
173
174 11
            $this->attributes[trim($name)] = trim($value);
175
        }
176
177
        // Return the html minus the current attribute.
178 12
        $posOfAttribute = strpos($html, $attributeMatches[2]);
179
180 12
        return substr($html, $posOfAttribute + strlen($attributeMatches[2]));
181
    }
182
183 12
    private function parseContents($html)
184
    {
185 12
        $remainingHtml = trim($html);
186 12
        if ($remainingHtml == '') {
187 1
            return '';
188
        }
189
190
        // Parse contents one token at a time.
191 11
        while (preg_match("/^<\/\s*" . $this->name . "\s*>/is", $remainingHtml) === 0) {
192 9
            $token = TokenFactory::buildFromHtml(
193 9
                $remainingHtml,
194 9
                $this,
195 9
                $this->getThrowOnError()
196 9
            );
197
198 9
            if ($token === false || $token->isClosingElementImplied($remainingHtml)) {
199 3
                return $remainingHtml;
200
            }
201
202 8
            $remainingHtml = trim($token->parse($remainingHtml));
203 8
            $this->children[] = $token;
204 8
        }
205
206
        // Remove remaining closing tag.
207 10
        $posOfClosingBracket = strpos($remainingHtml, '>');
208
209 10
        return substr($remainingHtml, $posOfClosingBracket + 1);
210
    }
211
212
    /**
213
     * Will get the element name from the html string.
214
     *
215
     * @param $html string
216
     *
217
     * @return string The element name.
218
     */
219 47
    private function parseElementName($html)
220
    {
221 47
        $elementMatchSuccessful = preg_match(
222 47
            "/^(<(([a-z0-9\-]+:)?[a-z0-9\-]+))/i",
223 47
            $html,
224
            $elementMatches
225 47
        );
226 47
        if ($elementMatchSuccessful !== 1) {
227 1
            if ($this->getThrowOnError()) {
228 1
                throw new ParseException('Invalid element name.');
229
            }
230
231 1
            return '';
232
        }
233
234 46
        return strtolower($elementMatches[2]);
235
    }
236
237 1
    public function getAttributes()
238
    {
239 1
        return $this->attributes;
240
    }
241
242 1
    public function hasAttributes()
243
    {
244 1
        return !empty($this->attributes);
245
    }
246
247 1
    public function getChildren()
248
    {
249 1
        return $this->children;
250
    }
251
252 1
    public function hasChildren()
253
    {
254 1
        return !empty($this->children);
255
    }
256
257
    /**
258
     * Getter for 'name'.
259
     *
260
     * @return string
261
     */
262 31
    public function getName()
263
    {
264 31
        return $this->name;
265
    }
266
267 13
    public function toArray()
268
    {
269
        $result = array(
270 13
            'type' => 'element',
271 13
            'name' => $this->name
272 13
        );
273
274 13
        if (!empty($this->attributes)) {
275 7
            $result['attributes'] = array();
276 7
            foreach ($this->attributes as $name => $value) {
277 7
                $result['attributes'][$name] = $value;
278 7
            }
279 7
        }
280
281 13
        if (!empty($this->children)) {
282 4
            $result['children'] = array();
283 4
            foreach ($this->children as $child) {
284 4
                $result['children'][] = $child->toArray();
285 4
            }
286 4
        }
287
288 13
        return $result;
289
    }
290
}
291