1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Kevintweber\HtmlTokenizer\Tokens; |
4
|
|
|
|
5
|
|
|
use Kevintweber\HtmlTokenizer\Exceptions\ParseException; |
6
|
|
|
|
7
|
|
|
class Element extends AbstractToken |
8
|
|
|
{ |
9
|
|
|
/** @var array */ |
10
|
|
|
private $attributes; |
11
|
|
|
|
12
|
|
|
/** @var array[Token] */ |
13
|
|
|
private $children; |
14
|
|
|
|
15
|
|
|
/** @var string */ |
16
|
|
|
private $name; |
17
|
|
|
|
18
|
71 |
|
public function __construct(Token $parent = null, bool $throwOnError = true) |
19
|
|
|
{ |
20
|
71 |
|
parent::__construct(Token::ELEMENT, $parent, $throwOnError); |
21
|
|
|
|
22
|
71 |
|
$this->attributes = array(); |
23
|
71 |
|
$this->children = array(); |
24
|
71 |
|
$this->name = null; |
25
|
71 |
|
} |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Does the parent have an implied closing tag? |
29
|
|
|
* |
30
|
|
|
* @param string $html |
31
|
|
|
* |
32
|
|
|
* @return boolean |
33
|
|
|
*/ |
34
|
27 |
|
public function isClosingElementImplied(string $html) : bool |
35
|
|
|
{ |
36
|
27 |
|
$parent = $this->getParent(); |
37
|
27 |
|
if ($parent === null || !($parent instanceof self)) { |
38
|
1 |
|
return false; |
39
|
|
|
} |
40
|
|
|
|
41
|
26 |
|
$name = $this->parseElementName($html); |
42
|
26 |
|
$parentName = $parent->getName(); |
43
|
|
|
|
44
|
|
|
// HEAD: no closing tag. |
45
|
26 |
|
if ($name === 'body' && $parentName === 'head') { |
46
|
2 |
|
return true; |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
// P |
50
|
|
|
$elementsNotChildrenOfP = array( |
51
|
25 |
|
'address', |
52
|
|
|
'article', |
53
|
|
|
'aside', |
54
|
|
|
'blockquote', |
55
|
|
|
'details', |
56
|
|
|
'div', |
57
|
|
|
'dl', |
58
|
|
|
'fieldset', |
59
|
|
|
'figcaption', |
60
|
|
|
'figure', |
61
|
|
|
'footer', |
62
|
|
|
'form', |
63
|
|
|
'h1', |
64
|
|
|
'h2', |
65
|
|
|
'h3', |
66
|
|
|
'h4', |
67
|
|
|
'h5', |
68
|
|
|
'h6', |
69
|
|
|
'header', |
70
|
|
|
'hgroup', |
71
|
|
|
'hr', |
72
|
|
|
'main', |
73
|
|
|
'menu', |
74
|
|
|
'nav', |
75
|
|
|
'ol', |
76
|
|
|
'p', |
77
|
|
|
'pre', |
78
|
|
|
'section', |
79
|
|
|
'table', |
80
|
|
|
'ul' |
81
|
|
|
); |
82
|
25 |
|
if ($parentName === 'p' && in_array($name, $elementsNotChildrenOfP)) { |
83
|
3 |
|
return true; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
// LI |
87
|
22 |
|
if ($parentName === 'li' && $name === 'li') { |
88
|
1 |
|
return true; |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
// DT and DD |
92
|
21 |
|
if (($parentName === 'dt' || $parentName === 'dd') && ($name === 'dt' || $name === 'dd')) { |
93
|
4 |
|
return true; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
// RP and RT |
97
|
17 |
|
if (($parentName === 'rp' || $parentName === 'rt') && ($name === 'rp' || $name === 'rt')) { |
98
|
4 |
|
return true; |
99
|
|
|
} |
100
|
|
|
|
101
|
13 |
|
return false; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* Will parse this element. |
106
|
|
|
* |
107
|
|
|
* @param string $html |
108
|
|
|
* |
109
|
|
|
* @return string Remaining HTML. |
110
|
|
|
*/ |
111
|
55 |
|
public function parse(string $html) : string |
112
|
|
|
{ |
113
|
55 |
|
$html = ltrim($html); |
114
|
55 |
|
$this->setTokenPosition($html); |
115
|
|
|
|
116
|
|
|
try { |
117
|
55 |
|
$this->name = $this->parseElementName($html); |
118
|
55 |
|
$remainingHtml = $this->parseAttributes($html); |
119
|
55 |
|
$posOfClosingBracket = $this->getPositionOfElementEndTag($remainingHtml); |
120
|
|
|
|
121
|
|
|
// Is self-closing? |
122
|
51 |
|
$posOfSelfClosingBracket = mb_strpos($remainingHtml, '/>'); |
123
|
51 |
|
$remainingHtml = mb_substr($remainingHtml, $posOfClosingBracket + 1); |
124
|
51 |
|
if ($posOfSelfClosingBracket !== false && $posOfSelfClosingBracket === $posOfClosingBracket - 1) { |
125
|
|
|
// Self-closing element. (Note: $this->valuue is unchanged.) |
126
|
7 |
|
return $remainingHtml; |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
// Lets close those closed-only elements that are left open. |
130
|
|
|
$closedOnlyElements = array( |
131
|
45 |
|
'area', |
132
|
|
|
'base', |
133
|
|
|
'br', |
134
|
|
|
'col', |
135
|
|
|
'embed', |
136
|
|
|
'hr', |
137
|
|
|
'img', |
138
|
|
|
'input', |
139
|
|
|
'link', |
140
|
|
|
'meta', |
141
|
|
|
'param', |
142
|
|
|
'source', |
143
|
|
|
'track', |
144
|
|
|
'wbr' |
145
|
|
|
); |
146
|
45 |
|
if (in_array($this->name, $closedOnlyElements)) { |
147
|
6 |
|
return $remainingHtml; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
// Open element. |
151
|
44 |
|
return $this->parseContents($remainingHtml); |
152
|
4 |
|
} catch (ParseException $e) { |
153
|
4 |
|
if ($this->getThrowOnError()) { |
154
|
3 |
|
throw $e; |
155
|
|
|
} |
156
|
|
|
} |
157
|
|
|
|
158
|
3 |
|
return ''; |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
/** |
162
|
|
|
* @param string $html |
163
|
|
|
* |
164
|
|
|
* @return string |
165
|
|
|
*/ |
166
|
55 |
|
private function parseAttributes(string $html) : string |
167
|
|
|
{ |
168
|
55 |
|
$remainingHtml = mb_substr($html, mb_strlen($this->name) + 1); |
169
|
55 |
|
while (mb_strpos($remainingHtml, '>') !== false && preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) { |
170
|
26 |
|
$remainingHtml = $this->parseAttribute($remainingHtml); |
171
|
|
|
} |
172
|
|
|
|
173
|
55 |
|
return $remainingHtml; |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
/** |
177
|
|
|
* Will parse attributes. |
178
|
|
|
* |
179
|
|
|
* @param string $html |
180
|
|
|
* |
181
|
|
|
* @return string Remaining HTML. |
182
|
|
|
*/ |
183
|
26 |
|
private function parseAttribute(string $html) : string |
184
|
|
|
{ |
185
|
26 |
|
$remainingHtml = ltrim($html); |
186
|
|
|
|
187
|
|
|
try { |
188
|
|
|
// Will match the first entire name/value attribute pair. |
189
|
26 |
|
preg_match( |
190
|
26 |
|
"/((([a-z0-9\-_]+:)?[a-z0-9\-_]+)(\s*=\s*)?)/i", |
191
|
26 |
|
$remainingHtml, |
192
|
26 |
|
$attributeMatches |
193
|
|
|
); |
194
|
|
|
|
195
|
26 |
|
$attributeName = $attributeMatches[2]; |
196
|
26 |
|
$remainingHtml = mb_substr(mb_strstr($remainingHtml, $attributeName), mb_strlen($attributeName)); |
197
|
26 |
|
if ($this->isAttributeValueless($remainingHtml)) { |
198
|
4 |
|
$this->attributes[trim($attributeName)] = true; |
199
|
|
|
|
200
|
4 |
|
return $remainingHtml; |
201
|
|
|
} |
202
|
|
|
|
203
|
24 |
|
return $this->parseAttributeValue($html, $remainingHtml, $attributeName); |
204
|
1 |
|
} catch (ParseException $e) { |
205
|
1 |
|
if ($this->getThrowOnError()) { |
206
|
1 |
|
throw $e; |
207
|
|
|
} |
208
|
|
|
} |
209
|
|
|
|
210
|
1 |
|
return ''; |
211
|
|
|
} |
212
|
|
|
|
213
|
24 |
|
private function parseAttributeValue(string $html, string $remainingHtml, string $attributeName) : string |
214
|
|
|
{ |
215
|
24 |
|
$remainingHtml = ltrim($remainingHtml, ' ='); |
216
|
24 |
|
if ($this->isAttributeValueQuoteEnclosed($remainingHtml)) { |
217
|
21 |
|
$attributeValue = $this->extractQuoteEnclosedAttributeValue($remainingHtml); |
218
|
|
|
} else { |
219
|
|
|
// No quotes enclosing the attribute value. |
220
|
7 |
|
$attributeValue = $this->extractQuotelessAttributeValue($remainingHtml); |
221
|
|
|
} |
222
|
|
|
|
223
|
23 |
|
$this->attributes[trim($attributeName)] = $attributeValue; |
224
|
23 |
|
$remainingHtml = $this->parseAttributeDetermineRemainingHtml($html, $attributeName, $attributeValue); |
225
|
|
|
|
226
|
23 |
|
return $remainingHtml; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
/** |
230
|
|
|
* Will parse the contents of this element. |
231
|
|
|
* |
232
|
|
|
* @param string $html |
233
|
|
|
* |
234
|
|
|
* @return string Remaining HTML. |
235
|
|
|
*/ |
236
|
44 |
|
private function parseContents(string $html) : string |
237
|
|
|
{ |
238
|
44 |
|
if (trim($html) === '') { |
239
|
13 |
|
return ''; |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
// Determine value. |
243
|
31 |
|
$this->value = $html; |
244
|
31 |
|
if (preg_match("/(.*)<\/\s*" . $this->name . "\s*>/iU", $html, $valueMatches) === 1) { |
245
|
28 |
|
$this->value = $valueMatches[1]; |
246
|
|
|
} |
247
|
|
|
|
248
|
|
|
// Don't parse contents of "iframe" element. |
249
|
31 |
|
if ($this->name === 'iframe') { |
250
|
3 |
|
return $this->parseNoContents('iframe', $html); |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
// Only TEXT inside a "script" element. |
254
|
30 |
|
if ($this->name === 'script') { |
255
|
5 |
|
return $this->parseForeignContents('script', $html); |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
// Only TEXT inside a "style" element. |
259
|
27 |
|
if ($this->name === 'style') { |
260
|
2 |
|
return $this->parseForeignContents('style', $html); |
261
|
|
|
} |
262
|
|
|
|
263
|
|
|
// Parse contents one token at a time. |
264
|
25 |
|
$remainingHtml = $html; |
265
|
25 |
|
while ($this->isAnotherTokenPresent($remainingHtml)) { |
266
|
23 |
|
$token = TokenFactory::buildFromHtml( |
267
|
23 |
|
$remainingHtml, |
268
|
23 |
|
$this, |
269
|
23 |
|
$this->getThrowOnError() |
270
|
|
|
); |
271
|
|
|
|
272
|
23 |
|
if (!$token instanceof Token || $token->isClosingElementImplied($remainingHtml)) { |
273
|
2 |
|
return $remainingHtml; |
274
|
|
|
} |
275
|
|
|
|
276
|
23 |
|
$remainingHtml = $token->parse($remainingHtml); |
277
|
23 |
|
$this->children[] = $token; |
278
|
|
|
} |
279
|
|
|
|
280
|
24 |
|
$this->removeLastTokenIfContainsOnlyWhitespace(); |
281
|
|
|
|
282
|
|
|
// Remove remaining closing tag. |
283
|
24 |
|
$posOfClosingBracket = mb_strpos($remainingHtml, '>'); |
284
|
|
|
|
285
|
24 |
|
return mb_substr($remainingHtml, $posOfClosingBracket + 1); |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
/** |
289
|
|
|
* Will get the element name from the html string. |
290
|
|
|
* |
291
|
|
|
* @param $html string |
292
|
|
|
* |
293
|
|
|
* @return string The element name. |
294
|
|
|
*/ |
295
|
69 |
|
private function parseElementName(string $html) : string |
296
|
|
|
{ |
297
|
69 |
|
$html = trim($html); |
298
|
69 |
|
$elementMatchSuccessful = preg_match( |
299
|
69 |
|
"/^(<(([a-z0-9\-]+:)?[a-z0-9\-]+))/i", |
300
|
69 |
|
$html, |
301
|
69 |
|
$elementMatches |
302
|
|
|
); |
303
|
69 |
|
if ($elementMatchSuccessful !== 1) { |
304
|
1 |
|
if ($this->getThrowOnError()) { |
305
|
1 |
|
throw new ParseException('Invalid element name. Truncated html = ' . mb_substr($html, 0, 20)); |
306
|
|
|
} |
307
|
|
|
|
308
|
1 |
|
return ''; |
309
|
|
|
} |
310
|
|
|
|
311
|
68 |
|
return mb_strtolower($elementMatches[2]); |
312
|
|
|
} |
313
|
|
|
|
314
|
|
|
/** |
315
|
|
|
* Will parse the script and style contents correctly. |
316
|
|
|
* |
317
|
|
|
* @param $tag string |
318
|
|
|
* @param $html string |
319
|
|
|
* |
320
|
|
|
* @return string The remaining HTML. |
321
|
|
|
*/ |
322
|
7 |
|
private function parseForeignContents(string $tag, string $html) : string |
323
|
|
|
{ |
324
|
7 |
|
$remainingHtml = ltrim($html); |
325
|
|
|
|
326
|
|
|
// Find all contents. |
327
|
7 |
|
$remainingHtml = $this->determineRemainingHtmlOfForeignContents( |
328
|
7 |
|
$tag, |
329
|
7 |
|
$html, |
330
|
7 |
|
$remainingHtml |
331
|
|
|
); |
332
|
|
|
|
333
|
|
|
// Handle no contents. |
334
|
7 |
|
if ($this->value === '') { |
335
|
1 |
|
return $remainingHtml; |
336
|
|
|
} |
337
|
|
|
|
338
|
7 |
|
$text = new Text($this, $this->getThrowOnError(), $this->value); |
339
|
7 |
|
$this->children[] = $text; |
340
|
|
|
|
341
|
7 |
|
return $remainingHtml; |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
/** |
345
|
|
|
* Will not parse the contents of an element. |
346
|
|
|
* |
347
|
|
|
* "iframe" elements. |
348
|
|
|
* |
349
|
|
|
* @param $tag string |
350
|
|
|
* @param $html string |
351
|
|
|
* |
352
|
|
|
* @return string The remaining HTML. |
353
|
|
|
*/ |
354
|
3 |
View Code Duplication |
private function parseNoContents(string $tag, string $html) : string |
|
|
|
|
355
|
|
|
{ |
356
|
3 |
|
$remainingHtml = ltrim($html); |
357
|
3 |
|
$matchingResult = preg_match( |
358
|
3 |
|
"/(<\/\s*" . $tag . "\s*>)/i", |
359
|
3 |
|
$html, |
360
|
3 |
|
$endOfScriptMatches |
361
|
|
|
); |
362
|
3 |
|
if ($matchingResult === 0) { |
363
|
1 |
|
return ''; |
364
|
|
|
} |
365
|
|
|
|
366
|
2 |
|
$closingTag = $endOfScriptMatches[1]; |
367
|
2 |
|
$this->value = mb_substr($remainingHtml, 0, mb_strpos($html, $closingTag)); |
368
|
|
|
|
369
|
2 |
|
return mb_substr( |
370
|
2 |
|
mb_strstr($remainingHtml, $closingTag), |
371
|
2 |
|
mb_strlen($closingTag) |
372
|
|
|
); |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
/** |
376
|
|
|
* Getter for 'attributes'. |
377
|
|
|
* |
378
|
|
|
* @return array |
379
|
|
|
*/ |
380
|
1 |
|
public function getAttributes() : array |
381
|
|
|
{ |
382
|
1 |
|
return $this->attributes; |
383
|
|
|
} |
384
|
|
|
|
385
|
|
|
/** |
386
|
|
|
* @return boolean |
387
|
|
|
*/ |
388
|
1 |
|
public function hasAttributes() : bool |
389
|
|
|
{ |
390
|
1 |
|
return !empty($this->attributes); |
391
|
|
|
} |
392
|
|
|
|
393
|
|
|
/** |
394
|
|
|
* Getter for 'children'. |
395
|
|
|
* |
396
|
|
|
* @return array |
397
|
|
|
*/ |
398
|
1 |
|
public function getChildren() : array |
399
|
|
|
{ |
400
|
1 |
|
return $this->children; |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
/** |
404
|
|
|
* @return boolean |
405
|
|
|
*/ |
406
|
1 |
|
public function hasChildren() : bool |
407
|
|
|
{ |
408
|
1 |
|
return !empty($this->children); |
409
|
|
|
} |
410
|
|
|
|
411
|
|
|
/** |
412
|
|
|
* Getter for 'name'. |
413
|
|
|
* |
414
|
|
|
* @return string |
415
|
|
|
*/ |
416
|
43 |
|
public function getName() : string |
417
|
|
|
{ |
418
|
43 |
|
return $this->name; |
419
|
|
|
} |
420
|
|
|
|
421
|
30 |
|
public function toArray() : array |
422
|
|
|
{ |
423
|
|
|
$result = array( |
424
|
30 |
|
'type' => 'element', |
425
|
30 |
|
'name' => $this->name, |
426
|
30 |
|
'line' => $this->getLine(), |
427
|
30 |
|
'position' => $this->getPosition() |
428
|
|
|
); |
429
|
|
|
|
430
|
30 |
|
if (!empty($this->attributes)) { |
431
|
17 |
|
$result['attributes'] = array(); |
432
|
17 |
|
foreach ($this->attributes as $name => $value) { |
433
|
17 |
|
$result['attributes'][$name] = $value; |
434
|
|
|
} |
435
|
|
|
} |
436
|
|
|
|
437
|
30 |
|
if (!empty($this->children)) { |
438
|
17 |
|
$result['children'] = array(); |
439
|
17 |
|
foreach ($this->children as $child) { |
440
|
17 |
|
$result['children'][] = $child->toArray(); |
441
|
|
|
} |
442
|
|
|
} |
443
|
|
|
|
444
|
30 |
|
return $result; |
445
|
|
|
} |
446
|
|
|
|
447
|
21 |
|
private function determineRemainingHtmlByRemovingAttributeName(string $html, string $name, string $value) : string |
448
|
|
|
{ |
449
|
21 |
|
$remainingHtml = ltrim($html); |
450
|
|
|
|
451
|
21 |
|
$remainingHtml = mb_substr($remainingHtml, mb_strlen($name)); |
452
|
21 |
|
$posOfAttributeValue = mb_strpos($remainingHtml, $value); |
453
|
21 |
|
$remainingHtml = ltrim( |
454
|
21 |
|
mb_substr( |
455
|
21 |
|
$remainingHtml, |
456
|
21 |
|
$posOfAttributeValue + mb_strlen($value) |
457
|
|
|
) |
458
|
|
|
); |
459
|
|
|
|
460
|
21 |
|
return $remainingHtml; |
461
|
|
|
} |
462
|
|
|
|
463
|
23 |
|
private function parseAttributeDetermineRemainingHtml(string $html, string $attributeName, string $value) : string |
464
|
|
|
{ |
465
|
23 |
|
if ($value === '') { |
466
|
2 |
|
$remainingHtml = ltrim(mb_substr(ltrim($html), mb_strlen($attributeName) + 3)); |
467
|
|
|
} else { |
468
|
21 |
|
$remainingHtml = $this->determineRemainingHtmlByRemovingAttributeName($html, $attributeName, $value); |
469
|
|
|
} |
470
|
|
|
|
471
|
23 |
|
return ltrim($remainingHtml, '\'"/ '); |
472
|
|
|
} |
473
|
|
|
|
474
|
26 |
|
private function isAttributeValueless(string $remainingHtml) : bool |
475
|
|
|
{ |
476
|
26 |
|
return preg_match("/^\s*=\s*/", $remainingHtml) === 0; |
477
|
|
|
} |
478
|
|
|
|
479
|
55 |
|
private function getPositionOfElementEndTag(string $remainingHtml) : int |
480
|
|
|
{ |
481
|
55 |
|
$posOfClosingBracket = mb_strpos($remainingHtml, '>'); |
482
|
55 |
|
if ($posOfClosingBracket === false) { |
483
|
4 |
|
throw new ParseException('Invalid element: missing closing bracket.'); |
484
|
|
|
} |
485
|
|
|
|
486
|
51 |
|
return $posOfClosingBracket; |
487
|
|
|
} |
488
|
|
|
|
489
|
24 |
|
private function removeLastTokenIfContainsOnlyWhitespace() |
490
|
|
|
{ |
491
|
24 |
|
if (!empty($this->children)) { |
492
|
22 |
|
$lastChildArray = array_slice($this->children, -1); |
493
|
22 |
|
$lastChild = array_pop($lastChildArray); |
494
|
22 |
|
if ($lastChild->isText() && trim($lastChild->getValue()) === '') { |
495
|
3 |
|
array_pop($this->children); |
496
|
|
|
} |
497
|
|
|
} |
498
|
24 |
|
} |
499
|
|
|
|
500
|
25 |
|
private function isAnotherTokenPresent($remainingHtml) : bool |
501
|
|
|
{ |
502
|
25 |
|
return preg_match("/^<\/\s*" . $this->name . "\s*>/is", $remainingHtml) === 0; |
503
|
|
|
} |
504
|
|
|
|
505
|
21 |
|
private function extractQuoteEnclosedAttributeValue(string $remainingHtml) : string |
506
|
|
|
{ |
507
|
21 |
|
$quoteCharacter = $remainingHtml[0]; |
508
|
21 |
|
$valueMatchSuccessful = preg_match( |
509
|
21 |
|
'/' . $quoteCharacter . "(.*?(?<!\\\))" . $quoteCharacter . "/s", |
510
|
21 |
|
$remainingHtml, |
511
|
21 |
|
$valueMatches |
512
|
|
|
); |
513
|
21 |
|
if ($valueMatchSuccessful !== 1) { |
514
|
1 |
|
throw new ParseException('Invalid quote enclosed attribute value encapsulation.'); |
515
|
|
|
} |
516
|
|
|
|
517
|
20 |
|
return $valueMatches[1]; |
518
|
|
|
} |
519
|
|
|
|
520
|
7 |
|
private function extractQuotelessAttributeValue(string $remainingHtml) : string |
521
|
|
|
{ |
522
|
7 |
|
$valueMatchSuccessful = preg_match("/(\s*([^>\s]*(?<!\/)))/", $remainingHtml, $valueMatches); |
523
|
7 |
|
if ($valueMatchSuccessful !== 1) { |
524
|
|
|
throw new ParseException('Invalid quoteless attribute value encapsulation.'); |
525
|
|
|
} |
526
|
|
|
|
527
|
7 |
|
return $valueMatches[2]; |
528
|
|
|
} |
529
|
|
|
|
530
|
24 |
|
private function isAttributeValueQuoteEnclosed(string $remainingHtml) : bool |
531
|
|
|
{ |
532
|
24 |
|
return $remainingHtml[0] === "'" || $remainingHtml[0] === '"'; |
533
|
|
|
} |
534
|
|
|
|
535
|
7 |
View Code Duplication |
private function determineRemainingHtmlOfForeignContents(string $tag, string $html, string $remainingHtml) : string |
|
|
|
|
536
|
|
|
{ |
537
|
7 |
|
$matchingResult = preg_match( |
538
|
7 |
|
"/(<\/\s*" . $tag . "\s*>)/i", |
539
|
7 |
|
$html, |
540
|
7 |
|
$endOfScriptMatches |
541
|
|
|
); |
542
|
7 |
|
if ($matchingResult === 0) { |
543
|
2 |
|
$this->value = trim($remainingHtml); |
544
|
|
|
|
545
|
2 |
|
return ''; |
546
|
|
|
} |
547
|
|
|
|
548
|
5 |
|
$closingTag = $endOfScriptMatches[1]; |
549
|
5 |
|
$this->value = trim( |
550
|
5 |
|
mb_substr($remainingHtml, 0, mb_strpos($remainingHtml, $closingTag)) |
551
|
|
|
); |
552
|
|
|
|
553
|
5 |
|
return mb_substr( |
554
|
5 |
|
mb_strstr($remainingHtml, $closingTag), |
555
|
5 |
|
mb_strlen($closingTag) |
556
|
|
|
); |
557
|
|
|
} |
558
|
|
|
} |
559
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.