1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Vikpe; |
4
|
|
|
|
5
|
|
|
class HtmlHeadingNormalizer |
6
|
|
|
{ |
7
|
|
|
public static function demote($html, $levels) |
8
|
|
|
{ |
9
|
|
|
return self::normalize($html, $levels); |
10
|
|
|
} |
11
|
|
|
|
12
|
|
|
public static function promote($html, $levels) |
13
|
|
|
{ |
14
|
|
|
return self::normalize($html, -$levels); |
15
|
|
|
} |
16
|
|
|
|
17
|
|
|
private static function normalize($html, $levels) |
18
|
|
|
{ |
19
|
|
|
$normalizationIsRequired = ((abs($levels) > 0) && self::containsHeadings($html)); |
20
|
|
|
|
21
|
|
|
if (!$normalizationIsRequired) { |
22
|
|
|
return $html; |
23
|
|
|
} |
24
|
|
|
|
25
|
|
|
$domDocument = new \DOMDocument(); |
26
|
|
|
$domDocument->loadHTML($html); |
27
|
|
|
|
28
|
|
|
$originalHeadings = self::getHeadings($domDocument); |
29
|
|
|
$normalizedHeadings = self::normalizeHeadings($originalHeadings, $levels); |
30
|
|
|
|
31
|
|
|
self::replaceHeadings( |
32
|
|
|
$originalHeadings, |
33
|
|
|
$normalizedHeadings |
34
|
|
|
); |
35
|
|
|
|
36
|
|
|
return $domDocument->saveHTML(); |
37
|
|
|
return self::formatResult($domDocument, $html); |
|
|
|
|
38
|
|
|
} |
39
|
|
|
|
40
|
|
|
private static function getHeadings(\DOMDocument $domDocument) |
41
|
|
|
{ |
42
|
|
|
$tagNames = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); |
43
|
|
|
$headings = array(); |
44
|
|
|
|
45
|
|
|
foreach ($tagNames as $tagName) { |
46
|
|
|
foreach ($domDocument->getElementsByTagName($tagName) as $heading) { |
47
|
|
|
$headings[] = $heading; |
48
|
|
|
} |
49
|
|
|
} |
50
|
|
|
|
51
|
|
|
return $headings; |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
private static function normalizeHeadings(array $originalHeadings, $levelDelta) |
55
|
|
|
{ |
56
|
|
|
$normalizedHeadings = array(); |
57
|
|
|
|
58
|
|
|
foreach ($originalHeadings as $heading) { |
59
|
|
|
$currentLevel = self::tagNameToLevel($heading->tagName); |
60
|
|
|
$newLevel = $currentLevel + $levelDelta; |
61
|
|
|
|
62
|
|
|
$normalizedHeadings[] = self::cloneHeading($heading, $newLevel); |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
return $normalizedHeadings; |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
private static function replaceHeadings(array $needles, array $replacements) |
69
|
|
|
{ |
70
|
|
|
foreach ($needles as $i => $needle) { |
71
|
|
|
$needle->parentNode->replaceChild($replacements[$i], $needle); |
72
|
|
|
} |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
private static function containsHeadings($html) |
76
|
|
|
{ |
77
|
|
|
$headingNeedle = '<h'; |
78
|
|
|
$containsHeadings = (false !== stripos($html, $headingNeedle)); |
79
|
|
|
|
80
|
|
|
return $containsHeadings; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
private static function tagNameToLevel($tagName) |
84
|
|
|
{ |
85
|
|
|
return substr($tagName, 1); |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
private static function levelToTagName($level) |
89
|
|
|
{ |
90
|
|
|
return 'h'.$level; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
private static function cloneHeading(\DOMElement $sourceHeading, $newLevel) |
94
|
|
|
{ |
95
|
|
|
$tagName = self::levelToTagName($newLevel); |
96
|
|
|
|
97
|
|
|
$targetHeading = $sourceHeading->parentNode->ownerDocument->createElement($tagName); |
98
|
|
|
self::copyAttributes($sourceHeading, $targetHeading); |
99
|
|
|
self::moveChildNodes($sourceHeading, $targetHeading); |
100
|
|
|
|
101
|
|
|
return $targetHeading; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
private static function copyAttributes(\DOMElement $source, \DOMElement $target) |
105
|
|
|
{ |
106
|
|
|
foreach ($source->attributes as $attribute) { |
107
|
|
|
$target->setAttribute($attribute->name, $attribute->value); |
108
|
|
|
} |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
private static function moveChildNodes(\DOMElement $source, \DOMElement $target) |
112
|
|
|
{ |
113
|
|
|
while ($source->hasChildNodes()) { |
114
|
|
|
// appendChild() actually moves the childNode |
115
|
|
|
$target->appendChild($source->childNodes->item(0)); |
116
|
|
|
} |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
private static function formatResult(\DOMDocument $domDocument, $originalHtml) |
120
|
|
|
{ |
121
|
|
|
if (!self::containsDocType($originalHtml)) { |
122
|
|
|
$domDocument->removeChild($domDocument->doctype); |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
if (self::containsHtmlTag($originalHtml)) { |
126
|
|
|
return $domDocument->saveHTML(); |
127
|
|
|
} else { |
128
|
|
|
$bodyDomElement = $domDocument->getElementsByTagName('body') |
129
|
|
|
->item(0); |
130
|
|
|
|
131
|
|
|
$html = $domDocument->saveHTML($bodyDomElement); |
132
|
|
|
|
133
|
|
|
return str_replace(['<body>', '</body>'], '', $html); |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
private static function containsDocType($html) |
138
|
|
|
{ |
139
|
|
|
return self::stringContains($html, '<!DOCTYPE'); |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
private static function stringContains($string, $needle) |
143
|
|
|
{ |
144
|
|
|
return (false !== strpos($string, $needle)); |
145
|
|
|
} |
146
|
|
|
|
147
|
|
|
private static function containsHtmlTag($html) |
148
|
|
|
{ |
149
|
|
|
return self::stringContains($html, '<html'); |
150
|
|
|
} |
151
|
|
|
} |
152
|
|
|
|
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return
,die
orexit
statements that have been added for debug purposes.In the above example, the last
return false
will never be executed, because a return statement has already been met in every possible execution path.