1 | <?php |
||
2 | /** |
||
3 | * This file is part of the O2System Framework package. |
||
4 | * |
||
5 | * For the full copyright and license information, please view the LICENSE |
||
6 | * file that was distributed with this source code. |
||
7 | * |
||
8 | * @author Steeve Andrian Salim |
||
9 | * @copyright Copyright (c) Steeve Andrian Salim |
||
10 | */ |
||
11 | |||
12 | // ------------------------------------------------------------------------ |
||
13 | |||
14 | namespace O2System\Html\Dom; |
||
15 | |||
16 | // ------------------------------------------------------------------------ |
||
17 | |||
18 | /** |
||
19 | * Class Beautifier |
||
20 | * |
||
21 | * @package O2System\HTML\DOM |
||
22 | */ |
||
23 | class Beautifier |
||
24 | { |
||
25 | /** |
||
26 | * Beautifier::ELEMENT_TYPE_BLOCK |
||
27 | * |
||
28 | * HTML Element Type Block Constant |
||
29 | * |
||
30 | * @var int |
||
31 | */ |
||
32 | const ELEMENT_TYPE_BLOCK = 0; |
||
33 | |||
34 | /** |
||
35 | * Beautifier::ELEMENT_TYPE_INLINE |
||
36 | * |
||
37 | * HTML Element Type Inline Constant |
||
38 | * |
||
39 | * @var int |
||
40 | */ |
||
41 | const ELEMENT_TYPE_INLINE = 1; |
||
42 | |||
43 | /** |
||
44 | * Beautifier::MATCH_INDENT_NO |
||
45 | * |
||
46 | * HTML Element No Indent |
||
47 | * |
||
48 | * @var int |
||
49 | */ |
||
50 | const MATCH_INDENT_NO = 0; |
||
51 | |||
52 | /** |
||
53 | * Beautifier::MATCH_INDENT_DECREASE |
||
54 | * |
||
55 | * HTML Element Indent Decrease |
||
56 | * |
||
57 | * @var int |
||
58 | */ |
||
59 | const MATCH_INDENT_DECREASE = 1; |
||
60 | |||
61 | /** |
||
62 | * Beautifier::MATCH_INDENT_INCREASE |
||
63 | * |
||
64 | * HTML Element Indent Increase |
||
65 | * |
||
66 | * @var int |
||
67 | */ |
||
68 | const MATCH_INDENT_INCREASE = 2; |
||
69 | |||
70 | /** |
||
71 | * Beautifier::MATCH_DISCARD |
||
72 | * |
||
73 | * HTML Element Indent Discard |
||
74 | * |
||
75 | * @var int |
||
76 | */ |
||
77 | const MATCH_DISCARD = 3; |
||
78 | |||
79 | /** |
||
80 | * Beautifier::$indentCharacter |
||
81 | * |
||
82 | * Indentation Character |
||
83 | * |
||
84 | * @var string |
||
85 | */ |
||
86 | private $indentCharacter = ' '; |
||
87 | |||
88 | /** |
||
89 | * Beautifier::$inlineElements |
||
90 | * |
||
91 | * Inline Elements |
||
92 | * |
||
93 | * @var array |
||
94 | */ |
||
95 | private $inlineElements = [ |
||
96 | 'b', |
||
97 | 'big', |
||
98 | 'i', |
||
99 | 'small', |
||
100 | 'tt', |
||
101 | 'abbr', |
||
102 | 'acronym', |
||
103 | 'cite', |
||
104 | 'code', |
||
105 | 'dfn', |
||
106 | 'em', |
||
107 | 'kbd', |
||
108 | 'strong', |
||
109 | 'samp', |
||
110 | 'var', |
||
111 | 'a', |
||
112 | 'bdo', |
||
113 | 'br', |
||
114 | 'img', |
||
115 | 'span', |
||
116 | 'sub', |
||
117 | 'sup', |
||
118 | ]; |
||
119 | |||
120 | // ------------------------------------------------------------------------ |
||
121 | |||
122 | /** |
||
123 | * Beautifier::setElementType |
||
124 | * |
||
125 | * @param string $elementName |
||
126 | * @param int $type FormatOutput::ELEMENT_TYPE_BLOCK | FormatOutput::ELEMENT_TYPE_INLINE |
||
127 | */ |
||
128 | public function setElementType($elementName, $type) |
||
129 | { |
||
130 | if ($type === static::ELEMENT_TYPE_BLOCK) { |
||
131 | $this->inlineElements = array_diff($this->inlineElements, [$elementName]); |
||
132 | } else { |
||
133 | if ($type === static::ELEMENT_TYPE_INLINE) { |
||
134 | $this->inlineElements[] = $elementName; |
||
135 | } |
||
136 | } |
||
137 | |||
138 | if ($this->inlineElements) { |
||
0 ignored issues
–
show
|
|||
139 | $this->inlineElements = array_unique($this->inlineElements); |
||
140 | } |
||
141 | } |
||
142 | |||
143 | // ------------------------------------------------------------------------ |
||
144 | |||
145 | /** |
||
146 | * Beautifier::format |
||
147 | * |
||
148 | * @param $source |
||
149 | * |
||
150 | * @return string |
||
151 | */ |
||
152 | public function format($source) |
||
153 | { |
||
154 | // We does not indent <script> body. Instead, it temporary removes it from the code, indents the input, and restores the script body. |
||
155 | $tempScriptElements = []; |
||
156 | |||
157 | if (preg_match_all('/<script\b[^>]*>([\s\S]*?)<\/script>/mi', $source, $matches)) { |
||
158 | $tempScriptElements = $matches[ 0 ]; |
||
159 | |||
160 | foreach ($matches[ 0 ] as $i => $match) { |
||
161 | $source = str_replace($match, '<script>' . ($i + 1) . '</script>', $source); |
||
162 | } |
||
163 | } |
||
164 | |||
165 | // Removing double whitespaces to make the source code easier to read. |
||
166 | // With exception of <pre>/ CSS white-space changing the default behaviour, double whitespace is meaningless in HTML output. |
||
167 | // This reason alone is sufficient not to use indentation in production. |
||
168 | $source = str_replace("\t", '', $source); |
||
169 | $source = preg_replace('/\s{2,}/', ' ', $source); |
||
170 | |||
171 | // Remove inline elements and replace them with text entities. |
||
172 | $tempInlineElements = []; |
||
173 | |||
174 | if (preg_match_all( |
||
175 | '/<(' . implode('|', $this->inlineElements) . ')[^>]*>(?:[^<]*)<\/\1>/', |
||
176 | $source, |
||
177 | $matches |
||
178 | )) { |
||
179 | $tempInlineElements = $matches[ 0 ]; |
||
180 | |||
181 | foreach ($matches[ 0 ] as $i => $match) { |
||
182 | $source = str_replace($match, 'ᐃ' . ($i + 1) . 'ᐃ', $source); |
||
183 | } |
||
184 | } |
||
185 | |||
186 | $output = ''; |
||
187 | |||
188 | $nextLineIndentationLevel = 0; |
||
189 | |||
190 | do { |
||
191 | $indentationLevel = $nextLineIndentationLevel; |
||
192 | |||
193 | $patterns = [ |
||
194 | // block tag |
||
195 | '/^(<([a-z]+)(?:[^>]*)>(?:[^<]*)<\/(?:\2)>)/' => static::MATCH_INDENT_NO, |
||
196 | // DOCTYPE |
||
197 | '/^<!([^>]*)>/' => static::MATCH_INDENT_NO, |
||
198 | // tag with implied closing |
||
199 | '/^<(input|link|meta|base|br|img|hr)([^>]*)>/' => static::MATCH_INDENT_NO, |
||
200 | // opening tag |
||
201 | '/^<[^\/]([^>]*)>/' => static::MATCH_INDENT_INCREASE, |
||
202 | // closing tag |
||
203 | '/^<\/([^>]*)>/' => static::MATCH_INDENT_DECREASE, |
||
204 | // self-closing tag |
||
205 | '/^<(.+)\/>/' => static::MATCH_INDENT_DECREASE, |
||
206 | // whitespace |
||
207 | '/^(\s+)/' => static::MATCH_DISCARD, |
||
208 | // text node |
||
209 | '/([^<]+)/' => static::MATCH_INDENT_NO, |
||
210 | ]; |
||
211 | |||
212 | foreach ($patterns as $pattern => $rule) { |
||
213 | if ($match = preg_match($pattern, $source, $matches)) { |
||
214 | if (function_exists('mb_substr')) { |
||
215 | $source = mb_substr($source, mb_strlen($matches[ 0 ])); |
||
216 | } else { |
||
217 | $source = substr($source, strlen($matches[ 0 ])); |
||
218 | } |
||
219 | |||
220 | if ($rule === static::MATCH_DISCARD) { |
||
221 | break; |
||
222 | } |
||
223 | |||
224 | if ($rule === static::MATCH_INDENT_NO) { |
||
225 | |||
226 | } else { |
||
227 | if ($rule === static::MATCH_INDENT_DECREASE) { |
||
228 | $nextLineIndentationLevel--; |
||
229 | $indentationLevel--; |
||
230 | } else { |
||
231 | $nextLineIndentationLevel++; |
||
232 | } |
||
233 | } |
||
234 | |||
235 | if ($indentationLevel < 0) { |
||
236 | $indentationLevel = 0; |
||
237 | } |
||
238 | |||
239 | $output .= str_repeat($this->indentCharacter, $indentationLevel) . $matches[ 0 ] . "\n"; |
||
240 | |||
241 | break; |
||
242 | } |
||
243 | } |
||
244 | } while ($match); |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
245 | |||
246 | $output = preg_replace('/(<(\w+)[^>]*>)\s*(<\/\2>)/', '\\1\\3', $output); |
||
247 | |||
248 | foreach ($tempScriptElements as $i => $original) { |
||
249 | $output = str_replace('<script>' . ($i + 1) . '</script>', $original, $output); |
||
250 | } |
||
251 | |||
252 | foreach ($tempInlineElements as $i => $original) { |
||
253 | $output = str_replace('ᐃ' . ($i + 1) . 'ᐃ', $original, $output); |
||
254 | } |
||
255 | |||
256 | return trim($output); |
||
257 | } |
||
258 | } |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.