This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | /* |
||
4 | * This file is part of the Patron package. |
||
5 | * |
||
6 | * (c) Olivier Laviale <[email protected]> |
||
7 | * |
||
8 | * For the full copyright and license information, please view the LICENSE |
||
9 | * file that was distributed with this source code. |
||
10 | */ |
||
11 | |||
12 | namespace Patron; |
||
13 | |||
14 | class HTMLParser |
||
15 | { |
||
16 | const T_ERROR_HANDLER = 'error-handler'; |
||
17 | |||
18 | private $encoding; |
||
19 | private $matches; |
||
20 | private $escaped; |
||
21 | private $opened = []; |
||
22 | |||
23 | protected $error_handler; |
||
24 | protected $namespace; |
||
25 | |||
26 | public function __construct(array $tags=[]) |
||
27 | { |
||
28 | $tags += [ |
||
29 | |||
30 | self::T_ERROR_HANDLER => function($str, $args) { |
||
31 | |||
32 | trigger_error(\ICanBoogie\format($str, $args)); |
||
33 | |||
34 | } |
||
35 | |||
36 | ]; |
||
37 | |||
38 | $this->error_handler = $tags[self::T_ERROR_HANDLER]; |
||
39 | } |
||
40 | |||
41 | public function parse($html, $namespace=null, $encoding='utf-8') |
||
42 | { |
||
43 | $this->encoding = $encoding; |
||
44 | $this->namespace = $namespace; |
||
45 | |||
46 | # |
||
47 | # we take care of escaping comments and processing options. they will not be parsed |
||
48 | # and will end as text nodes |
||
49 | # |
||
50 | |||
51 | $html = $this->escapeSpecials($html); |
||
52 | |||
53 | # |
||
54 | # in order to create a tree, we first need to split the HTML using the markups, |
||
55 | # creating a nice flat array of texts and opening and closing markups. |
||
56 | # |
||
57 | # the array can be read as follows : |
||
58 | # |
||
59 | # i+0 => some text |
||
60 | # i+1 => '/' for closing markups, nothing otherwise |
||
61 | # i+2 => the markup it self, without the '<' '>' |
||
62 | # |
||
63 | # note that i+2 might end with a '/' indicating an auto-closing markup |
||
64 | # |
||
65 | |||
66 | $this->matches = preg_split |
||
67 | ( |
||
68 | '#<(/?)' . $namespace . '([^>]*)>#', $html, -1, PREG_SPLIT_DELIM_CAPTURE |
||
69 | ); |
||
70 | |||
71 | # |
||
72 | # the flat representation is now ready, we can create our tree |
||
73 | # |
||
74 | |||
75 | $tree = $this->buildTree(); |
||
76 | |||
77 | # |
||
78 | # if comments or processing options where escaped, we can |
||
79 | # safely unescape them now |
||
80 | # |
||
81 | |||
82 | if ($this->escaped) |
||
83 | { |
||
84 | $tree = $this->unescapeSpecials($tree); |
||
85 | } |
||
86 | |||
87 | return $tree; |
||
88 | } |
||
89 | |||
90 | protected function escapeSpecials($html) |
||
91 | { |
||
92 | # |
||
93 | # here we escape comments |
||
94 | # |
||
95 | |||
96 | $html = preg_replace_callback('#<\!--.+-->#sU', [ $this, 'escapeSpecials_callback' ], $html); |
||
97 | |||
98 | # |
||
99 | # and processing options |
||
100 | # |
||
101 | |||
102 | $html = preg_replace_callback('#<\?.+\?>#sU', [ $this, 'escapeSpecials_callback' ], $html); |
||
103 | |||
104 | return $html; |
||
105 | } |
||
106 | |||
107 | protected function escapeSpecials_callback($m) |
||
108 | { |
||
109 | $this->escaped = true; |
||
110 | |||
111 | $text = $m[0]; |
||
112 | |||
113 | $text = str_replace |
||
114 | ( |
||
115 | [ '<', '>' ], |
||
116 | [ "\x01", "\x02" ], |
||
117 | $text |
||
118 | ); |
||
119 | |||
120 | return $text; |
||
121 | } |
||
122 | |||
123 | protected function unescapeSpecials($tree) |
||
124 | { |
||
125 | return is_array($tree) ? array_map([ $this, 'unescapeSpecials' ], $tree) : str_replace |
||
126 | ( |
||
127 | [ "\x01", "\x02" ], |
||
128 | [ '<', '>' ], |
||
129 | $tree |
||
130 | ); |
||
131 | } |
||
132 | |||
133 | protected function buildTree() |
||
134 | { |
||
135 | $nodes = []; |
||
136 | |||
137 | $i = 0; |
||
138 | $text = null; |
||
139 | |||
140 | while (($value = array_shift($this->matches)) !== null) |
||
141 | { |
||
142 | switch ($i++ % 3) |
||
143 | { |
||
144 | case 0: |
||
0 ignored issues
–
show
|
|||
145 | { |
||
146 | # |
||
147 | # if the trimed value is not empty we preserve the value, |
||
148 | # otherwise we discard it. |
||
149 | # |
||
150 | |||
151 | if (trim($value)) |
||
152 | { |
||
153 | $nodes[] = $value; |
||
154 | } |
||
155 | } |
||
156 | break; |
||
157 | |||
158 | case 1: |
||
0 ignored issues
–
show
CASE statements must be defined using a colon
As per the PSR-2 coding standard, case statements should not be wrapped in curly braces.
There is no need for braces, since each case is terminated by the next switch ($expr) {
case "A": { //wrong
doSomething();
break;
}
case "B": //right
doSomething();
break;
}
To learn more about the PSR-2 coding standard, please refer to the PHP-Fig. ![]() |
|||
159 | { |
||
160 | $closing = ($value == '/'); |
||
161 | } |
||
162 | break; |
||
163 | |||
164 | case 2: |
||
0 ignored issues
–
show
CASE statements must be defined using a colon
As per the PSR-2 coding standard, case statements should not be wrapped in curly braces.
There is no need for braces, since each case is terminated by the next switch ($expr) {
case "A": { //wrong
doSomething();
break;
}
case "B": //right
doSomething();
break;
}
To learn more about the PSR-2 coding standard, please refer to the PHP-Fig. ![]() |
|||
165 | { |
||
166 | if (substr($value, -1, 1) == '/') |
||
167 | { |
||
168 | # |
||
169 | # auto closing |
||
170 | # |
||
171 | |||
172 | $nodes[] = $this->parseMarkup(substr($value, 0, -1)); |
||
173 | } |
||
174 | else if ($closing) |
||
175 | { |
||
176 | # |
||
177 | # closing markup |
||
178 | # |
||
179 | |||
180 | $open = array_pop($this->opened); |
||
181 | |||
182 | if ($value != $open) |
||
183 | { |
||
184 | $this->error($value, $open); |
||
185 | } |
||
186 | |||
187 | return $nodes; |
||
188 | } |
||
189 | else |
||
190 | { |
||
191 | # |
||
192 | # this is an open markup with possible children |
||
193 | # |
||
194 | |||
195 | $node = $this->parseMarkup($value); |
||
196 | |||
197 | # |
||
198 | # push the markup name into the opened markups |
||
199 | # |
||
200 | |||
201 | $this->opened[] = $node['name']; |
||
202 | |||
203 | # |
||
204 | # create the node and parse its children |
||
205 | # |
||
206 | |||
207 | $node['children'] = $this->buildTree($this->matches); |
||
208 | |||
209 | $nodes[] = $node; |
||
210 | } |
||
211 | } |
||
212 | } |
||
213 | } |
||
214 | |||
215 | return $nodes; |
||
216 | } |
||
217 | |||
218 | protected function parseMarkup($markup) |
||
219 | { |
||
220 | # |
||
221 | # get markup's name |
||
222 | # |
||
223 | |||
224 | preg_match('#^[^\s]+#', $markup, $matches); |
||
225 | |||
226 | $name = $matches[0]; |
||
227 | |||
228 | # |
||
229 | # get markup's arguments |
||
230 | # |
||
231 | |||
232 | preg_match_all('#\s+([^=]+)\s*=\s*"([^"]+)"#', $markup, $matches, PREG_SET_ORDER); |
||
233 | |||
234 | # |
||
235 | # transform the matches into a nice key/value array |
||
236 | # |
||
237 | |||
238 | $args = []; |
||
239 | |||
240 | foreach ($matches as $m) |
||
241 | { |
||
242 | # |
||
243 | # we unescape the html entities of the argument's value |
||
244 | # |
||
245 | |||
246 | $args[$m[1]] = html_entity_decode($m[2], ENT_QUOTES, $this->encoding); |
||
247 | } |
||
248 | |||
249 | return [ 'name' => $name, 'args' => $args ]; |
||
250 | } |
||
251 | |||
252 | protected function error($markup, $expected) |
||
253 | { |
||
254 | $this->malformed = true; |
||
255 | |||
256 | call_user_func |
||
257 | ( |
||
258 | $this->error_handler, $expected |
||
259 | ? 'unexpected closing markup %markup, should be %expected' |
||
260 | : 'unexpected closing markup %markup, when none was opened', [ |
||
261 | |||
262 | '%markup' => $this->namespace . $markup, '%expected' => $expected |
||
263 | |||
264 | ] |
||
265 | ); |
||
266 | } |
||
267 | |||
268 | static public function collectMarkup($nodes, $markup) |
||
269 | { |
||
270 | $collected = []; |
||
271 | |||
272 | foreach ($nodes as $node) |
||
273 | { |
||
274 | if (!is_array($node)) |
||
275 | { |
||
276 | continue; |
||
277 | } |
||
278 | |||
279 | if ($node['name'] == $markup) |
||
280 | { |
||
281 | $collected[] = $node; |
||
282 | } |
||
283 | |||
284 | if (isset($node['children'])) |
||
285 | { |
||
286 | $collected = array_merge($collected, self::collectMarkup($node['children'], $markup)); |
||
287 | } |
||
288 | } |
||
289 | |||
290 | return $collected; |
||
291 | } |
||
292 | } |
||
293 |
As per the PSR-2 coding standard, case statements should not be wrapped in curly braces. There is no need for braces, since each case is terminated by the next
break
.To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.