| Conditions | 41 |
| Paths | 1296 |
| Total Lines | 288 |
| Code Lines | 169 |
| Lines | 20 |
| Ratio | 6.94 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 31 | public function tokenizeHTML($html, $config, $context) { |
||
| 32 | |||
| 33 | // special normalization for script tags without any armor |
||
| 34 | // our "armor" heurstic is a < sign any number of whitespaces after |
||
| 35 | // the first script tag |
||
| 36 | if ($config->get('HTML.Trusted')) { |
||
| 37 | $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si', |
||
| 38 | array($this, 'scriptCallback'), $html); |
||
| 39 | } |
||
| 40 | |||
| 41 | $html = $this->normalize($html, $config, $context); |
||
| 42 | |||
| 43 | $cursor = 0; // our location in the text |
||
| 44 | $inside_tag = false; // whether or not we're parsing the inside of a tag |
||
| 45 | $array = array(); // result array |
||
| 46 | |||
| 47 | // This is also treated to mean maintain *column* numbers too |
||
| 48 | $maintain_line_numbers = $config->get('Core.MaintainLineNumbers'); |
||
| 49 | |||
| 50 | if ($maintain_line_numbers === null) { |
||
| 51 | // automatically determine line numbering by checking |
||
| 52 | // if error collection is on |
||
| 53 | $maintain_line_numbers = $config->get('Core.CollectErrors'); |
||
| 54 | } |
||
| 55 | |||
| 56 | if ($maintain_line_numbers) { |
||
| 57 | $current_line = 1; |
||
| 58 | $current_col = 0; |
||
| 59 | $length = strlen($html); |
||
| 60 | } else { |
||
| 61 | $current_line = false; |
||
| 62 | $current_col = false; |
||
| 63 | $length = false; |
||
| 64 | } |
||
| 65 | $context->register('CurrentLine', $current_line); |
||
| 66 | $context->register('CurrentCol', $current_col); |
||
| 67 | $nl = "\n"; |
||
| 68 | // how often to manually recalculate. This will ALWAYS be right, |
||
| 69 | // but it's pretty wasteful. Set to 0 to turn off |
||
| 70 | $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval'); |
||
| 71 | |||
| 72 | $e = false; |
||
| 73 | if ($config->get('Core.CollectErrors')) { |
||
| 74 | $e =& $context->get('ErrorCollector'); |
||
| 75 | } |
||
| 76 | |||
| 77 | // for testing synchronization |
||
| 78 | $loops = 0; |
||
| 79 | |||
| 80 | while(++$loops) { |
||
| 81 | |||
| 82 | // $cursor is either at the start of a token, or inside of |
||
| 83 | // a tag (i.e. there was a < immediately before it), as indicated |
||
| 84 | // by $inside_tag |
||
| 85 | |||
| 86 | if ($maintain_line_numbers) { |
||
| 87 | |||
| 88 | // $rcursor, however, is always at the start of a token. |
||
| 89 | $rcursor = $cursor - (int) $inside_tag; |
||
| 90 | |||
| 91 | // Column number is cheap, so we calculate it every round. |
||
| 92 | // We're interested at the *end* of the newline string, so |
||
| 93 | // we need to add strlen($nl) == 1 to $nl_pos before subtracting it |
||
| 94 | // from our "rcursor" position. |
||
| 95 | $nl_pos = strrpos($html, $nl, $rcursor - $length); |
||
| 96 | $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1); |
||
| 97 | |||
| 98 | // recalculate lines |
||
| 99 | if ( |
||
| 100 | $synchronize_interval && // synchronization is on |
||
| 101 | $cursor > 0 && // cursor is further than zero |
||
| 102 | $loops % $synchronize_interval === 0 // time to synchronize! |
||
| 103 | ) { |
||
| 104 | $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); |
||
| 105 | } |
||
| 106 | |||
| 107 | } |
||
| 108 | |||
| 109 | $position_next_lt = strpos($html, '<', $cursor); |
||
| 110 | $position_next_gt = strpos($html, '>', $cursor); |
||
| 111 | |||
| 112 | // triggers on "<b>asdf</b>" but not "asdf <b></b>" |
||
| 113 | // special case to set up context |
||
| 114 | if ($position_next_lt === $cursor) { |
||
| 115 | $inside_tag = true; |
||
| 116 | $cursor++; |
||
| 117 | } |
||
| 118 | |||
| 119 | if (!$inside_tag && $position_next_lt !== false) { |
||
| 120 | // We are not inside tag and there still is another tag to parse |
||
| 121 | $token = new |
||
| 122 | HTMLPurifier_Token_Text( |
||
| 123 | $this->parseData( |
||
| 124 | substr( |
||
| 125 | $html, $cursor, $position_next_lt - $cursor |
||
| 126 | ) |
||
| 127 | ) |
||
| 128 | ); |
||
| 129 | View Code Duplication | if ($maintain_line_numbers) { |
|
| 130 | $token->rawPosition($current_line, $current_col); |
||
| 131 | $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); |
||
| 132 | } |
||
| 133 | $array[] = $token; |
||
| 134 | $cursor = $position_next_lt + 1; |
||
| 135 | $inside_tag = true; |
||
| 136 | continue; |
||
| 137 | } elseif (!$inside_tag) { |
||
| 138 | // We are not inside tag but there are no more tags |
||
| 139 | // If we're already at the end, break |
||
| 140 | if ($cursor === strlen($html)) break; |
||
| 141 | // Create Text of rest of string |
||
| 142 | $token = new |
||
| 143 | HTMLPurifier_Token_Text( |
||
| 144 | $this->parseData( |
||
| 145 | substr( |
||
| 146 | $html, $cursor |
||
| 147 | ) |
||
| 148 | ) |
||
| 149 | ); |
||
| 150 | if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); |
||
| 151 | $array[] = $token; |
||
| 152 | break; |
||
| 153 | } elseif ($inside_tag && $position_next_gt !== false) { |
||
| 154 | // We are in tag and it is well formed |
||
| 155 | // Grab the internals of the tag |
||
| 156 | $strlen_segment = $position_next_gt - $cursor; |
||
| 157 | |||
| 158 | if ($strlen_segment < 1) { |
||
| 159 | // there's nothing to process! |
||
| 160 | $token = new HTMLPurifier_Token_Text('<'); |
||
|
|
|||
| 161 | $cursor++; |
||
| 162 | continue; |
||
| 163 | } |
||
| 164 | |||
| 165 | $segment = substr($html, $cursor, $strlen_segment); |
||
| 166 | |||
| 167 | if ($segment === false) { |
||
| 168 | // somehow, we attempted to access beyond the end of |
||
| 169 | // the string, defense-in-depth, reported by Nate Abele |
||
| 170 | break; |
||
| 171 | } |
||
| 172 | |||
| 173 | // Check if it's a comment |
||
| 174 | if ( |
||
| 175 | substr($segment, 0, 3) === '!--' |
||
| 176 | ) { |
||
| 177 | // re-determine segment length, looking for --> |
||
| 178 | $position_comment_end = strpos($html, '-->', $cursor); |
||
| 179 | if ($position_comment_end === false) { |
||
| 180 | // uh oh, we have a comment that extends to |
||
| 181 | // infinity. Can't be helped: set comment |
||
| 182 | // end position to end of string |
||
| 183 | if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); |
||
| 184 | $position_comment_end = strlen($html); |
||
| 185 | $end = true; |
||
| 186 | } else { |
||
| 187 | $end = false; |
||
| 188 | } |
||
| 189 | $strlen_segment = $position_comment_end - $cursor; |
||
| 190 | $segment = substr($html, $cursor, $strlen_segment); |
||
| 191 | $token = new |
||
| 192 | HTMLPurifier_Token_Comment( |
||
| 193 | substr( |
||
| 194 | $segment, 3, $strlen_segment - 3 |
||
| 195 | ) |
||
| 196 | ); |
||
| 197 | if ($maintain_line_numbers) { |
||
| 198 | $token->rawPosition($current_line, $current_col); |
||
| 199 | $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); |
||
| 200 | } |
||
| 201 | $array[] = $token; |
||
| 202 | $cursor = $end ? $position_comment_end : $position_comment_end + 3; |
||
| 203 | $inside_tag = false; |
||
| 204 | continue; |
||
| 205 | } |
||
| 206 | |||
| 207 | // Check if it's an end tag |
||
| 208 | $is_end_tag = (strpos($segment,'/') === 0); |
||
| 209 | if ($is_end_tag) { |
||
| 210 | $type = substr($segment, 1); |
||
| 211 | $token = new HTMLPurifier_Token_End($type); |
||
| 212 | View Code Duplication | if ($maintain_line_numbers) { |
|
| 213 | $token->rawPosition($current_line, $current_col); |
||
| 214 | $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
||
| 215 | } |
||
| 216 | $array[] = $token; |
||
| 217 | $inside_tag = false; |
||
| 218 | $cursor = $position_next_gt + 1; |
||
| 219 | continue; |
||
| 220 | } |
||
| 221 | |||
| 222 | // Check leading character is alnum, if not, we may |
||
| 223 | // have accidently grabbed an emoticon. Translate into |
||
| 224 | // text and go our merry way |
||
| 225 | if (!ctype_alpha($segment[0])) { |
||
| 226 | // XML: $segment[0] !== '_' && $segment[0] !== ':' |
||
| 227 | if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); |
||
| 228 | $token = new HTMLPurifier_Token_Text('<'); |
||
| 229 | View Code Duplication | if ($maintain_line_numbers) { |
|
| 230 | $token->rawPosition($current_line, $current_col); |
||
| 231 | $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
||
| 232 | } |
||
| 233 | $array[] = $token; |
||
| 234 | $inside_tag = false; |
||
| 235 | continue; |
||
| 236 | } |
||
| 237 | |||
| 238 | // Check if it is explicitly self closing, if so, remove |
||
| 239 | // trailing slash. Remember, we could have a tag like <br>, so |
||
| 240 | // any later token processing scripts must convert improperly |
||
| 241 | // classified EmptyTags from StartTags. |
||
| 242 | $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); |
||
| 243 | if ($is_self_closing) { |
||
| 244 | $strlen_segment--; |
||
| 245 | $segment = substr($segment, 0, $strlen_segment); |
||
| 246 | } |
||
| 247 | |||
| 248 | // Check if there are any attributes |
||
| 249 | $position_first_space = strcspn($segment, $this->_whitespace); |
||
| 250 | |||
| 251 | if ($position_first_space >= $strlen_segment) { |
||
| 252 | if ($is_self_closing) { |
||
| 253 | $token = new HTMLPurifier_Token_Empty($segment); |
||
| 254 | } else { |
||
| 255 | $token = new HTMLPurifier_Token_Start($segment); |
||
| 256 | } |
||
| 257 | View Code Duplication | if ($maintain_line_numbers) { |
|
| 258 | $token->rawPosition($current_line, $current_col); |
||
| 259 | $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
||
| 260 | } |
||
| 261 | $array[] = $token; |
||
| 262 | $inside_tag = false; |
||
| 263 | $cursor = $position_next_gt + 1; |
||
| 264 | continue; |
||
| 265 | } |
||
| 266 | |||
| 267 | // Grab out all the data |
||
| 268 | $type = substr($segment, 0, $position_first_space); |
||
| 269 | $attribute_string = |
||
| 270 | trim( |
||
| 271 | substr( |
||
| 272 | $segment, $position_first_space |
||
| 273 | ) |
||
| 274 | ); |
||
| 275 | if ($attribute_string) { |
||
| 276 | $attr = $this->parseAttributeString( |
||
| 277 | $attribute_string |
||
| 278 | , $config, $context |
||
| 279 | ); |
||
| 280 | } else { |
||
| 281 | $attr = array(); |
||
| 282 | } |
||
| 283 | |||
| 284 | if ($is_self_closing) { |
||
| 285 | $token = new HTMLPurifier_Token_Empty($type, $attr); |
||
| 286 | } else { |
||
| 287 | $token = new HTMLPurifier_Token_Start($type, $attr); |
||
| 288 | } |
||
| 289 | View Code Duplication | if ($maintain_line_numbers) { |
|
| 290 | $token->rawPosition($current_line, $current_col); |
||
| 291 | $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
||
| 292 | } |
||
| 293 | $array[] = $token; |
||
| 294 | $cursor = $position_next_gt + 1; |
||
| 295 | $inside_tag = false; |
||
| 296 | continue; |
||
| 297 | } else { |
||
| 298 | // inside tag, but there's no ending > sign |
||
| 299 | if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); |
||
| 300 | $token = new |
||
| 301 | HTMLPurifier_Token_Text( |
||
| 302 | '<' . |
||
| 303 | $this->parseData( |
||
| 304 | substr($html, $cursor) |
||
| 305 | ) |
||
| 306 | ); |
||
| 307 | if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); |
||
| 308 | // no cursor scroll? Hmm... |
||
| 309 | $array[] = $token; |
||
| 310 | break; |
||
| 311 | } |
||
| 312 | break; |
||
| 313 | } |
||
| 314 | |||
| 315 | $context->destroy('CurrentLine'); |
||
| 316 | $context->destroy('CurrentCol'); |
||
| 317 | return $array; |
||
| 318 | } |
||
| 319 | |||
| 491 |
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.
Both the
$myVarassignment in line 1 and the$higherassignment in line 2 are dead. The first because$myVaris never used and the second because$higheris always overwritten for every possible time line.