wikimedia /
mediawiki
This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | /** |
||
| 3 | * Methods to play with strings. |
||
| 4 | * |
||
| 5 | * This program is free software; you can redistribute it and/or modify |
||
| 6 | * it under the terms of the GNU General Public License as published by |
||
| 7 | * the Free Software Foundation; either version 2 of the License, or |
||
| 8 | * (at your option) any later version. |
||
| 9 | * |
||
| 10 | * This program is distributed in the hope that it will be useful, |
||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 13 | * GNU General Public License for more details. |
||
| 14 | * |
||
| 15 | * You should have received a copy of the GNU General Public License along |
||
| 16 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
| 17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
| 18 | * http://www.gnu.org/copyleft/gpl.html |
||
| 19 | * |
||
| 20 | * @file |
||
| 21 | */ |
||
| 22 | |||
| 23 | /** |
||
| 24 | * A collection of static methods to play with strings. |
||
| 25 | */ |
||
| 26 | class StringUtils { |
||
| 27 | /** |
||
| 28 | * Test whether a string is valid UTF-8. |
||
| 29 | * |
||
| 30 | * The function check for invalid byte sequences, overlong encoding but |
||
| 31 | * not for different normalisations. |
||
| 32 | * |
||
| 33 | * @note In MediaWiki 1.21, this function did not provide proper UTF-8 validation. |
||
| 34 | * In particular, the pure PHP code path did not in fact check for overlong forms. |
||
| 35 | * Beware of this when backporting code to that version of MediaWiki. |
||
| 36 | * |
||
| 37 | * @since 1.21 |
||
| 38 | * @param string $value String to check |
||
| 39 | * @return bool Whether the given $value is a valid UTF-8 encoded string |
||
| 40 | */ |
||
| 41 | static function isUtf8( $value ) { |
||
| 42 | $value = (string)$value; |
||
| 43 | |||
| 44 | // HHVM 3.4 and older come with an outdated version of libmbfl that |
||
| 45 | // incorrectly allows values above U+10FFFF, so we have to check |
||
| 46 | // for them separately. (This issue also exists in PHP 5.3 and |
||
| 47 | // older, which are no longer supported.) |
||
| 48 | static $newPHP; |
||
| 49 | if ( $newPHP === null ) { |
||
| 50 | $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' ); |
||
| 51 | } |
||
| 52 | |||
| 53 | return mb_check_encoding( $value, 'UTF-8' ) && |
||
| 54 | ( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 ); |
||
| 55 | } |
||
| 56 | |||
| 57 | /** |
||
| 58 | * Perform an operation equivalent to `preg_replace()` |
||
| 59 | * |
||
| 60 | * Matches this code: |
||
| 61 | * |
||
| 62 | * preg_replace( "!$startDelim(.*?)$endDelim!", $replace, $subject ); |
||
| 63 | * |
||
| 64 | * ..except that it's worst-case O(N) instead of O(N^2). Compared to delimiterReplace(), this |
||
| 65 | * implementation is fast but memory-hungry and inflexible. The memory requirements are such |
||
| 66 | * that I don't recommend using it on anything but guaranteed small chunks of text. |
||
| 67 | * |
||
| 68 | * @param string $startDelim |
||
| 69 | * @param string $endDelim |
||
| 70 | * @param string $replace |
||
| 71 | * @param string $subject |
||
| 72 | * @return string |
||
| 73 | */ |
||
| 74 | static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) { |
||
| 75 | $segments = explode( $startDelim, $subject ); |
||
| 76 | $output = array_shift( $segments ); |
||
| 77 | foreach ( $segments as $s ) { |
||
| 78 | $endDelimPos = strpos( $s, $endDelim ); |
||
| 79 | if ( $endDelimPos === false ) { |
||
| 80 | $output .= $startDelim . $s; |
||
| 81 | } else { |
||
| 82 | $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) ); |
||
| 83 | } |
||
| 84 | } |
||
| 85 | |||
| 86 | return $output; |
||
| 87 | } |
||
| 88 | |||
| 89 | /** |
||
| 90 | * Perform an operation equivalent to `preg_replace_callback()` |
||
| 91 | * |
||
| 92 | * Matches this code: |
||
| 93 | * |
||
| 94 | * preg_replace_callback( "!$startDelim(.*)$endDelim!s$flags", $callback, $subject ); |
||
| 95 | * |
||
| 96 | * If the start delimiter ends with an initial substring of the end delimiter, |
||
| 97 | * e.g. in the case of C-style comments, the behavior differs from the model |
||
| 98 | * regex. In this implementation, the end must share no characters with the |
||
| 99 | * start, so e.g. `/*\/` is not considered to be both the start and end of a |
||
| 100 | * comment. `/*\/xy/*\/` is considered to be a single comment with contents `/xy/`. |
||
| 101 | * |
||
| 102 | * The implementation of delimiterReplaceCallback() is slower than hungryDelimiterReplace() |
||
| 103 | * but uses far less memory. The delimiters are literal strings, not regular expressions. |
||
| 104 | * |
||
| 105 | * @param string $startDelim Start delimiter |
||
| 106 | * @param string $endDelim End delimiter |
||
| 107 | * @param callable $callback Function to call on each match |
||
| 108 | * @param string $subject |
||
| 109 | * @param string $flags Regular expression flags |
||
| 110 | * @throws InvalidArgumentException |
||
| 111 | * @return string |
||
| 112 | */ |
||
| 113 | static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, |
||
| 114 | $subject, $flags = '' |
||
| 115 | ) { |
||
| 116 | $inputPos = 0; |
||
| 117 | $outputPos = 0; |
||
| 118 | $output = ''; |
||
| 119 | $foundStart = false; |
||
| 120 | $encStart = preg_quote( $startDelim, '!' ); |
||
| 121 | $encEnd = preg_quote( $endDelim, '!' ); |
||
| 122 | $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp'; |
||
| 123 | $endLength = strlen( $endDelim ); |
||
| 124 | $m = []; |
||
| 125 | |||
| 126 | while ( $inputPos < strlen( $subject ) && |
||
| 127 | preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos ) |
||
| 128 | ) { |
||
| 129 | $tokenOffset = $m[0][1]; |
||
| 130 | if ( $m[1][0] != '' ) { |
||
| 131 | if ( $foundStart && |
||
| 132 | $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0 |
||
| 133 | ) { |
||
| 134 | # An end match is present at the same location |
||
| 135 | $tokenType = 'end'; |
||
| 136 | $tokenLength = $endLength; |
||
| 137 | } else { |
||
| 138 | $tokenType = 'start'; |
||
| 139 | $tokenLength = strlen( $m[0][0] ); |
||
| 140 | } |
||
| 141 | } elseif ( $m[2][0] != '' ) { |
||
| 142 | $tokenType = 'end'; |
||
| 143 | $tokenLength = strlen( $m[0][0] ); |
||
| 144 | } else { |
||
| 145 | throw new InvalidArgumentException( 'Invalid delimiter given to ' . __METHOD__ ); |
||
| 146 | } |
||
| 147 | |||
| 148 | if ( $tokenType == 'start' ) { |
||
| 149 | # Only move the start position if we haven't already found a start |
||
| 150 | # This means that START START END matches outer pair |
||
| 151 | if ( !$foundStart ) { |
||
| 152 | # Found start |
||
| 153 | $inputPos = $tokenOffset + $tokenLength; |
||
| 154 | # Write out the non-matching section |
||
| 155 | $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos ); |
||
| 156 | $outputPos = $tokenOffset; |
||
| 157 | $contentPos = $inputPos; |
||
| 158 | $foundStart = true; |
||
| 159 | } else { |
||
| 160 | # Move the input position past the *first character* of START, |
||
| 161 | # to protect against missing END when it overlaps with START |
||
| 162 | $inputPos = $tokenOffset + 1; |
||
| 163 | } |
||
| 164 | } elseif ( $tokenType == 'end' ) { |
||
| 165 | if ( $foundStart ) { |
||
| 166 | # Found match |
||
| 167 | $output .= call_user_func( $callback, [ |
||
| 168 | substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ), |
||
| 169 | substr( $subject, $contentPos, $tokenOffset - $contentPos ) |
||
|
0 ignored issues
–
show
|
|||
| 170 | ] ); |
||
| 171 | $foundStart = false; |
||
| 172 | } else { |
||
| 173 | # Non-matching end, write it out |
||
| 174 | $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos ); |
||
| 175 | } |
||
| 176 | $inputPos = $outputPos = $tokenOffset + $tokenLength; |
||
| 177 | } else { |
||
| 178 | throw new InvalidArgumentException( 'Invalid delimiter given to ' . __METHOD__ ); |
||
| 179 | } |
||
| 180 | } |
||
| 181 | if ( $outputPos < strlen( $subject ) ) { |
||
| 182 | $output .= substr( $subject, $outputPos ); |
||
| 183 | } |
||
| 184 | |||
| 185 | return $output; |
||
| 186 | } |
||
| 187 | |||
| 188 | /** |
||
| 189 | * Perform an operation equivalent to `preg_replace()` with flags. |
||
| 190 | * |
||
| 191 | * Matches this code: |
||
| 192 | * |
||
| 193 | * preg_replace( "!$startDelim(.*)$endDelim!$flags", $replace, $subject ); |
||
| 194 | * |
||
| 195 | * @param string $startDelim Start delimiter regular expression |
||
| 196 | * @param string $endDelim End delimiter regular expression |
||
| 197 | * @param string $replace Replacement string. May contain $1, which will be |
||
| 198 | * replaced by the text between the delimiters |
||
| 199 | * @param string $subject String to search |
||
| 200 | * @param string $flags Regular expression flags |
||
| 201 | * @return string The string with the matches replaced |
||
| 202 | */ |
||
| 203 | static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) { |
||
| 204 | $replacer = new RegexlikeReplacer( $replace ); |
||
| 205 | |||
| 206 | return self::delimiterReplaceCallback( $startDelim, $endDelim, |
||
| 207 | $replacer->cb(), $subject, $flags ); |
||
| 208 | } |
||
| 209 | |||
| 210 | /** |
||
| 211 | * More or less "markup-safe" explode() |
||
| 212 | * Ignores any instances of the separator inside `<...>` |
||
| 213 | * @param string $separator |
||
| 214 | * @param string $text |
||
| 215 | * @return array |
||
| 216 | */ |
||
| 217 | static function explodeMarkup( $separator, $text ) { |
||
| 218 | $placeholder = "\x00"; |
||
| 219 | |||
| 220 | // Remove placeholder instances |
||
| 221 | $text = str_replace( $placeholder, '', $text ); |
||
| 222 | |||
| 223 | // Replace instances of the separator inside HTML-like tags with the placeholder |
||
| 224 | $replacer = new DoubleReplacer( $separator, $placeholder ); |
||
| 225 | $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text ); |
||
| 226 | |||
| 227 | // Explode, then put the replaced separators back in |
||
| 228 | $items = explode( $separator, $cleaned ); |
||
| 229 | foreach ( $items as $i => $str ) { |
||
| 230 | $items[$i] = str_replace( $placeholder, $separator, $str ); |
||
| 231 | } |
||
| 232 | |||
| 233 | return $items; |
||
| 234 | } |
||
| 235 | |||
| 236 | /** |
||
| 237 | * More or less "markup-safe" str_replace() |
||
| 238 | * Ignores any instances of the separator inside `<...>` |
||
| 239 | * @param string $search |
||
| 240 | * @param string $replace |
||
| 241 | * @param string $text |
||
| 242 | * @return string |
||
| 243 | */ |
||
| 244 | static function replaceMarkup( $search, $replace, $text ) { |
||
| 245 | $placeholder = "\x00"; |
||
| 246 | |||
| 247 | // Remove placeholder instances |
||
| 248 | $text = str_replace( $placeholder, '', $text ); |
||
| 249 | |||
| 250 | // Replace instances of the separator inside HTML-like tags with the placeholder |
||
| 251 | $replacer = new DoubleReplacer( $search, $placeholder ); |
||
| 252 | $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text ); |
||
| 253 | |||
| 254 | // Explode, then put the replaced separators back in |
||
| 255 | $cleaned = str_replace( $search, $replace, $cleaned ); |
||
| 256 | $text = str_replace( $placeholder, $search, $cleaned ); |
||
| 257 | |||
| 258 | return $text; |
||
| 259 | } |
||
| 260 | |||
| 261 | /** |
||
| 262 | * Escape a string to make it suitable for inclusion in a preg_replace() |
||
| 263 | * replacement parameter. |
||
| 264 | * |
||
| 265 | * @param string $string |
||
| 266 | * @return string |
||
| 267 | */ |
||
| 268 | static function escapeRegexReplacement( $string ) { |
||
| 269 | $string = str_replace( '\\', '\\\\', $string ); |
||
| 270 | $string = str_replace( '$', '\\$', $string ); |
||
| 271 | return $string; |
||
| 272 | } |
||
| 273 | |||
| 274 | /** |
||
| 275 | * Workalike for explode() with limited memory usage. |
||
| 276 | * |
||
| 277 | * @param string $separator |
||
| 278 | * @param string $subject |
||
| 279 | * @return ArrayIterator|ExplodeIterator |
||
| 280 | */ |
||
| 281 | static function explode( $separator, $subject ) { |
||
| 282 | if ( substr_count( $subject, $separator ) > 1000 ) { |
||
| 283 | return new ExplodeIterator( $separator, $subject ); |
||
| 284 | } else { |
||
| 285 | return new ArrayIterator( explode( $separator, $subject ) ); |
||
| 286 | } |
||
| 287 | } |
||
| 288 | } |
||
| 289 |
If you define a variable conditionally, it can happen that it is not defined for all execution paths.
Let’s take a look at an example:
In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.
Available Fixes
Check for existence of the variable explicitly:
Define a default value for the variable:
Add a value for the missing path: