| Conditions | 3 |
| Paths | 4 |
| Total Lines | 319 |
| Code Lines | 232 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 42 | function data_strip_invalid_text() |
||
| 43 | { |
||
| 44 | $fields = array( |
||
| 45 | 'latin1' => array( |
||
| 46 | // latin1. latin1 never changes. |
||
| 47 | 'charset' => 'latin1', |
||
| 48 | 'value' => "\xf0\x9f\x8e\xb7", |
||
| 49 | 'expected' => "\xf0\x9f\x8e\xb7", |
||
| 50 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 51 | ), |
||
| 52 | 'latin1_char_length' => array( |
||
| 53 | // latin1. latin1 never changes. |
||
| 54 | 'charset' => 'latin1', |
||
| 55 | 'value' => str_repeat('A', 11), |
||
| 56 | 'expected' => str_repeat('A', 10), |
||
| 57 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 58 | ), |
||
| 59 | 'latin1_byte_length' => array( |
||
| 60 | // latin1. latin1 never changes. |
||
| 61 | 'charset' => 'latin1', |
||
| 62 | 'value' => str_repeat('A', 11), |
||
| 63 | 'expected' => str_repeat('A', 10), |
||
| 64 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 65 | ), |
||
| 66 | 'ascii' => array( |
||
| 67 | // ascii gets special treatment, make sure it's covered |
||
| 68 | 'charset' => 'ascii', |
||
| 69 | 'value' => 'Hello World', |
||
| 70 | 'expected' => 'Hello World', |
||
| 71 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 72 | ), |
||
| 73 | 'ascii_char_length' => array( |
||
| 74 | // ascii gets special treatment, make sure it's covered |
||
| 75 | 'charset' => 'ascii', |
||
| 76 | 'value' => str_repeat('A', 11), |
||
| 77 | 'expected' => str_repeat('A', 10), |
||
| 78 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 79 | ), |
||
| 80 | 'ascii_byte_length' => array( |
||
| 81 | // ascii gets special treatment, make sure it's covered |
||
| 82 | 'charset' => 'ascii', |
||
| 83 | 'value' => str_repeat('A', 11), |
||
| 84 | 'expected' => str_repeat('A', 10), |
||
| 85 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 86 | ), |
||
| 87 | 'utf8' => array( |
||
| 88 | // utf8 only allows <= 3-byte chars |
||
| 89 | 'charset' => 'utf8', |
||
| 90 | 'value' => "H€llo\xf0\x9f\x98\x88World¢", |
||
| 91 | 'expected' => 'H€lloWorld¢', |
||
| 92 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 93 | ), |
||
| 94 | 'utf8_23char_length' => array( |
||
| 95 | // utf8 only allows <= 3-byte chars |
||
| 96 | 'charset' => 'utf8', |
||
| 97 | 'value' => str_repeat("²3", 10), |
||
| 98 | 'expected' => str_repeat("²3", 5), |
||
| 99 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 100 | ), |
||
| 101 | 'utf8_23byte_length' => array( |
||
| 102 | // utf8 only allows <= 3-byte chars |
||
| 103 | 'charset' => 'utf8', |
||
| 104 | 'value' => str_repeat("²3", 10), |
||
| 105 | 'expected' => "²3²3", |
||
| 106 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 107 | ), |
||
| 108 | 'utf8_3char_length' => array( |
||
| 109 | // utf8 only allows <= 3-byte chars |
||
| 110 | 'charset' => 'utf8', |
||
| 111 | 'value' => str_repeat("3", 11), |
||
| 112 | 'expected' => str_repeat("3", 10), |
||
| 113 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 114 | ), |
||
| 115 | 'utf8_3byte_length' => array( |
||
| 116 | // utf8 only allows <= 3-byte chars |
||
| 117 | 'charset' => 'utf8', |
||
| 118 | 'value' => str_repeat("3", 11), |
||
| 119 | 'expected' => "333", |
||
| 120 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 121 | ), |
||
| 122 | 'utf8mb3' => array( |
||
| 123 | // utf8mb3 should behave the same an utf8 |
||
| 124 | 'charset' => 'utf8mb3', |
||
| 125 | 'value' => "H€llo\xf0\x9f\x98\x88World¢", |
||
| 126 | 'expected' => 'H€lloWorld¢', |
||
| 127 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 128 | ), |
||
| 129 | 'utf8mb3_23char_length' => array( |
||
| 130 | // utf8mb3 should behave the same an utf8 |
||
| 131 | 'charset' => 'utf8mb3', |
||
| 132 | 'value' => str_repeat("²3", 10), |
||
| 133 | 'expected' => str_repeat("²3", 5), |
||
| 134 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 135 | ), |
||
| 136 | 'utf8mb3_23byte_length' => array( |
||
| 137 | // utf8mb3 should behave the same an utf8 |
||
| 138 | 'charset' => 'utf8mb3', |
||
| 139 | 'value' => str_repeat("²3", 10), |
||
| 140 | 'expected' => "²3²3", |
||
| 141 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 142 | ), |
||
| 143 | 'utf8mb3_3char_length' => array( |
||
| 144 | // utf8mb3 should behave the same an utf8 |
||
| 145 | 'charset' => 'utf8mb3', |
||
| 146 | 'value' => str_repeat("3", 11), |
||
| 147 | 'expected' => str_repeat("3", 10), |
||
| 148 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 149 | ), |
||
| 150 | 'utf8mb3_3byte_length' => array( |
||
| 151 | // utf8mb3 should behave the same an utf8 |
||
| 152 | 'charset' => 'utf8mb3', |
||
| 153 | 'value' => str_repeat("3", 10), |
||
| 154 | 'expected' => "333", |
||
| 155 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 156 | ), |
||
| 157 | 'utf8mb4' => array( |
||
| 158 | // utf8mb4 allows 4-byte characters, too |
||
| 159 | 'charset' => 'utf8mb4', |
||
| 160 | 'value' => "H€llo\xf0\x9f\x98\x88World¢", |
||
| 161 | 'expected' => "H€llo\xf0\x9f\x98\x88World¢", |
||
| 162 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 163 | ), |
||
| 164 | 'utf8mb4_234char_length' => array( |
||
| 165 | // utf8mb4 allows 4-byte characters, too |
||
| 166 | 'charset' => 'utf8mb4', |
||
| 167 | 'value' => str_repeat("²3𝟜", 10), |
||
| 168 | 'expected' => "²3𝟜²3𝟜²3𝟜²", |
||
| 169 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 170 | ), |
||
| 171 | 'utf8mb4_234byte_length' => array( |
||
| 172 | // utf8mb4 allows 4-byte characters, too |
||
| 173 | 'charset' => 'utf8mb4', |
||
| 174 | 'value' => str_repeat("²3𝟜", 10), |
||
| 175 | 'expected' => "²3𝟜", |
||
| 176 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 177 | ), |
||
| 178 | 'utf8mb4_4char_length' => array( |
||
| 179 | // utf8mb4 allows 4-byte characters, too |
||
| 180 | 'charset' => 'utf8mb4', |
||
| 181 | 'value' => str_repeat("𝟜", 11), |
||
| 182 | 'expected' => str_repeat("𝟜", 10), |
||
| 183 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 184 | ), |
||
| 185 | 'utf8mb4_4byte_length' => array( |
||
| 186 | // utf8mb4 allows 4-byte characters, too |
||
| 187 | 'charset' => 'utf8mb4', |
||
| 188 | 'value' => str_repeat("𝟜", 10), |
||
| 189 | 'expected' => "𝟜𝟜", |
||
| 190 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 191 | ), |
||
| 192 | 'koi8r' => array( |
||
| 193 | 'charset' => 'koi8r', |
||
| 194 | 'value' => "\xfdord\xf2ress", |
||
| 195 | 'expected' => "\xfdord\xf2ress", |
||
| 196 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 197 | ), |
||
| 198 | 'koi8r_char_length' => array( |
||
| 199 | 'charset' => 'koi8r', |
||
| 200 | 'value' => str_repeat("\xfd\xf2", 10), |
||
| 201 | 'expected' => str_repeat("\xfd\xf2", 5), |
||
| 202 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 203 | ), |
||
| 204 | 'koi8r_byte_length' => array( |
||
| 205 | 'charset' => 'koi8r', |
||
| 206 | 'value' => str_repeat("\xfd\xf2", 10), |
||
| 207 | 'expected' => str_repeat("\xfd\xf2", 5), |
||
| 208 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 209 | ), |
||
| 210 | 'hebrew' => array( |
||
| 211 | 'charset' => 'hebrew', |
||
| 212 | 'value' => "\xf9ord\xf7ress", |
||
| 213 | 'expected' => "\xf9ord\xf7ress", |
||
| 214 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 215 | ), |
||
| 216 | 'hebrew_char_length' => array( |
||
| 217 | 'charset' => 'hebrew', |
||
| 218 | 'value' => str_repeat("\xf9\xf7", 10), |
||
| 219 | 'expected' => str_repeat("\xf9\xf7", 5), |
||
| 220 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 221 | ), |
||
| 222 | 'hebrew_byte_length' => array( |
||
| 223 | 'charset' => 'hebrew', |
||
| 224 | 'value' => str_repeat("\xf9\xf7", 10), |
||
| 225 | 'expected' => str_repeat("\xf9\xf7", 5), |
||
| 226 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 227 | ), |
||
| 228 | 'cp1251' => array( |
||
| 229 | 'charset' => 'cp1251', |
||
| 230 | 'value' => "\xd8ord\xd0ress", |
||
| 231 | 'expected' => "\xd8ord\xd0ress", |
||
| 232 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 233 | ), |
||
| 234 | 'cp1251_no_length' => array( |
||
| 235 | 'charset' => 'cp1251', |
||
| 236 | 'value' => "\xd8ord\xd0ress", |
||
| 237 | 'expected' => "\xd8ord\xd0ress", |
||
| 238 | 'length' => false, |
||
| 239 | ), |
||
| 240 | 'cp1251_no_length_ascii' => array( |
||
| 241 | 'charset' => 'cp1251', |
||
| 242 | 'value' => "WordPress", |
||
| 243 | 'expected' => "WordPress", |
||
| 244 | 'length' => false, |
||
| 245 | // Don't set 'ascii' => true/false. |
||
| 246 | // That's a different codepath than it being unset even if |
||
| 247 | // three's only only ASCII in the value. |
||
| 248 | ), |
||
| 249 | 'cp1251_char_length' => array( |
||
| 250 | 'charset' => 'cp1251', |
||
| 251 | 'value' => str_repeat("\xd8\xd0", 10), |
||
| 252 | 'expected' => str_repeat("\xd8\xd0", 5), |
||
| 253 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 254 | ), |
||
| 255 | 'cp1251_byte_length' => array( |
||
| 256 | 'charset' => 'cp1251', |
||
| 257 | 'value' => str_repeat("\xd8\xd0", 10), |
||
| 258 | 'expected' => str_repeat("\xd8\xd0", 5), |
||
| 259 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 260 | ), |
||
| 261 | 'tis620' => array( |
||
| 262 | 'charset' => 'tis620', |
||
| 263 | 'value' => "\xccord\xe3ress", |
||
| 264 | 'expected' => "\xccord\xe3ress", |
||
| 265 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 266 | ), |
||
| 267 | 'tis620_char_length' => array( |
||
| 268 | 'charset' => 'tis620', |
||
| 269 | 'value' => str_repeat("\xcc\xe3", 10), |
||
| 270 | 'expected' => str_repeat("\xcc\xe3", 5), |
||
| 271 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 272 | ), |
||
| 273 | 'tis620_byte_length' => array( |
||
| 274 | 'charset' => 'tis620', |
||
| 275 | 'value' => str_repeat("\xcc\xe3", 10), |
||
| 276 | 'expected' => str_repeat("\xcc\xe3", 5), |
||
| 277 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 278 | ), |
||
| 279 | 'ujis_with_utf8_connection' => array( |
||
| 280 | 'charset' => 'ujis', |
||
| 281 | 'connection_charset' => 'utf8', |
||
| 282 | 'value' => '自動下書き', |
||
| 283 | 'expected' => '自動下書き', |
||
| 284 | 'length' => array( 'type' => 'byte', 'length' => 100 ), |
||
| 285 | ), |
||
| 286 | 'ujis_with_utf8_connection_char_length' => array( |
||
| 287 | 'charset' => 'ujis', |
||
| 288 | 'connection_charset' => 'utf8', |
||
| 289 | 'value' => '自動下書き', |
||
| 290 | 'expected' => '自動下書', |
||
| 291 | 'length' => array( 'type' => 'char', 'length' => 4 ), |
||
| 292 | ), |
||
| 293 | 'ujis_with_utf8_connection_byte_length' => array( |
||
| 294 | 'charset' => 'ujis', |
||
| 295 | 'connection_charset' => 'utf8', |
||
| 296 | 'value' => '自動下書き', |
||
| 297 | 'expected' => '自動', |
||
| 298 | 'length' => array( 'type' => 'byte', 'length' => 6 ), |
||
| 299 | ), |
||
| 300 | 'false' => array( |
||
| 301 | // false is a column with no character set (ie, a number column) |
||
| 302 | 'charset' => false, |
||
| 303 | 'value' => 100, |
||
| 304 | 'expected' => 100, |
||
| 305 | 'length' => false, |
||
| 306 | ), |
||
| 307 | ); |
||
| 308 | |||
| 309 | if (function_exists('mb_convert_encoding') ) { |
||
| 310 | // big5 is a non-Unicode multibyte charset |
||
| 311 | $utf8 = "a\xe5\x85\xb1b"; // UTF-8 Character 20849 |
||
| 312 | $big5 = mb_convert_encoding($utf8, 'BIG-5', 'UTF-8'); |
||
| 313 | $conv_utf8 = mb_convert_encoding($big5, 'UTF-8', 'BIG-5'); |
||
| 314 | // Make sure PHP's multibyte conversions are working correctly |
||
| 315 | $this->assertNotEquals($utf8, $big5); |
||
| 316 | $this->assertEquals($utf8, $conv_utf8); |
||
| 317 | |||
| 318 | $fields['big5'] = array( |
||
| 319 | 'charset' => 'big5', |
||
| 320 | 'value' => $big5, |
||
| 321 | 'expected' => $big5, |
||
| 322 | 'length' => array( 'type' => 'char', 'length' => 100 ), |
||
| 323 | ); |
||
| 324 | |||
| 325 | $fields['big5_char_length'] = array( |
||
| 326 | 'charset' => 'big5', |
||
| 327 | 'value' => str_repeat($big5, 10), |
||
| 328 | 'expected' => str_repeat($big5, 3) . 'a', |
||
| 329 | 'length' => array( 'type' => 'char', 'length' => 10 ), |
||
| 330 | ); |
||
| 331 | |||
| 332 | $fields['big5_byte_length'] = array( |
||
| 333 | 'charset' => 'big5', |
||
| 334 | 'value' => str_repeat($big5, 10), |
||
| 335 | 'expected' => str_repeat($big5, 2) . 'a', |
||
| 336 | 'length' => array( 'type' => 'byte', 'length' => 10 ), |
||
| 337 | ); |
||
| 338 | } |
||
| 339 | |||
| 340 | // The data above is easy to edit. Now, prepare it for the data provider. |
||
| 341 | $data_provider = $multiple = $multiple_expected = array(); |
||
| 342 | foreach ( $fields as $test_case => $field ) { |
||
| 343 | $expected = $field; |
||
| 344 | $expected['value'] = $expected['expected']; |
||
| 345 | unset($expected['expected'], $field['expected'], $expected['connection_charset']); |
||
| 346 | |||
| 347 | // We're keeping track of these for our multiple-field test. |
||
| 348 | $multiple[] = $field; |
||
| 349 | $multiple_expected[] = $expected; |
||
| 350 | |||
| 351 | // strip_invalid_text() expects an array of fields. We're testing one field at a time. |
||
| 352 | $data = array( $field ); |
||
| 353 | $expected = array( $expected ); |
||
| 354 | |||
| 355 | // First argument is field data. Second is expected. Third is the message. |
||
| 356 | $data_provider[] = array( $data, $expected, $test_case ); |
||
| 357 | } |
||
| 358 | |||
| 359 | return $data_provider; |
||
| 360 | } |
||
| 361 | |||
| 957 |