| @@ 2344-2398 (lines=55) @@ | ||
| 2341 | * <strong>2</strong> for UTF-16BE. |
|
| 2342 | * </p> |
|
| 2343 | */ |
|
| 2344 | public static function is_utf16(string $str) |
|
| 2345 | { |
|
| 2346 | if (self::is_binary($str) === false) { |
|
| 2347 | return false; |
|
| 2348 | } |
|
| 2349 | ||
| 2350 | // init |
|
| 2351 | $strChars = []; |
|
| 2352 | ||
| 2353 | $str = self::remove_bom($str); |
|
| 2354 | ||
| 2355 | $maybeUTF16LE = 0; |
|
| 2356 | $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE'); |
|
| 2357 | if ($test) { |
|
| 2358 | $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8'); |
|
| 2359 | $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE'); |
|
| 2360 | if ($test3 === $test) { |
|
| 2361 | if (\count($strChars) === 0) { |
|
| 2362 | $strChars = self::count_chars($str, true); |
|
| 2363 | } |
|
| 2364 | foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
|
| 2365 | if (\in_array($test3char, $strChars, true) === true) { |
|
| 2366 | $maybeUTF16LE++; |
|
| 2367 | } |
|
| 2368 | } |
|
| 2369 | } |
|
| 2370 | } |
|
| 2371 | ||
| 2372 | $maybeUTF16BE = 0; |
|
| 2373 | $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE'); |
|
| 2374 | if ($test) { |
|
| 2375 | $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8'); |
|
| 2376 | $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE'); |
|
| 2377 | if ($test3 === $test) { |
|
| 2378 | if (\count($strChars) === 0) { |
|
| 2379 | $strChars = self::count_chars($str, true); |
|
| 2380 | } |
|
| 2381 | foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
|
| 2382 | if (\in_array($test3char, $strChars, true) === true) { |
|
| 2383 | $maybeUTF16BE++; |
|
| 2384 | } |
|
| 2385 | } |
|
| 2386 | } |
|
| 2387 | } |
|
| 2388 | ||
| 2389 | if ($maybeUTF16BE !== $maybeUTF16LE) { |
|
| 2390 | if ($maybeUTF16LE > $maybeUTF16BE) { |
|
| 2391 | return 1; |
|
| 2392 | } |
|
| 2393 | ||
| 2394 | return 2; |
|
| 2395 | } |
|
| 2396 | ||
| 2397 | return false; |
|
| 2398 | } |
|
| 2399 | ||
| 2400 | /** |
|
| 2401 | * Check if the string is UTF-32. |
|
| @@ 2411-2465 (lines=55) @@ | ||
| 2408 | * <strong>2</strong> for UTF-32BE. |
|
| 2409 | * </p> |
|
| 2410 | */ |
|
| 2411 | public static function is_utf32(string $str) |
|
| 2412 | { |
|
| 2413 | if (self::is_binary($str) === false) { |
|
| 2414 | return false; |
|
| 2415 | } |
|
| 2416 | ||
| 2417 | // init |
|
| 2418 | $strChars = []; |
|
| 2419 | ||
| 2420 | $str = self::remove_bom($str); |
|
| 2421 | ||
| 2422 | $maybeUTF32LE = 0; |
|
| 2423 | $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE'); |
|
| 2424 | if ($test) { |
|
| 2425 | $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8'); |
|
| 2426 | $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE'); |
|
| 2427 | if ($test3 === $test) { |
|
| 2428 | if (\count($strChars) === 0) { |
|
| 2429 | $strChars = self::count_chars($str, true); |
|
| 2430 | } |
|
| 2431 | foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
|
| 2432 | if (\in_array($test3char, $strChars, true) === true) { |
|
| 2433 | $maybeUTF32LE++; |
|
| 2434 | } |
|
| 2435 | } |
|
| 2436 | } |
|
| 2437 | } |
|
| 2438 | ||
| 2439 | $maybeUTF32BE = 0; |
|
| 2440 | $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE'); |
|
| 2441 | if ($test) { |
|
| 2442 | $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8'); |
|
| 2443 | $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE'); |
|
| 2444 | if ($test3 === $test) { |
|
| 2445 | if (\count($strChars) === 0) { |
|
| 2446 | $strChars = self::count_chars($str, true); |
|
| 2447 | } |
|
| 2448 | foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
|
| 2449 | if (\in_array($test3char, $strChars, true) === true) { |
|
| 2450 | $maybeUTF32BE++; |
|
| 2451 | } |
|
| 2452 | } |
|
| 2453 | } |
|
| 2454 | } |
|
| 2455 | ||
| 2456 | if ($maybeUTF32BE !== $maybeUTF32LE) { |
|
| 2457 | if ($maybeUTF32LE > $maybeUTF32BE) { |
|
| 2458 | return 1; |
|
| 2459 | } |
|
| 2460 | ||
| 2461 | return 2; |
|
| 2462 | } |
|
| 2463 | ||
| 2464 | return false; |
|
| 2465 | } |
|
| 2466 | ||
| 2467 | /** |
|
| 2468 | * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters. |
|