| @@ 2821-2875 (lines=55) @@ | ||
| 2818 | * <strong>2</strong> for UTF-16BE. |
|
| 2819 | * </p> |
|
| 2820 | */ |
|
| 2821 | public static function is_utf16(string $str) |
|
| 2822 | { |
|
| 2823 | if (self::is_binary($str) === false) { |
|
| 2824 | return false; |
|
| 2825 | } |
|
| 2826 | ||
| 2827 | // init |
|
| 2828 | $strChars = []; |
|
| 2829 | ||
| 2830 | $str = self::remove_bom($str); |
|
| 2831 | ||
| 2832 | $maybeUTF16LE = 0; |
|
| 2833 | $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE'); |
|
| 2834 | if ($test) { |
|
| 2835 | $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8'); |
|
| 2836 | $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE'); |
|
| 2837 | if ($test3 === $test) { |
|
| 2838 | if (\count($strChars) === 0) { |
|
| 2839 | $strChars = self::count_chars($str, true); |
|
| 2840 | } |
|
| 2841 | foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
|
| 2842 | if (\in_array($test3char, $strChars, true) === true) { |
|
| 2843 | $maybeUTF16LE++; |
|
| 2844 | } |
|
| 2845 | } |
|
| 2846 | } |
|
| 2847 | } |
|
| 2848 | ||
| 2849 | $maybeUTF16BE = 0; |
|
| 2850 | $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE'); |
|
| 2851 | if ($test) { |
|
| 2852 | $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8'); |
|
| 2853 | $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE'); |
|
| 2854 | if ($test3 === $test) { |
|
| 2855 | if (\count($strChars) === 0) { |
|
| 2856 | $strChars = self::count_chars($str, true); |
|
| 2857 | } |
|
| 2858 | foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
|
| 2859 | if (\in_array($test3char, $strChars, true) === true) { |
|
| 2860 | $maybeUTF16BE++; |
|
| 2861 | } |
|
| 2862 | } |
|
| 2863 | } |
|
| 2864 | } |
|
| 2865 | ||
| 2866 | if ($maybeUTF16BE !== $maybeUTF16LE) { |
|
| 2867 | if ($maybeUTF16LE > $maybeUTF16BE) { |
|
| 2868 | return 1; |
|
| 2869 | } |
|
| 2870 | ||
| 2871 | return 2; |
|
| 2872 | } |
|
| 2873 | ||
| 2874 | return false; |
|
| 2875 | } |
|
| 2876 | ||
| 2877 | /** |
|
| 2878 | * Check if the string is UTF-32. |
|
| @@ 2888-2942 (lines=55) @@ | ||
| 2885 | * <strong>2</strong> for UTF-32BE. |
|
| 2886 | * </p> |
|
| 2887 | */ |
|
| 2888 | public static function is_utf32(string $str) |
|
| 2889 | { |
|
| 2890 | if (self::is_binary($str) === false) { |
|
| 2891 | return false; |
|
| 2892 | } |
|
| 2893 | ||
| 2894 | // init |
|
| 2895 | $strChars = []; |
|
| 2896 | ||
| 2897 | $str = self::remove_bom($str); |
|
| 2898 | ||
| 2899 | $maybeUTF32LE = 0; |
|
| 2900 | $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE'); |
|
| 2901 | if ($test) { |
|
| 2902 | $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8'); |
|
| 2903 | $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE'); |
|
| 2904 | if ($test3 === $test) { |
|
| 2905 | if (\count($strChars) === 0) { |
|
| 2906 | $strChars = self::count_chars($str, true); |
|
| 2907 | } |
|
| 2908 | foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
|
| 2909 | if (\in_array($test3char, $strChars, true) === true) { |
|
| 2910 | $maybeUTF32LE++; |
|
| 2911 | } |
|
| 2912 | } |
|
| 2913 | } |
|
| 2914 | } |
|
| 2915 | ||
| 2916 | $maybeUTF32BE = 0; |
|
| 2917 | $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE'); |
|
| 2918 | if ($test) { |
|
| 2919 | $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8'); |
|
| 2920 | $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE'); |
|
| 2921 | if ($test3 === $test) { |
|
| 2922 | if (\count($strChars) === 0) { |
|
| 2923 | $strChars = self::count_chars($str, true); |
|
| 2924 | } |
|
| 2925 | foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
|
| 2926 | if (\in_array($test3char, $strChars, true) === true) { |
|
| 2927 | $maybeUTF32BE++; |
|
| 2928 | } |
|
| 2929 | } |
|
| 2930 | } |
|
| 2931 | } |
|
| 2932 | ||
| 2933 | if ($maybeUTF32BE !== $maybeUTF32LE) { |
|
| 2934 | if ($maybeUTF32LE > $maybeUTF32BE) { |
|
| 2935 | return 1; |
|
| 2936 | } |
|
| 2937 | ||
| 2938 | return 2; |
|
| 2939 | } |
|
| 2940 | ||
| 2941 | return false; |
|
| 2942 | } |
|
| 2943 | ||
| 2944 | /** |
|
| 2945 | * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters. |
|