1 | <?php |
||||
2 | |||||
3 | declare(strict_types=1); |
||||
4 | |||||
5 | namespace AlecRabbit\WCWidth\Kernel; |
||||
6 | |||||
7 | use FFI; |
||||
0 ignored issues
–
show
Coding Style
introduced
by
![]() |
|||||
8 | |||||
9 | use RuntimeException; |
||||
10 | |||||
11 | use function is_array; |
||||
12 | use function mb_ord; |
||||
13 | use function mb_strlen; |
||||
14 | |||||
15 | use const AlecRabbit\WCWidth\WIDE_EASTASIAN; |
||||
0 ignored issues
–
show
|
|||||
16 | use const AlecRabbit\WCWidth\ZERO_WIDTH; |
||||
0 ignored issues
–
show
|
|||||
17 | |||||
18 | class UCode |
||||
19 | { |
||||
20 | // NOTE(jquast/wcwidth): created by hand, there isn't anything identifiable other than |
||||
21 | // general Cf category code to identify these, and some characters in Cf |
||||
22 | // category code are of non-zero width. |
||||
23 | // Also includes some Cc, Mn, Zl, and Zp characters |
||||
24 | private const ZERO_WIDTH_CF = [ |
||||
25 | 0 => true, // Null (Cc) |
||||
26 | 0x034F => true, // Combining grapheme joiner (Mn) |
||||
27 | 0x200B => true, // Zero width space |
||||
28 | 0x200C => true, // Zero width non-joiner |
||||
29 | 0x200D => true, // Zero width joiner |
||||
30 | 0x200E => true, // Left-to-right mark |
||||
31 | 0x200F => true, // Right-to-left mark |
||||
32 | 0x2028 => true, // Line separator (Zl) |
||||
33 | 0x2029 => true, // Paragraph separator (Zp) |
||||
34 | 0x202A => true, // Left-to-right embedding |
||||
35 | 0x202B => true, // Right-to-left embedding |
||||
36 | 0x202C => true, // Pop directional formatting |
||||
37 | 0x202D => true, // Left-to-right override |
||||
38 | 0x202E => true, // Right-to-left override |
||||
39 | 0x2060 => true, // Word joiner |
||||
40 | 0x2061 => true, // Function application |
||||
41 | 0x2062 => true, // Invisible times |
||||
42 | 0x2063 => true, // Invisible separator |
||||
43 | ]; |
||||
44 | |||||
45 | private static ?FFI $ffi = null; |
||||
46 | |||||
47 | |||||
48 | public static function wcswidth(string $subject, ?int $n = null, ?string $version = null): int |
||||
49 | { |
||||
50 | $end = $n ?? mb_strlen($subject); |
||||
51 | $chrArray = array_slice( |
||||
52 | static::split($subject), |
||||
53 | 0, |
||||
54 | $end |
||||
55 | ); |
||||
56 | $width = 0; |
||||
57 | foreach ($chrArray as $char) { |
||||
58 | $wcw = static::wcwidth($char, $version); |
||||
59 | if ($wcw < 0) { |
||||
60 | return -1; |
||||
61 | } |
||||
62 | $width += $wcw; |
||||
63 | } |
||||
64 | return $width; |
||||
65 | } |
||||
66 | |||||
67 | /** |
||||
68 | * @return array<string> |
||||
69 | */ |
||||
70 | protected static function split(string $subject): array |
||||
71 | { |
||||
72 | $_split = preg_split('//u', $subject, -1, PREG_SPLIT_NO_EMPTY); |
||||
73 | // @codeCoverageIgnoreStart |
||||
74 | if (!is_array($_split)) { |
||||
0 ignored issues
–
show
|
|||||
75 | // Should never happen |
||||
76 | throw new RuntimeException('Failed to split string.'); |
||||
77 | } |
||||
78 | // @codeCoverageIgnoreEnd |
||||
79 | return $_split; |
||||
0 ignored issues
–
show
|
|||||
80 | } |
||||
81 | |||||
82 | public static function wcwidth(string $wc, ?string $version = null): int |
||||
83 | { |
||||
84 | $version = UnicodeVersion::refine($version); |
||||
85 | |||||
86 | $ucs = mb_ord($wc); |
||||
87 | |||||
88 | if (self::ZERO_WIDTH_CF[$ucs] ?? false) { // 0 width |
||||
89 | return 0; |
||||
90 | } |
||||
91 | |||||
92 | # C0/C1 control characters |
||||
93 | if ($ucs < 32 || (0x07F <= $ucs && $ucs < 0x0A0)) { |
||||
94 | return -1; |
||||
95 | } |
||||
96 | |||||
97 | # combining characters with zero width |
||||
98 | if (static::bisearch($ucs, ZERO_WIDTH[$version])) { |
||||
99 | return 0; |
||||
100 | } |
||||
101 | |||||
102 | return 1 + static::bisearch($ucs, WIDE_EASTASIAN[$version]); |
||||
103 | } |
||||
104 | |||||
105 | |||||
106 | protected static function bisearch(int $ucs, array $table): int |
||||
107 | { |
||||
108 | $lbound = 0; |
||||
109 | $ubound = count($table) - 1; |
||||
110 | |||||
111 | if ($ucs < $table[0][0] || $ucs > $table[$ubound][1]) { |
||||
112 | return 0; |
||||
113 | } |
||||
114 | while ($ubound >= $lbound) { |
||||
115 | $mid = ($lbound + $ubound); // 2 |
||||
116 | if ($ucs > $table[$mid][1]) { |
||||
117 | $lbound = $mid + 1; |
||||
118 | } elseif ($ucs < $table[$mid][0]) { |
||||
119 | $ubound = $mid - 1; |
||||
120 | } else { |
||||
121 | return 1; |
||||
122 | } |
||||
123 | } |
||||
124 | return 0; |
||||
125 | } |
||||
126 | |||||
127 | // @codeCoverageIgnoreStart |
||||
128 | public static function ffi_wcwidth(string $wc, ?string $version = null): int |
||||
0 ignored issues
–
show
The parameter
$version is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||
129 | { |
||||
130 | // Note: $version is ignored |
||||
131 | if (null === self::$ffi) { |
||||
132 | self::$ffi = |
||||
133 | FFI::cdef( |
||||
134 | " |
||||
135 | typedef uint32_t wchar_t; |
||||
136 | int wcwidth(wchar_t wc); |
||||
137 | ", |
||||
138 | ); |
||||
139 | } |
||||
140 | /** @psalm-suppress PossiblyUndefinedMethod */ |
||||
141 | return self::$ffi->wcwidth(mb_ord($wc)); |
||||
0 ignored issues
–
show
The method
wcwidth() does not exist on FFI .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed. ![]() |
|||||
142 | } |
||||
143 | // @codeCoverageIgnoreEnd |
||||
144 | } |
||||
145 |