1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace AlecRabbit\WCWidth\Kernel; |
6
|
|
|
|
7
|
|
|
use FFI; |
|
|
|
|
8
|
|
|
|
9
|
|
|
use RuntimeException; |
10
|
|
|
|
11
|
|
|
use function is_array; |
12
|
|
|
use function mb_ord; |
13
|
|
|
use function mb_strlen; |
14
|
|
|
|
15
|
|
|
use const AlecRabbit\WCWidth\WIDE_EASTASIAN; |
|
|
|
|
16
|
|
|
use const AlecRabbit\WCWidth\ZERO_WIDTH; |
|
|
|
|
17
|
|
|
|
18
|
|
|
class UCode |
19
|
|
|
{ |
20
|
|
|
// NOTE(jquast/wcwidth): created by hand, there isn't anything identifiable other than |
21
|
|
|
// general Cf category code to identify these, and some characters in Cf |
22
|
|
|
// category code are of non-zero width. |
23
|
|
|
// Also includes some Cc, Mn, Zl, and Zp characters |
24
|
|
|
private const ZERO_WIDTH_CF = [ |
25
|
|
|
0 => true, // Null (Cc) |
26
|
|
|
0x034F => true, // Combining grapheme joiner (Mn) |
27
|
|
|
0x200B => true, // Zero width space |
28
|
|
|
0x200C => true, // Zero width non-joiner |
29
|
|
|
0x200D => true, // Zero width joiner |
30
|
|
|
0x200E => true, // Left-to-right mark |
31
|
|
|
0x200F => true, // Right-to-left mark |
32
|
|
|
0x2028 => true, // Line separator (Zl) |
33
|
|
|
0x2029 => true, // Paragraph separator (Zp) |
34
|
|
|
0x202A => true, // Left-to-right embedding |
35
|
|
|
0x202B => true, // Right-to-left embedding |
36
|
|
|
0x202C => true, // Pop directional formatting |
37
|
|
|
0x202D => true, // Left-to-right override |
38
|
|
|
0x202E => true, // Right-to-left override |
39
|
|
|
0x2060 => true, // Word joiner |
40
|
|
|
0x2061 => true, // Function application |
41
|
|
|
0x2062 => true, // Invisible times |
42
|
|
|
0x2063 => true, // Invisible separator |
43
|
|
|
]; |
44
|
|
|
|
45
|
|
|
private static ?FFI $ffi = null; |
46
|
|
|
|
47
|
|
|
|
48
|
|
|
public static function wcswidth(string $subject, ?int $n = null, ?string $version = null): int |
49
|
|
|
{ |
50
|
|
|
$end = $n ?? mb_strlen($subject); |
51
|
|
|
$chrArray = array_slice( |
52
|
|
|
static::split($subject), |
53
|
|
|
0, |
54
|
|
|
$end |
55
|
|
|
); |
56
|
|
|
$width = 0; |
57
|
|
|
foreach ($chrArray as $char) { |
58
|
|
|
$wcw = static::wcwidth($char, $version); |
59
|
|
|
if ($wcw < 0) { |
60
|
|
|
return -1; |
61
|
|
|
} |
62
|
|
|
$width += $wcw; |
63
|
|
|
} |
64
|
|
|
return $width; |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @return array<string> |
69
|
|
|
*/ |
70
|
|
|
protected static function split(string $subject): array |
71
|
|
|
{ |
72
|
|
|
$_split = preg_split('//u', $subject, -1, PREG_SPLIT_NO_EMPTY); |
73
|
|
|
// @codeCoverageIgnoreStart |
74
|
|
|
if (!is_array($_split)) { |
|
|
|
|
75
|
|
|
// Should never happen |
76
|
|
|
throw new RuntimeException('Failed to split string.'); |
77
|
|
|
} |
78
|
|
|
// @codeCoverageIgnoreEnd |
79
|
|
|
return $_split; |
|
|
|
|
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
public static function wcwidth(string $wc, ?string $version = null): int |
83
|
|
|
{ |
84
|
|
|
$version = UnicodeVersion::refine($version); |
85
|
|
|
|
86
|
|
|
$ucs = mb_ord($wc); |
87
|
|
|
|
88
|
|
|
if (self::ZERO_WIDTH_CF[$ucs] ?? false) { // 0 width |
89
|
|
|
return 0; |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
# C0/C1 control characters |
93
|
|
|
if ($ucs < 32 || (0x07F <= $ucs && $ucs < 0x0A0)) { |
94
|
|
|
return -1; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
# combining characters with zero width |
98
|
|
|
if (static::bisearch($ucs, ZERO_WIDTH[$version])) { |
99
|
|
|
return 0; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
return 1 + static::bisearch($ucs, WIDE_EASTASIAN[$version]); |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
|
106
|
|
|
protected static function bisearch(int $ucs, array $table): int |
107
|
|
|
{ |
108
|
|
|
$lbound = 0; |
109
|
|
|
$ubound = count($table) - 1; |
110
|
|
|
|
111
|
|
|
if ($ucs < $table[0][0] || $ucs > $table[$ubound][1]) { |
112
|
|
|
return 0; |
113
|
|
|
} |
114
|
|
|
while ($ubound >= $lbound) { |
115
|
|
|
$mid = ($lbound + $ubound); // 2 |
116
|
|
|
if ($ucs > $table[$mid][1]) { |
117
|
|
|
$lbound = $mid + 1; |
118
|
|
|
} elseif ($ucs < $table[$mid][0]) { |
119
|
|
|
$ubound = $mid - 1; |
120
|
|
|
} else { |
121
|
|
|
return 1; |
122
|
|
|
} |
123
|
|
|
} |
124
|
|
|
return 0; |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
// @codeCoverageIgnoreStart |
128
|
|
|
public static function ffi_wcwidth(string $wc, ?string $version = null): int |
|
|
|
|
129
|
|
|
{ |
130
|
|
|
// Note: $version is ignored |
131
|
|
|
if (null === self::$ffi) { |
132
|
|
|
self::$ffi = |
133
|
|
|
FFI::cdef( |
134
|
|
|
" |
135
|
|
|
typedef uint32_t wchar_t; |
136
|
|
|
int wcwidth(wchar_t wc); |
137
|
|
|
", |
138
|
|
|
); |
139
|
|
|
} |
140
|
|
|
/** @psalm-suppress PossiblyUndefinedMethod */ |
141
|
|
|
return self::$ffi->wcwidth(mb_ord($wc)); |
|
|
|
|
142
|
|
|
} |
143
|
|
|
// @codeCoverageIgnoreEnd |
144
|
|
|
} |
145
|
|
|
|