|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/* |
|
4
|
|
|
* This file is part of the Symfony package. |
|
5
|
|
|
* |
|
6
|
|
|
* (c) Fabien Potencier <[email protected]> |
|
7
|
|
|
* |
|
8
|
|
|
* For the full copyright and license information, please view the LICENSE |
|
9
|
|
|
* file that was distributed with this source code. |
|
10
|
|
|
*/ |
|
11
|
|
|
|
|
12
|
|
|
namespace Symfony\Component\String; |
|
13
|
|
|
|
|
14
|
|
|
use Symfony\Component\String\Exception\ExceptionInterface; |
|
15
|
|
|
use Symfony\Component\String\Exception\InvalidArgumentException; |
|
16
|
|
|
use Symfony\Component\String\Exception\RuntimeException; |
|
17
|
|
|
|
|
18
|
|
|
/** |
|
19
|
|
|
* Represents a string of abstract Unicode characters. |
|
20
|
|
|
* |
|
21
|
|
|
* Unicode defines 3 types of "characters" (bytes, code points and grapheme clusters). |
|
22
|
|
|
* This class is the abstract type to use as a type-hint when the logic you want to |
|
23
|
|
|
* implement is Unicode-aware but doesn't care about code points vs grapheme clusters. |
|
24
|
|
|
* |
|
25
|
|
|
* @author Nicolas Grekas <[email protected]> |
|
26
|
|
|
* |
|
27
|
|
|
* @throws ExceptionInterface |
|
28
|
|
|
*/ |
|
29
|
|
|
abstract class AbstractUnicodeString extends AbstractString |
|
30
|
|
|
{ |
|
31
|
|
|
public const NFC = \Normalizer::NFC; |
|
32
|
|
|
public const NFD = \Normalizer::NFD; |
|
33
|
|
|
public const NFKC = \Normalizer::NFKC; |
|
34
|
|
|
public const NFKD = \Normalizer::NFKD; |
|
35
|
|
|
|
|
36
|
|
|
// all ASCII letters sorted by typical frequency of occurrence |
|
37
|
|
|
private const ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; |
|
38
|
|
|
|
|
39
|
|
|
// the subset of folded case mappings that is not in lower case mappings |
|
40
|
|
|
private const FOLD_FROM = ['İ', 'µ', 'ſ', "\xCD\x85", 'ς', 'ϐ', 'ϑ', 'ϕ', 'ϖ', 'ϰ', 'ϱ', 'ϵ', 'ẛ', "\xE1\xBE\xBE", 'ß', 'İ', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'և', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'ẚ', 'ẞ', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'ᾐ', 'ᾑ', 'ᾒ', 'ᾓ', 'ᾔ', 'ᾕ', 'ᾖ', 'ᾗ', 'ᾘ', 'ᾙ', 'ᾚ', 'ᾛ', 'ᾜ', 'ᾝ', 'ᾞ', 'ᾟ', 'ᾠ', 'ᾡ', 'ᾢ', 'ᾣ', 'ᾤ', 'ᾥ', 'ᾦ', 'ᾧ', 'ᾨ', 'ᾩ', 'ᾪ', 'ᾫ', 'ᾬ', 'ᾭ', 'ᾮ', 'ᾯ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'ᾼ', 'ῂ', 'ῃ', 'ῄ', 'ῆ', 'ῇ', 'ῌ', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ῲ', 'ῳ', 'ῴ', 'ῶ', 'ῷ', 'ῼ', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'ſt', 'st', 'ﬓ', 'ﬔ', 'ﬕ', 'ﬖ', 'ﬗ']; |
|
41
|
|
|
private const FOLD_TO = ['i̇', 'μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', 'ṡ', 'ι', 'ss', 'i̇', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'եւ', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'aʾ', 'ss', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὰι', 'αι', 'άι', 'ᾶ', 'ᾶι', 'αι', 'ὴι', 'ηι', 'ήι', 'ῆ', 'ῆι', 'ηι', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ὼι', 'ωι', 'ώι', 'ῶ', 'ῶι', 'ωι', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'st', 'st', 'մն', 'մե', 'մի', 'վն', 'մխ']; |
|
42
|
|
|
|
|
43
|
|
|
// the subset of upper case mappings that map one code point to many code points |
|
44
|
|
|
private const UPPER_FROM = ['ß', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'ſt', 'st', 'և', 'ﬓ', 'ﬔ', 'ﬕ', 'ﬖ', 'ﬗ', 'ʼn', 'ΐ', 'ΰ', 'ǰ', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'ẚ', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ᾶ', 'ῆ', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ῶ']; |
|
45
|
|
|
private const UPPER_TO = ['SS', 'FF', 'FI', 'FL', 'FFI', 'FFL', 'ST', 'ST', 'ԵՒ', 'ՄՆ', 'ՄԵ', 'ՄԻ', 'ՎՆ', 'ՄԽ', 'ʼN', 'Ϊ́', 'Ϋ́', 'J̌', 'H̱', 'T̈', 'W̊', 'Y̊', 'Aʾ', 'Υ̓', 'Υ̓̀', 'Υ̓́', 'Υ̓͂', 'Α͂', 'Η͂', 'Ϊ̀', 'Ϊ́', 'Ι͂', 'Ϊ͂', 'Ϋ̀', 'Ϋ́', 'Ρ̓', 'Υ͂', 'Ϋ͂', 'Ω͂']; |
|
46
|
|
|
|
|
47
|
|
|
// the subset of https://github.com/unicode-org/cldr/blob/master/common/transforms/Latin-ASCII.xml that is not in NFKD |
|
48
|
|
|
private const TRANSLIT_FROM = ['Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ', 'ħ', 'ı', 'ĸ', 'Ŀ', 'ŀ', 'Ł', 'ł', 'ʼn', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ', 'ƀ', 'Ɓ', 'Ƃ', 'ƃ', 'Ƈ', 'ƈ', 'Ɖ', 'Ɗ', 'Ƌ', 'ƌ', 'Ɛ', 'Ƒ', 'ƒ', 'Ɠ', 'ƕ', 'Ɩ', 'Ɨ', 'Ƙ', 'ƙ', 'ƚ', 'Ɲ', 'ƞ', 'Ƣ', 'ƣ', 'Ƥ', 'ƥ', 'ƫ', 'Ƭ', 'ƭ', 'Ʈ', 'Ʋ', 'Ƴ', 'ƴ', 'Ƶ', 'ƶ', 'DŽ', 'Dž', 'dž', 'Ǥ', 'ǥ', 'ȡ', 'Ȥ', 'ȥ', 'ȴ', 'ȵ', 'ȶ', 'ȷ', 'ȸ', 'ȹ', 'Ⱥ', 'Ȼ', 'ȼ', 'Ƚ', 'Ⱦ', 'ȿ', 'ɀ', 'Ƀ', 'Ʉ', 'Ɇ', 'ɇ', 'Ɉ', 'ɉ', 'Ɍ', 'ɍ', 'Ɏ', 'ɏ', 'ɓ', 'ɕ', 'ɖ', 'ɗ', 'ɛ', 'ɟ', 'ɠ', 'ɡ', 'ɢ', 'ɦ', 'ɧ', 'ɨ', 'ɪ', 'ɫ', 'ɬ', 'ɭ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɶ', 'ɼ', 'ɽ', 'ɾ', 'ʀ', 'ʂ', 'ʈ', 'ʉ', 'ʋ', 'ʏ', 'ʐ', 'ʑ', 'ʙ', 'ʛ', 'ʜ', 'ʝ', 'ʟ', 'ʠ', 'ʣ', 'ʥ', 'ʦ', 'ʪ', 'ʫ', 'ᴀ', 'ᴁ', 'ᴃ', 'ᴄ', 'ᴅ', 'ᴆ', 'ᴇ', 'ᴊ', 'ᴋ', 'ᴌ', 'ᴍ', 'ᴏ', 'ᴘ', 'ᴛ', 'ᴜ', 'ᴠ', 'ᴡ', 'ᴢ', 'ᵫ', 'ᵬ', 'ᵭ', 'ᵮ', 'ᵯ', 'ᵰ', 'ᵱ', 'ᵲ', 'ᵳ', 'ᵴ', 'ᵵ', 'ᵶ', 'ᵺ', 'ᵻ', 'ᵽ', 'ᵾ', 'ᶀ', 'ᶁ', 'ᶂ', 'ᶃ', 'ᶄ', 'ᶅ', 'ᶆ', 'ᶇ', 'ᶈ', 'ᶉ', 'ᶊ', 'ᶌ', 'ᶍ', 'ᶎ', 'ᶏ', 'ᶑ', 'ᶒ', 'ᶓ', 'ᶖ', 'ᶙ', 'ẚ', 'ẜ', 'ẝ', 'ẞ', 'Ỻ', 'ỻ', 'Ỽ', 'ỽ', 'Ỿ', 'ỿ', '©', '®', '₠', '₢', '₣', '₤', '₧', '₺', '₹', 'ℌ', '℞', '㎧', '㎮', '㏆', '㏗', '㏞', '㏟', '¼', '½', '¾', '⅓', '⅔', '⅕', '⅖', '⅗', '⅘', '⅙', '⅚', '⅛', '⅜', '⅝', '⅞', '⅟', '〇', '‘', '’', '‚', '‛', '“', '”', '„', '‟', '′', '″', '〝', '〞', '«', '»', '‹', '›', '‐', '‑', '‒', '–', '—', '―', '︱', '︲', '﹘', '‖', '⁄', '⁅', '⁆', '⁎', '、', '。', '〈', '〉', '《', '》', '〔', '〕', '〘', '〙', '〚', '〛', '︑', '︒', '︹', '︺', '︽', '︾', '︿', '﹀', '﹑', '﹝', '﹞', '⦅', '⦆', '。', '、', '×', '÷', '−', '∕', '∖', '∣', '∥', '≪', '≫', '⦅', '⦆']; |
|
49
|
|
|
private const TRANSLIT_TO = ['AE', 'D', 'O', 'TH', 'ss', 'ae', 'd', 'o', 'th', 'D', 'd', 'H', 'h', 'i', 'q', 'L', 'l', 'L', 'l', '\'n', 'N', 'n', 'OE', 'oe', 'T', 't', 'b', 'B', 'B', 'b', 'C', 'c', 'D', 'D', 'D', 'd', 'E', 'F', 'f', 'G', 'hv', 'I', 'I', 'K', 'k', 'l', 'N', 'n', 'OI', 'oi', 'P', 'p', 't', 'T', 't', 'T', 'V', 'Y', 'y', 'Z', 'z', 'DZ', 'Dz', 'dz', 'G', 'g', 'd', 'Z', 'z', 'l', 'n', 't', 'j', 'db', 'qp', 'A', 'C', 'c', 'L', 'T', 's', 'z', 'B', 'U', 'E', 'e', 'J', 'j', 'R', 'r', 'Y', 'y', 'b', 'c', 'd', 'd', 'e', 'j', 'g', 'g', 'G', 'h', 'h', 'i', 'I', 'l', 'l', 'l', 'm', 'n', 'n', 'N', 'OE', 'r', 'r', 'r', 'R', 's', 't', 'u', 'v', 'Y', 'z', 'z', 'B', 'G', 'H', 'j', 'L', 'q', 'dz', 'dz', 'ts', 'ls', 'lz', 'A', 'AE', 'B', 'C', 'D', 'D', 'E', 'J', 'K', 'L', 'M', 'O', 'P', 'T', 'U', 'V', 'W', 'Z', 'ue', 'b', 'd', 'f', 'm', 'n', 'p', 'r', 'r', 's', 't', 'z', 'th', 'I', 'p', 'U', 'b', 'd', 'f', 'g', 'k', 'l', 'm', 'n', 'p', 'r', 's', 'v', 'x', 'z', 'a', 'd', 'e', 'e', 'i', 'u', 'a', 's', 's', 'SS', 'LL', 'll', 'V', 'v', 'Y', 'y', '(C)', '(R)', 'CE', 'Cr', 'Fr.', 'L.', 'Pts', 'TL', 'Rs', 'x', 'Rx', 'm/s', 'rad/s', 'C/kg', 'pH', 'V/m', 'A/m', ' 1/4', ' 1/2', ' 3/4', ' 1/3', ' 2/3', ' 1/5', ' 2/5', ' 3/5', ' 4/5', ' 1/6', ' 5/6', ' 1/8', ' 3/8', ' 5/8', ' 7/8', ' 1/', '0', '\'', '\'', ',', '\'', '"', '"', ',,', '"', '\'', '"', '"', '"', '<<', '>>', '<', '>', '-', '-', '-', '-', '-', '-', '-', '-', '-', '||', '/', '[', ']', '*', ',', '.', '<', '>', '<<', '>>', '[', ']', '[', ']', '[', ']', ',', '.', '[', ']', '<<', '>>', '<', '>', ',', '[', ']', '((', '))', '.', ',', '*', '/', '-', '/', '\\', '|', '||', '<<', '>>', '((', '))']; |
|
50
|
|
|
|
|
51
|
|
|
private static $transliterators = []; |
|
52
|
|
|
|
|
53
|
|
|
/** |
|
54
|
|
|
* @return static |
|
55
|
|
|
*/ |
|
56
|
|
|
public static function fromCodePoints(int ...$codes): self |
|
57
|
|
|
{ |
|
58
|
|
|
$string = ''; |
|
59
|
|
|
|
|
60
|
|
|
foreach ($codes as $code) { |
|
61
|
|
|
if (0x80 > $code %= 0x200000) { |
|
62
|
|
|
$string .= \chr($code); |
|
63
|
|
|
} elseif (0x800 > $code) { |
|
64
|
|
|
$string .= \chr(0xC0 | $code >> 6).\chr(0x80 | $code & 0x3F); |
|
65
|
|
|
} elseif (0x10000 > $code) { |
|
66
|
|
|
$string .= \chr(0xE0 | $code >> 12).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F); |
|
67
|
|
|
} else { |
|
68
|
|
|
$string .= \chr(0xF0 | $code >> 18).\chr(0x80 | $code >> 12 & 0x3F).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F); |
|
69
|
|
|
} |
|
70
|
|
|
} |
|
71
|
|
|
|
|
72
|
|
|
return new static($string); |
|
73
|
|
|
} |
|
74
|
|
|
|
|
75
|
|
|
/** |
|
76
|
|
|
* Generic UTF-8 to ASCII transliteration. |
|
77
|
|
|
* |
|
78
|
|
|
* Install the intl extension for best results. |
|
79
|
|
|
* |
|
80
|
|
|
* @param string[]|\Transliterator[] $rules See "*-Latin" rules from Transliterator::listIDs() |
|
81
|
|
|
*/ |
|
82
|
|
|
public function ascii(array $rules = []): self |
|
83
|
|
|
{ |
|
84
|
|
|
$str = clone $this; |
|
85
|
|
|
$s = $str->string; |
|
86
|
|
|
$str->string = ''; |
|
87
|
|
|
|
|
88
|
|
|
array_unshift($rules, 'nfd'); |
|
89
|
|
|
$rules[] = 'latin-ascii'; |
|
90
|
|
|
|
|
91
|
|
|
if (\function_exists('transliterator_transliterate')) { |
|
92
|
|
|
$rules[] = 'any-latin/bgn'; |
|
93
|
|
|
} |
|
94
|
|
|
|
|
95
|
|
|
$rules[] = 'nfkd'; |
|
96
|
|
|
$rules[] = '[:nonspacing mark:] remove'; |
|
97
|
|
|
|
|
98
|
|
|
while (\strlen($s) - 1 > $i = strspn($s, self::ASCII)) { |
|
99
|
|
|
if (0 < --$i) { |
|
100
|
|
|
$str->string .= substr($s, 0, $i); |
|
101
|
|
|
$s = substr($s, $i); |
|
102
|
|
|
} |
|
103
|
|
|
|
|
104
|
|
|
if (!$rule = array_shift($rules)) { |
|
105
|
|
|
$rules = []; // An empty rule interrupts the next ones |
|
106
|
|
|
} |
|
107
|
|
|
|
|
108
|
|
|
if ($rule instanceof \Transliterator) { |
|
|
|
|
|
|
109
|
|
|
$s = $rule->transliterate($s); |
|
110
|
|
|
} elseif ($rule) { |
|
111
|
|
|
if ('nfd' === $rule = strtolower($rule)) { |
|
112
|
|
|
normalizer_is_normalized($s, self::NFD) ?: $s = normalizer_normalize($s, self::NFD); |
|
113
|
|
|
} elseif ('nfkd' === $rule) { |
|
114
|
|
|
normalizer_is_normalized($s, self::NFKD) ?: $s = normalizer_normalize($s, self::NFKD); |
|
115
|
|
|
} elseif ('[:nonspacing mark:] remove' === $rule) { |
|
116
|
|
|
$s = preg_replace('/\p{Mn}++/u', '', $s); |
|
117
|
|
|
} elseif ('latin-ascii' === $rule) { |
|
118
|
|
|
$s = str_replace(self::TRANSLIT_FROM, self::TRANSLIT_TO, $s); |
|
119
|
|
|
} elseif ('de-ascii' === $rule) { |
|
120
|
|
|
$s = preg_replace("/([AUO])\u{0308}(?=\p{Ll})/u", '$1e', $s); |
|
121
|
|
|
$s = str_replace(["a\u{0308}", "o\u{0308}", "u\u{0308}", "A\u{0308}", "O\u{0308}", "U\u{0308}"], ['ae', 'oe', 'ue', 'AE', 'OE', 'UE'], $s); |
|
122
|
|
|
} elseif (\function_exists('transliterator_transliterate')) { |
|
123
|
|
|
if (null === $transliterator = self::$transliterators[$rule] ?? self::$transliterators[$rule] = \Transliterator::create($rule)) { |
|
124
|
|
|
if ('any-latin/bgn' === $rule) { |
|
125
|
|
|
$rule = 'any-latin'; |
|
126
|
|
|
$transliterator = self::$transliterators[$rule] ?? self::$transliterators[$rule] = \Transliterator::create($rule); |
|
127
|
|
|
} |
|
128
|
|
|
|
|
129
|
|
|
if (null === $transliterator) { |
|
130
|
|
|
throw new InvalidArgumentException(sprintf('Unknown transliteration rule "%s".', $rule)); |
|
131
|
|
|
} |
|
132
|
|
|
|
|
133
|
|
|
self::$transliterators['any-latin/bgn'] = $transliterator; |
|
134
|
|
|
} |
|
135
|
|
|
|
|
136
|
|
|
$s = $transliterator->transliterate($s); |
|
137
|
|
|
} |
|
138
|
|
|
} elseif (!\function_exists('iconv')) { |
|
139
|
|
|
$s = preg_replace('/[^\x00-\x7F]/u', '?', $s); |
|
140
|
|
|
} elseif (ICONV_IMPL === 'glibc') { |
|
141
|
|
|
$s = iconv('UTF-8', 'ASCII//TRANSLIT', $s); |
|
142
|
|
|
} else { |
|
143
|
|
|
$s = @preg_replace_callback('/[^\x00-\x7F]/u', static function ($c) { |
|
144
|
|
|
if ('' === $c = (string) iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c[0])) { |
|
145
|
|
|
throw new \LogicException(sprintf('"%s" requires a translit-able iconv implementation, try installing "gnu-libiconv" if you\'re using Alpine Linux.', static::class)); |
|
146
|
|
|
} |
|
147
|
|
|
|
|
148
|
|
|
return 1 < \strlen($c) ? ltrim($c, '\'`"^~') : (\strlen($c) ? $c : '?'); |
|
149
|
|
|
}, $s); |
|
150
|
|
|
} |
|
151
|
|
|
} |
|
152
|
|
|
|
|
153
|
|
|
$str->string .= $s; |
|
154
|
|
|
|
|
155
|
|
|
return $str; |
|
156
|
|
|
} |
|
157
|
|
|
|
|
158
|
|
|
public function camel(): parent |
|
159
|
|
|
{ |
|
160
|
|
|
$str = clone $this; |
|
161
|
|
|
$str->string = str_replace(' ', '', preg_replace_callback('/\b./u', static function ($m) use (&$i) { |
|
162
|
|
|
return 1 === ++$i ? ('İ' === $m[0] ? 'i̇' : mb_strtolower($m[0], 'UTF-8')) : mb_convert_case($m[0], MB_CASE_TITLE, 'UTF-8'); |
|
163
|
|
|
}, preg_replace('/[^\pL0-9]++/u', ' ', $this->string))); |
|
164
|
|
|
|
|
165
|
|
|
return $str; |
|
|
|
|
|
|
166
|
|
|
} |
|
167
|
|
|
|
|
168
|
|
|
/** |
|
169
|
|
|
* @return int[] |
|
170
|
|
|
*/ |
|
171
|
|
|
public function codePointsAt(int $offset): array |
|
172
|
|
|
{ |
|
173
|
|
|
$str = $this->slice($offset, 1); |
|
174
|
|
|
|
|
175
|
|
|
if ('' === $str->string) { |
|
176
|
|
|
return []; |
|
177
|
|
|
} |
|
178
|
|
|
|
|
179
|
|
|
$codePoints = []; |
|
180
|
|
|
|
|
181
|
|
|
foreach (preg_split('//u', $str->string, -1, PREG_SPLIT_NO_EMPTY) as $c) { |
|
182
|
|
|
$codePoints[] = mb_ord($c, 'UTF-8'); |
|
183
|
|
|
} |
|
184
|
|
|
|
|
185
|
|
|
return $codePoints; |
|
186
|
|
|
} |
|
187
|
|
|
|
|
188
|
|
|
public function folded(bool $compat = true): parent |
|
189
|
|
|
{ |
|
190
|
|
|
$str = clone $this; |
|
191
|
|
|
|
|
192
|
|
|
if (!$compat || \PHP_VERSION_ID < 70300 || !\defined('Normalizer::NFKC_CF')) { |
|
193
|
|
|
$str->string = normalizer_normalize($str->string, $compat ? \Normalizer::NFKC : \Normalizer::NFC); |
|
|
|
|
|
|
194
|
|
|
$str->string = mb_strtolower(str_replace(self::FOLD_FROM, self::FOLD_TO, $this->string), 'UTF-8'); |
|
195
|
|
|
} else { |
|
196
|
|
|
$str->string = normalizer_normalize($str->string, \Normalizer::NFKC_CF); |
|
|
|
|
|
|
197
|
|
|
} |
|
198
|
|
|
|
|
199
|
|
|
return $str; |
|
|
|
|
|
|
200
|
|
|
} |
|
201
|
|
|
|
|
202
|
|
|
public function join(array $strings, string $lastGlue = null): parent |
|
203
|
|
|
{ |
|
204
|
|
|
$str = clone $this; |
|
205
|
|
|
|
|
206
|
|
|
$tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : ''; |
|
207
|
|
|
$str->string = implode($this->string, $strings).$tail; |
|
208
|
|
|
|
|
209
|
|
|
if (!preg_match('//u', $str->string)) { |
|
210
|
|
|
throw new InvalidArgumentException('Invalid UTF-8 string.'); |
|
211
|
|
|
} |
|
212
|
|
|
|
|
213
|
|
|
return $str; |
|
|
|
|
|
|
214
|
|
|
} |
|
215
|
|
|
|
|
216
|
|
|
public function lower(): parent |
|
217
|
|
|
{ |
|
218
|
|
|
$str = clone $this; |
|
219
|
|
|
$str->string = mb_strtolower(str_replace('İ', 'i̇', $str->string), 'UTF-8'); |
|
220
|
|
|
|
|
221
|
|
|
return $str; |
|
|
|
|
|
|
222
|
|
|
} |
|
223
|
|
|
|
|
224
|
|
|
public function match(string $regexp, int $flags = 0, int $offset = 0): array |
|
225
|
|
|
{ |
|
226
|
|
|
$match = ((PREG_PATTERN_ORDER | PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match'; |
|
227
|
|
|
|
|
228
|
|
|
if ($this->ignoreCase) { |
|
229
|
|
|
$regexp .= 'i'; |
|
230
|
|
|
} |
|
231
|
|
|
|
|
232
|
|
|
set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); }); |
|
233
|
|
|
|
|
234
|
|
|
try { |
|
235
|
|
|
if (false === $match($regexp.'u', $this->string, $matches, $flags | PREG_UNMATCHED_AS_NULL, $offset)) { |
|
|
|
|
|
|
236
|
|
|
$lastError = preg_last_error(); |
|
237
|
|
|
|
|
238
|
|
|
foreach (get_defined_constants(true)['pcre'] as $k => $v) { |
|
239
|
|
|
if ($lastError === $v && '_ERROR' === substr($k, -6)) { |
|
240
|
|
|
throw new RuntimeException('Matching failed with '.$k.'.'); |
|
241
|
|
|
} |
|
242
|
|
|
} |
|
243
|
|
|
|
|
244
|
|
|
throw new RuntimeException('Matching failed with unknown error code.'); |
|
245
|
|
|
} |
|
246
|
|
|
} finally { |
|
247
|
|
|
restore_error_handler(); |
|
248
|
|
|
} |
|
249
|
|
|
|
|
250
|
|
|
return $matches; |
|
251
|
|
|
} |
|
252
|
|
|
|
|
253
|
|
|
/** |
|
254
|
|
|
* @return static |
|
255
|
|
|
*/ |
|
256
|
|
|
public function normalize(int $form = self::NFC): self |
|
257
|
|
|
{ |
|
258
|
|
|
if (!\in_array($form, [self::NFC, self::NFD, self::NFKC, self::NFKD])) { |
|
259
|
|
|
throw new InvalidArgumentException('Unsupported normalization form.'); |
|
260
|
|
|
} |
|
261
|
|
|
|
|
262
|
|
|
$str = clone $this; |
|
263
|
|
|
normalizer_is_normalized($str->string, $form) ?: $str->string = normalizer_normalize($str->string, $form); |
|
|
|
|
|
|
264
|
|
|
|
|
265
|
|
|
return $str; |
|
266
|
|
|
} |
|
267
|
|
|
|
|
268
|
|
|
public function padBoth(int $length, string $padStr = ' '): parent |
|
269
|
|
|
{ |
|
270
|
|
|
if ('' === $padStr || !preg_match('//u', $padStr)) { |
|
271
|
|
|
throw new InvalidArgumentException('Invalid UTF-8 string.'); |
|
272
|
|
|
} |
|
273
|
|
|
|
|
274
|
|
|
$pad = clone $this; |
|
275
|
|
|
$pad->string = $padStr; |
|
276
|
|
|
|
|
277
|
|
|
return $this->pad($length, $pad, STR_PAD_BOTH); |
|
|
|
|
|
|
278
|
|
|
} |
|
279
|
|
|
|
|
280
|
|
|
public function padEnd(int $length, string $padStr = ' '): parent |
|
281
|
|
|
{ |
|
282
|
|
|
if ('' === $padStr || !preg_match('//u', $padStr)) { |
|
283
|
|
|
throw new InvalidArgumentException('Invalid UTF-8 string.'); |
|
284
|
|
|
} |
|
285
|
|
|
|
|
286
|
|
|
$pad = clone $this; |
|
287
|
|
|
$pad->string = $padStr; |
|
288
|
|
|
|
|
289
|
|
|
return $this->pad($length, $pad, STR_PAD_RIGHT); |
|
|
|
|
|
|
290
|
|
|
} |
|
291
|
|
|
|
|
292
|
|
|
public function padStart(int $length, string $padStr = ' '): parent |
|
293
|
|
|
{ |
|
294
|
|
|
if ('' === $padStr || !preg_match('//u', $padStr)) { |
|
295
|
|
|
throw new InvalidArgumentException('Invalid UTF-8 string.'); |
|
296
|
|
|
} |
|
297
|
|
|
|
|
298
|
|
|
$pad = clone $this; |
|
299
|
|
|
$pad->string = $padStr; |
|
300
|
|
|
|
|
301
|
|
|
return $this->pad($length, $pad, STR_PAD_LEFT); |
|
|
|
|
|
|
302
|
|
|
} |
|
303
|
|
|
|
|
304
|
|
|
public function replaceMatches(string $fromRegexp, $to): parent |
|
305
|
|
|
{ |
|
306
|
|
|
if ($this->ignoreCase) { |
|
307
|
|
|
$fromRegexp .= 'i'; |
|
308
|
|
|
} |
|
309
|
|
|
|
|
310
|
|
|
if (\is_array($to) || $to instanceof \Closure) { |
|
311
|
|
|
if (!\is_callable($to)) { |
|
312
|
|
|
throw new \TypeError(sprintf('Argument 2 passed to "%s::replaceMatches()" must be callable, array given.', static::class)); |
|
313
|
|
|
} |
|
314
|
|
|
|
|
315
|
|
|
$replace = 'preg_replace_callback'; |
|
316
|
|
|
$to = static function (array $m) use ($to): string { |
|
317
|
|
|
$to = $to($m); |
|
|
|
|
|
|
318
|
|
|
|
|
319
|
|
|
if ('' !== $to && (!\is_string($to) || !preg_match('//u', $to))) { |
|
320
|
|
|
throw new InvalidArgumentException('Replace callback must return a valid UTF-8 string.'); |
|
321
|
|
|
} |
|
322
|
|
|
|
|
323
|
|
|
return $to; |
|
324
|
|
|
}; |
|
325
|
|
|
} elseif ('' !== $to && !preg_match('//u', $to)) { |
|
326
|
|
|
throw new InvalidArgumentException('Invalid UTF-8 string.'); |
|
327
|
|
|
} else { |
|
328
|
|
|
$replace = 'preg_replace'; |
|
329
|
|
|
} |
|
330
|
|
|
|
|
331
|
|
|
set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); }); |
|
332
|
|
|
|
|
333
|
|
|
try { |
|
334
|
|
|
if (null === $string = $replace($fromRegexp.'u', $to, $this->string)) { |
|
335
|
|
|
$lastError = preg_last_error(); |
|
336
|
|
|
|
|
337
|
|
|
foreach (get_defined_constants(true)['pcre'] as $k => $v) { |
|
338
|
|
|
if ($lastError === $v && '_ERROR' === substr($k, -6)) { |
|
339
|
|
|
throw new RuntimeException('Matching failed with '.$k.'.'); |
|
340
|
|
|
} |
|
341
|
|
|
} |
|
342
|
|
|
|
|
343
|
|
|
throw new RuntimeException('Matching failed with unknown error code.'); |
|
344
|
|
|
} |
|
345
|
|
|
} finally { |
|
346
|
|
|
restore_error_handler(); |
|
347
|
|
|
} |
|
348
|
|
|
|
|
349
|
|
|
$str = clone $this; |
|
350
|
|
|
$str->string = $string; |
|
351
|
|
|
|
|
352
|
|
|
return $str; |
|
|
|
|
|
|
353
|
|
|
} |
|
354
|
|
|
|
|
355
|
|
|
public function reverse(): parent |
|
356
|
|
|
{ |
|
357
|
|
|
$str = clone $this; |
|
358
|
|
|
$str->string = implode('', array_reverse(preg_split('/(\X)/u', $str->string, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY))); |
|
359
|
|
|
|
|
360
|
|
|
return $str; |
|
|
|
|
|
|
361
|
|
|
} |
|
362
|
|
|
|
|
363
|
|
|
public function snake(): parent |
|
364
|
|
|
{ |
|
365
|
|
|
$str = $this->camel()->title(); |
|
366
|
|
|
$str->string = mb_strtolower(preg_replace(['/(\p{Lu}+)(\p{Lu}\p{Ll})/u', '/([\p{Ll}0-9])(\p{Lu})/u'], '\1_\2', $str->string), 'UTF-8'); |
|
367
|
|
|
|
|
368
|
|
|
return $str; |
|
369
|
|
|
} |
|
370
|
|
|
|
|
371
|
|
|
public function title(bool $allWords = false): parent |
|
372
|
|
|
{ |
|
373
|
|
|
$str = clone $this; |
|
374
|
|
|
|
|
375
|
|
|
$limit = $allWords ? -1 : 1; |
|
376
|
|
|
|
|
377
|
|
|
$str->string = preg_replace_callback('/\b./u', static function (array $m): string { |
|
378
|
|
|
return mb_convert_case($m[0], MB_CASE_TITLE, 'UTF-8'); |
|
379
|
|
|
}, $str->string, $limit); |
|
380
|
|
|
|
|
381
|
|
|
return $str; |
|
|
|
|
|
|
382
|
|
|
} |
|
383
|
|
|
|
|
384
|
|
|
public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent |
|
385
|
|
|
{ |
|
386
|
|
|
if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { |
|
387
|
|
|
throw new InvalidArgumentException('Invalid UTF-8 chars.'); |
|
388
|
|
|
} |
|
389
|
|
|
$chars = preg_quote($chars); |
|
390
|
|
|
|
|
391
|
|
|
$str = clone $this; |
|
392
|
|
|
$str->string = preg_replace("{^[$chars]++|[$chars]++$}uD", '', $str->string); |
|
393
|
|
|
|
|
394
|
|
|
return $str; |
|
|
|
|
|
|
395
|
|
|
} |
|
396
|
|
|
|
|
397
|
|
|
public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent |
|
398
|
|
|
{ |
|
399
|
|
|
if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { |
|
400
|
|
|
throw new InvalidArgumentException('Invalid UTF-8 chars.'); |
|
401
|
|
|
} |
|
402
|
|
|
$chars = preg_quote($chars); |
|
403
|
|
|
|
|
404
|
|
|
$str = clone $this; |
|
405
|
|
|
$str->string = preg_replace("{[$chars]++$}uD", '', $str->string); |
|
406
|
|
|
|
|
407
|
|
|
return $str; |
|
|
|
|
|
|
408
|
|
|
} |
|
409
|
|
|
|
|
410
|
|
|
public function trimStart(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent |
|
411
|
|
|
{ |
|
412
|
|
|
if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { |
|
413
|
|
|
throw new InvalidArgumentException('Invalid UTF-8 chars.'); |
|
414
|
|
|
} |
|
415
|
|
|
$chars = preg_quote($chars); |
|
416
|
|
|
|
|
417
|
|
|
$str = clone $this; |
|
418
|
|
|
$str->string = preg_replace("{^[$chars]++}uD", '', $str->string); |
|
419
|
|
|
|
|
420
|
|
|
return $str; |
|
|
|
|
|
|
421
|
|
|
} |
|
422
|
|
|
|
|
423
|
|
|
public function upper(): parent |
|
424
|
|
|
{ |
|
425
|
|
|
$str = clone $this; |
|
426
|
|
|
$str->string = mb_strtoupper($str->string, 'UTF-8'); |
|
427
|
|
|
|
|
428
|
|
|
if (\PHP_VERSION_ID < 70300) { |
|
429
|
|
|
$str->string = str_replace(self::UPPER_FROM, self::UPPER_TO, $str->string); |
|
430
|
|
|
} |
|
431
|
|
|
|
|
432
|
|
|
return $str; |
|
|
|
|
|
|
433
|
|
|
} |
|
434
|
|
|
|
|
435
|
|
|
public function width(bool $ignoreAnsiDecoration = true): int |
|
436
|
|
|
{ |
|
437
|
|
|
$width = 0; |
|
438
|
|
|
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string); |
|
439
|
|
|
|
|
440
|
|
|
if (false !== strpos($s, "\r")) { |
|
441
|
|
|
$s = str_replace(["\r\n", "\r"], "\n", $s); |
|
442
|
|
|
} |
|
443
|
|
|
|
|
444
|
|
|
if (!$ignoreAnsiDecoration) { |
|
445
|
|
|
$s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s); |
|
446
|
|
|
} |
|
447
|
|
|
|
|
448
|
|
|
foreach (explode("\n", $s) as $s) { |
|
449
|
|
|
if ($ignoreAnsiDecoration) { |
|
450
|
|
|
$s = preg_replace('/(?:\x1B(?: |
|
451
|
|
|
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E] |
|
452
|
|
|
| [P\]X^_] .*? \x1B\\\\ |
|
453
|
|
|
| [\x41-\x7E] |
|
454
|
|
|
)|[\p{Cc}\x7F]++)/xu', '', $s); |
|
455
|
|
|
} |
|
456
|
|
|
|
|
457
|
|
|
// Non printable characters have been dropped, so wcswidth cannot logically return -1. |
|
458
|
|
|
$width += $this->wcswidth($s); |
|
459
|
|
|
} |
|
460
|
|
|
|
|
461
|
|
|
return $width; |
|
462
|
|
|
} |
|
463
|
|
|
|
|
464
|
|
|
/** |
|
465
|
|
|
* @return static |
|
466
|
|
|
*/ |
|
467
|
|
|
private function pad(int $len, self $pad, int $type): parent |
|
468
|
|
|
{ |
|
469
|
|
|
$sLen = $this->length(); |
|
470
|
|
|
|
|
471
|
|
|
if ($len <= $sLen) { |
|
472
|
|
|
return clone $this; |
|
473
|
|
|
} |
|
474
|
|
|
|
|
475
|
|
|
$padLen = $pad->length(); |
|
476
|
|
|
$freeLen = $len - $sLen; |
|
477
|
|
|
$len = $freeLen % $padLen; |
|
478
|
|
|
|
|
479
|
|
|
switch ($type) { |
|
480
|
|
|
case STR_PAD_RIGHT: |
|
481
|
|
|
return $this->append(str_repeat($pad->string, $freeLen / $padLen).($len ? $pad->slice(0, $len) : '')); |
|
482
|
|
|
|
|
483
|
|
|
case STR_PAD_LEFT: |
|
484
|
|
|
return $this->prepend(str_repeat($pad->string, $freeLen / $padLen).($len ? $pad->slice(0, $len) : '')); |
|
485
|
|
|
|
|
486
|
|
|
case STR_PAD_BOTH: |
|
487
|
|
|
$freeLen /= 2; |
|
488
|
|
|
|
|
489
|
|
|
$rightLen = ceil($freeLen); |
|
490
|
|
|
$len = $rightLen % $padLen; |
|
491
|
|
|
$str = $this->append(str_repeat($pad->string, $rightLen / $padLen).($len ? $pad->slice(0, $len) : '')); |
|
492
|
|
|
|
|
493
|
|
|
$leftLen = floor($freeLen); |
|
494
|
|
|
$len = $leftLen % $padLen; |
|
495
|
|
|
|
|
496
|
|
|
return $str->prepend(str_repeat($pad->string, $leftLen / $padLen).($len ? $pad->slice(0, $len) : '')); |
|
497
|
|
|
|
|
498
|
|
|
default: |
|
499
|
|
|
throw new InvalidArgumentException('Invalid padding type.'); |
|
500
|
|
|
} |
|
501
|
|
|
} |
|
502
|
|
|
|
|
503
|
|
|
/** |
|
504
|
|
|
* Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c. |
|
505
|
|
|
*/ |
|
506
|
|
|
private function wcswidth(string $string): int |
|
507
|
|
|
{ |
|
508
|
|
|
$width = 0; |
|
509
|
|
|
|
|
510
|
|
|
foreach (preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY) as $c) { |
|
511
|
|
|
$codePoint = mb_ord($c, 'UTF-8'); |
|
512
|
|
|
|
|
513
|
|
|
if (0 === $codePoint // NULL |
|
514
|
|
|
|| 0x034F === $codePoint // COMBINING GRAPHEME JOINER |
|
515
|
|
|
|| (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK |
|
516
|
|
|
|| 0x2028 === $codePoint // LINE SEPARATOR |
|
517
|
|
|
|| 0x2029 === $codePoint // PARAGRAPH SEPARATOR |
|
518
|
|
|
|| (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE |
|
519
|
|
|
|| (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR |
|
520
|
|
|
) { |
|
521
|
|
|
continue; |
|
522
|
|
|
} |
|
523
|
|
|
|
|
524
|
|
|
// Non printable characters |
|
525
|
|
|
if (32 > $codePoint // C0 control characters |
|
526
|
|
|
|| (0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL |
|
527
|
|
|
) { |
|
528
|
|
|
return -1; |
|
529
|
|
|
} |
|
530
|
|
|
|
|
531
|
|
|
static $tableZero; |
|
532
|
|
|
if (null === $tableZero) { |
|
533
|
|
|
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php'; |
|
534
|
|
|
} |
|
535
|
|
|
|
|
536
|
|
|
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) { |
|
537
|
|
|
$lbound = 0; |
|
538
|
|
|
while ($ubound >= $lbound) { |
|
539
|
|
|
$mid = floor(($lbound + $ubound) / 2); |
|
540
|
|
|
|
|
541
|
|
|
if ($codePoint > $tableZero[$mid][1]) { |
|
542
|
|
|
$lbound = $mid + 1; |
|
543
|
|
|
} elseif ($codePoint < $tableZero[$mid][0]) { |
|
544
|
|
|
$ubound = $mid - 1; |
|
545
|
|
|
} else { |
|
546
|
|
|
continue 2; |
|
547
|
|
|
} |
|
548
|
|
|
} |
|
549
|
|
|
} |
|
550
|
|
|
|
|
551
|
|
|
static $tableWide; |
|
552
|
|
|
if (null === $tableWide) { |
|
553
|
|
|
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php'; |
|
554
|
|
|
} |
|
555
|
|
|
|
|
556
|
|
|
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) { |
|
557
|
|
|
$lbound = 0; |
|
558
|
|
|
while ($ubound >= $lbound) { |
|
559
|
|
|
$mid = floor(($lbound + $ubound) / 2); |
|
560
|
|
|
|
|
561
|
|
|
if ($codePoint > $tableWide[$mid][1]) { |
|
562
|
|
|
$lbound = $mid + 1; |
|
563
|
|
|
} elseif ($codePoint < $tableWide[$mid][0]) { |
|
564
|
|
|
$ubound = $mid - 1; |
|
565
|
|
|
} else { |
|
566
|
|
|
$width += 2; |
|
567
|
|
|
|
|
568
|
|
|
continue 2; |
|
569
|
|
|
} |
|
570
|
|
|
} |
|
571
|
|
|
} |
|
572
|
|
|
|
|
573
|
|
|
++$width; |
|
574
|
|
|
} |
|
575
|
|
|
|
|
576
|
|
|
return $width; |
|
577
|
|
|
} |
|
578
|
|
|
} |
|
579
|
|
|
|
This error could be the result of:
1. Missing dependencies
PHP Analyzer uses your
composer.jsonfile (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects thecomposer.jsonto be in the root folder of your repository.Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the
requireorrequire-devsection?2. Missing use statement
PHP does not complain about undefined classes in
ìnstanceofchecks. For example, the following PHP code will work perfectly fine:If you have not tested against this specific condition, such errors might go unnoticed.