Total Complexity | 134 |
Total Lines | 550 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like AbstractUnicodeString often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AbstractUnicodeString, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
29 | abstract class AbstractUnicodeString extends AbstractString |
||
30 | { |
||
31 | public const NFC = \Normalizer::NFC; |
||
32 | public const NFD = \Normalizer::NFD; |
||
33 | public const NFKC = \Normalizer::NFKC; |
||
34 | public const NFKD = \Normalizer::NFKD; |
||
35 | |||
36 | // all ASCII letters sorted by typical frequency of occurrence |
||
37 | private const ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; |
||
38 | |||
39 | // the subset of folded case mappings that is not in lower case mappings |
||
40 | private const FOLD_FROM = ['İ', 'µ', 'ſ', "\xCD\x85", 'ς', 'ϐ', 'ϑ', 'ϕ', 'ϖ', 'ϰ', 'ϱ', 'ϵ', 'ẛ', "\xE1\xBE\xBE", 'ß', 'İ', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'և', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'ẚ', 'ẞ', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'ᾐ', 'ᾑ', 'ᾒ', 'ᾓ', 'ᾔ', 'ᾕ', 'ᾖ', 'ᾗ', 'ᾘ', 'ᾙ', 'ᾚ', 'ᾛ', 'ᾜ', 'ᾝ', 'ᾞ', 'ᾟ', 'ᾠ', 'ᾡ', 'ᾢ', 'ᾣ', 'ᾤ', 'ᾥ', 'ᾦ', 'ᾧ', 'ᾨ', 'ᾩ', 'ᾪ', 'ᾫ', 'ᾬ', 'ᾭ', 'ᾮ', 'ᾯ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'ᾼ', 'ῂ', 'ῃ', 'ῄ', 'ῆ', 'ῇ', 'ῌ', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ῲ', 'ῳ', 'ῴ', 'ῶ', 'ῷ', 'ῼ', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'ſt', 'st', 'ﬓ', 'ﬔ', 'ﬕ', 'ﬖ', 'ﬗ']; |
||
41 | private const FOLD_TO = ['i̇', 'μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', 'ṡ', 'ι', 'ss', 'i̇', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'եւ', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'aʾ', 'ss', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὰι', 'αι', 'άι', 'ᾶ', 'ᾶι', 'αι', 'ὴι', 'ηι', 'ήι', 'ῆ', 'ῆι', 'ηι', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ὼι', 'ωι', 'ώι', 'ῶ', 'ῶι', 'ωι', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'st', 'st', 'մն', 'մե', 'մի', 'վն', 'մխ']; |
||
42 | |||
43 | // the subset of upper case mappings that map one code point to many code points |
||
44 | private const UPPER_FROM = ['ß', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'ſt', 'st', 'և', 'ﬓ', 'ﬔ', 'ﬕ', 'ﬖ', 'ﬗ', 'ʼn', 'ΐ', 'ΰ', 'ǰ', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'ẚ', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ᾶ', 'ῆ', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ῶ']; |
||
45 | private const UPPER_TO = ['SS', 'FF', 'FI', 'FL', 'FFI', 'FFL', 'ST', 'ST', 'ԵՒ', 'ՄՆ', 'ՄԵ', 'ՄԻ', 'ՎՆ', 'ՄԽ', 'ʼN', 'Ϊ́', 'Ϋ́', 'J̌', 'H̱', 'T̈', 'W̊', 'Y̊', 'Aʾ', 'Υ̓', 'Υ̓̀', 'Υ̓́', 'Υ̓͂', 'Α͂', 'Η͂', 'Ϊ̀', 'Ϊ́', 'Ι͂', 'Ϊ͂', 'Ϋ̀', 'Ϋ́', 'Ρ̓', 'Υ͂', 'Ϋ͂', 'Ω͂']; |
||
46 | |||
47 | // the subset of https://github.com/unicode-org/cldr/blob/master/common/transforms/Latin-ASCII.xml that is not in NFKD |
||
48 | private const TRANSLIT_FROM = ['Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ', 'ħ', 'ı', 'ĸ', 'Ŀ', 'ŀ', 'Ł', 'ł', 'ʼn', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ', 'ƀ', 'Ɓ', 'Ƃ', 'ƃ', 'Ƈ', 'ƈ', 'Ɖ', 'Ɗ', 'Ƌ', 'ƌ', 'Ɛ', 'Ƒ', 'ƒ', 'Ɠ', 'ƕ', 'Ɩ', 'Ɨ', 'Ƙ', 'ƙ', 'ƚ', 'Ɲ', 'ƞ', 'Ƣ', 'ƣ', 'Ƥ', 'ƥ', 'ƫ', 'Ƭ', 'ƭ', 'Ʈ', 'Ʋ', 'Ƴ', 'ƴ', 'Ƶ', 'ƶ', 'DŽ', 'Dž', 'dž', 'Ǥ', 'ǥ', 'ȡ', 'Ȥ', 'ȥ', 'ȴ', 'ȵ', 'ȶ', 'ȷ', 'ȸ', 'ȹ', 'Ⱥ', 'Ȼ', 'ȼ', 'Ƚ', 'Ⱦ', 'ȿ', 'ɀ', 'Ƀ', 'Ʉ', 'Ɇ', 'ɇ', 'Ɉ', 'ɉ', 'Ɍ', 'ɍ', 'Ɏ', 'ɏ', 'ɓ', 'ɕ', 'ɖ', 'ɗ', 'ɛ', 'ɟ', 'ɠ', 'ɡ', 'ɢ', 'ɦ', 'ɧ', 'ɨ', 'ɪ', 'ɫ', 'ɬ', 'ɭ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɶ', 'ɼ', 'ɽ', 'ɾ', 'ʀ', 'ʂ', 'ʈ', 'ʉ', 'ʋ', 'ʏ', 'ʐ', 'ʑ', 'ʙ', 'ʛ', 'ʜ', 'ʝ', 'ʟ', 'ʠ', 'ʣ', 'ʥ', 'ʦ', 'ʪ', 'ʫ', 'ᴀ', 'ᴁ', 'ᴃ', 'ᴄ', 'ᴅ', 'ᴆ', 'ᴇ', 'ᴊ', 'ᴋ', 'ᴌ', 'ᴍ', 'ᴏ', 'ᴘ', 'ᴛ', 'ᴜ', 'ᴠ', 'ᴡ', 'ᴢ', 'ᵫ', 'ᵬ', 'ᵭ', 'ᵮ', 'ᵯ', 'ᵰ', 'ᵱ', 'ᵲ', 'ᵳ', 'ᵴ', 'ᵵ', 'ᵶ', 'ᵺ', 'ᵻ', 'ᵽ', 'ᵾ', 'ᶀ', 'ᶁ', 'ᶂ', 'ᶃ', 'ᶄ', 'ᶅ', 'ᶆ', 'ᶇ', 'ᶈ', 'ᶉ', 'ᶊ', 'ᶌ', 'ᶍ', 'ᶎ', 'ᶏ', 'ᶑ', 'ᶒ', 'ᶓ', 'ᶖ', 'ᶙ', 'ẚ', 'ẜ', 'ẝ', 'ẞ', 'Ỻ', 'ỻ', 'Ỽ', 'ỽ', 'Ỿ', 'ỿ', '©', '®', '₠', '₢', '₣', '₤', '₧', '₺', '₹', 'ℌ', '℞', '㎧', '㎮', '㏆', '㏗', '㏞', '㏟', '¼', '½', '¾', '⅓', '⅔', '⅕', '⅖', '⅗', '⅘', '⅙', '⅚', '⅛', '⅜', '⅝', '⅞', '⅟', '〇', '‘', '’', '‚', '‛', '“', '”', '„', '‟', '′', '″', '〝', '〞', '«', '»', '‹', '›', '‐', '‑', '‒', '–', '—', '―', '︱', '︲', '﹘', '‖', '⁄', '⁅', '⁆', '⁎', '、', '。', '〈', '〉', '《', '》', '〔', '〕', '〘', '〙', '〚', '〛', '︑', '︒', '︹', '︺', '︽', '︾', '︿', '﹀', '﹑', '﹝', '﹞', '⦅', '⦆', '。', '、', '×', '÷', '−', '∕', '∖', '∣', '∥', '≪', '≫', '⦅', '⦆']; |
||
49 | private const TRANSLIT_TO = ['AE', 'D', 'O', 'TH', 'ss', 'ae', 'd', 'o', 'th', 'D', 'd', 'H', 'h', 'i', 'q', 'L', 'l', 'L', 'l', '\'n', 'N', 'n', 'OE', 'oe', 'T', 't', 'b', 'B', 'B', 'b', 'C', 'c', 'D', 'D', 'D', 'd', 'E', 'F', 'f', 'G', 'hv', 'I', 'I', 'K', 'k', 'l', 'N', 'n', 'OI', 'oi', 'P', 'p', 't', 'T', 't', 'T', 'V', 'Y', 'y', 'Z', 'z', 'DZ', 'Dz', 'dz', 'G', 'g', 'd', 'Z', 'z', 'l', 'n', 't', 'j', 'db', 'qp', 'A', 'C', 'c', 'L', 'T', 's', 'z', 'B', 'U', 'E', 'e', 'J', 'j', 'R', 'r', 'Y', 'y', 'b', 'c', 'd', 'd', 'e', 'j', 'g', 'g', 'G', 'h', 'h', 'i', 'I', 'l', 'l', 'l', 'm', 'n', 'n', 'N', 'OE', 'r', 'r', 'r', 'R', 's', 't', 'u', 'v', 'Y', 'z', 'z', 'B', 'G', 'H', 'j', 'L', 'q', 'dz', 'dz', 'ts', 'ls', 'lz', 'A', 'AE', 'B', 'C', 'D', 'D', 'E', 'J', 'K', 'L', 'M', 'O', 'P', 'T', 'U', 'V', 'W', 'Z', 'ue', 'b', 'd', 'f', 'm', 'n', 'p', 'r', 'r', 's', 't', 'z', 'th', 'I', 'p', 'U', 'b', 'd', 'f', 'g', 'k', 'l', 'm', 'n', 'p', 'r', 's', 'v', 'x', 'z', 'a', 'd', 'e', 'e', 'i', 'u', 'a', 's', 's', 'SS', 'LL', 'll', 'V', 'v', 'Y', 'y', '(C)', '(R)', 'CE', 'Cr', 'Fr.', 'L.', 'Pts', 'TL', 'Rs', 'x', 'Rx', 'm/s', 'rad/s', 'C/kg', 'pH', 'V/m', 'A/m', ' 1/4', ' 1/2', ' 3/4', ' 1/3', ' 2/3', ' 1/5', ' 2/5', ' 3/5', ' 4/5', ' 1/6', ' 5/6', ' 1/8', ' 3/8', ' 5/8', ' 7/8', ' 1/', '0', '\'', '\'', ',', '\'', '"', '"', ',,', '"', '\'', '"', '"', '"', '<<', '>>', '<', '>', '-', '-', '-', '-', '-', '-', '-', '-', '-', '||', '/', '[', ']', '*', ',', '.', '<', '>', '<<', '>>', '[', ']', '[', ']', '[', ']', ',', '.', '[', ']', '<<', '>>', '<', '>', ',', '[', ']', '((', '))', '.', ',', '*', '/', '-', '/', '\\', '|', '||', '<<', '>>', '((', '))']; |
||
50 | |||
51 | private static $transliterators = []; |
||
52 | |||
53 | /** |
||
54 | * @return static |
||
55 | */ |
||
56 | public static function fromCodePoints(int ...$codes): self |
||
57 | { |
||
58 | $string = ''; |
||
59 | |||
60 | foreach ($codes as $code) { |
||
61 | if (0x80 > $code %= 0x200000) { |
||
62 | $string .= \chr($code); |
||
63 | } elseif (0x800 > $code) { |
||
64 | $string .= \chr(0xC0 | $code >> 6).\chr(0x80 | $code & 0x3F); |
||
65 | } elseif (0x10000 > $code) { |
||
66 | $string .= \chr(0xE0 | $code >> 12).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F); |
||
67 | } else { |
||
68 | $string .= \chr(0xF0 | $code >> 18).\chr(0x80 | $code >> 12 & 0x3F).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F); |
||
69 | } |
||
70 | } |
||
71 | |||
72 | return new static($string); |
||
73 | } |
||
74 | |||
75 | /** |
||
76 | * Generic UTF-8 to ASCII transliteration. |
||
77 | * |
||
78 | * Install the intl extension for best results. |
||
79 | * |
||
80 | * @param string[]|\Transliterator[]|\Closure[] $rules See "*-Latin" rules from Transliterator::listIDs() |
||
81 | */ |
||
82 | public function ascii(array $rules = []): self |
||
83 | { |
||
84 | $str = clone $this; |
||
85 | $s = $str->string; |
||
86 | $str->string = ''; |
||
87 | |||
88 | array_unshift($rules, 'nfd'); |
||
89 | $rules[] = 'latin-ascii'; |
||
90 | |||
91 | if (\function_exists('transliterator_transliterate')) { |
||
92 | $rules[] = 'any-latin/bgn'; |
||
93 | } |
||
94 | |||
95 | $rules[] = 'nfkd'; |
||
96 | $rules[] = '[:nonspacing mark:] remove'; |
||
97 | |||
98 | while (\strlen($s) - 1 > $i = strspn($s, self::ASCII)) { |
||
99 | if (0 < --$i) { |
||
100 | $str->string .= substr($s, 0, $i); |
||
101 | $s = substr($s, $i); |
||
102 | } |
||
103 | |||
104 | if (!$rule = array_shift($rules)) { |
||
105 | $rules = []; // An empty rule interrupts the next ones |
||
106 | } |
||
107 | |||
108 | if ($rule instanceof \Transliterator) { |
||
109 | $s = $rule->transliterate($s); |
||
110 | } elseif ($rule instanceof \Closure) { |
||
111 | $s = $rule($s); |
||
112 | } elseif ($rule) { |
||
113 | if ('nfd' === $rule = strtolower($rule)) { |
||
114 | normalizer_is_normalized($s, self::NFD) ?: $s = normalizer_normalize($s, self::NFD); |
||
115 | } elseif ('nfkd' === $rule) { |
||
116 | normalizer_is_normalized($s, self::NFKD) ?: $s = normalizer_normalize($s, self::NFKD); |
||
117 | } elseif ('[:nonspacing mark:] remove' === $rule) { |
||
118 | $s = preg_replace('/\p{Mn}++/u', '', $s); |
||
119 | } elseif ('latin-ascii' === $rule) { |
||
120 | $s = str_replace(self::TRANSLIT_FROM, self::TRANSLIT_TO, $s); |
||
121 | } elseif ('de-ascii' === $rule) { |
||
122 | $s = preg_replace("/([AUO])\u{0308}(?=\p{Ll})/u", '$1e', $s); |
||
123 | $s = str_replace(["a\u{0308}", "o\u{0308}", "u\u{0308}", "A\u{0308}", "O\u{0308}", "U\u{0308}"], ['ae', 'oe', 'ue', 'AE', 'OE', 'UE'], $s); |
||
124 | } elseif (\function_exists('transliterator_transliterate')) { |
||
125 | if (null === $transliterator = self::$transliterators[$rule] ?? self::$transliterators[$rule] = \Transliterator::create($rule)) { |
||
126 | if ('any-latin/bgn' === $rule) { |
||
127 | $rule = 'any-latin'; |
||
128 | $transliterator = self::$transliterators[$rule] ?? self::$transliterators[$rule] = \Transliterator::create($rule); |
||
129 | } |
||
130 | |||
131 | if (null === $transliterator) { |
||
132 | throw new InvalidArgumentException(sprintf('Unknown transliteration rule "%s".', $rule)); |
||
133 | } |
||
134 | |||
135 | self::$transliterators['any-latin/bgn'] = $transliterator; |
||
136 | } |
||
137 | |||
138 | $s = $transliterator->transliterate($s); |
||
139 | } |
||
140 | } elseif (!\function_exists('iconv')) { |
||
141 | $s = preg_replace('/[^\x00-\x7F]/u', '?', $s); |
||
142 | } else { |
||
143 | $s = @preg_replace_callback('/[^\x00-\x7F]/u', static function ($c) { |
||
144 | $c = (string) iconv('UTF-8', 'ASCII//TRANSLIT', $c[0]); |
||
145 | |||
146 | if ('' === $c && '' === iconv('UTF-8', 'ASCII//TRANSLIT', '²')) { |
||
147 | throw new \LogicException(sprintf('"%s" requires a translit-able iconv implementation, try installing "gnu-libiconv" if you\'re using Alpine Linux.', static::class)); |
||
148 | } |
||
149 | |||
150 | return 1 < \strlen($c) ? ltrim($c, '\'`"^~') : ('' !== $c ? $c : '?'); |
||
151 | }, $s); |
||
152 | } |
||
153 | } |
||
154 | |||
155 | $str->string .= $s; |
||
156 | |||
157 | return $str; |
||
158 | } |
||
159 | |||
160 | public function camel(): parent |
||
168 | } |
||
169 | |||
170 | /** |
||
171 | * @return int[] |
||
172 | */ |
||
173 | public function codePointsAt(int $offset): array |
||
174 | { |
||
175 | $str = $this->slice($offset, 1); |
||
176 | |||
177 | if ('' === $str->string) { |
||
178 | return []; |
||
179 | } |
||
180 | |||
181 | $codePoints = []; |
||
182 | |||
183 | foreach (preg_split('//u', $str->string, -1, \PREG_SPLIT_NO_EMPTY) as $c) { |
||
184 | $codePoints[] = mb_ord($c, 'UTF-8'); |
||
185 | } |
||
186 | |||
187 | return $codePoints; |
||
188 | } |
||
189 | |||
190 | public function folded(bool $compat = true): parent |
||
191 | { |
||
192 | $str = clone $this; |
||
193 | |||
194 | if (!$compat || \PHP_VERSION_ID < 70300 || !\defined('Normalizer::NFKC_CF')) { |
||
195 | $str->string = normalizer_normalize($str->string, $compat ? \Normalizer::NFKC : \Normalizer::NFC); |
||
196 | $str->string = mb_strtolower(str_replace(self::FOLD_FROM, self::FOLD_TO, $this->string), 'UTF-8'); |
||
197 | } else { |
||
198 | $str->string = normalizer_normalize($str->string, \Normalizer::NFKC_CF); |
||
199 | } |
||
200 | |||
201 | return $str; |
||
202 | } |
||
203 | |||
204 | public function join(array $strings, string $lastGlue = null): parent |
||
205 | { |
||
206 | $str = clone $this; |
||
207 | |||
208 | $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : ''; |
||
209 | $str->string = implode($this->string, $strings).$tail; |
||
210 | |||
211 | if (!preg_match('//u', $str->string)) { |
||
212 | throw new InvalidArgumentException('Invalid UTF-8 string.'); |
||
213 | } |
||
214 | |||
215 | return $str; |
||
216 | } |
||
217 | |||
218 | public function lower(): parent |
||
224 | } |
||
225 | |||
226 | public function match(string $regexp, int $flags = 0, int $offset = 0): array |
||
227 | { |
||
228 | $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match'; |
||
229 | |||
230 | if ($this->ignoreCase) { |
||
231 | $regexp .= 'i'; |
||
232 | } |
||
233 | |||
234 | set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); }); |
||
235 | |||
236 | try { |
||
237 | if (false === $match($regexp.'u', $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) { |
||
238 | $lastError = preg_last_error(); |
||
239 | |||
240 | foreach (get_defined_constants(true)['pcre'] as $k => $v) { |
||
241 | if ($lastError === $v && '_ERROR' === substr($k, -6)) { |
||
242 | throw new RuntimeException('Matching failed with '.$k.'.'); |
||
243 | } |
||
244 | } |
||
245 | |||
246 | throw new RuntimeException('Matching failed with unknown error code.'); |
||
247 | } |
||
248 | } finally { |
||
249 | restore_error_handler(); |
||
250 | } |
||
251 | |||
252 | return $matches; |
||
253 | } |
||
254 | |||
255 | /** |
||
256 | * @return static |
||
257 | */ |
||
258 | public function normalize(int $form = self::NFC): self |
||
259 | { |
||
260 | if (!\in_array($form, [self::NFC, self::NFD, self::NFKC, self::NFKD])) { |
||
261 | throw new InvalidArgumentException('Unsupported normalization form.'); |
||
262 | } |
||
263 | |||
264 | $str = clone $this; |
||
265 | normalizer_is_normalized($str->string, $form) ?: $str->string = normalizer_normalize($str->string, $form); |
||
266 | |||
267 | return $str; |
||
268 | } |
||
269 | |||
270 | public function padBoth(int $length, string $padStr = ' '): parent |
||
271 | { |
||
272 | if ('' === $padStr || !preg_match('//u', $padStr)) { |
||
273 | throw new InvalidArgumentException('Invalid UTF-8 string.'); |
||
274 | } |
||
275 | |||
276 | $pad = clone $this; |
||
277 | $pad->string = $padStr; |
||
278 | |||
279 | return $this->pad($length, $pad, \STR_PAD_BOTH); |
||
280 | } |
||
281 | |||
282 | public function padEnd(int $length, string $padStr = ' '): parent |
||
283 | { |
||
284 | if ('' === $padStr || !preg_match('//u', $padStr)) { |
||
285 | throw new InvalidArgumentException('Invalid UTF-8 string.'); |
||
286 | } |
||
287 | |||
288 | $pad = clone $this; |
||
289 | $pad->string = $padStr; |
||
290 | |||
291 | return $this->pad($length, $pad, \STR_PAD_RIGHT); |
||
292 | } |
||
293 | |||
294 | public function padStart(int $length, string $padStr = ' '): parent |
||
295 | { |
||
296 | if ('' === $padStr || !preg_match('//u', $padStr)) { |
||
297 | throw new InvalidArgumentException('Invalid UTF-8 string.'); |
||
298 | } |
||
299 | |||
300 | $pad = clone $this; |
||
301 | $pad->string = $padStr; |
||
302 | |||
303 | return $this->pad($length, $pad, \STR_PAD_LEFT); |
||
304 | } |
||
305 | |||
306 | public function replaceMatches(string $fromRegexp, $to): parent |
||
307 | { |
||
308 | if ($this->ignoreCase) { |
||
309 | $fromRegexp .= 'i'; |
||
310 | } |
||
311 | |||
312 | if (\is_array($to) || $to instanceof \Closure) { |
||
313 | if (!\is_callable($to)) { |
||
314 | throw new \TypeError(sprintf('Argument 2 passed to "%s::replaceMatches()" must be callable, array given.', static::class)); |
||
315 | } |
||
316 | |||
317 | $replace = 'preg_replace_callback'; |
||
318 | $to = static function (array $m) use ($to): string { |
||
319 | $to = $to($m); |
||
320 | |||
321 | if ('' !== $to && (!\is_string($to) || !preg_match('//u', $to))) { |
||
322 | throw new InvalidArgumentException('Replace callback must return a valid UTF-8 string.'); |
||
323 | } |
||
324 | |||
325 | return $to; |
||
326 | }; |
||
327 | } elseif ('' !== $to && !preg_match('//u', $to)) { |
||
328 | throw new InvalidArgumentException('Invalid UTF-8 string.'); |
||
329 | } else { |
||
330 | $replace = 'preg_replace'; |
||
331 | } |
||
332 | |||
333 | set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); }); |
||
334 | |||
335 | try { |
||
336 | if (null === $string = $replace($fromRegexp.'u', $to, $this->string)) { |
||
337 | $lastError = preg_last_error(); |
||
338 | |||
339 | foreach (get_defined_constants(true)['pcre'] as $k => $v) { |
||
340 | if ($lastError === $v && '_ERROR' === substr($k, -6)) { |
||
341 | throw new RuntimeException('Matching failed with '.$k.'.'); |
||
342 | } |
||
343 | } |
||
344 | |||
345 | throw new RuntimeException('Matching failed with unknown error code.'); |
||
346 | } |
||
347 | } finally { |
||
348 | restore_error_handler(); |
||
349 | } |
||
350 | |||
351 | $str = clone $this; |
||
352 | $str->string = $string; |
||
353 | |||
354 | return $str; |
||
355 | } |
||
356 | |||
357 | public function reverse(): parent |
||
363 | } |
||
364 | |||
365 | public function snake(): parent |
||
366 | { |
||
367 | $str = $this->camel()->title(); |
||
368 | $str->string = mb_strtolower(preg_replace(['/(\p{Lu}+)(\p{Lu}\p{Ll})/u', '/([\p{Ll}0-9])(\p{Lu})/u'], '\1_\2', $str->string), 'UTF-8'); |
||
369 | |||
370 | return $str; |
||
371 | } |
||
372 | |||
373 | public function title(bool $allWords = false): parent |
||
374 | { |
||
375 | $str = clone $this; |
||
376 | |||
377 | $limit = $allWords ? -1 : 1; |
||
378 | |||
379 | $str->string = preg_replace_callback('/\b./u', static function (array $m): string { |
||
380 | return mb_convert_case($m[0], \MB_CASE_TITLE, 'UTF-8'); |
||
381 | }, $str->string, $limit); |
||
382 | |||
383 | return $str; |
||
384 | } |
||
385 | |||
386 | public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent |
||
387 | { |
||
388 | if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { |
||
389 | throw new InvalidArgumentException('Invalid UTF-8 chars.'); |
||
390 | } |
||
391 | $chars = preg_quote($chars); |
||
392 | |||
393 | $str = clone $this; |
||
394 | $str->string = preg_replace("{^[$chars]++|[$chars]++$}uD", '', $str->string); |
||
395 | |||
396 | return $str; |
||
397 | } |
||
398 | |||
399 | public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent |
||
400 | { |
||
401 | if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { |
||
402 | throw new InvalidArgumentException('Invalid UTF-8 chars.'); |
||
403 | } |
||
404 | $chars = preg_quote($chars); |
||
405 | |||
406 | $str = clone $this; |
||
407 | $str->string = preg_replace("{[$chars]++$}uD", '', $str->string); |
||
408 | |||
409 | return $str; |
||
410 | } |
||
411 | |||
412 | public function trimStart(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent |
||
413 | { |
||
414 | if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { |
||
415 | throw new InvalidArgumentException('Invalid UTF-8 chars.'); |
||
416 | } |
||
417 | $chars = preg_quote($chars); |
||
418 | |||
419 | $str = clone $this; |
||
420 | $str->string = preg_replace("{^[$chars]++}uD", '', $str->string); |
||
421 | |||
422 | return $str; |
||
423 | } |
||
424 | |||
425 | public function upper(): parent |
||
426 | { |
||
427 | $str = clone $this; |
||
428 | $str->string = mb_strtoupper($str->string, 'UTF-8'); |
||
429 | |||
430 | if (\PHP_VERSION_ID < 70300) { |
||
431 | $str->string = str_replace(self::UPPER_FROM, self::UPPER_TO, $str->string); |
||
432 | } |
||
433 | |||
434 | return $str; |
||
435 | } |
||
436 | |||
437 | public function width(bool $ignoreAnsiDecoration = true): int |
||
464 | } |
||
465 | |||
466 | /** |
||
467 | * @return static |
||
468 | */ |
||
469 | private function pad(int $len, self $pad, int $type): parent |
||
470 | { |
||
471 | $sLen = $this->length(); |
||
472 | |||
473 | if ($len <= $sLen) { |
||
474 | return clone $this; |
||
475 | } |
||
476 | |||
477 | $padLen = $pad->length(); |
||
478 | $freeLen = $len - $sLen; |
||
479 | $len = $freeLen % $padLen; |
||
480 | |||
481 | switch ($type) { |
||
482 | case \STR_PAD_RIGHT: |
||
483 | return $this->append(str_repeat($pad->string, $freeLen / $padLen).($len ? $pad->slice(0, $len) : '')); |
||
484 | |||
485 | case \STR_PAD_LEFT: |
||
486 | return $this->prepend(str_repeat($pad->string, $freeLen / $padLen).($len ? $pad->slice(0, $len) : '')); |
||
487 | |||
488 | case \STR_PAD_BOTH: |
||
489 | $freeLen /= 2; |
||
490 | |||
491 | $rightLen = ceil($freeLen); |
||
492 | $len = $rightLen % $padLen; |
||
493 | $str = $this->append(str_repeat($pad->string, $rightLen / $padLen).($len ? $pad->slice(0, $len) : '')); |
||
494 | |||
495 | $leftLen = floor($freeLen); |
||
496 | $len = $leftLen % $padLen; |
||
497 | |||
498 | return $str->prepend(str_repeat($pad->string, $leftLen / $padLen).($len ? $pad->slice(0, $len) : '')); |
||
499 | |||
500 | default: |
||
501 | throw new InvalidArgumentException('Invalid padding type.'); |
||
502 | } |
||
503 | } |
||
504 | |||
505 | /** |
||
506 | * Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c. |
||
507 | */ |
||
508 | private function wcswidth(string $string): int |
||
579 | } |
||
580 | } |
||
581 |