|
1
|
|
|
<?php |
|
2
|
|
|
namespace nochso\Omni; |
|
3
|
|
|
|
|
4
|
|
|
/** |
|
5
|
|
|
* Strings class provides methods for string handling missing from default PHP. |
|
6
|
|
|
* |
|
7
|
|
|
* `mb_*` methods are used where sensible, so make sure to pass UTF-8 strings. |
|
8
|
|
|
*/ |
|
9
|
|
|
final class Strings { |
|
10
|
|
|
private static $controlCharMap = [ |
|
11
|
|
|
"\n" => '\n', |
|
12
|
|
|
"\r" => '\r', |
|
13
|
|
|
"\t" => '\t', |
|
14
|
|
|
"\v" => '\v', |
|
15
|
|
|
"\e" => '\e', |
|
16
|
|
|
"\f" => '\f', |
|
17
|
|
|
]; |
|
18
|
|
|
|
|
19
|
|
|
const CONTROL_CHAR_PATTERN = '/[\x00-\x1F\x7F]/'; |
|
20
|
|
|
|
|
21
|
|
|
/** |
|
22
|
|
|
* startsWith returns true if the input begins with a prefix. |
|
23
|
|
|
* |
|
24
|
|
|
* @param string $input |
|
25
|
|
|
* @param string $prefix |
|
26
|
|
|
* |
|
27
|
|
|
* @return bool |
|
28
|
|
|
*/ |
|
29
|
|
|
public static function startsWith($input, $prefix) { |
|
30
|
|
|
return substr($input, 0, strlen($prefix)) === $prefix; |
|
31
|
|
|
} |
|
32
|
|
|
|
|
33
|
|
|
/** |
|
34
|
|
|
* endsWith returns true if the input ends with a suffix. |
|
35
|
|
|
* |
|
36
|
|
|
* @param string $input |
|
37
|
|
|
* @param string $suffix |
|
38
|
|
|
* |
|
39
|
|
|
* @return bool |
|
40
|
|
|
*/ |
|
41
|
|
|
public static function endsWith($input, $suffix) { |
|
42
|
|
|
return strlen($suffix) === 0 || substr($input, -strlen($suffix)) === $suffix; |
|
43
|
|
|
} |
|
44
|
|
|
|
|
45
|
|
|
/** |
|
46
|
|
|
* getMostFrequentNeedle by counting occurences of each needle in haystack. |
|
47
|
|
|
* |
|
48
|
|
|
* @param string $haystack Haystack to be searched in. |
|
49
|
|
|
* @param array $needles Needles to be counted. |
|
50
|
|
|
* |
|
51
|
|
|
* @return string|null The most occuring needle. If counts are tied, the first tied needle is returned. If no |
|
52
|
|
|
* needles were found, `null` is returned. |
|
53
|
|
|
*/ |
|
54
|
|
|
public static function getMostFrequentNeedle($haystack, array $needles) { |
|
55
|
|
|
$maxCount = 0; |
|
56
|
|
|
$maxNeedle = null; |
|
57
|
|
|
foreach ($needles as $needle) { |
|
58
|
|
|
$newCount = mb_substr_count($haystack, $needle); |
|
59
|
|
|
if ($newCount > $maxCount) { |
|
60
|
|
|
$maxCount = $newCount; |
|
61
|
|
|
$maxNeedle = $needle; |
|
62
|
|
|
} |
|
63
|
|
|
} |
|
64
|
|
|
return $maxNeedle; |
|
65
|
|
|
} |
|
66
|
|
|
|
|
67
|
|
|
/** |
|
68
|
|
|
* escapeControlChars by replacing line feeds, tabs, etc. to their escaped representation. |
|
69
|
|
|
* |
|
70
|
|
|
* e.g. an actual line feed will return '\n' |
|
71
|
|
|
* |
|
72
|
|
|
* @param string $input |
|
73
|
|
|
* |
|
74
|
|
|
* @return string |
|
75
|
|
|
*/ |
|
76
|
|
|
public static function escapeControlChars($input) { |
|
77
|
|
|
$escaper = function ($chars) { |
|
78
|
|
|
$char = $chars[0]; |
|
79
|
|
|
if (isset(self::$controlCharMap[$char])) { |
|
80
|
|
|
return self::$controlCharMap[$char]; |
|
81
|
|
|
} |
|
82
|
|
|
return sprintf('\x%02X', ord($char)); |
|
83
|
|
|
}; |
|
84
|
|
|
$output = str_replace('\\', '\\\\', $input); |
|
85
|
|
|
return preg_replace_callback(self::CONTROL_CHAR_PATTERN, $escaper, $output); |
|
86
|
|
|
} |
|
87
|
|
|
|
|
88
|
|
|
/** |
|
89
|
|
|
* padMultibyte strings to a certain length with another string. |
|
90
|
|
|
* |
|
91
|
|
|
* @param string $input The input string to be padded. |
|
92
|
|
|
* @param int $padLength If the pad is length smaller than the input length, no padding takes place. |
|
93
|
|
|
* @param string $padding Optional, defaults to a space character. Can be more than one character. The padding |
|
94
|
|
|
* may be truncated if the required number of padding characters can't be evenly |
|
95
|
|
|
* divided. |
|
96
|
|
|
* @param int $paddingType Optional, defaults to STR_PAD_RIGHT. Must be one of STR_PAD_LEFT, STR_PAD_RIGHT or |
|
97
|
|
|
* STR_PAD_BOTH. |
|
98
|
|
|
* |
|
99
|
|
|
* @return string The padded string. |
|
100
|
|
|
*/ |
|
101
|
|
|
public static function padMultibyte($input, $padLength, $padding = ' ', $paddingType = STR_PAD_RIGHT) { |
|
102
|
|
|
if ($paddingType !== STR_PAD_LEFT && $paddingType !== STR_PAD_RIGHT && $paddingType !== STR_PAD_BOTH) { |
|
103
|
|
|
throw new \InvalidArgumentException('Padding type must be one of STR_PAD_LEFT, STR_PAD_RIGHT or STR_PAD_BOTH.'); |
|
104
|
|
|
} |
|
105
|
|
|
$paddingLength = mb_strlen($padding); |
|
106
|
|
|
if ($paddingLength === 0) { |
|
107
|
|
|
throw new \InvalidArgumentException('Padding string must not be empty.'); |
|
108
|
|
|
} |
|
109
|
|
|
$inputLength = mb_strlen($input); |
|
110
|
|
|
if ($inputLength > $padLength) { |
|
111
|
|
|
return $input; |
|
112
|
|
|
} |
|
113
|
|
|
$freeLength = $padLength - $inputLength; |
|
114
|
|
|
if ($paddingType === STR_PAD_BOTH) { |
|
115
|
|
|
// Original str_pad prefers trailing padding |
|
116
|
|
|
$leftPadLength = $padLength - ceil($freeLength / 2); |
|
117
|
|
|
// Reuse the below left/right implementation |
|
118
|
|
|
return self::padMultibyte( |
|
119
|
|
|
self::padMultibyte($input, $leftPadLength, $padding, STR_PAD_LEFT), |
|
120
|
|
|
$padLength, |
|
121
|
|
|
$padding, |
|
122
|
|
|
STR_PAD_RIGHT |
|
123
|
|
|
); |
|
124
|
|
|
} |
|
125
|
|
|
$foo = str_repeat($padding, $freeLength / $paddingLength); |
|
126
|
|
|
$partialPadLength = $freeLength % $paddingLength; |
|
127
|
|
|
if ($partialPadLength > 0) { |
|
128
|
|
|
$foo .= mb_substr($padding, 0, $partialPadLength); |
|
129
|
|
|
} |
|
130
|
|
|
if ($paddingType === STR_PAD_LEFT) { |
|
131
|
|
|
return $foo . $input; |
|
132
|
|
|
} |
|
133
|
|
|
return $input . $foo; |
|
134
|
|
|
} |
|
135
|
|
|
|
|
136
|
|
|
/** |
|
137
|
|
|
* getCommonPrefix of two strings. |
|
138
|
|
|
* |
|
139
|
|
|
* @param string $first |
|
140
|
|
|
* @param string $second |
|
141
|
|
|
* |
|
142
|
|
|
* @return string All common characters from the beginning of both strings. |
|
143
|
|
|
*/ |
|
144
|
|
|
public static function getCommonPrefix($first, $second) { |
|
145
|
|
|
if ($first === $second) { |
|
146
|
|
|
return $first; |
|
147
|
|
|
} |
|
148
|
|
|
$length = min(mb_strlen($first), mb_strlen($second)); |
|
149
|
|
|
for ($i = 0; $i < $length; $i++) { |
|
150
|
|
|
if ($first[$i] !== $second[$i]) { |
|
151
|
|
|
return mb_substr($first, 0, $i); |
|
152
|
|
|
} |
|
153
|
|
|
} |
|
154
|
|
|
return mb_substr($first, 0, $length); |
|
155
|
|
|
} |
|
156
|
|
|
|
|
157
|
|
|
/** |
|
158
|
|
|
* getCommonSuffix of two strings. |
|
159
|
|
|
* |
|
160
|
|
|
* @param string $first |
|
161
|
|
|
* @param string $second |
|
162
|
|
|
* |
|
163
|
|
|
* @return string All common characters from the end of both strings. |
|
164
|
|
|
*/ |
|
165
|
|
|
public static function getCommonSuffix($first, $second) { |
|
166
|
|
|
$reversedCommon = self::getCommonPrefix(self::reverse($first), self::reverse($second)); |
|
167
|
|
|
return self::reverse($reversedCommon); |
|
168
|
|
|
} |
|
169
|
|
|
|
|
170
|
|
|
/** |
|
171
|
|
|
* Reverse a string. |
|
172
|
|
|
* |
|
173
|
|
|
* @param string $input |
|
174
|
|
|
* |
|
175
|
|
|
* @return string The reversed string. |
|
176
|
|
|
*/ |
|
177
|
|
|
public static function reverse($input) { |
|
178
|
|
|
$length = mb_strlen($input); |
|
179
|
|
|
$reversed = ''; |
|
180
|
|
|
for ($i = $length - 1; $i !== -1; --$i) { |
|
181
|
|
|
$reversed .= mb_substr($input, $i, 1); |
|
182
|
|
|
} |
|
183
|
|
|
return $reversed; |
|
184
|
|
|
} |
|
185
|
|
|
|
|
186
|
|
|
/** |
|
187
|
|
|
* groupByCommonPrefix returns an array with a common key and a list of differing suffixes. |
|
188
|
|
|
* |
|
189
|
|
|
* e.g. passing an array `['sameHERE', 'sameTHERE']` would return |
|
190
|
|
|
* ``` |
|
191
|
|
|
* 'same' => [ |
|
192
|
|
|
* 'HERE', |
|
193
|
|
|
* 'THERE', |
|
194
|
|
|
* ] |
|
195
|
|
|
* ``` |
|
196
|
|
|
* |
|
197
|
|
|
* This can be used to group several file paths by a common base. |
|
198
|
|
|
* |
|
199
|
|
|
* @param string[] $strings |
|
200
|
|
|
* |
|
201
|
|
|
* @return string[][] |
|
202
|
|
|
*/ |
|
203
|
|
|
public static function groupByCommonPrefix($strings) { |
|
204
|
|
|
sort($strings); |
|
205
|
|
|
$common = null; |
|
206
|
|
|
foreach ($strings as $folder) { |
|
207
|
|
|
if ($common === null) { |
|
208
|
|
|
$common = $folder; |
|
209
|
|
|
} else { |
|
210
|
|
|
$common = self::getCommonPrefix($common, $folder); |
|
211
|
|
|
} |
|
212
|
|
|
} |
|
213
|
|
|
$trimmedFolders = []; |
|
214
|
|
|
$commonLength = mb_strlen($common); |
|
215
|
|
|
foreach ($strings as $folder) { |
|
216
|
|
|
$trimmedFolders[$common][] = mb_substr($folder, $commonLength); |
|
217
|
|
|
} |
|
218
|
|
|
return $trimmedFolders; |
|
219
|
|
|
} |
|
220
|
|
|
|
|
221
|
|
|
/** |
|
222
|
|
|
* groupByCommonSuffix returns an array with a common key and a list of differing suffixes. |
|
223
|
|
|
* |
|
224
|
|
|
* e.g. passing an array `['sameHERE', 'sameTHERE']` would return |
|
225
|
|
|
* ``` |
|
226
|
|
|
* 'HERE' => [ |
|
227
|
|
|
* 'same', |
|
228
|
|
|
* 'sameT', |
|
229
|
|
|
* ] |
|
230
|
|
|
* ``` |
|
231
|
|
|
* |
|
232
|
|
|
* @param string[] $strings |
|
233
|
|
|
* |
|
234
|
|
|
* @return string[][] |
|
235
|
|
|
*/ |
|
236
|
|
|
public static function groupByCommonSuffix($strings) { |
|
237
|
|
|
foreach ($strings as $key => $string) { |
|
238
|
|
|
$strings[$key] = self::reverse($string); |
|
239
|
|
|
} |
|
240
|
|
|
$reversedGroups = self::groupByCommonPrefix($strings); |
|
241
|
|
|
$groups = []; |
|
242
|
|
|
foreach ($reversedGroups as $revKey => $revStrings) { |
|
243
|
|
|
$groups[self::reverse($revKey)] = array_map([self::class, 'reverse'], $revStrings); |
|
244
|
|
|
} |
|
245
|
|
|
return $groups; |
|
246
|
|
|
} |
|
247
|
|
|
} |
|
248
|
|
|
|