1
|
|
|
<?php |
2
|
|
|
namespace nochso\Omni; |
3
|
|
|
|
4
|
|
|
/** |
5
|
|
|
* Strings class provides methods for string handling missing from default PHP. |
6
|
|
|
* |
7
|
|
|
* `mb_*` methods are used where sensible, so make sure to pass UTF-8 strings. |
8
|
|
|
*/ |
9
|
|
|
final class Strings { |
10
|
|
|
private static $controlCharMap = [ |
11
|
|
|
"\n" => '\n', |
12
|
|
|
"\r" => '\r', |
13
|
|
|
"\t" => '\t', |
14
|
|
|
"\v" => '\v', |
15
|
|
|
"\e" => '\e', |
16
|
|
|
"\f" => '\f', |
17
|
|
|
]; |
18
|
|
|
|
19
|
|
|
const CONTROL_CHAR_PATTERN = '/[\x00-\x1F\x7F]/'; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* startsWith returns true if the input begins with a prefix. |
23
|
|
|
* |
24
|
|
|
* @param string $input |
25
|
|
|
* @param string $prefix |
26
|
|
|
* |
27
|
|
|
* @return bool |
28
|
|
|
*/ |
29
|
|
|
public static function startsWith($input, $prefix) { |
30
|
|
|
return substr($input, 0, strlen($prefix)) === $prefix; |
31
|
|
|
} |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* endsWith returns true if the input ends with a suffix. |
35
|
|
|
* |
36
|
|
|
* @param string $input |
37
|
|
|
* @param string $suffix |
38
|
|
|
* |
39
|
|
|
* @return bool |
40
|
|
|
*/ |
41
|
|
|
public static function endsWith($input, $suffix) { |
42
|
|
|
return strlen($suffix) === 0 || substr($input, -strlen($suffix)) === $suffix; |
43
|
|
|
} |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* getMostFrequentNeedle by counting occurences of each needle in haystack. |
47
|
|
|
* |
48
|
|
|
* @param string $haystack Haystack to be searched in. |
49
|
|
|
* @param array $needles Needles to be counted. |
50
|
|
|
* |
51
|
|
|
* @return string|null The most occuring needle. If counts are tied, the first tied needle is returned. If no |
52
|
|
|
* needles were found, `null` is returned. |
53
|
|
|
*/ |
54
|
|
|
public static function getMostFrequentNeedle($haystack, array $needles) { |
55
|
|
|
$maxCount = 0; |
56
|
|
|
$maxNeedle = null; |
57
|
|
|
foreach ($needles as $needle) { |
58
|
|
|
$newCount = mb_substr_count($haystack, $needle); |
59
|
|
|
if ($newCount > $maxCount) { |
60
|
|
|
$maxCount = $newCount; |
61
|
|
|
$maxNeedle = $needle; |
62
|
|
|
} |
63
|
|
|
} |
64
|
|
|
return $maxNeedle; |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* escapeControlChars by replacing line feeds, tabs, etc. to their escaped representation. |
69
|
|
|
* |
70
|
|
|
* e.g. an actual line feed will return '\n' |
71
|
|
|
* |
72
|
|
|
* @param string $input |
73
|
|
|
* |
74
|
|
|
* @return string |
75
|
|
|
*/ |
76
|
|
|
public static function escapeControlChars($input) { |
77
|
|
|
$escaper = function ($chars) { |
78
|
|
|
$char = $chars[0]; |
79
|
|
|
if (isset(self::$controlCharMap[$char])) { |
80
|
|
|
return self::$controlCharMap[$char]; |
81
|
|
|
} |
82
|
|
|
return sprintf('\x%02X', ord($char)); |
83
|
|
|
}; |
84
|
|
|
$output = str_replace('\\', '\\\\', $input); |
85
|
|
|
return preg_replace_callback(self::CONTROL_CHAR_PATTERN, $escaper, $output); |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* padMultibyte strings to a certain length with another string. |
90
|
|
|
* |
91
|
|
|
* @param string $input The input string to be padded. |
92
|
|
|
* @param int $padLength If the pad is length smaller than the input length, no padding takes place. |
93
|
|
|
* @param string $padding Optional, defaults to a space character. Can be more than one character. The padding |
94
|
|
|
* may be truncated if the required number of padding characters can't be evenly |
95
|
|
|
* divided. |
96
|
|
|
* @param int $paddingType Optional, defaults to STR_PAD_RIGHT. Must be one of STR_PAD_LEFT, STR_PAD_RIGHT or |
97
|
|
|
* STR_PAD_BOTH. |
98
|
|
|
* |
99
|
|
|
* @return string The padded string. |
100
|
|
|
*/ |
101
|
|
|
public static function padMultibyte($input, $padLength, $padding = ' ', $paddingType = STR_PAD_RIGHT) { |
102
|
|
|
if ($paddingType !== STR_PAD_LEFT && $paddingType !== STR_PAD_RIGHT && $paddingType !== STR_PAD_BOTH) { |
103
|
|
|
throw new \InvalidArgumentException('Padding type must be one of STR_PAD_LEFT, STR_PAD_RIGHT or STR_PAD_BOTH.'); |
104
|
|
|
} |
105
|
|
|
$paddingLength = mb_strlen($padding); |
106
|
|
|
if ($paddingLength === 0) { |
107
|
|
|
throw new \InvalidArgumentException('Padding string must not be empty.'); |
108
|
|
|
} |
109
|
|
|
$inputLength = mb_strlen($input); |
110
|
|
|
if ($inputLength > $padLength) { |
111
|
|
|
return $input; |
112
|
|
|
} |
113
|
|
|
$freeLength = $padLength - $inputLength; |
114
|
|
|
if ($paddingType === STR_PAD_BOTH) { |
115
|
|
|
// Original str_pad prefers trailing padding |
116
|
|
|
$leftPadLength = $padLength - ceil($freeLength / 2); |
117
|
|
|
// Reuse the below left/right implementation |
118
|
|
|
return self::padMultibyte( |
119
|
|
|
self::padMultibyte($input, $leftPadLength, $padding, STR_PAD_LEFT), |
120
|
|
|
$padLength, |
121
|
|
|
$padding, |
122
|
|
|
STR_PAD_RIGHT |
123
|
|
|
); |
124
|
|
|
} |
125
|
|
|
$foo = str_repeat($padding, $freeLength / $paddingLength); |
126
|
|
|
$partialPadLength = $freeLength % $paddingLength; |
127
|
|
|
if ($partialPadLength > 0) { |
128
|
|
|
$foo .= mb_substr($padding, 0, $partialPadLength); |
129
|
|
|
} |
130
|
|
|
if ($paddingType === STR_PAD_LEFT) { |
131
|
|
|
return $foo . $input; |
132
|
|
|
} |
133
|
|
|
return $input . $foo; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* getCommonPrefix of two strings. |
138
|
|
|
* |
139
|
|
|
* @param string $first |
140
|
|
|
* @param string $second |
141
|
|
|
* |
142
|
|
|
* @return string All common characters from the beginning of both strings. |
143
|
|
|
*/ |
144
|
|
|
public static function getCommonPrefix($first, $second) { |
145
|
|
|
if ($first === $second) { |
146
|
|
|
return $first; |
147
|
|
|
} |
148
|
|
|
$length = min(mb_strlen($first), mb_strlen($second)); |
149
|
|
|
for ($i = 0; $i < $length; $i++) { |
150
|
|
|
if ($first[$i] !== $second[$i]) { |
151
|
|
|
return mb_substr($first, 0, $i); |
152
|
|
|
} |
153
|
|
|
} |
154
|
|
|
return mb_substr($first, 0, $length); |
155
|
|
|
} |
156
|
|
|
|
157
|
|
|
/** |
158
|
|
|
* getCommonSuffix of two strings. |
159
|
|
|
* |
160
|
|
|
* @param string $first |
161
|
|
|
* @param string $second |
162
|
|
|
* |
163
|
|
|
* @return string All common characters from the end of both strings. |
164
|
|
|
*/ |
165
|
|
|
public static function getCommonSuffix($first, $second) { |
166
|
|
|
$reversedCommon = self::getCommonPrefix(self::reverse($first), self::reverse($second)); |
167
|
|
|
return self::reverse($reversedCommon); |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
/** |
171
|
|
|
* Reverse a string. |
172
|
|
|
* |
173
|
|
|
* @param string $input |
174
|
|
|
* |
175
|
|
|
* @return string The reversed string. |
176
|
|
|
*/ |
177
|
|
|
public static function reverse($input) { |
178
|
|
|
$length = mb_strlen($input); |
179
|
|
|
$reversed = ''; |
180
|
|
|
for ($i = $length - 1; $i !== -1; --$i) { |
181
|
|
|
$reversed .= mb_substr($input, $i, 1); |
182
|
|
|
} |
183
|
|
|
return $reversed; |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
/** |
187
|
|
|
* groupByCommonPrefix returns an array with a common key and a list of differing suffixes. |
188
|
|
|
* |
189
|
|
|
* e.g. passing an array `['sameHERE', 'sameTHERE']` would return |
190
|
|
|
* ``` |
191
|
|
|
* 'same' => [ |
192
|
|
|
* 'HERE', |
193
|
|
|
* 'THERE', |
194
|
|
|
* ] |
195
|
|
|
* ``` |
196
|
|
|
* |
197
|
|
|
* This can be used to group several file paths by a common base. |
198
|
|
|
* |
199
|
|
|
* @param string[] $strings |
200
|
|
|
* |
201
|
|
|
* @return string[][] |
202
|
|
|
*/ |
203
|
|
|
public static function groupByCommonPrefix($strings) { |
204
|
|
|
sort($strings); |
205
|
|
|
$common = null; |
206
|
|
|
foreach ($strings as $folder) { |
207
|
|
|
if ($common === null) { |
208
|
|
|
$common = $folder; |
209
|
|
|
} else { |
210
|
|
|
$common = self::getCommonPrefix($common, $folder); |
211
|
|
|
} |
212
|
|
|
} |
213
|
|
|
$trimmedFolders = []; |
214
|
|
|
$commonLength = mb_strlen($common); |
215
|
|
|
foreach ($strings as $folder) { |
216
|
|
|
$trimmedFolders[$common][] = mb_substr($folder, $commonLength); |
217
|
|
|
} |
218
|
|
|
return $trimmedFolders; |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
/** |
222
|
|
|
* groupByCommonSuffix returns an array with a common key and a list of differing suffixes. |
223
|
|
|
* |
224
|
|
|
* e.g. passing an array `['sameHERE', 'sameTHERE']` would return |
225
|
|
|
* ``` |
226
|
|
|
* 'HERE' => [ |
227
|
|
|
* 'same', |
228
|
|
|
* 'sameT', |
229
|
|
|
* ] |
230
|
|
|
* ``` |
231
|
|
|
* |
232
|
|
|
* @param string[] $strings |
233
|
|
|
* |
234
|
|
|
* @return string[][] |
235
|
|
|
*/ |
236
|
|
|
public static function groupByCommonSuffix($strings) { |
237
|
|
|
foreach ($strings as $key => $string) { |
238
|
|
|
$strings[$key] = self::reverse($string); |
239
|
|
|
} |
240
|
|
|
$reversedGroups = self::groupByCommonPrefix($strings); |
241
|
|
|
$groups = []; |
242
|
|
|
foreach ($reversedGroups as $revKey => $revStrings) { |
243
|
|
|
$groups[self::reverse($revKey)] = array_map([self::class, 'reverse'], $revStrings); |
244
|
|
|
} |
245
|
|
|
return $groups; |
246
|
|
|
} |
247
|
|
|
} |
248
|
|
|
|