1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
/* |
6
|
|
|
* This file is part of the colinodell/indentation package. |
7
|
|
|
* |
8
|
|
|
* (c) Colin O'Dell <[email protected]> |
9
|
|
|
* |
10
|
|
|
* detect() method forked from detect-indent, |
11
|
|
|
* (c) Sindre Sorhus <[email protected]> (https://sindresorhus.com) |
12
|
|
|
* |
13
|
|
|
* For the full copyright and license information, please view the LICENSE |
14
|
|
|
* file that was distributed with this source code. |
15
|
|
|
*/ |
16
|
|
|
|
17
|
|
|
namespace ColinODell\Indentation; |
18
|
|
|
|
19
|
|
|
final class Indentation |
20
|
|
|
{ |
21
|
|
|
public const TYPE_SPACE = 'space'; |
22
|
|
|
public const TYPE_TAB = 'tab'; |
23
|
|
|
public const TYPE_UNKNOWN = 'unknown'; |
24
|
|
|
|
25
|
|
|
/** @var int<0, max> */ |
26
|
|
|
public int $amount; |
27
|
|
|
|
28
|
|
|
/** @var self::TYPE_* */ |
29
|
|
|
public string $type; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @param int<0, max> $amount |
33
|
|
|
* @param self::TYPE_* $type |
34
|
|
|
*/ |
35
|
30 |
|
public function __construct(int $amount, string $type) |
36
|
|
|
{ |
37
|
30 |
|
$this->amount = $amount; |
38
|
30 |
|
$this->type = $type; |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @return int<0, max> |
43
|
|
|
*/ |
44
|
1 |
|
public function getAmount(): int |
45
|
|
|
{ |
46
|
1 |
|
return $this->amount; |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* @return self::TYPE_* |
51
|
|
|
*/ |
52
|
1 |
|
public function getType(): string |
53
|
|
|
{ |
54
|
1 |
|
return $this->type; |
|
|
|
|
55
|
|
|
} |
56
|
|
|
|
57
|
34 |
|
public function __toString(): string |
58
|
|
|
{ |
59
|
34 |
|
if ($this->amount === 0 || $this->type === self::TYPE_UNKNOWN) { |
60
|
3 |
|
return ''; |
61
|
|
|
} |
62
|
|
|
|
63
|
31 |
|
$indentCharacter = $this->type === self::TYPE_SPACE ? ' ' : "\t"; |
64
|
|
|
|
65
|
31 |
|
return \str_repeat($indentCharacter, $this->amount); |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* Detect the indentation of the given string. |
70
|
|
|
*/ |
71
|
21 |
|
public static function detect(string $string): Indentation |
72
|
|
|
{ |
73
|
|
|
// Identify indents while skipping single space indents to avoid common edge cases (e.g. code comments) |
74
|
21 |
|
$indents = self::makeIndentsMap($string, true); |
75
|
|
|
// If no indents are identified, run again and include all indents for comprehensive detection |
76
|
21 |
|
if (\count($indents) === 0) { |
77
|
4 |
|
$indents = self::makeIndentsMap($string, false); |
78
|
|
|
} |
79
|
|
|
|
80
|
21 |
|
$keyOfMostUsedIndent = self::getMostUsedKey($indents); |
81
|
21 |
|
if ($keyOfMostUsedIndent === null) { |
82
|
3 |
|
return new self(0, self::TYPE_UNKNOWN); |
83
|
|
|
} |
84
|
|
|
|
85
|
18 |
|
[$amount, $type] = self::decodeIndentsKey($keyOfMostUsedIndent); |
86
|
|
|
|
87
|
18 |
|
return new self(\max(0, $amount), $type); |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* Change the indentation from one style to another |
92
|
|
|
*/ |
93
|
9 |
|
public static function change(string $string, Indentation $newStyle): string |
94
|
|
|
{ |
95
|
9 |
|
$oldStyle = self::detect($string); |
96
|
|
|
|
97
|
9 |
|
if ($oldStyle->type === self::TYPE_UNKNOWN || $oldStyle->amount === 0) { |
98
|
2 |
|
return $string; |
99
|
|
|
} |
100
|
|
|
|
101
|
7 |
|
$lines = \preg_split('/(\R)/', $string, flags: \PREG_SPLIT_DELIM_CAPTURE); |
102
|
7 |
|
if ($lines === false) { |
103
|
|
|
throw new \InvalidArgumentException('Bad input string'); |
104
|
|
|
} |
105
|
|
|
|
106
|
7 |
|
$newContent = ''; |
107
|
7 |
|
foreach ($lines as $i => $line) { |
108
|
|
|
// Newline characters are in the odd-numbered positions |
109
|
7 |
|
if ($i % 2 === 1) { |
110
|
7 |
|
$newContent .= $line; |
111
|
7 |
|
continue; |
112
|
|
|
} |
113
|
|
|
|
114
|
7 |
|
if (\preg_match('/^(?:' . \preg_quote($oldStyle->__toString(), '/') . ')+/', $line, $matches) !== 1) { |
115
|
7 |
|
$newContent .= $line; |
116
|
7 |
|
continue; |
117
|
|
|
} |
118
|
|
|
|
119
|
7 |
|
$indentLevel = (int) (\strlen($matches[0]) / $oldStyle->amount); |
120
|
7 |
|
$newContent .= \str_repeat($newStyle->__toString(), $indentLevel) . \substr($line, $indentLevel * $oldStyle->amount); |
121
|
|
|
} |
122
|
|
|
|
123
|
7 |
|
return $newContent; |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
/** |
127
|
|
|
* Adds the given $indentation to the beginning of each line in the given $string |
128
|
|
|
* |
129
|
|
|
* @throws \InvalidArgumentException if $indentation type is not spaces or tabs |
130
|
|
|
*/ |
131
|
6 |
|
public static function indent(string $string, Indentation $indentation): string |
132
|
|
|
{ |
133
|
6 |
|
$toAdd = (string) $indentation; |
134
|
|
|
|
135
|
6 |
|
$result = \preg_replace('/^(?=.)/m', $toAdd, $string); |
136
|
6 |
|
if ($result === null) { |
137
|
|
|
return $string; |
138
|
|
|
} |
139
|
|
|
|
140
|
6 |
|
return $result; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* De-indent the given string, removing any leading indentation that is common to all lines. |
145
|
|
|
*/ |
146
|
12 |
|
public static function unindent(string $string): string |
147
|
|
|
{ |
148
|
12 |
|
$leadingIndent = PHP_INT_MAX; |
149
|
12 |
|
$leadingIndentType = self::TYPE_UNKNOWN; |
150
|
12 |
|
foreach (self::iterateLines($string) as $indentation) { |
151
|
|
|
// Any lines with no leading indentation means we can't trim the entire string |
152
|
11 |
|
if ($indentation === null) { |
153
|
1 |
|
return $string; |
154
|
|
|
} |
155
|
|
|
|
156
|
10 |
|
$leadingIndent = \min($leadingIndent, $indentation[0]); |
157
|
10 |
|
if ($leadingIndentType === self::TYPE_UNKNOWN) { |
158
|
10 |
|
$leadingIndentType = $indentation[1]; |
159
|
10 |
|
} elseif ($leadingIndentType !== $indentation[1]) { |
160
|
|
|
// Don't trim if the leading indent types are different |
161
|
2 |
|
return $string; |
162
|
|
|
} |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
// Don't trim if there's no leading indents or if the types are inconsistent |
166
|
9 |
|
if ($leadingIndent === 0 || $leadingIndent === PHP_INT_MAX || $leadingIndentType === self::TYPE_UNKNOWN) { |
167
|
1 |
|
return $string; |
168
|
|
|
} |
169
|
|
|
|
170
|
8 |
|
$leadingIndent = new Indentation($leadingIndent, $leadingIndentType); |
171
|
|
|
|
172
|
8 |
|
$trimmed = \preg_replace('/^' . \preg_quote((string) $leadingIndent, '/') . '/m', '', $string); |
173
|
8 |
|
if (! \is_string($trimmed)) { |
174
|
|
|
return $string; |
175
|
|
|
} |
176
|
|
|
|
177
|
8 |
|
return $trimmed; |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
/** |
181
|
|
|
* @return array<string, array{0: int, 1: int}> |
182
|
|
|
*/ |
183
|
21 |
|
private static function makeIndentsMap(string $string, bool $ignoreSingleSpaces): array |
184
|
|
|
{ |
185
|
21 |
|
$indents = []; |
186
|
|
|
|
187
|
|
|
// Remember the size of previous line's indentation |
188
|
21 |
|
$previousSize = 0; |
189
|
21 |
|
$previousIndentType = null; |
190
|
|
|
|
191
|
|
|
// Indents key (ident type + size of the indents/unindents) |
192
|
21 |
|
$key = null; |
193
|
|
|
|
194
|
21 |
|
foreach (self::iterateLines($string) as $indentation) { |
195
|
|
|
// Detect either spaces or tabs but not both to properly handle tabs for indentation and spaces for alignment |
196
|
20 |
|
if ($indentation === null) { |
197
|
15 |
|
$previousSize = 0; |
198
|
15 |
|
$previousIndentType = ''; |
199
|
15 |
|
continue; |
200
|
|
|
} |
201
|
|
|
|
202
|
18 |
|
[$indent, $indentType] = $indentation; |
203
|
|
|
// Ignore single space unless it's the only indent detected to prevent common false positives |
204
|
18 |
|
if ($ignoreSingleSpaces && $indentType === self::TYPE_SPACE && $indent === 1) { |
205
|
2 |
|
continue; |
206
|
|
|
} |
207
|
|
|
|
208
|
18 |
|
if ($indentType !== $previousIndentType) { |
209
|
18 |
|
$previousSize = 0; |
210
|
|
|
} |
211
|
|
|
|
212
|
18 |
|
$previousIndentType = $indentType; |
213
|
18 |
|
$weight = 0; |
214
|
18 |
|
$indentDifference = $indent - $previousSize; |
215
|
18 |
|
$previousSize = $indent; |
216
|
|
|
|
217
|
|
|
// Previous line have same indent? |
218
|
18 |
|
if ($indentDifference === 0) { |
219
|
11 |
|
$weight++; |
220
|
|
|
// We use the key from previous loop |
221
|
11 |
|
\assert(isset($key) && \is_string($key)); |
222
|
|
|
} else { |
223
|
18 |
|
$key = self::encodeIndentsKey($indentType, $indentDifference > 0 ? $indentDifference : -$indentDifference); |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
// Update the stats |
227
|
18 |
|
if (! isset($indents[$key])) { |
228
|
18 |
|
$indents[$key] = [1, 0]; |
229
|
|
|
} else { |
230
|
18 |
|
$indents[$key][0]++; |
231
|
18 |
|
$indents[$key][1] += $weight; |
232
|
|
|
} |
233
|
|
|
} |
234
|
|
|
|
235
|
21 |
|
return $indents; |
236
|
|
|
} |
237
|
|
|
|
238
|
|
|
/** |
239
|
|
|
* @return iterable<int, array{0: int<0, max>, 1: self::TYPE_*}|null> |
240
|
|
|
*/ |
241
|
33 |
|
private static function iterateLines(string $string): iterable |
242
|
|
|
{ |
243
|
33 |
|
$lines = \preg_split('/\R/', $string); |
244
|
33 |
|
if ($lines === false) { |
245
|
|
|
throw new \InvalidArgumentException('Invalid string'); |
246
|
|
|
} |
247
|
|
|
|
248
|
33 |
|
foreach ($lines as $i => $line) { |
249
|
33 |
|
if ($line === '') { |
250
|
|
|
// Ignore empty lines |
251
|
19 |
|
continue; |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
// Detect either spaces or tabs but not both to properly handle tabs for indentation and spaces for alignment |
255
|
31 |
|
if (\preg_match('/^(?:( )+|\t+)/', $line, $matches) !== 1) { |
256
|
16 |
|
yield $i => null; |
257
|
|
|
|
258
|
15 |
|
continue; |
259
|
|
|
} |
260
|
|
|
|
261
|
28 |
|
$indent = \strlen($matches[0]); |
262
|
28 |
|
$indentType = isset($matches[1]) ? self::TYPE_SPACE : self::TYPE_TAB; |
263
|
|
|
|
264
|
28 |
|
yield $i => [$indent, $indentType]; |
265
|
|
|
} |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
/** |
269
|
|
|
* Encode the indent type and amount as a string (e.g. 's4') for use as a compound key in the indents map. |
270
|
|
|
* |
271
|
|
|
* @param self::TYPE_* $indentType |
272
|
|
|
*/ |
273
|
18 |
|
private static function encodeIndentsKey(string $indentType, int $indentAmount): string |
274
|
|
|
{ |
275
|
18 |
|
$typeCharacter = $indentType === self::TYPE_SPACE ? 's' : 't'; |
276
|
|
|
|
277
|
18 |
|
return $typeCharacter . $indentAmount; |
278
|
|
|
} |
279
|
|
|
|
280
|
|
|
/** |
281
|
|
|
* Extract the indent type and amount from a key of the indents map. |
282
|
|
|
* |
283
|
|
|
* @return array{0: int, 1: self::TYPE_*} |
284
|
|
|
*/ |
285
|
18 |
|
private static function decodeIndentsKey(string $indentsKey): array |
286
|
|
|
{ |
287
|
18 |
|
$keyHasTypeSpace = $indentsKey[0] === 's'; |
288
|
18 |
|
$type = $keyHasTypeSpace ? self::TYPE_SPACE : self::TYPE_TAB; |
289
|
|
|
|
290
|
18 |
|
$amount = \intval(\substr($indentsKey, 1)); |
291
|
|
|
|
292
|
18 |
|
return [$amount, $type]; |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* Return the key (e.g. 's4') from the indents map that represents the most common indent, |
297
|
|
|
* or return undefined if there are no indents. |
298
|
|
|
* |
299
|
|
|
* @param array<string, array{int, int}> $indents |
300
|
|
|
*/ |
301
|
21 |
|
private static function getMostUsedKey(array $indents): string|null |
302
|
|
|
{ |
303
|
21 |
|
$result = null; |
304
|
21 |
|
$maxUsed = 0; |
305
|
21 |
|
$maxWeight = 0; |
306
|
|
|
|
307
|
21 |
|
foreach ($indents as $key => [$usedCount, $weight]) { |
308
|
18 |
|
if ($usedCount <= $maxUsed && ($usedCount !== $maxUsed || $weight <= $maxWeight)) { |
309
|
6 |
|
continue; |
310
|
|
|
} |
311
|
|
|
|
312
|
18 |
|
$maxUsed = $usedCount; |
313
|
18 |
|
$maxWeight = $weight; |
314
|
18 |
|
$result = $key; |
315
|
|
|
} |
316
|
|
|
|
317
|
21 |
|
return $result; |
318
|
|
|
} |
319
|
|
|
} |
320
|
|
|
|