1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace PhpMyAdmin; |
6
|
|
|
|
7
|
|
|
use function array_intersect; |
8
|
|
|
use function array_map; |
9
|
|
|
use function explode; |
10
|
|
|
use function fclose; |
11
|
|
|
use function feof; |
12
|
|
|
use function fgets; |
13
|
|
|
use function fopen; |
14
|
|
|
use function function_exists; |
15
|
|
|
use function fwrite; |
16
|
|
|
use function iconv; |
17
|
|
|
use function mb_convert_encoding; |
18
|
|
|
use function mb_convert_kana; |
19
|
|
|
use function mb_detect_encoding; |
20
|
|
|
use function mb_list_encodings; |
21
|
|
|
use function recode_string; |
22
|
|
|
use function tempnam; |
23
|
|
|
use function unlink; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* Encoding conversion helper class |
27
|
|
|
*/ |
28
|
|
|
class Encoding |
29
|
|
|
{ |
30
|
|
|
/** |
31
|
|
|
* None encoding conversion engine |
32
|
|
|
*/ |
33
|
|
|
public const ENGINE_NONE = 0; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* iconv encoding conversion engine |
37
|
|
|
*/ |
38
|
|
|
public const ENGINE_ICONV = 1; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* recode encoding conversion engine |
42
|
|
|
*/ |
43
|
|
|
public const ENGINE_RECODE = 2; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* mbstring encoding conversion engine |
47
|
|
|
*/ |
48
|
|
|
public const ENGINE_MB = 3; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* Chosen encoding engine |
52
|
|
|
* |
53
|
|
|
* @var int |
54
|
|
|
*/ |
55
|
|
|
private static $engine = null; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Map of conversion engine configurations |
59
|
|
|
* |
60
|
|
|
* Each entry contains: |
61
|
|
|
* |
62
|
|
|
* - function to detect |
63
|
|
|
* - engine contant |
64
|
|
|
* - extension name to warn when missing |
65
|
|
|
* |
66
|
|
|
* @var array |
67
|
|
|
*/ |
68
|
|
|
private static $enginemap = [ |
69
|
|
|
'iconv' => [ |
70
|
|
|
'iconv', |
71
|
|
|
self::ENGINE_ICONV, |
72
|
|
|
'iconv', |
73
|
|
|
], |
74
|
|
|
'recode' => [ |
75
|
|
|
'recode_string', |
76
|
|
|
self::ENGINE_RECODE, |
77
|
|
|
'recode', |
78
|
|
|
], |
79
|
|
|
'mb' => [ |
80
|
|
|
'mb_convert_encoding', |
81
|
|
|
self::ENGINE_MB, |
82
|
|
|
'mbstring', |
83
|
|
|
], |
84
|
|
|
'none' => [ |
85
|
|
|
'isset', |
86
|
|
|
self::ENGINE_NONE, |
87
|
|
|
'', |
88
|
|
|
], |
89
|
|
|
]; |
90
|
|
|
|
91
|
|
|
/** |
92
|
|
|
* Order of automatic detection of engines |
93
|
|
|
* |
94
|
|
|
* @var array |
95
|
|
|
*/ |
96
|
|
|
private static $engineorder = [ |
97
|
|
|
'iconv', |
98
|
|
|
'mb', |
99
|
|
|
'recode', |
100
|
|
|
]; |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* Kanji encodings list |
104
|
|
|
* |
105
|
|
|
* @var string |
106
|
|
|
*/ |
107
|
|
|
private static $kanjiEncodings = 'ASCII,SJIS,EUC-JP,JIS'; |
108
|
|
|
|
109
|
|
|
/** |
110
|
|
|
* Initializes encoding engine detecting available backends. |
111
|
|
|
*/ |
112
|
38 |
|
public static function initEngine(): void |
113
|
|
|
{ |
114
|
38 |
|
$engine = 'auto'; |
115
|
38 |
|
if (isset($GLOBALS['cfg']['RecodingEngine'])) { |
116
|
|
|
$engine = $GLOBALS['cfg']['RecodingEngine']; |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
/* Use user configuration */ |
120
|
38 |
|
if (isset(self::$enginemap[$engine])) { |
121
|
|
|
if (function_exists(self::$enginemap[$engine][0])) { |
122
|
|
|
self::$engine = self::$enginemap[$engine][1]; |
123
|
|
|
|
124
|
|
|
return; |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
Core::warnMissingExtension(self::$enginemap[$engine][2]); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
/* Autodetection */ |
131
|
38 |
|
foreach (self::$engineorder as $engine) { |
132
|
38 |
|
if (function_exists(self::$enginemap[$engine][0])) { |
133
|
38 |
|
self::$engine = self::$enginemap[$engine][1]; |
134
|
|
|
|
135
|
38 |
|
return; |
136
|
|
|
} |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/* Fallback to none conversion */ |
140
|
|
|
self::$engine = self::ENGINE_NONE; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* Setter for engine. Use with caution, mostly useful for testing. |
145
|
|
|
* |
146
|
|
|
* @param int $engine Engine encoding |
147
|
|
|
*/ |
148
|
14 |
|
public static function setEngine(int $engine): void |
149
|
|
|
{ |
150
|
14 |
|
self::$engine = $engine; |
151
|
14 |
|
} |
152
|
|
|
|
153
|
|
|
/** |
154
|
|
|
* Checks whether there is any charset conversion supported |
155
|
|
|
*/ |
156
|
|
|
public static function isSupported(): bool |
157
|
|
|
{ |
158
|
|
|
if (self::$engine === null) { |
|
|
|
|
159
|
|
|
self::initEngine(); |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
return self::$engine != self::ENGINE_NONE; |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
/** |
166
|
|
|
* Converts encoding of text according to parameters with detected |
167
|
|
|
* conversion function. |
168
|
|
|
* |
169
|
|
|
* @param string $src_charset source charset |
170
|
|
|
* @param string $dest_charset target charset |
171
|
|
|
* @param string $what what to convert |
172
|
|
|
* |
173
|
|
|
* @return string converted text |
174
|
|
|
* |
175
|
|
|
* @access public |
176
|
|
|
*/ |
177
|
18 |
|
public static function convertString( |
178
|
|
|
string $src_charset, |
179
|
|
|
string $dest_charset, |
180
|
|
|
string $what |
181
|
|
|
): string { |
182
|
18 |
|
if ($src_charset == $dest_charset) { |
183
|
4 |
|
return $what; |
184
|
|
|
} |
185
|
|
|
|
186
|
14 |
|
if (self::$engine === null) { |
|
|
|
|
187
|
|
|
self::initEngine(); |
188
|
|
|
} |
189
|
|
|
|
190
|
14 |
|
switch (self::$engine) { |
191
|
14 |
|
case self::ENGINE_RECODE: |
192
|
2 |
|
return recode_string( |
193
|
2 |
|
$src_charset . '..' . $dest_charset, |
194
|
2 |
|
$what |
195
|
|
|
); |
196
|
|
|
|
197
|
12 |
|
case self::ENGINE_ICONV: |
198
|
4 |
|
return iconv( |
199
|
4 |
|
$src_charset, |
200
|
|
|
$dest_charset . |
201
|
4 |
|
($GLOBALS['cfg']['IconvExtraParams'] ?? ''), |
202
|
4 |
|
$what |
203
|
|
|
); |
204
|
|
|
|
205
|
8 |
|
case self::ENGINE_MB: |
206
|
4 |
|
return mb_convert_encoding( |
|
|
|
|
207
|
4 |
|
$what, |
208
|
4 |
|
$dest_charset, |
209
|
4 |
|
$src_charset |
210
|
|
|
); |
211
|
|
|
|
212
|
|
|
default: |
213
|
4 |
|
return $what; |
214
|
|
|
} |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
/** |
218
|
|
|
* Detects whether Kanji encoding is available |
219
|
|
|
*/ |
220
|
|
|
public static function canConvertKanji(): bool |
221
|
|
|
{ |
222
|
|
|
return $GLOBALS['lang'] === 'ja'; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* Setter for Kanji encodings. Use with caution, mostly useful for testing. |
227
|
|
|
*/ |
228
|
4 |
|
public static function getKanjiEncodings(): string |
229
|
|
|
{ |
230
|
4 |
|
return self::$kanjiEncodings; |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
/** |
234
|
|
|
* Setter for Kanji encodings. Use with caution, mostly useful for testing. |
235
|
|
|
* |
236
|
|
|
* @param string $value Kanji encodings list |
237
|
|
|
*/ |
238
|
|
|
public static function setKanjiEncodings(string $value): void |
239
|
|
|
{ |
240
|
|
|
self::$kanjiEncodings = $value; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
/** |
244
|
|
|
* Reverses SJIS & EUC-JP position in the encoding codes list |
245
|
|
|
*/ |
246
|
8 |
|
public static function kanjiChangeOrder(): void |
247
|
|
|
{ |
248
|
8 |
|
$parts = explode(',', self::$kanjiEncodings); |
249
|
8 |
|
if ($parts[1] === 'EUC-JP') { |
250
|
8 |
|
self::$kanjiEncodings = 'ASCII,SJIS,EUC-JP,JIS'; |
251
|
|
|
|
252
|
8 |
|
return; |
253
|
|
|
} |
254
|
|
|
|
255
|
8 |
|
self::$kanjiEncodings = 'ASCII,EUC-JP,SJIS,JIS'; |
256
|
8 |
|
} |
257
|
|
|
|
258
|
|
|
/** |
259
|
|
|
* Kanji string encoding convert |
260
|
|
|
* |
261
|
|
|
* @param string $str the string to convert |
262
|
|
|
* @param string $enc the destination encoding code |
263
|
|
|
* @param string $kana set 'kana' convert to JIS-X208-kana |
264
|
|
|
* |
265
|
|
|
* @return string the converted string |
266
|
|
|
*/ |
267
|
8 |
|
public static function kanjiStrConv(string $str, string $enc, string $kana): string |
268
|
|
|
{ |
269
|
8 |
|
if ($enc == '' && $kana == '') { |
270
|
4 |
|
return $str; |
271
|
|
|
} |
272
|
|
|
|
273
|
8 |
|
$string_encoding = mb_detect_encoding($str, self::$kanjiEncodings); |
274
|
8 |
|
if ($string_encoding === false) { |
275
|
|
|
$string_encoding = 'utf-8'; |
276
|
|
|
} |
277
|
|
|
|
278
|
8 |
|
if ($kana === 'kana') { |
279
|
8 |
|
$dist = mb_convert_kana($str, 'KV', $string_encoding); |
280
|
8 |
|
$str = $dist; |
281
|
|
|
} |
282
|
|
|
|
283
|
8 |
|
if ($string_encoding != $enc && $enc != '') { |
284
|
8 |
|
return mb_convert_encoding($str, $enc, $string_encoding); |
|
|
|
|
285
|
|
|
} |
286
|
|
|
|
287
|
4 |
|
return $str; |
288
|
|
|
} |
289
|
|
|
|
290
|
|
|
/** |
291
|
|
|
* Kanji file encoding convert |
292
|
|
|
* |
293
|
|
|
* @param string $file the name of the file to convert |
294
|
|
|
* @param string $enc the destination encoding code |
295
|
|
|
* @param string $kana set 'kana' convert to JIS-X208-kana |
296
|
|
|
* |
297
|
|
|
* @return string the name of the converted file |
298
|
|
|
*/ |
299
|
4 |
|
public static function kanjiFileConv(string $file, string $enc, string $kana): string |
300
|
|
|
{ |
301
|
4 |
|
if ($enc == '' && $kana == '') { |
302
|
|
|
return $file; |
303
|
|
|
} |
304
|
|
|
|
305
|
4 |
|
$tmpfname = (string) tempnam($GLOBALS['config']->getUploadTempDir(), $enc); |
306
|
4 |
|
$fpd = fopen($tmpfname, 'wb'); |
307
|
4 |
|
if ($fpd === false) { |
308
|
|
|
return $file; |
309
|
|
|
} |
310
|
|
|
|
311
|
4 |
|
$fps = fopen($file, 'r'); |
312
|
4 |
|
if ($fps === false) { |
313
|
|
|
return $file; |
314
|
|
|
} |
315
|
|
|
|
316
|
4 |
|
self::kanjiChangeOrder(); |
317
|
4 |
|
while (! feof($fps)) { |
318
|
4 |
|
$line = fgets($fps, 4096); |
319
|
4 |
|
if ($line === false) { |
320
|
|
|
continue; |
321
|
|
|
} |
322
|
|
|
|
323
|
4 |
|
$dist = self::kanjiStrConv($line, $enc, $kana); |
324
|
4 |
|
fwrite($fpd, $dist); |
325
|
|
|
} |
326
|
|
|
|
327
|
4 |
|
self::kanjiChangeOrder(); |
328
|
4 |
|
fclose($fps); |
329
|
4 |
|
fclose($fpd); |
330
|
4 |
|
unlink($file); |
331
|
|
|
|
332
|
4 |
|
return $tmpfname; |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
/** |
336
|
|
|
* Defines radio form fields to switch between encoding modes |
337
|
|
|
* |
338
|
|
|
* @return string HTML code for the radio controls |
339
|
|
|
*/ |
340
|
4 |
|
public static function kanjiEncodingForm(): string |
341
|
|
|
{ |
342
|
4 |
|
$template = new Template(); |
343
|
|
|
|
344
|
4 |
|
return $template->render('encoding/kanji_encoding_form'); |
345
|
|
|
} |
346
|
|
|
|
347
|
|
|
/** |
348
|
|
|
* Lists available encodings. |
349
|
|
|
* |
350
|
|
|
* @return array |
351
|
|
|
*/ |
352
|
4 |
|
public static function listEncodings(): array |
353
|
|
|
{ |
354
|
4 |
|
if (self::$engine === null) { |
|
|
|
|
355
|
|
|
self::initEngine(); |
356
|
|
|
} |
357
|
|
|
|
358
|
|
|
/* Most engines do not support listing */ |
359
|
4 |
|
if (self::$engine != self::ENGINE_MB) { |
360
|
4 |
|
return $GLOBALS['cfg']['AvailableCharsets']; |
361
|
|
|
} |
362
|
|
|
|
363
|
|
|
return array_intersect( |
364
|
|
|
array_map('strtolower', mb_list_encodings()), |
365
|
|
|
$GLOBALS['cfg']['AvailableCharsets'] |
366
|
|
|
); |
367
|
|
|
} |
368
|
|
|
} |
369
|
|
|
|