|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* TUtf8Converter class file |
|
5
|
|
|
* |
|
6
|
|
|
* @author Fabio Bas <[email protected]> |
|
7
|
|
|
* @link https://github.com/pradosoft/prado |
|
8
|
|
|
* @license https://github.com/pradosoft/prado/blob/master/LICENSE |
|
9
|
|
|
*/ |
|
10
|
|
|
|
|
11
|
|
|
namespace Prado\Util; |
|
12
|
|
|
|
|
13
|
|
|
/** |
|
14
|
|
|
* TUtf8Converter class. |
|
15
|
|
|
* |
|
16
|
|
|
* TUtf8Converter is a simple wrapper around iconv functions to convert |
|
17
|
|
|
* strings from and to UTF-8. |
|
18
|
|
|
* |
|
19
|
|
|
* @author Fabio Bas <[email protected]> |
|
20
|
|
|
* @since 4.0.2 |
|
21
|
|
|
*/ |
|
22
|
|
|
class TUtf8Converter |
|
23
|
|
|
{ |
|
24
|
|
|
/** |
|
25
|
|
|
* Convert strings to UTF-8 via iconv. NB, the result may not by UTF-8 |
|
26
|
|
|
* if the conversion failed. |
|
27
|
|
|
* @param string $string string to convert to UTF-8 |
|
28
|
|
|
* @param string $from source encoding |
|
29
|
|
|
* @param ?string $lang Language of the encoding as accepted by PHP setLocale |
|
30
|
|
|
* @return string UTF-8 encoded string, original string if iconv failed. |
|
31
|
|
|
* @see https://www.php.net/manual/en/function.setlocale.php |
|
32
|
|
|
* The $lang locale information is maintained per process, not per thread. |
|
33
|
|
|
*/ |
|
34
|
|
|
public static function toUTF8($string, $from, $lang = null) |
|
35
|
|
|
{ |
|
36
|
|
|
if ($from != 'UTF-8') { |
|
37
|
|
|
$locale = null; |
|
38
|
|
|
if ($lang === null) { |
|
39
|
|
|
self::parseEncodingLanguage($from, $lang); |
|
40
|
|
|
} |
|
41
|
|
|
if ($lang !== null) { |
|
42
|
|
|
$locale = setLocale(LC_CTYPE, '0'); |
|
43
|
|
|
setLocale(LC_CTYPE, $lang); |
|
44
|
|
|
} |
|
45
|
|
|
$s = iconv($from, 'UTF-8', $string); //to UTF-8 |
|
46
|
|
|
if ($lang !== null) { |
|
47
|
|
|
setLocale(LC_CTYPE, $locale); |
|
|
|
|
|
|
48
|
|
|
} |
|
49
|
|
|
return $s !== false ? $s : $string; //it could return false |
|
50
|
|
|
} |
|
51
|
|
|
return $string; |
|
52
|
|
|
} |
|
53
|
|
|
|
|
54
|
|
|
/** |
|
55
|
|
|
* Convert UTF-8 strings to a different encoding. NB. The result |
|
56
|
|
|
* may not have been encoded if iconv fails. |
|
57
|
|
|
* @param string $string the UTF-8 string for conversion |
|
58
|
|
|
* @param string $to destination encoding |
|
59
|
|
|
* @param ?string $lang Language of the encoding as accepted by PHP setLocale |
|
60
|
|
|
* @return string encoded string. |
|
61
|
|
|
* @see https://www.php.net/manual/en/function.setlocale.php |
|
62
|
|
|
* The $lang locale information is maintained per process, not per thread. |
|
63
|
|
|
*/ |
|
64
|
|
|
public static function fromUTF8($string, $to, $lang = null) |
|
65
|
|
|
{ |
|
66
|
|
|
if ($to != 'UTF-8') { |
|
67
|
|
|
$locale = null; |
|
68
|
|
|
if ($lang === null) { |
|
69
|
|
|
self::parseEncodingLanguage($to, $lang); |
|
70
|
|
|
} |
|
71
|
|
|
if ($lang !== null) { |
|
72
|
|
|
$locale = setLocale(LC_CTYPE, '0'); |
|
73
|
|
|
setLocale(LC_CTYPE, $lang); |
|
74
|
|
|
} |
|
75
|
|
|
$s = iconv('UTF-8', $to, $string); |
|
76
|
|
|
if ($lang !== null) { |
|
77
|
|
|
setLocale(LC_CTYPE, $locale); |
|
|
|
|
|
|
78
|
|
|
} |
|
79
|
|
|
return $s !== false ? $s : $string; |
|
80
|
|
|
} |
|
81
|
|
|
return $string; |
|
82
|
|
|
} |
|
83
|
|
|
|
|
84
|
|
|
/** |
|
85
|
|
|
* This parses a Character Set Encoding for an appended/embedded language. |
|
86
|
|
|
* eg "ASCII" can also be "ASCII.de" to designate German ASCII layout. |
|
87
|
|
|
* In this example, at input $encoding is "ASCII.de" and on output $encoding |
|
88
|
|
|
* is 'ASCII' with $lang is "de". |
|
89
|
|
|
* @param string $encoding The character set encoding with optional period and |
|
90
|
|
|
* language appended. |
|
91
|
|
|
* @param ?string &$lang The output language of the encoding. |
|
92
|
|
|
*/ |
|
93
|
|
|
public static function parseEncodingLanguage(string &$encoding, &$lang) |
|
94
|
|
|
{ |
|
95
|
|
|
if (strpos($encoding, '.') !== false) { |
|
96
|
|
|
$parts = explode($encoding, '.', 1); |
|
97
|
|
|
$encoding = $parts[0]; |
|
98
|
|
|
$lang = $parts[1]; |
|
99
|
|
|
} |
|
100
|
|
|
} |
|
101
|
|
|
} |
|
102
|
|
|
|