|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace CSanquer\ColibriCsv\Utility; |
|
4
|
|
|
|
|
5
|
|
|
use Ddeboer\Transcoder\Exception\UnsupportedEncodingException; |
|
6
|
|
|
use Ddeboer\Transcoder\IconvTranscoder; |
|
7
|
|
|
use Ddeboer\Transcoder\MbTranscoder; |
|
8
|
|
|
use Ddeboer\Transcoder\TranscoderInterface; |
|
9
|
|
|
use Ddeboer\Transcoder\Transcoder as BaseTranscoder; |
|
10
|
|
|
|
|
11
|
|
|
/** |
|
12
|
|
|
* Transcoder : Charset and encoding manager class |
|
13
|
|
|
* Adapter class based on Ddeboer\Transcoder\TranscoderInterface |
|
14
|
|
|
* |
|
15
|
|
|
* @author Charles SANQUER - <[email protected]> |
|
16
|
|
|
*/ |
|
17
|
|
|
class Transcoder implements TranscoderInterface |
|
18
|
|
|
{ |
|
19
|
|
|
/** |
|
20
|
|
|
* @var TranscoderInterface |
|
21
|
|
|
*/ |
|
22
|
|
|
protected $transcoder; |
|
23
|
|
|
|
|
24
|
|
|
/** |
|
25
|
|
|
* Is mbstring extension available? |
|
26
|
|
|
* |
|
27
|
|
|
* @var boolean |
|
28
|
|
|
*/ |
|
29
|
|
|
protected $mbstringEnabled; |
|
30
|
|
|
|
|
31
|
|
|
/** |
|
32
|
|
|
* Is iconv extension available? |
|
33
|
|
|
* |
|
34
|
|
|
* @var boolean |
|
35
|
|
|
*/ |
|
36
|
|
|
protected $iconvEnabled; |
|
37
|
|
|
|
|
38
|
|
|
/** |
|
39
|
|
|
* @var array |
|
40
|
|
|
*/ |
|
41
|
|
|
protected $bomList = [ |
|
42
|
|
|
'UTF-7' => [ |
|
43
|
|
|
"\x2B\x2F\76\x38", |
|
44
|
|
|
"\x2B\x2F\76\x39", |
|
45
|
|
|
"\x2B\x2F\76\x2B", |
|
46
|
|
|
"\x2B\x2F\76\x2F", |
|
47
|
|
|
], |
|
48
|
|
|
'UTF-8' => "\xEF\xBB\xBF", |
|
49
|
|
|
'UTF-16BE' => "\xFE\xFF", |
|
50
|
|
|
'UTF-16LE' => "\xFF\xFE", |
|
51
|
|
|
'UTF-32BE' => "\x00\x00\xFE\xFF", |
|
52
|
|
|
'UTF-32LE' => "\xFF\xFE\x00\x00", |
|
53
|
|
|
]; |
|
54
|
|
|
|
|
55
|
|
|
/** |
|
56
|
|
|
* @param string $defaultEncoding |
|
57
|
|
|
* @param bool $forceMbString |
|
58
|
|
|
*/ |
|
59
|
63 |
|
public function __construct($defaultEncoding = 'UTF-8', $forceMbString = false) |
|
60
|
|
|
{ |
|
61
|
63 |
|
$this->iconvEnabled = function_exists('iconv'); |
|
62
|
63 |
|
$this->mbstringEnabled = function_exists('mb_convert_encoding'); |
|
63
|
|
|
|
|
64
|
63 |
|
$defaultEncoding = empty($defaultEncoding) ? 'UTF-8' : $defaultEncoding; |
|
65
|
|
|
|
|
66
|
63 |
|
if ($this->iconvEnabled && !$forceMbString) { |
|
67
|
62 |
|
$this->transcoder = new IconvTranscoder($defaultEncoding); |
|
68
|
63 |
|
} elseif ($this->mbstringEnabled) { |
|
69
|
1 |
|
$this->transcoder = new MbTranscoder($this->getWindowsCPEncoding($defaultEncoding)); |
|
70
|
1 |
|
} |
|
71
|
63 |
|
} |
|
72
|
|
|
|
|
73
|
|
|
/** |
|
74
|
|
|
* |
|
75
|
|
|
* @param string $str |
|
76
|
|
|
* @param string $fallback |
|
77
|
|
|
* @return string |
|
78
|
|
|
*/ |
|
79
|
2 |
|
public function detectEncoding($str, $fallback = 'UTF-8') |
|
80
|
|
|
{ |
|
81
|
2 |
|
$encoding = null; |
|
82
|
2 |
|
if ($this->mbstringEnabled) { |
|
83
|
|
|
$encodingList =[ |
|
84
|
2 |
|
'ASCII', |
|
85
|
2 |
|
'UTF-8', |
|
86
|
2 |
|
'UTF-16BE', |
|
87
|
2 |
|
'UTF-16LE', |
|
88
|
2 |
|
'UTF-32BE', |
|
89
|
2 |
|
'UTF-32LE', |
|
90
|
2 |
|
'ISO-8859-1', |
|
91
|
2 |
|
'ISO-8859-2', |
|
92
|
2 |
|
'ISO-8859-3', |
|
93
|
2 |
|
'ISO-8859-4', |
|
94
|
2 |
|
'ISO-8859-5', |
|
95
|
2 |
|
'ISO-8859-6', |
|
96
|
2 |
|
'ISO-8859-7', |
|
97
|
2 |
|
'ISO-8859-8', |
|
98
|
2 |
|
'ISO-8859-9', |
|
99
|
2 |
|
'ISO-8859-10', |
|
100
|
2 |
|
'ISO-8859-13', |
|
101
|
2 |
|
'ISO-8859-14', |
|
102
|
2 |
|
'ISO-8859-15', |
|
103
|
2 |
|
'ISO-8859-16', |
|
104
|
2 |
|
'Windows-1251', |
|
105
|
2 |
|
'Windows-1252', |
|
106
|
2 |
|
'Windows-1254', |
|
107
|
2 |
|
'UTF-7', |
|
108
|
2 |
|
]; |
|
109
|
|
|
|
|
110
|
2 |
|
$encoding = mb_detect_encoding($str, $encodingList, true); |
|
111
|
2 |
|
} |
|
112
|
|
|
|
|
113
|
2 |
|
return $encoding ? $encoding : $fallback; |
|
114
|
|
|
} |
|
115
|
|
|
|
|
116
|
|
|
/** |
|
117
|
|
|
* Transcode a string from one into another encoding |
|
118
|
|
|
* |
|
119
|
|
|
* @param string $string String |
|
120
|
|
|
* @param string $from From encoding (optional) default = auto |
|
121
|
|
|
* @param string $to To encoding (optional) default = UTF-8 |
|
122
|
|
|
* @param string $iconvTranslit (optional) default = null Iconv translit option possible values : 'translit', 'ignore', null |
|
123
|
|
|
* |
|
124
|
|
|
* @return string |
|
125
|
|
|
* |
|
126
|
|
|
* @throws UnsupportedEncodingException |
|
127
|
|
|
*/ |
|
128
|
29 |
|
public function transcode($string, $from = 'auto', $to = 'UTF-8', $iconvTranslit = null) |
|
129
|
|
|
{ |
|
130
|
29 |
|
if ($this->transcoder && $from != $to) { |
|
131
|
5 |
|
if ($from == 'auto' || empty($from)) { |
|
132
|
1 |
|
$from = $this->detectEncoding($string); |
|
133
|
1 |
|
} |
|
134
|
|
|
|
|
135
|
5 |
|
if ($this->transcoder instanceof IconvTranscoder) { |
|
136
|
4 |
|
$iconvTranslit = strtoupper($iconvTranslit); |
|
137
|
4 |
|
$to .= in_array($iconvTranslit, ['TRANSLIT', 'IGNORE']) ? '//'.$iconvTranslit : ''; |
|
138
|
5 |
|
} elseif ($this->transcoder instanceof MbTranscoder) { |
|
139
|
1 |
|
$from = $this->getWindowsCPEncoding($from); |
|
140
|
1 |
|
$to = $this->getWindowsCPEncoding($to); |
|
141
|
1 |
|
} |
|
142
|
|
|
|
|
143
|
5 |
|
$string = $this->transcoder->transcode($string, $from, $to); |
|
144
|
5 |
|
} |
|
145
|
|
|
|
|
146
|
29 |
|
return $string; |
|
147
|
|
|
} |
|
148
|
|
|
|
|
149
|
|
|
/** |
|
150
|
|
|
* get BOM for given encoding |
|
151
|
|
|
* |
|
152
|
|
|
* @param string $encoding |
|
153
|
|
|
* @return string BOM |
|
154
|
|
|
*/ |
|
155
|
5 |
|
public function getBOM($encoding = 'UTF-8') |
|
156
|
|
|
{ |
|
157
|
5 |
|
return isset($this->bomList[$encoding]) ? $this->bomList[$encoding] : null; |
|
158
|
|
|
} |
|
159
|
|
|
|
|
160
|
|
|
/** |
|
161
|
|
|
* get Valid Windows CP encoding name for mb_string |
|
162
|
|
|
* |
|
163
|
|
|
* @param $encoding |
|
164
|
|
|
* @return string |
|
165
|
|
|
*/ |
|
166
|
1 |
|
protected function getWindowsCPEncoding($encoding) |
|
167
|
|
|
{ |
|
168
|
1 |
|
return in_array($encoding, ['CP1251', 'CP1252', 'CP1254']) ? 'Windows-'.substr($encoding, 2, 4) : $encoding; |
|
169
|
|
|
} |
|
170
|
|
|
} |
|
171
|
|
|
|