1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace CSanquer\ColibriCsv\Utility; |
4
|
|
|
|
5
|
|
|
use Ddeboer\Transcoder\Exception\UnsupportedEncodingException; |
6
|
|
|
use Ddeboer\Transcoder\IconvTranscoder; |
7
|
|
|
use Ddeboer\Transcoder\MbTranscoder; |
8
|
|
|
use Ddeboer\Transcoder\TranscoderInterface; |
9
|
|
|
use Ddeboer\Transcoder\Transcoder as BaseTranscoder; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* Transcoder : Charset and encoding manager class |
13
|
|
|
* Adapter class based on Ddeboer\Transcoder\TranscoderInterface |
14
|
|
|
* |
15
|
|
|
* @author Charles SANQUER - <[email protected]> |
16
|
|
|
*/ |
17
|
|
|
class Transcoder implements TranscoderInterface |
18
|
|
|
{ |
19
|
|
|
/** |
20
|
|
|
* @var TranscoderInterface |
21
|
|
|
*/ |
22
|
|
|
protected $transcoder; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* Is mbstring extension available? |
26
|
|
|
* |
27
|
|
|
* @var boolean |
28
|
|
|
*/ |
29
|
|
|
protected $mbstringEnabled; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* Is iconv extension available? |
33
|
|
|
* |
34
|
|
|
* @var boolean |
35
|
|
|
*/ |
36
|
|
|
protected $iconvEnabled; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @var array |
40
|
|
|
*/ |
41
|
|
|
protected $bomList = [ |
42
|
|
|
'UTF-7' => [ |
43
|
|
|
"\x2B\x2F\76\x38", |
44
|
|
|
"\x2B\x2F\76\x39", |
45
|
|
|
"\x2B\x2F\76\x2B", |
46
|
|
|
"\x2B\x2F\76\x2F", |
47
|
|
|
], |
48
|
|
|
'UTF-8' => "\xEF\xBB\xBF", |
49
|
|
|
'UTF-16BE' => "\xFE\xFF", |
50
|
|
|
'UTF-16LE' => "\xFF\xFE", |
51
|
|
|
'UTF-32BE' => "\x00\x00\xFE\xFF", |
52
|
|
|
'UTF-32LE' => "\xFF\xFE\x00\x00", |
53
|
|
|
]; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* @param string $defaultEncoding |
57
|
|
|
* @param bool $forceMbString |
58
|
|
|
*/ |
59
|
63 |
|
public function __construct($defaultEncoding = 'UTF-8', $forceMbString = false) |
60
|
|
|
{ |
61
|
63 |
|
$this->iconvEnabled = function_exists('iconv'); |
62
|
63 |
|
$this->mbstringEnabled = function_exists('mb_convert_encoding'); |
63
|
|
|
|
64
|
63 |
|
$defaultEncoding = empty($defaultEncoding) ? 'UTF-8' : $defaultEncoding; |
65
|
|
|
|
66
|
63 |
|
if ($this->iconvEnabled && !$forceMbString) { |
67
|
62 |
|
$this->transcoder = new IconvTranscoder($defaultEncoding); |
68
|
63 |
|
} elseif ($this->mbstringEnabled) { |
69
|
1 |
|
$this->transcoder = new MbTranscoder($this->getWindowsCPEncoding($defaultEncoding)); |
70
|
1 |
|
} |
71
|
63 |
|
} |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* |
75
|
|
|
* @param string $str |
76
|
|
|
* @param string $fallback |
77
|
|
|
* @return string |
78
|
|
|
*/ |
79
|
2 |
|
public function detectEncoding($str, $fallback = 'UTF-8') |
80
|
|
|
{ |
81
|
2 |
|
$encoding = null; |
82
|
2 |
|
if ($this->mbstringEnabled) { |
83
|
|
|
$encodingList =[ |
84
|
2 |
|
'ASCII', |
85
|
2 |
|
'UTF-8', |
86
|
2 |
|
'UTF-16BE', |
87
|
2 |
|
'UTF-16LE', |
88
|
2 |
|
'UTF-32BE', |
89
|
2 |
|
'UTF-32LE', |
90
|
2 |
|
'ISO-8859-1', |
91
|
2 |
|
'ISO-8859-2', |
92
|
2 |
|
'ISO-8859-3', |
93
|
2 |
|
'ISO-8859-4', |
94
|
2 |
|
'ISO-8859-5', |
95
|
2 |
|
'ISO-8859-6', |
96
|
2 |
|
'ISO-8859-7', |
97
|
2 |
|
'ISO-8859-8', |
98
|
2 |
|
'ISO-8859-9', |
99
|
2 |
|
'ISO-8859-10', |
100
|
2 |
|
'ISO-8859-13', |
101
|
2 |
|
'ISO-8859-14', |
102
|
2 |
|
'ISO-8859-15', |
103
|
2 |
|
'ISO-8859-16', |
104
|
2 |
|
'Windows-1251', |
105
|
2 |
|
'Windows-1252', |
106
|
2 |
|
'Windows-1254', |
107
|
2 |
|
'UTF-7', |
108
|
2 |
|
]; |
109
|
|
|
|
110
|
2 |
|
$encoding = mb_detect_encoding($str, $encodingList, true); |
111
|
2 |
|
} |
112
|
|
|
|
113
|
2 |
|
return $encoding ? $encoding : $fallback; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* Transcode a string from one into another encoding |
118
|
|
|
* |
119
|
|
|
* @param string $string String |
120
|
|
|
* @param string $from From encoding (optional) default = auto |
121
|
|
|
* @param string $to To encoding (optional) default = UTF-8 |
122
|
|
|
* @param string $iconvTranslit (optional) default = null Iconv translit option possible values : 'translit', 'ignore', null |
123
|
|
|
* |
124
|
|
|
* @return string |
125
|
|
|
* |
126
|
|
|
* @throws UnsupportedEncodingException |
127
|
|
|
*/ |
128
|
29 |
|
public function transcode($string, $from = 'auto', $to = 'UTF-8', $iconvTranslit = null) |
129
|
|
|
{ |
130
|
29 |
|
if ($this->transcoder && $from != $to) { |
131
|
5 |
|
if ($from == 'auto' || empty($from)) { |
132
|
1 |
|
$from = $this->detectEncoding($string); |
133
|
1 |
|
} |
134
|
|
|
|
135
|
5 |
|
if ($this->transcoder instanceof IconvTranscoder) { |
136
|
4 |
|
$iconvTranslit = strtoupper($iconvTranslit); |
137
|
4 |
|
$to .= in_array($iconvTranslit, ['TRANSLIT', 'IGNORE']) ? '//'.$iconvTranslit : ''; |
138
|
5 |
|
} elseif ($this->transcoder instanceof MbTranscoder) { |
139
|
1 |
|
$from = $this->getWindowsCPEncoding($from); |
140
|
1 |
|
$to = $this->getWindowsCPEncoding($to); |
141
|
1 |
|
} |
142
|
|
|
|
143
|
5 |
|
$string = $this->transcoder->transcode($string, $from, $to); |
144
|
5 |
|
} |
145
|
|
|
|
146
|
29 |
|
return $string; |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
/** |
150
|
|
|
* get BOM for given encoding |
151
|
|
|
* |
152
|
|
|
* @param string $encoding |
153
|
|
|
* @return string BOM |
154
|
|
|
*/ |
155
|
5 |
|
public function getBOM($encoding = 'UTF-8') |
156
|
|
|
{ |
157
|
5 |
|
return isset($this->bomList[$encoding]) ? $this->bomList[$encoding] : null; |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* get Valid Windows CP encoding name for mb_string |
162
|
|
|
* |
163
|
|
|
* @param $encoding |
164
|
|
|
* @return string |
165
|
|
|
*/ |
166
|
1 |
|
protected function getWindowsCPEncoding($encoding) |
167
|
|
|
{ |
168
|
1 |
|
return in_array($encoding, ['CP1251', 'CP1252', 'CP1254']) ? 'Windows-'.substr($encoding, 2, 4) : $encoding; |
169
|
|
|
} |
170
|
|
|
} |
171
|
|
|
|