Passed
Pull Request — master (#204)
by
unknown
23:41
created

midcom_services_i18n::convert_from_utf8()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2.0625

Importance

Changes 0
Metric Value
cc 2
eloc 3
nc 2
nop 1
dl 0
loc 6
ccs 3
cts 4
cp 0.75
crap 2.0625
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * @package midcom.services
4
 * @author The Midgard Project, http://www.midgard-project.org
5
 * @copyright The Midgard Project, http://www.midgard-project.org
6
 * @license http://www.gnu.org/licenses/lgpl.html GNU Lesser General Public License
7
 */
8
9
use Symfony\Component\Intl\Intl;
10
use Symfony\Component\Intl\Languages;
11
use Symfony\Component\Intl\Locales;
12
13
/**
14
 * This is a basic MidCOM Service which provides an interfaces to the
15
 * various I18n facilities of MidCOM.
16
 *
17
 * The I18n service serves as a central access point for all aspects
18
 * around internationalization and localization. It provides auto-detection
19
 * of language data using HTTP Content-Negotiation along with a cookie-based
20
 * fallback.
21
 *
22
 * This class is able to run independently from midcom_application
23
 * due to the fact that it is used in the cache_hit code.
24
 *
25
 * Use this class to set the language preferences (charset and locale) and to gain
26
 * access to the l10n string databases. A few helpers which can be used to ease
27
 * translation work (like charset conversion) are in here as well.
28
 *
29
 * All language codes used here are ISO 639-1 two-letter codes.
30
 *
31
 * @package midcom.services
32
 */
33
class midcom_services_i18n
34
{
35
    /**
36
     * Fallback language, in case the selected language is not available.
37
     *
38
     * @var string
39
     */
40
    private $_fallback_language;
41
42
    /**
43
     * Cache of all instantiated localization classes.
44
     *
45
     * @var midcom_services_i18n_l10n[]
46
     */
47
    private $_obj_l10n = [];
48
49
    /**
50
     * Current language.
51
     *
52
     * @var string
53
     */
54
    private $_current_language;
55
56
    /**
57
     * Current character set
58
     *
59
     * @var string
60
     */
61
    private $_current_charset = 'utf-8';
62
63
    /**
64
     * Initialize the available i18n framework by determining the desired language
65
     * from these different sources: HTTP Content Negotiation, Client side language cookie.
66
     *
67
     * Its two parameters set the default language in case that none is supplied
68
     * via HTTP Content Negotiation or through Cookies.
69
     *
70
     * The default language set on startup is currently hardcoded to 'en',
71
     * you should override it after initialization, if you want something
72
     * else using the setter methods below.
73
     *
74
     * The fallback language is read from the MidCOM configuration directive
75
     * <i>i18n_fallback_language</i>.
76
     */
77 3
    public function __construct()
78
    {
79 3
        $this->_fallback_language = midcom::get()->config->get('i18n_fallback_language');
80 3
        $this->set_language($this->_fallback_language);
81
82 3
        $this->_set_startup_langs();
83 3
    }
84
85
    /**
86
     * Scans the HTTP negotiation and the cookie data and tries to set a
87
     * suitable default language. Cookies have priority here.
88
     */
89 3
    private function _set_startup_langs()
90
    {
91 3
        if ($cookie_data = $this->_read_cookie()) {
92
            $this->_current_language = $cookie_data['language'];
93
            $this->_current_charset = $cookie_data['charset'];
94
            return;
95
        }
96
97 3
        if ($http_langs = $this->_read_http_negotiation()) {
98 2
            foreach (array_keys($http_langs) as $name) {
99 2
                if ($this->set_language($name)) {
100 2
                    break;
101
                }
102
            }
103
        }
104 3
    }
105
106
    /**
107
     * Try to pull the user's preferred language and
108
     * character set out of a cookie named "midcom_services_i18n".
109
     */
110 3
    private function _read_cookie()
111
    {
112 3
        if (empty($_COOKIE['midcom_services_i18n'])) {
113 3
            return;
114
        }
115
116
        $rawdata = base64_decode($_COOKIE['midcom_services_i18n']);
117
        $array = unserialize($rawdata);
118
119
        if (   !array_key_exists('language', $array)
120
            || !array_key_exists('charset', $array)) {
121
            debug_add("Rejecting cookie, it seems invalid.");
122
            return;
123
        }
124
125
        return $array;
126
    }
127
128
    /**
129
     * Pull available language out of the HTTP Headers
130
     *
131
     * q-parameters for prioritization are supported.
132
     *
133
     * @return array Keys are the languages, the value is their q-index.
134
     */
135 3
    private function _read_http_negotiation() : array
136
    {
137 3
        $http_langs = [];
138 3
        if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
139 2
            $rawdata = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']);
140 2
            foreach ($rawdata as $data) {
141 2
                $params = explode(";", $data);
142 2
                $lang = array_shift($params);
143
144
                // we can't use strings like en-US, so we only use the first two characters
145 2
                $lang = substr($lang, 0, 2);
146 2
                $q = $this->_get_q($params);
147
148 2
                if (   !isset($http_langs[$lang])
149 2
                    || $http_langs[$lang] < $q) {
150 2
                    $http_langs[$lang] = $q;
151
                }
152
            }
153
        }
154 3
        arsort($http_langs, SORT_NUMERIC);
155 3
        return $http_langs;
156
    }
157
158 2
    private function _get_q(array $params) : float
159
    {
160 2
        $q = 1.0;
161 2
        $option = array_shift($params);
162 2
        while ($option !== null) {
163 1
            $option_params = explode("=", $option);
164 1
            if (count($option_params) != 2) {
165
                $option = array_shift($params);
166
                continue;
167
            }
168 1
            if (   $option_params[0] == "q"
169 1
                && is_numeric($option_params[1])) {
170
                // make sure that 0.0 <= $q <= 1.0
171 1
                $q = max(0.0, min(1.0, $option_params[1]));
172
            }
173 1
            $option = array_shift($params);
174
        }
175 2
        return $q;
176
    }
177
178
    /**
179
     * Set output character set.
180
     *
181
     * @param string $charset    Charset name.
182
     */
183
    public function set_charset($charset)
184
    {
185
        $this->_current_charset = strtolower($charset);
186
    }
187
188
    /**
189
     * Set output language.
190
     *
191
     * This will set the character encoding to the language's default
192
     * encoding and will also set the system locale to the one
193
     * specified in the language database.
194
     *
195
     * If you want another character encoding as the default one, you
196
     * have to override it manually using midcom_services_i18n::set_charset()
197
     * after calling this method.
198
     *
199
     * @param string $lang    Language ISO 639-1 code
200
     */
201 3
    public function set_language($lang) : bool
202
    {
203 3
        if (Locales::getName($lang) === null) {
0 ignored issues
show
introduced by
The condition Symfony\Component\Intl\L...getName($lang) === null is always false.
Loading history...
204
            debug_add("Language {$lang} not found.", MIDCOM_LOG_ERROR);
205
            return false;
206
        }
207
208 3
        $this->_current_language = $lang;
209
210 3
        setlocale(LC_ALL, $lang);
211 3
        if (Intl::isExtensionLoaded()) {
212 3
            Locale::setDefault($lang);
213
        }
214
215 3
        foreach ($this->_obj_l10n as $object) {
216
            $object->set_language($lang);
217
        }
218 3
        return true;
219
    }
220
221
    /**
222
     * Set the fallback language.
223
     *
224
     * @param string $lang    Language name.
225
     */
226
    public function set_fallback_language($lang)
227
    {
228
        $this->_fallback_language = $lang;
229
        foreach ($this->_obj_l10n as $object) {
230
            $object->set_fallback_language($lang);
231
        }
232
    }
233
234
    /**
235
     * Returns the current language code
236
     *
237
     * @return string
238
     */
239 374
    public function get_current_language()
240
    {
241 374
        return $this->_current_language;
242
    }
243
244
    /**
245
     * Returns language code corresponding to current content language
246
     *
247
     * @return string
248
     */
249
    public function get_content_language()
250
    {
251
        return $this->get_current_language();
252
    }
253
254
    /**
255
     * Returns the current fallback language code
256
     *
257
     * @return string
258
     */
259 53
    public function get_fallback_language()
260
    {
261 53
        return $this->_fallback_language;
262
    }
263
264
    /**
265
     * Returns the current character set
266
     *
267
     * @return string
268
     */
269 103
    public function get_current_charset()
270
    {
271 103
        return $this->_current_charset;
272
    }
273
274
    /**
275
     * Returns a l10n class instance which can be used to
276
     * access the localization data of the current component.
277
     *
278
     * Using the special name "midcom" you will get the midcom core l10n library.
279
     *
280
     * @see midcom_services_i18n_l10n
281
     * @param string $component    The component for which to retrieve a string database.
282
     * @param string $database    The string table to retrieve from the component's locale directory.
283
     */
284 385
    public function get_l10n($component = 'midcom', $database = 'default') : midcom_services_i18n_l10n
285
    {
286 385
        $cacheid = "{$component}/{$database}";
287
288 385
        if (!array_key_exists($cacheid, $this->_obj_l10n)) {
289 4
            $this->_load_l10n_db($component, $database);
290
        }
291
292 385
        return $this->_obj_l10n[$cacheid];
293
    }
294
295
    /**
296
     * Returns a translated string using the l10n database specified in the function
297
     * arguments.
298
     *
299
     * @param string $stringid The string to translate.
300
     * @param string $component    The component for which to retrieve a string database. If omitted, this defaults to the
301
     *     current component (out of the component context).
302
     * @param string $database    The string table to retrieve from the component's locale directory. If omitted, the 'default'
303
     *     database is used.
304
     * @see midcom_services_i18n_l10n::get()
305
     */
306 359
    public function get_string($stringid, $component = null, $database = 'default') : string
307
    {
308 359
        if ($component === null) {
309 5
            $component = midcom_core_context::get()->get_key(MIDCOM_CONTEXT_COMPONENT);
310
        }
311
312 359
        $cacheid = "{$component}/{$database}";
313 359
        if (!array_key_exists($cacheid, $this->_obj_l10n)) {
314 4
            $this->_load_l10n_db($component, $database);
315
        }
316
317 359
        return $this->_obj_l10n[$cacheid]->get($stringid);
318
    }
319
320
    /**
321
     * This is a shortcut for echo $this->get_string(...);.
322
     *
323
     * To keep the naming stable with the actual l10n class, this is not called
324
     * echo_string (Zend won't allow $l10n->echo().)
325
     *
326
     * @param string $stringid The string to translate.
327
     * @param string $component    The component for which to retrieve a string database. If omitted, this defaults to the
328
     *     current component (out of the component context).
329
     * @param string $database    The string table to retrieve from the component's locale directory. If omitted, the 'default'
330
     *     database is used.
331
     * @see midcom_services_i18n_l10n::get()
332
     * @see get_string()
333
     */
334
    public function show_string($stringid, $component = null, $database = 'default')
335
    {
336
        echo $this->get_string($stringid, $component, $database);
337
    }
338
339
    /**
340
     * Load the specified l10n library.
341
     *
342
     * If loading the library failed, midcom_error is thrown, otherwise the l10n
343
     * db cache is populated accordingly.
344
     *
345
     * @param string $component    The component for which to retrieve a string database.
346
     * @param string $database    The string table to retrieve from the component's locale directory.
347
     */
348 7
    private function _load_l10n_db($component, string $database)
349
    {
350 7
        $cacheid = "{$component}/{$database}";
351 7
        $obj = new midcom_services_i18n_l10n($component, $database);
352
353 7
        $obj->set_language($this->_current_language);
354 7
        $obj->set_fallback_language($this->_fallback_language);
355 7
        $this->_obj_l10n[$cacheid] = $obj;
356 7
    }
357
358
    /**
359
     * Lists languages as identifier -> name pairs
360
     */
361 2
    public function list_languages() : array
362
    {
363 2
        $languages = Languages::getNames('en');
364 2
        foreach ($languages as $identifier => &$language) {
365 2
            $localname = Languages::getName($identifier, $identifier);
366 2
            if ($localname != $language) {
367 2
                $language .= ' (' . $localname . ')';
368
            }
369
        }
370 2
        return $languages;
371
    }
372
373
    /**
374
     * This is a calling wrapper to the iconv library.
375
     *
376
     * See the PHP iconv() function for the exact parameter definitions.
377
     *
378
     * @param string $source_charset The charset to convert from.
379
     * @param string $destination_charset The charset to convert to.
380
     * @param string $string The string to convert.
381
     * @return mixed The converted string or false on any error.
382
     */
383
    private function iconv(string $source_charset, string $destination_charset, string $string)
384
    {
385
        $result = @iconv($source_charset, $destination_charset, $string);
386
        if ($result === false && !empty($string)) {
387
            debug_add("Iconv returned failed to convert a string, returning an empty string.", MIDCOM_LOG_WARN);
388
            debug_print_r("Tried to convert this string from {$source_charset} to {$destination_charset}:", $string);
389
            midcom::get()->debug->log_php_error(MIDCOM_LOG_WARN);
390
            return false;
391
        }
392
        return $result;
393
    }
394
395
    /**
396
     * Convert a string assumed to be in the currently active charset to UTF8.
397
     *
398
     * @param string $string The string to convert
399
     * @return string The string converted to UTF-8
400
     */
401
    public function convert_to_utf8($string)
402
    {
403
        if ($this->_current_charset == 'utf-8') {
404
            return $string;
405
        }
406
        return $this->iconv($this->_current_charset, 'utf-8', $string);
407
    }
408
409
    /**
410
     * Convert a string assumed to be in UTF-8 to the currently active charset.
411
     *
412
     * @param string $string The string to convert
413
     * @return string The string converted to the current charset
414
     */
415 370
    public function convert_from_utf8($string)
416
    {
417 370
        if ($this->_current_charset == 'utf-8') {
418 370
            return $string;
419
        }
420
        return $this->iconv('utf-8', $this->_current_charset, $string);
421
    }
422
423
    /**
424
     * Converts the given string to the current site charset.
425
     *
426
     * @param string $string The string to convert.
427
     * @return string The converted string.
428
     */
429
    public function convert_to_current_charset($string)
430
    {
431
        $charset = mb_detect_encoding($string, "UTF-8, UTF-7, ASCII, ISO-8859-15");
432
        debug_add("mb_detect_encoding got {$charset}");
433
        return $this->iconv($charset, $this->_current_charset, $string);
434
    }
435
436
    /**
437
     * Wrapped html_entity_decode call
438
     *
439
     * @param string $text The text with HTML entities, which should be replaced by their native equivalents.
440
     */
441 14
    public function html_entity_decode($text) : string
442
    {
443 14
        return html_entity_decode($text, ENT_COMPAT, $this->_current_charset);
444
    }
445
}
446