| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | namespace TYPO3\CMS\Core\Charset; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | /* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * This file is part of the TYPO3 CMS project. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * It is free software; you can redistribute it and/or modify it under | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * the terms of the GNU General Public License, either version 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * of the License, or any later version. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * For the full copyright and license information, please read the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  * LICENSE.txt file that was distributed with this source code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  * The TYPO3 project - inspiring people to share! | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | use TYPO3\CMS\Core\SingletonInterface; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | use TYPO3\CMS\Core\Utility\GeneralUtility; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  * Notes on UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  * Functions working on UTF-8 strings: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  * - strchr/strstr | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |  * - strrchr | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |  * - substr_count | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  * - implode/explode/join | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  * Functions nearly working on UTF-8 strings: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |  * - trim/ltrim/rtrim: the second parameter 'charlist' won't work for characters not contained in 7-bit ASCII | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  * - htmlentities: charset support for UTF-8 only since PHP 4.3.0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  * - preg_*: Support compiled into PHP by default nowadays, but could be unavailable, need to use modifier | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  * Functions NOT working on UTF-8 strings: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |  * - str*cmp | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  * - stristr | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  * - stripos | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |  * - substr | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |  * - strrev | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  * - split/spliti | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |  * - ... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |  * Class for conversion between charsets | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  | class CharsetConverter implements SingletonInterface | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |      * ASCII Value for chars with no equivalent. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |      * @var int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     public $noCharByteVal = 63; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |      * This is the array where parsed conversion tables are stored (cached) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |      * @var array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |     public $parsedCharsets = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |      * An array where case folding data will be stored (cached) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |      * @var array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |     public $caseFolding = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |      * An array where charset-to-ASCII mappings are stored (cached) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |      * @var array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |     public $toASCII = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |      * This tells the converter which charsets has two bytes per char: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |      * @var array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |     public $twoByteSets = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         'ucs-2' => 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |     ]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |      * This tells the converter which charsets use a scheme like the Extended Unix Code: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |      * @var array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |     public $eucBasedSets = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         'gb2312' => 1, // Chinese, simplified. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         'big5' => 1, // Chinese, traditional. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         'euc-kr' => 1, // Korean | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         'shift_jis' => 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |     ]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |      * @link http://developer.apple.com/documentation/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.b0.html | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |      * @link http://czyborra.com/charsets/iso8859.html | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |      * @var array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |     public $synonyms = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |         'us' => 'ascii', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         'us-ascii' => 'ascii', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         'cp819' => 'iso-8859-1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |         'ibm819' => 'iso-8859-1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |         'iso-ir-100' => 'iso-8859-1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         'iso-ir-101' => 'iso-8859-2', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         'iso-ir-109' => 'iso-8859-3', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |         'iso-ir-110' => 'iso-8859-4', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |         'iso-ir-144' => 'iso-8859-5', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         'iso-ir-127' => 'iso-8859-6', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         'iso-ir-126' => 'iso-8859-7', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         'iso-ir-138' => 'iso-8859-8', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         'iso-ir-148' => 'iso-8859-9', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |         'iso-ir-157' => 'iso-8859-10', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |         'iso-ir-179' => 'iso-8859-13', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |         'iso-ir-199' => 'iso-8859-14', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |         'iso-ir-203' => 'iso-8859-15', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |         'csisolatin1' => 'iso-8859-1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |         'csisolatin2' => 'iso-8859-2', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |         'csisolatin3' => 'iso-8859-3', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         'csisolatin5' => 'iso-8859-9', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |         'csisolatin8' => 'iso-8859-14', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |         'csisolatin9' => 'iso-8859-15', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |         'csisolatingreek' => 'iso-8859-7', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |         'iso-celtic' => 'iso-8859-14', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |         'latin1' => 'iso-8859-1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |         'latin2' => 'iso-8859-2', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |         'latin3' => 'iso-8859-3', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |         'latin5' => 'iso-8859-9', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |         'latin6' => 'iso-8859-10', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |         'latin8' => 'iso-8859-14', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |         'latin9' => 'iso-8859-15', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |         'l1' => 'iso-8859-1', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |         'l2' => 'iso-8859-2', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         'l3' => 'iso-8859-3', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |         'l5' => 'iso-8859-9', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |         'l6' => 'iso-8859-10', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |         'l8' => 'iso-8859-14', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |         'l9' => 'iso-8859-15', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |         'cyrillic' => 'iso-8859-5', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |         'arabic' => 'iso-8859-6', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |         'tis-620' => 'iso-8859-11', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |         'win874' => 'windows-874', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |         'win1250' => 'windows-1250', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |         'win1251' => 'windows-1251', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         'win1252' => 'windows-1252', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         'win1253' => 'windows-1253', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |         'win1254' => 'windows-1254', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |         'win1255' => 'windows-1255', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |         'win1256' => 'windows-1256', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |         'win1257' => 'windows-1257', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |         'win1258' => 'windows-1258', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         'cp1250' => 'windows-1250', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         'cp1251' => 'windows-1251', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         'cp1252' => 'windows-1252', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |         'ms-ee' => 'windows-1250', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |         'ms-ansi' => 'windows-1252', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         'ms-greek' => 'windows-1253', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |         'ms-turk' => 'windows-1254', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |         'winbaltrim' => 'windows-1257', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |         'koi-8ru' => 'koi-8r', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |         'koi8r' => 'koi-8r', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |         'cp878' => 'koi-8r', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |         'mac' => 'macroman', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |         'macintosh' => 'macroman', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |         'euc-cn' => 'gb2312', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |         'x-euc-cn' => 'gb2312', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |         'euccn' => 'gb2312', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |         'cp936' => 'gb2312', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |         'big-5' => 'big5', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |         'cp950' => 'big5', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |         'eucjp' => 'euc-jp', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |         'sjis' => 'shift_jis', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |         'shift-jis' => 'shift_jis', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |         'cp932' => 'shift_jis', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |         'cp949' => 'euc-kr', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |         'utf7' => 'utf-7', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |         'utf8' => 'utf-8', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |         'utf16' => 'utf-16', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |         'utf32' => 'utf-32', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |         'ucs2' => 'ucs-2', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |         'ucs4' => 'ucs-4' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |     ]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |      * Normalize - changes input character set to lowercase letters. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |      * @param string $charset Input charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |      * @return string Normalized charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |     public function parse_charset($charset) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |         $charset = trim(strtolower($charset)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |         if (isset($this->synonyms[$charset])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |             $charset = $this->synonyms[$charset]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |         return $charset; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |     /******************************************** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |      * Charset Conversion functions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |      ********************************************/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |      * Convert from one charset to another charset. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  |      * @param string $inputString Input string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 |  |  |      * @param string $fromCharset From charset (the current charset of the string) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 |  |  |      * @param string $toCharset To charset (the output charset wanted) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 |  |  |      * @param bool $useEntityForNoChar If set, then characters that are not available in the destination character set will be encoded as numeric entities | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  |      * @return string Converted string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 |  |  |      * @see convArray() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  |     public function conv($inputString, $fromCharset, $toCharset, $useEntityForNoChar = false) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 |  |  |         if ($fromCharset === $toCharset) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  |             return $inputString; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 227 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 228 |  |  |         // PHP-libs don't support fallback to SGML entities, but UTF-8 handles everything | 
            
                                                                                                            
                            
            
                                    
            
            
                | 229 |  |  |         if ($toCharset === 'utf-8' || !$useEntityForNoChar) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 230 |  |  |             // Returns FALSE for unsupported charsets | 
            
                                                                                                            
                            
            
                                    
            
            
                | 231 |  |  |             $convertedString = mb_convert_encoding($inputString, $toCharset, $fromCharset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 232 |  |  |             if (false !== $convertedString) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 233 |  |  |                 return $convertedString; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 234 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 235 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 236 |  |  |         if ($fromCharset !== 'utf-8') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 237 |  |  |             $inputString = $this->utf8_encode($inputString, $fromCharset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 238 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 239 |  |  |         if ($toCharset !== 'utf-8') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 240 |  |  |             $inputString = $this->utf8_decode($inputString, $toCharset, $useEntityForNoChar); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 241 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 242 |  |  |         return $inputString; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 243 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 244 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  |      * Convert all elements in ARRAY with type string from one charset to another charset. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 247 |  |  |      * NOTICE: Array is passed by reference! | 
            
                                                                                                            
                            
            
                                    
            
            
                | 248 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 249 |  |  |      * @param array $array Input array, possibly multidimensional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 250 |  |  |      * @param string $fromCharset From charset (the current charset of the string) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 251 |  |  |      * @param string $toCharset To charset (the output charset wanted) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 252 |  |  |      * @param bool $useEntityForNoChar If set, then characters that are not available in the destination character set will be encoded as numeric entities | 
            
                                                                                                            
                            
            
                                    
            
            
                | 253 |  |  |      * @see conv() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 254 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 255 |  |  |     public function convArray(&$array, $fromCharset, $toCharset, $useEntityForNoChar = false) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 256 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 257 |  |  |         foreach ($array as $key => $value) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 258 |  |  |             if (is_array($array[$key])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 259 |  |  |                 $this->convArray($array[$key], $fromCharset, $toCharset, $useEntityForNoChar); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 260 |  |  |             } elseif (is_string($array[$key])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 261 |  |  |                 $array[$key] = $this->conv($array[$key], $fromCharset, $toCharset, $useEntityForNoChar); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 |  |  |      * Converts $str from $charset to UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 |  |  |      * @param string $str String in local charset to convert to UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 |  |  |      * @param string $charset Charset, lowercase. Must be found in csconvtbl/ folder. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 |  |  |      * @return string Output string, converted to UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 |  |  |     public function utf8_encode($str, $charset) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  |         if ($charset === 'utf-8') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 |  |  |             return $str; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  |         // Charset is case-insensitive | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 |  |  |         // Parse conv. table if not already | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 |  |  |         if ($this->initCharset($charset)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 |  |  |             $strLen = strlen($str); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 |  |  |             $outStr = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 |  |  |             // Traverse each char in string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 |  |  |             for ($a = 0; $a < $strLen; $a++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 |  |  |                 $chr = substr($str, $a, 1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |                 $ord = ord($chr); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 287 |  |  |                 // If the charset has two bytes per char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 288 |  |  |                 if (isset($this->twoByteSets[$charset])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 289 |  |  |                     $ord2 = ord($str[$a + 1]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 |  |  |                     // Assume big endian | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 |  |  |                     $ord = $ord << 8 | $ord2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |                     // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 |  |  |                     if (isset($this->parsedCharsets[$charset]['local'][$ord])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 |  |  |                         $outStr .= $this->parsedCharsets[$charset]['local'][$ord]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  |                     } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 |  |  |                         $outStr .= chr($this->noCharByteVal); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  |                     // No char exists | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 |  |  |                     $a++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 |  |  |                 } elseif ($ord > 127) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 |  |  |                     // If char has value over 127 it's a multibyte char in UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 |  |  |                     // EUC uses two-bytes above 127; we get both and advance pointer and make $ord a 16bit int. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 |  |  |                     if (isset($this->eucBasedSets[$charset])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 |  |  |                         // Shift-JIS: chars between 160 and 223 are single byte | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 |  |  |                         if ($charset !== 'shift_jis' || ($ord < 160 || $ord > 223)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  |                             $a++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  |                             $ord2 = ord(substr($str, $a, 1)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 308 |  |  |                             $ord = $ord * 256 + $ord2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 309 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 310 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 311 |  |  |                     if (isset($this->parsedCharsets[$charset]['local'][$ord])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 312 |  |  |                         // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 313 |  |  |                         $outStr .= $this->parsedCharsets[$charset]['local'][$ord]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 314 |  |  |                     } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 315 |  |  |                         $outStr .= chr($this->noCharByteVal); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 316 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 317 |  |  |                 } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 318 |  |  |                     $outStr .= $chr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 319 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 320 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 321 |  |  |             return $outStr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 322 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 323 |  |  |         return ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 324 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 325 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 326 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 327 |  |  |      * Converts $str from UTF-8 to $charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 328 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 329 |  |  |      * @param string $str String in UTF-8 to convert to local charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 330 |  |  |      * @param string $charset Charset, lowercase. Must be found in csconvtbl/ folder. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 331 |  |  |      * @param bool $useEntityForNoChar If set, then characters that are not available in the destination character set will be encoded as numeric entities | 
            
                                                                                                            
                            
            
                                    
            
            
                | 332 |  |  |      * @return string Output string, converted to local charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 333 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 334 |  |  |     public function utf8_decode($str, $charset, $useEntityForNoChar = false) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 335 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 336 |  |  |         if ($charset === 'utf-8') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 337 |  |  |             return $str; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 338 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 339 |  |  |         // Charset is case-insensitive. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 340 |  |  |         // Parse conv. table if not already | 
            
                                                                                                            
                            
            
                                    
            
            
                | 341 |  |  |         if ($this->initCharset($charset)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 342 |  |  |             $strLen = strlen($str); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 343 |  |  |             $outStr = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 344 |  |  |             // Traverse each char in UTF-8 string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 345 |  |  |             for ($a = 0, $i = 0; $a < $strLen; $a++, $i++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 346 |  |  |                 $chr = substr($str, $a, 1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 347 |  |  |                 $ord = ord($chr); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 348 |  |  |                 // This means multibyte! (first byte!) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 349 |  |  |                 if ($ord > 127) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 350 |  |  |                     // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 351 |  |  |                     if ($ord & 64) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 352 |  |  |                         // Add first byte | 
            
                                                                                                            
                            
            
                                    
            
            
                | 353 |  |  |                         $buf = $chr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 354 |  |  |                         // For each byte in multibyte string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 355 |  |  |                         for ($b = 0; $b < 8; $b++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 356 |  |  |                             // Shift it left and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 357 |  |  |                             $ord = $ord << 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 358 |  |  |                             // ... and with 8th bit - if that is set, then there are still bytes in sequence. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 359 |  |  |                             if ($ord & 128) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 360 |  |  |                                 $a++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 361 |  |  |                                 // ... and add the next char. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 362 |  |  |                                 $buf .= substr($str, $a, 1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 363 |  |  |                             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 364 |  |  |                                 break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 365 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 366 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 367 |  |  |                         // If the UTF-8 char-sequence is found then... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 368 |  |  |                         if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 369 |  |  |                             // The local number | 
            
                                                                                                            
                            
            
                                    
            
            
                | 370 |  |  |                             $mByte = $this->parsedCharsets[$charset]['utf8'][$buf]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 371 |  |  |                             // If the local number is greater than 255 we will need to split the byte (16bit word assumed) in two chars. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 372 |  |  |                             if ($mByte > 255) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 373 |  |  |                                 $outStr .= chr(($mByte >> 8 & 255)) . chr(($mByte & 255)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 374 |  |  |                             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 375 |  |  |                                 $outStr .= chr($mByte); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 376 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 377 |  |  |                         } elseif ($useEntityForNoChar) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 378 |  |  |                             // Create num entity: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 379 |  |  |                             $outStr .= '&#' . $this->utf8CharToUnumber($buf, true) . ';'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 380 |  |  |                         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 381 |  |  |                             $outStr .= chr($this->noCharByteVal); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 382 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 383 |  |  |                     } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 384 |  |  |                         $outStr .= chr($this->noCharByteVal); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 385 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 386 |  |  |                 } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 387 |  |  |                     $outStr .= $chr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 388 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 389 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 390 |  |  |             return $outStr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 391 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 392 |  |  |         return ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 393 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 394 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 395 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 396 |  |  |      * Converts all chars > 127 to numeric entities. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 397 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 398 |  |  |      * @param string $str Input string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 399 |  |  |      * @return string Output string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 400 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 401 |  |  |     public function utf8_to_entities($str) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 402 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 403 |  |  |         $strLen = strlen($str); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 404 |  |  |         $outStr = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 405 |  |  |         // Traverse each char in UTF-8 string. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 406 |  |  |         for ($a = 0; $a < $strLen; $a++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 407 |  |  |             $chr = substr($str, $a, 1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 408 |  |  |             $ord = ord($chr); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 409 |  |  |             // This means multibyte! (first byte!) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 410 |  |  |             if ($ord > 127) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 411 |  |  |                 // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 412 |  |  |                 if ($ord & 64) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 413 |  |  |                     // Add first byte | 
            
                                                                                                            
                            
            
                                    
            
            
                | 414 |  |  |                     $buf = $chr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 415 |  |  |                     // For each byte in multibyte string... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 416 |  |  |                     for ($b = 0; $b < 8; $b++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 417 |  |  |                         // Shift it left and ... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 418 |  |  |                         $ord = $ord << 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 419 |  |  |                         // ... and with 8th bit - if that is set, then there are still bytes in sequence. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 420 |  |  |                         if ($ord & 128) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 421 |  |  |                             $a++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 422 |  |  |                             // ... and add the next char. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 423 |  |  |                             $buf .= substr($str, $a, 1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 424 |  |  |                         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 425 |  |  |                             break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 426 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 427 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 428 |  |  |                     $outStr .= '&#' . $this->utf8CharToUnumber($buf, true) . ';'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 429 |  |  |                 } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 430 |  |  |                     $outStr .= chr($this->noCharByteVal); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 431 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 432 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 433 |  |  |                 $outStr .= $chr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 434 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 435 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 436 |  |  |         return $outStr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 437 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 438 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 439 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 440 |  |  |      * Converts numeric entities (UNICODE, eg. decimal (Ӓ) or hexadecimal ()) to UTF-8 multibyte chars. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 441 |  |  |      * All string-HTML entities (like & or £) will be converted as well | 
            
                                                                                                            
                            
            
                                    
            
            
                | 442 |  |  |      * @param string $str Input string, UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 443 |  |  |      * @return string Output string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 444 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 445 |  |  |     public function entities_to_utf8($str) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 446 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 447 |  |  |         $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_COMPAT)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 448 |  |  |         $token = md5(microtime()); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 449 |  |  |         $parts = explode($token, preg_replace('/(&([#[:alnum:]]*);)/', $token . '${2}' . $token, $str)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 450 |  |  |         foreach ($parts as $k => $v) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 451 |  |  |             // Only take every second element | 
            
                                                                                                            
                            
            
                                    
            
            
                | 452 |  |  |             if ($k % 2 === 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 453 |  |  |                 continue; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 454 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 455 |  |  |             $position = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 456 |  |  |             // Dec or hex entities | 
            
                                                                                                            
                            
            
                                    
            
            
                | 457 |  |  |             if (substr($v, $position, 1) === '#') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 458 |  |  |                 $position++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 459 |  |  |                 if (substr($v, $position, 1) === 'x') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 460 |  |  |                     $v = hexdec(substr($v, ++$position)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 461 |  |  |                 } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 462 |  |  |                     $v = substr($v, $position); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 463 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 464 |  |  |                 $parts[$k] = $this->UnumberToChar($v); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 465 |  |  |             } elseif (isset($trans_tbl['&' . $v . ';'])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 466 |  |  |                 // Other entities: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 467 |  |  |                 $v = $trans_tbl['&' . $v . ';']; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 468 |  |  |                 $parts[$k] = $v; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 469 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 470 |  |  |                 // No conversion: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 471 |  |  |                 $parts[$k] = '&' . $v . ';'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 472 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 473 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 474 |  |  |         return implode('', $parts); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 475 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 476 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 477 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 478 |  |  |      * Converts all chars in the input UTF-8 string into integer numbers returned in an array. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 479 |  |  |      * All HTML entities (like & or £ or { or 㽝) will be detected as characters. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 480 |  |  |      * Also, instead of integer numbers the real UTF-8 char is returned. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 481 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 482 |  |  |      * @param string $str Input string, UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 483 |  |  |      * @return array Output array with the char numbers | 
            
                                                                                                            
                            
            
                                    
            
            
                | 484 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 485 |  |  |     public function utf8_to_numberarray($str) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 486 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 487 |  |  |         // Entities must be registered as well | 
            
                                                                                                            
                            
            
                                    
            
            
                | 488 |  |  |         $str = $this->entities_to_utf8($str); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 489 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 490 |  |  |         // Do conversion: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 491 |  |  |         $strLen = strlen($str); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 492 |  |  |         $outArr = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 493 |  |  |         // Traverse each char in UTF-8 string. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 494 |  |  |         for ($a = 0; $a < $strLen; $a++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 495 |  |  |             $chr = substr($str, $a, 1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 496 |  |  |             $ord = ord($chr); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 497 |  |  |             // This means multibyte! (first byte!) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 498 |  |  |             if ($ord > 127) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 499 |  |  |                 // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 500 |  |  |                 if ($ord & 64) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 501 |  |  |                     // Add first byte | 
            
                                                                                                            
                            
            
                                    
            
            
                | 502 |  |  |                     $buf = $chr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 503 |  |  |                     // For each byte in multibyte string... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 504 |  |  |                     for ($b = 0; $b < 8; $b++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 505 |  |  |                         // Shift it left and ... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 506 |  |  |                         $ord = $ord << 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 507 |  |  |                         // ... and with 8th bit - if that is set, then there are still bytes in sequence. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 508 |  |  |                         if ($ord & 128) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 509 |  |  |                             $a++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 510 |  |  |                             // ... and add the next char. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 511 |  |  |                             $buf .= substr($str, $a, 1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 512 |  |  |                         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 513 |  |  |                             break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 514 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 515 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 516 |  |  |                     $outArr[] = $buf; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 517 |  |  |                 } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 518 |  |  |                     $outArr[] = chr($this->noCharByteVal); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 519 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 520 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 521 |  |  |                 $outArr[] = chr($ord); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 522 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 523 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 524 |  |  |         return $outArr; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 525 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 526 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 527 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 528 |  |  |      * Converts a UNICODE number to a UTF-8 multibyte character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 529 |  |  |      * Algorithm based on script found at From: http://czyborra.com/utf/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 530 |  |  |      * Unit-tested by Kasper | 
            
                                                                                                            
                            
            
                                    
            
            
                | 531 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 532 |  |  |      * The binary representation of the character's integer value is thus simply spread across the bytes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 533 |  |  |      * and the number of high bits set in the lead byte announces the number of bytes in the multibyte sequence: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 534 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 535 |  |  |      * bytes | bits | representation | 
            
                                                                                                            
                            
            
                                    
            
            
                | 536 |  |  |      *     1 |    7 | 0vvvvvvv | 
            
                                                                                                            
                            
            
                                    
            
            
                | 537 |  |  |      *     2 |   11 | 110vvvvv 10vvvvvv | 
            
                                                                                                            
                            
            
                                    
            
            
                | 538 |  |  |      *     3 |   16 | 1110vvvv 10vvvvvv 10vvvvvv | 
            
                                                                                                            
                            
            
                                    
            
            
                | 539 |  |  |      *     4 |   21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv | 
            
                                                                                                            
                            
            
                                    
            
            
                | 540 |  |  |      *     5 |   26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv | 
            
                                                                                                            
                            
            
                                    
            
            
                | 541 |  |  |      *     6 |   31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv | 
            
                                                                                                            
                            
            
                                    
            
            
                | 542 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 543 |  |  |      * @param int $unicodeInteger UNICODE integer | 
            
                                                                                                            
                            
            
                                    
            
            
                | 544 |  |  |      * @return string UTF-8 multibyte character string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 545 |  |  |      * @see utf8CharToUnumber() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 546 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 547 |  |  |     public function UnumberToChar($unicodeInteger) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 548 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 549 |  |  |         $str = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 550 |  |  |         if ($unicodeInteger < 128) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 551 |  |  |             $str .= chr($unicodeInteger); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 552 |  |  |         } elseif ($unicodeInteger < 2048) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 553 |  |  |             $str .= chr(192 | $unicodeInteger >> 6); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 554 |  |  |             $str .= chr(128 | $unicodeInteger & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 555 |  |  |         } elseif ($unicodeInteger < 65536) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 556 |  |  |             $str .= chr(224 | $unicodeInteger >> 12); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 557 |  |  |             $str .= chr(128 | $unicodeInteger >> 6 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 558 |  |  |             $str .= chr(128 | $unicodeInteger & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 559 |  |  |         } elseif ($unicodeInteger < 2097152) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 560 |  |  |             $str .= chr(240 | $unicodeInteger >> 18); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 561 |  |  |             $str .= chr(128 | $unicodeInteger >> 12 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 562 |  |  |             $str .= chr(128 | $unicodeInteger >> 6 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 563 |  |  |             $str .= chr(128 | $unicodeInteger & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 564 |  |  |         } elseif ($unicodeInteger < 67108864) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 565 |  |  |             $str .= chr(248 | $unicodeInteger >> 24); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 566 |  |  |             $str .= chr(128 | $unicodeInteger >> 18 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 567 |  |  |             $str .= chr(128 | $unicodeInteger >> 12 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 568 |  |  |             $str .= chr(128 | $unicodeInteger >> 6 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 569 |  |  |             $str .= chr(128 | $unicodeInteger & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 570 |  |  |         } elseif ($unicodeInteger < 2147483648) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 571 |  |  |             $str .= chr(252 | $unicodeInteger >> 30); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 572 |  |  |             $str .= chr(128 | $unicodeInteger >> 24 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 573 |  |  |             $str .= chr(128 | $unicodeInteger >> 18 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 574 |  |  |             $str .= chr(128 | $unicodeInteger >> 12 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 575 |  |  |             $str .= chr(128 | $unicodeInteger >> 6 & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 576 |  |  |             $str .= chr(128 | $unicodeInteger & 63); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 577 |  |  |         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 578 |  |  |             // Cannot express a 32-bit character in UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 579 |  |  |             $str .= chr($this->noCharByteVal); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 580 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 581 |  |  |         return $str; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 582 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 583 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 584 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 585 |  |  |      * Converts a UTF-8 Multibyte character to a UNICODE number | 
            
                                                                                                            
                            
            
                                    
            
            
                | 586 |  |  |      * Unit-tested by Kasper | 
            
                                                                                                            
                            
            
                                    
            
            
                | 587 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 588 |  |  |      * @param string $str UTF-8 multibyte character string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 589 |  |  |      * @param bool $hex If set, then a hex. number is returned. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 590 |  |  |      * @return int UNICODE integer | 
            
                                                                                                            
                            
            
                                    
            
            
                | 591 |  |  |      * @see UnumberToChar() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 592 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 593 |  |  |     public function utf8CharToUnumber($str, $hex = false) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 594 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 595 |  |  |         // First char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 596 |  |  |         $ord = ord($str[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 597 |  |  |         // This verifies that it IS a multi byte string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 598 |  |  |         if (($ord & 192) === 192) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 599 |  |  |             $binBuf = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 600 |  |  |             $b = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 601 |  |  |             // For each byte in multibyte string... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 602 |  |  |             for (; $b < 8; $b++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 603 |  |  |                 // Shift it left and ... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 604 |  |  |                 $ord = $ord << 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 605 |  |  |                 // ... and with 8th bit - if that is set, then there are still bytes in sequence. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 606 |  |  |                 if ($ord & 128) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 607 |  |  |                     $binBuf .= substr('00000000' . decbin(ord(substr($str, ($b + 1), 1))), -6); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 608 |  |  |                 } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 609 |  |  |                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 610 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 611 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 612 |  |  |             $binBuf = substr(('00000000' . decbin(ord($str[0]))), -(6 - $b)) . $binBuf; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 613 |  |  |             $int = bindec($binBuf); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 614 |  |  |         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 615 |  |  |             $int = $ord; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 616 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 617 |  |  |         return $hex ? 'x' . dechex($int) : $int; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 618 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 619 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 620 |  |  |     /******************************************** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 621 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 622 |  |  |      * Init functions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 623 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 624 |  |  |      ********************************************/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 625 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 626 |  |  |      * This will initialize a charset for use if it's defined in the 'typo3/sysext/core/Resources/Private/Charsets/csconvtbl/' folder | 
            
                                                                                                            
                            
            
                                    
            
            
                | 627 |  |  |      * This function is automatically called by the conversion functions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 628 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 629 |  |  |      * PLEASE SEE: http://www.unicode.org/Public/MAPPINGS/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 630 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 631 |  |  |      * @param string $charset The charset to be initialized. Use lowercase charset always (the charset must match exactly with a filename in csconvtbl/ folder ([charset].tbl) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 632 |  |  |      * @return int Returns '1' if already loaded, '2' if the charset conversion table was found and parsed. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 633 |  |  |      * @throws UnknownCharsetException if no charset table was found | 
            
                                                                                                            
                            
            
                                    
            
            
                | 634 |  |  |      * @access private | 
            
                                                                                                            
                            
            
                                    
            
            
                | 635 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 636 |  |  |     public function initCharset($charset) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 637 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 638 |  |  |         // Only process if the charset is not yet loaded: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 639 |  |  |         if (!is_array($this->parsedCharsets[$charset])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 640 |  |  |             // Conversion table filename: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 641 |  |  |             $charsetConvTableFile = ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/csconvtbl/' . $charset . '.tbl'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 642 |  |  |             // If the conversion table is found: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 643 |  |  |             if ($charset && GeneralUtility::validPathStr($charsetConvTableFile) && @is_file($charsetConvTableFile)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 644 |  |  |                 // Cache file for charsets: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 645 |  |  |                 // Caching brought parsing time for gb2312 down from 2400 ms to 150 ms. For other charsets we are talking 11 ms down to zero. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 646 |  |  |                 $cacheFile = GeneralUtility::getFileAbsFileName('typo3temp/var/charset/charset_' . $charset . '.tbl'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 647 |  |  |                 if ($cacheFile && @is_file($cacheFile)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 648 |  |  |                     $this->parsedCharsets[$charset] = unserialize(file_get_contents($cacheFile)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 649 |  |  |                 } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 650 |  |  |                     // Parse conversion table into lines: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 651 |  |  |                     $lines = GeneralUtility::trimExplode(LF, file_get_contents($charsetConvTableFile), true); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 652 |  |  |                     // Initialize the internal variable holding the conv. table: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 653 |  |  |                     $this->parsedCharsets[$charset] = ['local' => [], 'utf8' => []]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 654 |  |  |                     // traverse the lines: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 655 |  |  |                     $detectedType = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 656 |  |  |                     foreach ($lines as $value) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 657 |  |  |                         // Comment line or blanks are ignored. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 658 |  |  |                         if (trim($value) && $value[0] !== '#') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 659 |  |  |                             // Detect type if not done yet: (Done on first real line) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 660 |  |  |                             // The "whitespaced" type is on the syntax 	"0x0A	0x000A	#LINE FEED" 	while 	"ms-token" is like 		"B9 = U+00B9 : SUPERSCRIPT ONE" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 661 |  |  |                             if (!$detectedType) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 662 |  |  |                                 $detectedType = preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $value) ? 'whitespaced' : 'ms-token'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 663 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 664 |  |  |                             $hexbyte = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 665 |  |  |                             $utf8 = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 666 |  |  |                             if ($detectedType === 'ms-token') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 667 |  |  |                                 list($hexbyte, $utf8) = preg_split('/[=:]/', $value, 3); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 668 |  |  |                             } elseif ($detectedType === 'whitespaced') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 669 |  |  |                                 $regA = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 670 |  |  |                                 preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $value, $regA); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 671 |  |  |                                 $hexbyte = $regA[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 672 |  |  |                                 $utf8 = 'U+' . $regA[2]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 673 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 674 |  |  |                             $decval = hexdec(trim($hexbyte)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 675 |  |  |                             if ($decval > 127) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 676 |  |  |                                 $utf8decval = hexdec(substr(trim($utf8), 2)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 677 |  |  |                                 $this->parsedCharsets[$charset]['local'][$decval] = $this->UnumberToChar($utf8decval); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 678 |  |  |                                 $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]] = $decval; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 679 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 680 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 681 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 682 |  |  |                     if ($cacheFile) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 683 |  |  |                         GeneralUtility::writeFileToTypo3tempDir($cacheFile, serialize($this->parsedCharsets[$charset])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 684 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 685 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 686 |  |  |                 return 2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 687 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 688 |  |  |             throw new UnknownCharsetException(sprintf('Unknown charset "%s"', $charset), 1508916031); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 689 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 690 |  |  |         return 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 691 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 692 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 693 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 694 |  |  |      * This function initializes all UTF-8 character data tables. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 695 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 696 |  |  |      * PLEASE SEE: http://www.unicode.org/Public/UNIDATA/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 697 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 698 |  |  |      * @param string $mode Mode ("case", "ascii", ...) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 699 |  |  |      * @return int Returns FALSE on error, a TRUE value on success: 1 table already loaded, 2, cached version, 3 table parsed (and cached). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 700 |  |  |      * @access private | 
            
                                                                                                            
                            
            
                                    
            
            
                | 701 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 702 |  |  |     public function initUnicodeData($mode = null) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 703 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 704 |  |  |         // Cache files | 
            
                                                                                                            
                            
            
                                    
            
            
                | 705 |  |  |         $cacheFileCase = GeneralUtility::getFileAbsFileName('typo3temp/var/charset/cscase_utf-8.tbl'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 706 |  |  |         $cacheFileASCII = GeneralUtility::getFileAbsFileName('typo3temp/var/charset/csascii_utf-8.tbl'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 707 |  |  |         // Only process if the tables are not yet loaded | 
            
                                                                                                            
                            
            
                                    
            
            
                | 708 |  |  |         switch ($mode) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 709 |  |  |             case 'case': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 710 |  |  |                 if (is_array($this->caseFolding['utf-8'])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 711 |  |  |                     return 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 712 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 713 |  |  |                 // Use cached version if possible | 
            
                                                                                                            
                            
            
                                    
            
            
                | 714 |  |  |                 if ($cacheFileCase && @is_file($cacheFileCase)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 715 |  |  |                     $this->caseFolding['utf-8'] = unserialize(file_get_contents($cacheFileCase)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 716 |  |  |                     return 2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 717 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 718 |  |  |                 break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 719 |  |  |             case 'ascii': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 720 |  |  |                 if (is_array($this->toASCII['utf-8'])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 721 |  |  |                     return 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 722 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 723 |  |  |                 // Use cached version if possible | 
            
                                                                                                            
                            
            
                                    
            
            
                | 724 |  |  |                 if ($cacheFileASCII && @is_file($cacheFileASCII)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 725 |  |  |                     $this->toASCII['utf-8'] = unserialize(file_get_contents($cacheFileASCII)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 726 |  |  |                     return 2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 727 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 728 |  |  |                 break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 729 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 730 |  |  |         // Process main Unicode data file | 
            
                                                                                                            
                            
            
                                    
            
            
                | 731 |  |  |         $unicodeDataFile = ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/unidata/UnicodeData.txt'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 732 |  |  |         if (!(GeneralUtility::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 733 |  |  |             return false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 734 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 735 |  |  |         $fh = fopen($unicodeDataFile, 'rb'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 736 |  |  |         if (!$fh) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 737 |  |  |             return false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 738 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 739 |  |  |         // key = utf8 char (single codepoint), value = utf8 string (codepoint sequence) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 740 |  |  |         // Note: we use the UTF-8 characters here and not the Unicode numbers to avoid conversion roundtrip in utf8_strtolower/-upper) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 741 |  |  |         $this->caseFolding['utf-8'] = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 742 |  |  |         $utf8CaseFolding = &$this->caseFolding['utf-8']; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 743 |  |  |         // a shorthand | 
            
                                                                                                            
                            
            
                                    
            
            
                | 744 |  |  |         $utf8CaseFolding['toUpper'] = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 745 |  |  |         $utf8CaseFolding['toLower'] = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 746 |  |  |         $utf8CaseFolding['toTitle'] = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 747 |  |  |         // Array of temp. decompositions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 748 |  |  |         $decomposition = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 749 |  |  |         // Array of chars that are marks (eg. composing accents) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 750 |  |  |         $mark = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 751 |  |  |         // Array of chars that are numbers (eg. digits) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 752 |  |  |         $number = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 753 |  |  |         // Array of chars to be omitted (eg. Russian hard sign) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 754 |  |  |         $omit = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 755 |  |  |         while (!feof($fh)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 756 |  |  |             $line = fgets($fh, 4096); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 757 |  |  |             // Has a lot of info | 
            
                                                                                                            
                            
            
                                    
            
            
                | 758 |  |  |             list($char, $name, $cat, , , $decomp, , , $num, , , , $upper, $lower, $title, ) = explode(';', rtrim($line)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 759 |  |  |             $ord = hexdec($char); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 760 |  |  |             if ($ord > 65535) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 761 |  |  |                 // Only process the BMP | 
            
                                                                                                            
                            
            
                                    
            
            
                | 762 |  |  |                 break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 763 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 764 |  |  |             $utf8_char = $this->UnumberToChar($ord); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 765 |  |  |             if ($upper) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 766 |  |  |                 $utf8CaseFolding['toUpper'][$utf8_char] = $this->UnumberToChar(hexdec($upper)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 767 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 768 |  |  |             if ($lower) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 769 |  |  |                 $utf8CaseFolding['toLower'][$utf8_char] = $this->UnumberToChar(hexdec($lower)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 770 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 771 |  |  |             // Store "title" only when different from "upper" (only a few) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 772 |  |  |             if ($title && $title !== $upper) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 773 |  |  |                 $utf8CaseFolding['toTitle'][$utf8_char] = $this->UnumberToChar(hexdec($title)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 774 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 775 |  |  |             switch ($cat[0]) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 776 |  |  |                 case 'M': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 777 |  |  |                     // mark (accent, umlaut, ...) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 778 |  |  |                     $mark['U+' . $char] = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 779 |  |  |                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 780 |  |  |                 case 'N': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 781 |  |  |                     // numeric value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 782 |  |  |                     if ($ord > 128 && $num !== '') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 783 |  |  |                         $number['U+' . $char] = $num; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 784 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 785 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 786 |  |  |             // Accented Latin letters without "official" decomposition | 
            
                                                                                                            
                            
            
                                    
            
            
                | 787 |  |  |             $match = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 788 |  |  |             if (preg_match('/^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH/', $name, $match) && !$decomp) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 789 |  |  |                 $c = ord($match[2]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 790 |  |  |                 if ($match[1] === 'SMALL') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 791 |  |  |                     $c += 32; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 792 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 793 |  |  |                 $decomposition['U+' . $char] = [dechex($c)]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 794 |  |  |                 continue; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 795 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 796 |  |  |             $match = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 797 |  |  |             if (preg_match('/(<.*>)? *(.+)/', $decomp, $match)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 798 |  |  |                 switch ($match[1]) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 799 |  |  |                     case '<circle>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 800 |  |  |                         // add parenthesis as circle replacement, eg (1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 801 |  |  |                         $match[2] = '0028 ' . $match[2] . ' 0029'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 802 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 803 |  |  |                     case '<square>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 804 |  |  |                         // add square brackets as square replacement, eg [1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 805 |  |  |                         $match[2] = '005B ' . $match[2] . ' 005D'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 806 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 807 |  |  |                     case '<compat>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 808 |  |  |                         // ignore multi char decompositions that start with a space | 
            
                                                                                                            
                            
            
                                    
            
            
                | 809 |  |  |                         if (preg_match('/^0020 /', $match[2])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 810 |  |  |                             continue 2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 811 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 812 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 813 |  |  |                     case '<initial>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 814 |  |  |                     case '<medial>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 815 |  |  |                     case '<final>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 816 |  |  |                     case '<isolated>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 817 |  |  |                     case '<vertical>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 818 |  |  |                         continue 2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 819 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 820 |  |  |                 $decomposition['U+' . $char] = explode(' ', $match[2]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 821 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 822 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 823 |  |  |         fclose($fh); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 824 |  |  |         // Process additional Unicode data for casing (allow folded characters to expand into a sequence) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 825 |  |  |         $specialCasingFile = ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/unidata/SpecialCasing.txt'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 826 |  |  |         if (GeneralUtility::validPathStr($specialCasingFile) && @is_file($specialCasingFile)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 827 |  |  |             $fh = fopen($specialCasingFile, 'rb'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 828 |  |  |             if ($fh) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 829 |  |  |                 while (!feof($fh)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 830 |  |  |                     $line = fgets($fh, 4096); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 831 |  |  |                     if ($line[0] !== '#' && trim($line) !== '') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 832 |  |  |                         list($char, $lower, $title, $upper, $cond) = GeneralUtility::trimExplode(';', $line); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 833 |  |  |                         if ($cond === '' || $cond[0] === '#') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 834 |  |  |                             $utf8_char = $this->UnumberToChar(hexdec($char)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 835 |  |  |                             if ($char !== $lower) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 836 |  |  |                                 $arr = explode(' ', $lower); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 837 |  |  |                                 for ($i = 0; isset($arr[$i]); $i++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 838 |  |  |                                     $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 839 |  |  |                                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 840 |  |  |                                 $utf8CaseFolding['toLower'][$utf8_char] = implode('', $arr); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 841 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 842 |  |  |                             if ($char !== $title && $title !== $upper) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 843 |  |  |                                 $arr = explode(' ', $title); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 844 |  |  |                                 for ($i = 0; isset($arr[$i]); $i++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 845 |  |  |                                     $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 846 |  |  |                                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 847 |  |  |                                 $utf8CaseFolding['toTitle'][$utf8_char] = implode('', $arr); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 848 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 849 |  |  |                             if ($char !== $upper) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 850 |  |  |                                 $arr = explode(' ', $upper); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 851 |  |  |                                 for ($i = 0; isset($arr[$i]); $i++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 852 |  |  |                                     $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 853 |  |  |                                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 854 |  |  |                                 $utf8CaseFolding['toUpper'][$utf8_char] = implode('', $arr); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 855 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 856 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 857 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 858 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 859 |  |  |                 fclose($fh); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 860 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 861 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 862 |  |  |         // Process custom decompositions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 863 |  |  |         $customTranslitFile = ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/unidata/Translit.txt'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 864 |  |  |         if (GeneralUtility::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 865 |  |  |             $fh = fopen($customTranslitFile, 'rb'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 866 |  |  |             if ($fh) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 867 |  |  |                 while (!feof($fh)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 868 |  |  |                     $line = fgets($fh, 4096); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 869 |  |  |                     if ($line[0] !== '#' && trim($line) !== '') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 870 |  |  |                         list($char, $translit) = GeneralUtility::trimExplode(';', $line); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 871 |  |  |                         if (!$translit) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 872 |  |  |                             $omit['U+' . $char] = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 873 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 874 |  |  |                         $decomposition['U+' . $char] = explode(' ', $translit); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 875 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 876 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 877 |  |  |                 fclose($fh); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 878 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 879 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 880 |  |  |         // Decompose and remove marks; inspired by unac (Loic Dachary <[email protected]>) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 881 |  |  |         foreach ($decomposition as $from => $to) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 882 |  |  |             $code_decomp = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 883 |  |  |             while ($code_value = array_shift($to)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 884 |  |  |                 // Do recursive decomposition | 
            
                                                                                                            
                            
            
                                    
            
            
                | 885 |  |  |                 if (isset($decomposition['U+' . $code_value])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 886 |  |  |                     foreach (array_reverse($decomposition['U+' . $code_value]) as $cv) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 887 |  |  |                         array_unshift($to, $cv); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 888 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 889 |  |  |                 } elseif (!isset($mark['U+' . $code_value])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 890 |  |  |                     // remove mark | 
            
                                                                                                            
                            
            
                                    
            
            
                | 891 |  |  |                     $code_decomp[] = $code_value; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 892 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 893 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 894 |  |  |             if (!empty($code_decomp) || isset($omit[$from])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 895 |  |  |                 $decomposition[$from] = $code_decomp; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 896 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 897 |  |  |                 unset($decomposition[$from]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 898 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 899 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 900 |  |  |         // Create ascii only mapping | 
            
                                                                                                            
                            
            
                                    
            
            
                | 901 |  |  |         $this->toASCII['utf-8'] = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 902 |  |  |         $ascii = &$this->toASCII['utf-8']; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 903 |  |  |         foreach ($decomposition as $from => $to) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 904 |  |  |             $code_decomp = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 905 |  |  |             while ($code_value = array_shift($to)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 906 |  |  |                 $ord = hexdec($code_value); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 907 |  |  |                 if ($ord > 127) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 908 |  |  |                     continue 2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 909 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 910 |  |  |                 // Skip decompositions containing non-ASCII chars | 
            
                                                                                                            
                            
            
                                    
            
            
                | 911 |  |  |                 $code_decomp[] = chr($ord); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 912 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 913 |  |  |             $ascii[$this->UnumberToChar(hexdec($from))] = implode('', $code_decomp); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 914 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 915 |  |  |         // Add numeric decompositions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 916 |  |  |         foreach ($number as $from => $to) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 917 |  |  |             $utf8_char = $this->UnumberToChar(hexdec($from)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 918 |  |  |             if (!isset($ascii[$utf8_char])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 919 |  |  |                 $ascii[$utf8_char] = $to; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 920 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 921 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 922 |  |  |         if ($cacheFileCase) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 923 |  |  |             GeneralUtility::writeFileToTypo3tempDir($cacheFileCase, serialize($utf8CaseFolding)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 924 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 925 |  |  |         if ($cacheFileASCII) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 926 |  |  |             GeneralUtility::writeFileToTypo3tempDir($cacheFileASCII, serialize($ascii)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 927 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 928 |  |  |         return 3; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 929 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 930 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 931 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 932 |  |  |      * This function initializes the folding table for a charset other than UTF-8. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 933 |  |  |      * This function is automatically called by the case folding functions. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 934 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 935 |  |  |      * @param string $charset Charset for which to initialize case folding. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 936 |  |  |      * @return int Returns FALSE on error, a TRUE value on success: 1 table already loaded, 2, cached version, 3 table parsed (and cached). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 937 |  |  |      * @access private | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 938 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 939 |  |  |     public function initCaseFolding($charset) | 
            
                                                                        
                            
            
                                    
            
            
                | 940 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 941 |  |  |         // Only process if the case table is not yet loaded: | 
            
                                                                        
                            
            
                                    
            
            
                | 942 |  |  |         if (is_array($this->caseFolding[$charset])) { | 
            
                                                                        
                            
            
                                    
            
            
                | 943 |  |  |             return 1; | 
            
                                                                        
                            
            
                                    
            
            
                | 944 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 945 |  |  |         // Use cached version if possible | 
            
                                                                        
                            
            
                                    
            
            
                | 946 |  |  |         $cacheFile = GeneralUtility::getFileAbsFileName('typo3temp/var/charset/cscase_' . $charset . '.tbl'); | 
            
                                                                        
                            
            
                                    
            
            
                | 947 |  |  |         if ($cacheFile && @is_file($cacheFile)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 948 |  |  |             $this->caseFolding[$charset] = unserialize(file_get_contents($cacheFile)); | 
            
                                                                        
                            
            
                                    
            
            
                | 949 |  |  |             return 2; | 
            
                                                                        
                            
            
                                    
            
            
                | 950 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 951 |  |  |         // init UTF-8 conversion for this charset | 
            
                                                                        
                            
            
                                    
            
            
                | 952 |  |  |         if (!$this->initCharset($charset)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 953 |  |  |             return false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 954 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 955 |  |  |         // UTF-8 case folding is used as the base conversion table | 
            
                                                                        
                            
            
                                    
            
            
                | 956 |  |  |         if (!$this->initUnicodeData('case')) { | 
            
                                                                        
                            
            
                                    
            
            
                | 957 |  |  |             return false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 958 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 959 |  |  |         $nochar = chr($this->noCharByteVal); | 
            
                                                                        
                            
            
                                    
            
            
                | 960 |  |  |         foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) { | 
            
                                                                        
                            
            
                                    
            
            
                | 961 |  |  |             // Reconvert to charset (don't use chr() of numeric value, might be muli-byte) | 
            
                                                                        
                            
            
                                    
            
            
                | 962 |  |  |             $c = $this->utf8_decode($utf8, $charset); | 
            
                                                                        
                            
            
                                    
            
            
                | 963 |  |  |             $cc = $this->utf8_decode($this->caseFolding['utf-8']['toUpper'][$utf8], $charset); | 
            
                                                                        
                            
            
                                    
            
            
                | 964 |  |  |             if ($cc !== '' && $cc !== $nochar) { | 
            
                                                                        
                            
            
                                    
            
            
                | 965 |  |  |                 $this->caseFolding[$charset]['toUpper'][$c] = $cc; | 
            
                                                                        
                            
            
                                    
            
            
                | 966 |  |  |             } | 
            
                                                                        
                            
            
                                    
            
            
                | 967 |  |  |             $cc = $this->utf8_decode($this->caseFolding['utf-8']['toLower'][$utf8], $charset); | 
            
                                                                        
                            
            
                                    
            
            
                | 968 |  |  |             if ($cc !== '' && $cc !== $nochar) { | 
            
                                                                        
                            
            
                                    
            
            
                | 969 |  |  |                 $this->caseFolding[$charset]['toLower'][$c] = $cc; | 
            
                                                                        
                            
            
                                    
            
            
                | 970 |  |  |             } | 
            
                                                                        
                            
            
                                    
            
            
                | 971 |  |  |             $cc = $this->utf8_decode($this->caseFolding['utf-8']['toTitle'][$utf8], $charset); | 
            
                                                                        
                            
            
                                    
            
            
                | 972 |  |  |             if ($cc !== '' && $cc !== $nochar) { | 
            
                                                                        
                            
            
                                    
            
            
                | 973 |  |  |                 $this->caseFolding[$charset]['toTitle'][$c] = $cc; | 
            
                                                                        
                            
            
                                    
            
            
                | 974 |  |  |             } | 
            
                                                                        
                            
            
                                    
            
            
                | 975 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 976 |  |  |         // Add the ASCII case table | 
            
                                                                        
                            
            
                                    
            
            
                | 977 |  |  |         $start = ord('a'); | 
            
                                                                        
                            
            
                                    
            
            
                | 978 |  |  |         $end = ord('z'); | 
            
                                                                        
                            
            
                                    
            
            
                | 979 |  |  |         for ($i = $start; $i <= $end; $i++) { | 
            
                                                                        
                            
            
                                    
            
            
                | 980 |  |  |             $this->caseFolding[$charset]['toUpper'][chr($i)] = chr($i - 32); | 
            
                                                                        
                            
            
                                    
            
            
                | 981 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 982 |  |  |         $start = ord('A'); | 
            
                                                                        
                            
            
                                    
            
            
                | 983 |  |  |         $end = ord('Z'); | 
            
                                                                        
                            
            
                                    
            
            
                | 984 |  |  |         for ($i = $start; $i <= $end; $i++) { | 
            
                                                                        
                            
            
                                    
            
            
                | 985 |  |  |             $this->caseFolding[$charset]['toLower'][chr($i)] = chr($i + 32); | 
            
                                                                        
                            
            
                                    
            
            
                | 986 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 987 |  |  |         if ($cacheFile) { | 
            
                                                                        
                            
            
                                    
            
            
                | 988 |  |  |             GeneralUtility::writeFileToTypo3tempDir($cacheFile, serialize($this->caseFolding[$charset])); | 
            
                                                                        
                            
            
                                    
            
            
                | 989 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 990 |  |  |         return 3; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 991 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 992 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 993 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 994 |  |  |      * This function initializes the to-ASCII conversion table for a charset other than UTF-8. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 995 |  |  |      * This function is automatically called by the ASCII transliteration functions. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 996 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 997 |  |  |      * @param string $charset Charset for which to initialize conversion. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 998 |  |  |      * @return int Returns FALSE on error, a TRUE value on success: 1 table already loaded, 2, cached version, 3 table parsed (and cached). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 999 |  |  |      * @access private | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1000 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1001 |  |  |     public function initToASCII($charset) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1002 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1003 |  |  |         // Only process if the case table is not yet loaded: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1004 |  |  |         if (is_array($this->toASCII[$charset])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1005 |  |  |             return 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1006 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1007 |  |  |         // Use cached version if possible | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1008 |  |  |         $cacheFile = GeneralUtility::getFileAbsFileName('typo3temp/var/charset/csascii_' . $charset . '.tbl'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1009 |  |  |         if ($cacheFile && @is_file($cacheFile)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1010 |  |  |             $this->toASCII[$charset] = unserialize(file_get_contents($cacheFile)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1011 |  |  |             return 2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1012 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1013 |  |  |         // Init UTF-8 conversion for this charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1014 |  |  |         if (!$this->initCharset($charset)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1015 |  |  |             return false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1016 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1017 |  |  |         // UTF-8/ASCII transliteration is used as the base conversion table | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1018 |  |  |         if (!$this->initUnicodeData('ascii')) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1019 |  |  |             return false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1020 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1021 |  |  |         foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1022 |  |  |             // Reconvert to charset (don't use chr() of numeric value, might be muli-byte) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1023 |  |  |             $c = $this->utf8_decode($utf8, $charset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1024 |  |  |             if (isset($this->toASCII['utf-8'][$utf8])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1025 |  |  |                 $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1026 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1027 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1028 |  |  |         if ($cacheFile) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1029 |  |  |             GeneralUtility::writeFileToTypo3tempDir($cacheFile, serialize($this->toASCII[$charset])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1030 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1031 |  |  |         return 3; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1032 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1033 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1034 |  |  |     /******************************************** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1035 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1036 |  |  |      * String operation functions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1037 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1038 |  |  |      ********************************************/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1039 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1040 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1041 |  |  |      * Truncates a string and pre-/appends a string. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1042 |  |  |      * Unit tested by Kasper | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1043 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1044 |  |  |      * @param string $charset The character set | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1045 |  |  |      * @param string $string Character string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1046 |  |  |      * @param int $len Length (in characters) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1047 |  |  |      * @param string $crop Crop signifier | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1048 |  |  |      * @return string The shortened string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1049 |  |  |      * @see substr(), mb_strimwidth() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1050 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1051 |  |  |     public function crop($charset, $string, $len, $crop = '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1052 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1053 |  |  |         if ((int)$len === 0 || mb_strlen($string, $charset) <= abs($len)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1054 |  |  |             return $string; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1055 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1056 |  |  |         if ($len > 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1057 |  |  |             $string = mb_substr($string, 0, $len, $charset) . $crop; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1058 |  |  |         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1059 |  |  |             $string = $crop . mb_substr($string, $len, mb_strlen($string, $charset), $charset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1060 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1061 |  |  |         return $string; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1062 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1063 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1064 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1065 |  |  |      * Equivalent of lcfirst/ucfirst but using character set. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1066 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1067 |  |  |      * @param string $charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1068 |  |  |      * @param string $string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1069 |  |  |      * @param string $case can be 'toLower' or 'toUpper' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1070 |  |  |      * @return string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1071 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1072 |  |  |     public function convCaseFirst($charset, $string, $case) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1073 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1074 |  |  |         $firstChar = mb_substr($string, 0, 1, $charset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1075 |  |  |         $firstChar = $case === 'toLower' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1076 |  |  |             ? mb_strtolower($firstChar, $charset) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1077 |  |  |             : mb_strtoupper($firstChar, $charset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1078 |  |  |         $remainder = mb_substr($string, 1, null, $charset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1079 |  |  |         return $firstChar . $remainder; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1080 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1081 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1082 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1083 |  |  |      * Converts special chars (like æøåÆØÅ, umlauts etc) to ascii equivalents (usually double-bytes, like æ => ae etc.) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1084 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1085 |  |  |      * @param string $charset Character set of string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1086 |  |  |      * @param string $string Input string to convert | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1087 |  |  |      * @return string The converted string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1088 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1089 |  |  |     public function specCharsToASCII($charset, $string) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1090 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1091 |  |  |         if ($charset === 'utf-8') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1092 |  |  |             $string = $this->utf8_char_mapping($string); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1093 |  |  |         } elseif (isset($this->eucBasedSets[$charset])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1094 |  |  |             $string = $this->euc_char_mapping($string, $charset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1095 |  |  |         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1096 |  |  |             // Treat everything else as single-byte encoding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1097 |  |  |             $string = $this->sb_char_mapping($string, $charset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1098 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1099 |  |  |         return $string; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1100 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1101 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1102 |  |  |     /******************************************** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1103 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1104 |  |  |      * Internal string operation functions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1105 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1106 |  |  |      ********************************************/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1107 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1108 |  |  |      * Maps all characters of a string in a single byte charset. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1109 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1110 |  |  |      * @param string $str The string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1111 |  |  |      * @param string $charset The charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1112 |  |  |      * @return string The converted string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1113 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1114 |  |  |     public function sb_char_mapping($str, $charset) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1115 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1116 |  |  |         if (!$this->initToASCII($charset)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1117 |  |  |             return $str; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1118 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1119 |  |  |         // Do nothing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1120 |  |  |         $map = &$this->toASCII[$charset]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1121 |  |  |         $out = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1122 |  |  |         for ($i = 0; isset($str[$i]); $i++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1123 |  |  |             $c = $str[$i]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1124 |  |  |             if (isset($map[$c])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1125 |  |  |                 $out .= $map[$c]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1126 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1127 |  |  |                 $out .= $c; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1128 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1129 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1130 |  |  |         return $out; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1131 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1133 |  |  |     /******************************************** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1134 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1135 |  |  |      * Internal UTF-8 string operation functions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1136 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1137 |  |  |      ********************************************/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1138 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1139 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1140 |  |  |      * Translates a character position into an 'absolute' byte position. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1141 |  |  |      * Unit tested by Kasper. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1142 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1143 |  |  |      * @param string $str UTF-8 string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1144 |  |  |      * @param int $pos Character position (negative values start from the end) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1145 |  |  |      * @return int Byte position | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1146 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1147 |  |  |     public function utf8_char2byte_pos($str, $pos) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1148 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1149 |  |  |         // Number of characters found | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1150 |  |  |         $n = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1151 |  |  |         // Number of characters wanted | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1152 |  |  |         $p = abs($pos); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1153 |  |  |         if ($pos >= 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1154 |  |  |             $i = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1155 |  |  |             $d = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1156 |  |  |         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1157 |  |  |             $i = strlen($str) - 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1158 |  |  |             $d = -1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1159 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1160 |  |  |         for (; isset($str[$i]) && $n < $p; $i += $d) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1161 |  |  |             $c = (int)ord($str[$i]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1162 |  |  |             // single-byte (0xxxxxx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1163 |  |  |             if (!($c & 128)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1164 |  |  |                 $n++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1165 |  |  |             } elseif (($c & 192) === 192) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1166 |  |  |                 // Multi-byte starting byte (11xxxxxx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1167 |  |  |                 $n++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1168 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1169 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1170 |  |  |         if (!isset($str[$i])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1171 |  |  |             // Offset beyond string length | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1172 |  |  |             return false; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1173 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1174 |  |  |         if ($pos >= 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1175 |  |  |             // Skip trailing multi-byte data bytes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1176 |  |  |             while (ord($str[$i]) & 128 && !(ord($str[$i]) & 64)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1177 |  |  |                 $i++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1178 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1179 |  |  |         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1180 |  |  |             // Correct offset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1181 |  |  |             $i++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1182 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1183 |  |  |         return $i; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1184 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1185 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1186 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1187 |  |  |      * Maps all characters of an UTF-8 string. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1188 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1189 |  |  |      * @param string $str UTF-8 string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1190 |  |  |      * @return string The converted string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1191 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1192 |  |  |     public function utf8_char_mapping($str) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1193 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1194 |  |  |         if (!$this->initUnicodeData('ascii')) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1195 |  |  |             // Do nothing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1196 |  |  |             return $str; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1197 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1198 |  |  |         $out = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1199 |  |  |         $map = &$this->toASCII['utf-8']; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1200 |  |  |         for ($i = 0; isset($str[$i]); $i++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1201 |  |  |             $c = ord($str[$i]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1202 |  |  |             $mbc = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1203 |  |  |             // single-byte (0xxxxxx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1204 |  |  |             if (!($c & 128)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1205 |  |  |                 $mbc = $str[$i]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1206 |  |  |             } elseif (($c & 192) === 192) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1207 |  |  |                 $bc = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1208 |  |  |                 // multi-byte starting byte (11xxxxxx) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1209 |  |  |                 for (; $c & 128; $c = $c << 1) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1210 |  |  |                     $bc++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1211 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1212 |  |  |                 // calculate number of bytes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1213 |  |  |                 $mbc = substr($str, $i, $bc); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1214 |  |  |                 $i += $bc - 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1215 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1216 |  |  |             if (isset($map[$mbc])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1217 |  |  |                 $out .= $map[$mbc]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1218 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1219 |  |  |                 $out .= $mbc; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1220 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1221 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1222 |  |  |         return $out; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1223 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1224 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1225 |  |  |     /******************************************** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1226 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1227 |  |  |      * Internal EUC string operation functions | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1228 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1229 |  |  |      * Extended Unix Code: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1230 |  |  |      *  ASCII compatible 7bit single bytes chars | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1231 |  |  |      *  8bit two byte chars | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1232 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1233 |  |  |      * Shift-JIS is treated as a special case. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1234 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1235 |  |  |      ********************************************/ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1236 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1237 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1238 |  |  |      * Maps all characters of a string in the EUC charset family. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1239 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1240 |  |  |      * @param string $str EUC multibyte character string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1241 |  |  |      * @param string $charset The charset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1242 |  |  |      * @return string The converted string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1243 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1244 |  |  |     public function euc_char_mapping($str, $charset) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1245 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1246 |  |  |         if (!$this->initToASCII($charset)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1247 |  |  |             return $str; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1248 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1249 |  |  |         // do nothing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1250 |  |  |         $map = &$this->toASCII[$charset]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1251 |  |  |         $out = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1252 |  |  |         for ($i = 0; isset($str[$i]); $i++) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1253 |  |  |             $mbc = $str[$i]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1254 |  |  |             $c = ord($mbc); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1255 |  |  |             if ($charset === 'shift_jis') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1256 |  |  |                 // A double-byte char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1257 |  |  |                 if ($c >= 128 && $c < 160 || $c >= 224) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1258 |  |  |                     $mbc = substr($str, $i, 2); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1259 |  |  |                     $i++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1260 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1261 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1262 |  |  |                 // A double-byte char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1263 |  |  |                 if ($c >= 128) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1264 |  |  |                     $mbc = substr($str, $i, 2); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1265 |  |  |                     $i++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1266 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1267 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1268 |  |  |             if (isset($map[$mbc])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1269 |  |  |                 $out .= $map[$mbc]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1270 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1271 |  |  |                 $out .= $mbc; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1272 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1273 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1274 |  |  |         return $out; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 1275 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 1276 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 1277 |  |  |  |