Elgg /
Elgg
| 1 | <?php |
||
| 2 | namespace Elgg; |
||
| 3 | /** |
||
| 4 | * Elgg Transliterate |
||
| 5 | * |
||
| 6 | * For creating "friendly titles" for URLs |
||
| 7 | * |
||
| 8 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||
| 9 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||
| 10 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
||
| 11 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
||
| 12 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||
| 13 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
||
| 14 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||
| 15 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||
| 16 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
| 17 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
||
| 18 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
| 19 | * |
||
| 20 | * This software consists of voluntary contributions made by many individuals |
||
| 21 | * and is licensed under the LGPL. For more information, see |
||
| 22 | * <http://www.doctrine-project.org>. |
||
| 23 | * |
||
| 24 | * @package Elgg.Core |
||
| 25 | * @author Konsta Vesterinen <[email protected]> |
||
| 26 | * @author Jonathan H. Wage <[email protected]> |
||
| 27 | * @author Steve Clay <[email protected]> |
||
| 28 | * |
||
| 29 | * @access private Plugin authors should not use this directly |
||
| 30 | */ |
||
| 31 | class Translit { |
||
| 32 | |||
| 33 | /** |
||
| 34 | * Create a version of a string for embedding in a URL |
||
| 35 | * |
||
| 36 | * @param string $string A UTF-8 string |
||
| 37 | * @param string $separator The character to separate words with |
||
| 38 | * @return string |
||
| 39 | */ |
||
| 40 | 1 | static public function urlize($string, $separator = '-') { |
|
| 41 | // Iñtërnâtiônàlizætiøn, AND 日本語! |
||
| 42 | |||
| 43 | // try to force combined chars because the translit map and others expect it |
||
| 44 | 1 | if (self::hasNormalizerSupport()) { |
|
| 45 | 1 | $nfc = normalizer_normalize($string); |
|
| 46 | 1 | if (is_string($nfc)) { |
|
| 47 | 1 | $string = $nfc; |
|
| 48 | } |
||
| 49 | } |
||
| 50 | // Internationalization, AND 日本語! |
||
| 51 | 1 | $string = self::transliterateAscii($string); |
|
| 52 | |||
| 53 | // allow HTML tags in titles |
||
| 54 | 1 | $string = preg_replace('~<([a-zA-Z][^>]*)>~', ' $1 ', $string); |
|
| 55 | |||
| 56 | // more substitutions |
||
| 57 | // @todo put these somewhere else |
||
| 58 | 1 | $string = strtr($string, [ |
|
| 59 | // currency |
||
| 60 | 1 | "\xE2\x82\xAC" /* € */ => ' E ', |
|
| 61 | "\xC2\xA3" /* £ */ => ' GBP ', |
||
| 62 | ]); |
||
| 63 | |||
| 64 | // remove all ASCII except 0-9a-zA-Z, hyphen, underscore, and whitespace |
||
| 65 | // note: "x" modifier did not work with this pattern. |
||
| 66 | 1 | $string = preg_replace('~[' |
|
| 67 | . '\x00-\x08' // control chars |
||
| 68 | . '\x0b\x0c' // vert tab, form feed |
||
| 69 | . '\x0e-\x1f' // control chars |
||
| 70 | . '\x21-\x2c' // ! ... , |
||
| 71 | . '\x2e\x2f' // . slash |
||
| 72 | . '\x3a-\x40' // : ... @ |
||
| 73 | . '\x5b-\x5e' // [ ... ^ |
||
| 74 | . '\x60' // ` |
||
| 75 | . '\x7b-\x7f' // { ... DEL |
||
| 76 | 1 | . ']~', '', $string); |
|
| 77 | 1 | $string = strtr($string, '', ''); |
|
| 78 | |||
| 79 | // internationalization, and 日本語! |
||
| 80 | // note: not using elgg_strtolower to keep this class portable |
||
| 81 | 1 | $string = is_callable('mb_strtolower') ? mb_strtolower($string, 'UTF-8') : strtolower($string); |
|
| 82 | |||
| 83 | // split by ASCII chars not in 0-9a-zA-Z |
||
| 84 | // note: we cannot use [^0-9a-zA-Z] because that matches multibyte chars. |
||
| 85 | // note: "x" modifier did not work with this pattern. |
||
| 86 | $pattern = '~[' |
||
| 87 | . '\x00-\x2f' // controls ... slash |
||
| 88 | . '\x3a-\x40' // : ... @ |
||
| 89 | . '\x5b-\x60' // [ ... ` |
||
| 90 | . '\x7b-\x7f' // { ... DEL |
||
| 91 | 1 | . ']+~x'; |
|
| 92 | |||
| 93 | // ['internationalization', 'and', '日本語'] |
||
| 94 | 1 | $words = preg_split($pattern, $string, -1, PREG_SPLIT_NO_EMPTY); |
|
| 95 | |||
| 96 | // ['internationalization', 'and', '%E6%97%A5%E6%9C%AC%E8%AA%9E'] |
||
| 97 | 1 | $words = array_map('urlencode', $words); |
|
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 98 | |||
| 99 | // internationalization-and-%E6%97%A5%E6%9C%AC%E8%AA%9E |
||
| 100 | 1 | return implode($separator, $words); |
|
| 101 | } |
||
| 102 | |||
| 103 | /** |
||
| 104 | * Transliterate Western multibyte chars to ASCII |
||
| 105 | * |
||
| 106 | * @param string $utf8 a UTF-8 string |
||
| 107 | * @return string |
||
| 108 | */ |
||
| 109 | 1 | static public function transliterateAscii($utf8) { |
|
| 110 | 1 | static $map = null; |
|
| 111 | 1 | if (!preg_match('/[\x80-\xff]/', $utf8)) { |
|
| 112 | 1 | return $utf8; |
|
| 113 | } |
||
| 114 | 1 | if (null === $map) { |
|
| 115 | 1 | $map = self::getAsciiTranslitMap(); |
|
| 116 | } |
||
| 117 | 1 | return strtr($utf8, $map); |
|
| 118 | } |
||
| 119 | |||
| 120 | /** |
||
| 121 | * Get array of UTF-8 (NFC) character replacements. |
||
| 122 | * |
||
| 123 | * @return array |
||
| 124 | */ |
||
| 125 | 1 | static public function getAsciiTranslitMap() { |
|
| 126 | return [ |
||
| 127 | // Decompositions for Latin-1 Supplement |
||
| 128 | 1 | "\xC2\xAA" /* ª */ => 'a', "\xC2\xBA" /* º */ => 'o', "\xC3\x80" /* À */ => 'A', |
|
| 129 | "\xC3\x81" /* Á */ => 'A', "\xC3\x82" /* Â */ => 'A', "\xC3\x83" /* Ã */ => 'A', |
||
| 130 | "\xC3\x84" /* Ä */ => 'A', "\xC3\x85" /* Å */ => 'A', "\xC3\x86" /* Æ */ => 'AE', |
||
| 131 | "\xC3\x87" /* Ç */ => 'C', "\xC3\x88" /* È */ => 'E', "\xC3\x89" /* É */ => 'E', |
||
| 132 | "\xC3\x8A" /* Ê */ => 'E', "\xC3\x8B" /* Ë */ => 'E', "\xC3\x8C" /* Ì */ => 'I', |
||
| 133 | "\xC3\x8D" /* Í */ => 'I', "\xC3\x8E" /* Î */ => 'I', "\xC3\x8F" /* Ï */ => 'I', |
||
| 134 | "\xC3\x90" /* Ð */ => 'D', "\xC3\x91" /* Ñ */ => 'N', "\xC3\x92" /* Ò */ => 'O', |
||
| 135 | "\xC3\x93" /* Ó */ => 'O', "\xC3\x94" /* Ô */ => 'O', "\xC3\x95" /* Õ */ => 'O', |
||
| 136 | "\xC3\x96" /* Ö */ => 'O', "\xC3\x99" /* Ù */ => 'U', "\xC3\x9A" /* Ú */ => 'U', |
||
| 137 | "\xC3\x9B" /* Û */ => 'U', "\xC3\x9C" /* Ü */ => 'U', "\xC3\x9D" /* Ý */ => 'Y', |
||
| 138 | "\xC3\x9E" /* Þ */ => 'TH', "\xC3\x9F" /* ß */ => 'ss', "\xC3\xA0" /* à */ => 'a', |
||
| 139 | "\xC3\xA1" /* á */ => 'a', "\xC3\xA2" /* â */ => 'a', "\xC3\xA3" /* ã */ => 'a', |
||
| 140 | "\xC3\xA4" /* ä */ => 'a', "\xC3\xA5" /* å */ => 'a', "\xC3\xA6" /* æ */ => 'ae', |
||
| 141 | "\xC3\xA7" /* ç */ => 'c', "\xC3\xA8" /* è */ => 'e', "\xC3\xA9" /* é */ => 'e', |
||
| 142 | "\xC3\xAA" /* ê */ => 'e', "\xC3\xAB" /* ë */ => 'e', "\xC3\xAC" /* ì */ => 'i', |
||
| 143 | "\xC3\xAD" /* í */ => 'i', "\xC3\xAE" /* î */ => 'i', "\xC3\xAF" /* ï */ => 'i', |
||
| 144 | "\xC3\xB0" /* ð */ => 'd', "\xC3\xB1" /* ñ */ => 'n', "\xC3\xB2" /* ò */ => 'o', |
||
| 145 | "\xC3\xB3" /* ó */ => 'o', "\xC3\xB4" /* ô */ => 'o', "\xC3\xB5" /* õ */ => 'o', |
||
| 146 | "\xC3\xB6" /* ö */ => 'o', "\xC3\xB8" /* ø */ => 'o', "\xC3\xB9" /* ù */ => 'u', |
||
| 147 | "\xC3\xBA" /* ú */ => 'u', "\xC3\xBB" /* û */ => 'u', "\xC3\xBC" /* ü */ => 'u', |
||
| 148 | "\xC3\xBD" /* ý */ => 'y', "\xC3\xBE" /* þ */ => 'th', "\xC3\xBF" /* ÿ */ => 'y', |
||
| 149 | "\xC3\x98" /* Ø */ => 'O', |
||
| 150 | // Decompositions for Latin Extended-A |
||
| 151 | "\xC4\x80" /* Ā */ => 'A', "\xC4\x81" /* ā */ => 'a', "\xC4\x82" /* Ă */ => 'A', |
||
| 152 | "\xC4\x83" /* ă */ => 'a', "\xC4\x84" /* Ą */ => 'A', "\xC4\x85" /* ą */ => 'a', |
||
| 153 | "\xC4\x86" /* Ć */ => 'C', "\xC4\x87" /* ć */ => 'c', "\xC4\x88" /* Ĉ */ => 'C', |
||
| 154 | "\xC4\x89" /* ĉ */ => 'c', "\xC4\x8A" /* Ċ */ => 'C', "\xC4\x8B" /* ċ */ => 'c', |
||
| 155 | "\xC4\x8C" /* Č */ => 'C', "\xC4\x8D" /* č */ => 'c', "\xC4\x8E" /* Ď */ => 'D', |
||
| 156 | "\xC4\x8F" /* ď */ => 'd', "\xC4\x90" /* Đ */ => 'D', "\xC4\x91" /* đ */ => 'd', |
||
| 157 | "\xC4\x92" /* Ē */ => 'E', "\xC4\x93" /* ē */ => 'e', "\xC4\x94" /* Ĕ */ => 'E', |
||
| 158 | "\xC4\x95" /* ĕ */ => 'e', "\xC4\x96" /* Ė */ => 'E', "\xC4\x97" /* ė */ => 'e', |
||
| 159 | "\xC4\x98" /* Ę */ => 'E', "\xC4\x99" /* ę */ => 'e', "\xC4\x9A" /* Ě */ => 'E', |
||
| 160 | "\xC4\x9B" /* ě */ => 'e', "\xC4\x9C" /* Ĝ */ => 'G', "\xC4\x9D" /* ĝ */ => 'g', |
||
| 161 | "\xC4\x9E" /* Ğ */ => 'G', "\xC4\x9F" /* ğ */ => 'g', "\xC4\xA0" /* Ġ */ => 'G', |
||
| 162 | "\xC4\xA1" /* ġ */ => 'g', "\xC4\xA2" /* Ģ */ => 'G', "\xC4\xA3" /* ģ */ => 'g', |
||
| 163 | "\xC4\xA4" /* Ĥ */ => 'H', "\xC4\xA5" /* ĥ */ => 'h', "\xC4\xA6" /* Ħ */ => 'H', |
||
| 164 | "\xC4\xA7" /* ħ */ => 'h', "\xC4\xA8" /* Ĩ */ => 'I', "\xC4\xA9" /* ĩ */ => 'i', |
||
| 165 | "\xC4\xAA" /* Ī */ => 'I', "\xC4\xAB" /* ī */ => 'i', "\xC4\xAC" /* Ĭ */ => 'I', |
||
| 166 | "\xC4\xAD" /* ĭ */ => 'i', "\xC4\xAE" /* Į */ => 'I', "\xC4\xAF" /* į */ => 'i', |
||
| 167 | "\xC4\xB0" /* İ */ => 'I', "\xC4\xB1" /* ı */ => 'i', "\xC4\xB2" /* IJ */ => 'IJ', |
||
| 168 | "\xC4\xB3" /* ij */ => 'ij', "\xC4\xB4" /* Ĵ */ => 'J', "\xC4\xB5" /* ĵ */ => 'j', |
||
| 169 | "\xC4\xB6" /* Ķ */ => 'K', "\xC4\xB7" /* ķ */ => 'k', "\xC4\xB8" /* ĸ */ => 'k', |
||
| 170 | "\xC4\xB9" /* Ĺ */ => 'L', "\xC4\xBA" /* ĺ */ => 'l', "\xC4\xBB" /* Ļ */ => 'L', |
||
| 171 | "\xC4\xBC" /* ļ */ => 'l', "\xC4\xBD" /* Ľ */ => 'L', "\xC4\xBE" /* ľ */ => 'l', |
||
| 172 | "\xC4\xBF" /* Ŀ */ => 'L', "\xC5\x80" /* ŀ */ => 'l', "\xC5\x81" /* Ł */ => 'L', |
||
| 173 | "\xC5\x82" /* ł */ => 'l', "\xC5\x83" /* Ń */ => 'N', "\xC5\x84" /* ń */ => 'n', |
||
| 174 | "\xC5\x85" /* Ņ */ => 'N', "\xC5\x86" /* ņ */ => 'n', "\xC5\x87" /* Ň */ => 'N', |
||
| 175 | "\xC5\x88" /* ň */ => 'n', "\xC5\x89" /* ʼn */ => 'N', "\xC5\x8A" /* Ŋ */ => 'n', |
||
| 176 | "\xC5\x8B" /* ŋ */ => 'N', "\xC5\x8C" /* Ō */ => 'O', "\xC5\x8D" /* ō */ => 'o', |
||
| 177 | "\xC5\x8E" /* Ŏ */ => 'O', "\xC5\x8F" /* ŏ */ => 'o', "\xC5\x90" /* Ő */ => 'O', |
||
| 178 | "\xC5\x91" /* ő */ => 'o', "\xC5\x92" /* Œ */ => 'OE', "\xC5\x93" /* œ */ => 'oe', |
||
| 179 | "\xC5\x94" /* Ŕ */ => 'R', "\xC5\x95" /* ŕ */ => 'r', "\xC5\x96" /* Ŗ */ => 'R', |
||
| 180 | "\xC5\x97" /* ŗ */ => 'r', "\xC5\x98" /* Ř */ => 'R', "\xC5\x99" /* ř */ => 'r', |
||
| 181 | "\xC5\x9A" /* Ś */ => 'S', "\xC5\x9B" /* ś */ => 's', "\xC5\x9C" /* Ŝ */ => 'S', |
||
| 182 | "\xC5\x9D" /* ŝ */ => 's', "\xC5\x9E" /* Ş */ => 'S', "\xC5\x9F" /* ş */ => 's', |
||
| 183 | "\xC5\xA0" /* Š */ => 'S', "\xC5\xA1" /* š */ => 's', "\xC5\xA2" /* Ţ */ => 'T', |
||
| 184 | "\xC5\xA3" /* ţ */ => 't', "\xC5\xA4" /* Ť */ => 'T', "\xC5\xA5" /* ť */ => 't', |
||
| 185 | "\xC5\xA6" /* Ŧ */ => 'T', "\xC5\xA7" /* ŧ */ => 't', "\xC5\xA8" /* Ũ */ => 'U', |
||
| 186 | "\xC5\xA9" /* ũ */ => 'u', "\xC5\xAA" /* Ū */ => 'U', "\xC5\xAB" /* ū */ => 'u', |
||
| 187 | "\xC5\xAC" /* Ŭ */ => 'U', "\xC5\xAD" /* ŭ */ => 'u', "\xC5\xAE" /* Ů */ => 'U', |
||
| 188 | "\xC5\xAF" /* ů */ => 'u', "\xC5\xB0" /* Ű */ => 'U', "\xC5\xB1" /* ű */ => 'u', |
||
| 189 | "\xC5\xB2" /* Ų */ => 'U', "\xC5\xB3" /* ų */ => 'u', "\xC5\xB4" /* Ŵ */ => 'W', |
||
| 190 | "\xC5\xB5" /* ŵ */ => 'w', "\xC5\xB6" /* Ŷ */ => 'Y', "\xC5\xB7" /* ŷ */ => 'y', |
||
| 191 | "\xC5\xB8" /* Ÿ */ => 'Y', "\xC5\xB9" /* Ź */ => 'Z', "\xC5\xBA" /* ź */ => 'z', |
||
| 192 | "\xC5\xBB" /* Ż */ => 'Z', "\xC5\xBC" /* ż */ => 'z', "\xC5\xBD" /* Ž */ => 'Z', |
||
| 193 | "\xC5\xBE" /* ž */ => 'z', "\xC5\xBF" /* ſ */ => 's', |
||
| 194 | // Decompositions for Latin Extended-B |
||
| 195 | "\xC8\x98" /* Ș */ => 'S', "\xC8\x99" /* ș */ => 's', |
||
| 196 | "\xC8\x9A" /* Ț */ => 'T', "\xC8\x9B" /* ț */ => 't', |
||
| 197 | // unmarked |
||
| 198 | "\xC6\xA0" /* Ơ */ => 'O', "\xC6\xA1" /* ơ */ => 'o', |
||
| 199 | "\xC6\xAF" /* Ư */ => 'U', "\xC6\xB0" /* ư */ => 'u', |
||
| 200 | // grave accent |
||
| 201 | "\xE1\xBA\xA6" /* Ầ */ => 'A', "\xE1\xBA\xA7" /* ầ */ => 'a', |
||
| 202 | "\xE1\xBA\xB0" /* Ằ */ => 'A', "\xE1\xBA\xB1" /* ằ */ => 'a', |
||
| 203 | "\xE1\xBB\x80" /* Ề */ => 'E', "\xE1\xBB\x81" /* ề */ => 'e', |
||
| 204 | "\xE1\xBB\x92" /* Ồ */ => 'O', "\xE1\xBB\x93" /* ồ */ => 'o', |
||
| 205 | "\xE1\xBB\x9C" /* Ờ */ => 'O', "\xE1\xBB\x9D" /* ờ */ => 'o', |
||
| 206 | "\xE1\xBB\xAA" /* Ừ */ => 'U', "\xE1\xBB\xAB" /* ừ */ => 'u', |
||
| 207 | "\xE1\xBB\xB2" /* Ỳ */ => 'Y', "\xE1\xBB\xB3" /* ỳ */ => 'y', |
||
| 208 | // hook |
||
| 209 | "\xE1\xBA\xA2" /* Ả */ => 'A', "\xE1\xBA\xA3" /* ả */ => 'a', |
||
| 210 | "\xE1\xBA\xA8" /* Ẩ */ => 'A', "\xE1\xBA\xA9" /* ẩ */ => 'a', |
||
| 211 | "\xE1\xBA\xB2" /* Ẳ */ => 'A', "\xE1\xBA\xB3" /* ẳ */ => 'a', |
||
| 212 | "\xE1\xBA\xBA" /* Ẻ */ => 'E', "\xE1\xBA\xBB" /* ẻ */ => 'e', |
||
| 213 | "\xE1\xBB\x82" /* Ể */ => 'E', "\xE1\xBB\x83" /* ể */ => 'e', |
||
| 214 | "\xE1\xBB\x88" /* Ỉ */ => 'I', "\xE1\xBB\x89" /* ỉ */ => 'i', |
||
| 215 | "\xE1\xBB\x8E" /* Ỏ */ => 'O', "\xE1\xBB\x8F" /* ỏ */ => 'o', |
||
| 216 | "\xE1\xBB\x94" /* Ổ */ => 'O', "\xE1\xBB\x95" /* ổ */ => 'o', |
||
| 217 | "\xE1\xBB\x9E" /* Ở */ => 'O', "\xE1\xBB\x9F" /* ở */ => 'o', |
||
| 218 | "\xE1\xBB\xA6" /* Ủ */ => 'U', "\xE1\xBB\xA7" /* ủ */ => 'u', |
||
| 219 | "\xE1\xBB\xAC" /* Ử */ => 'U', "\xE1\xBB\xAD" /* ử */ => 'u', |
||
| 220 | "\xE1\xBB\xB6" /* Ỷ */ => 'Y', "\xE1\xBB\xB7" /* ỷ */ => 'y', |
||
| 221 | // tilde |
||
| 222 | "\xE1\xBA\xAA" /* Ẫ */ => 'A', "\xE1\xBA\xAB" /* ẫ */ => 'a', |
||
| 223 | "\xE1\xBA\xB4" /* Ẵ */ => 'A', "\xE1\xBA\xB5" /* ẵ */ => 'a', |
||
| 224 | "\xE1\xBA\xBC" /* Ẽ */ => 'E', "\xE1\xBA\xBD" /* ẽ */ => 'e', |
||
| 225 | "\xE1\xBB\x84" /* Ễ */ => 'E', "\xE1\xBB\x85" /* ễ */ => 'e', |
||
| 226 | "\xE1\xBB\x96" /* Ỗ */ => 'O', "\xE1\xBB\x97" /* ỗ */ => 'o', |
||
| 227 | "\xE1\xBB\xA0" /* Ỡ */ => 'O', "\xE1\xBB\xA1" /* ỡ */ => 'o', |
||
| 228 | "\xE1\xBB\xAE" /* Ữ */ => 'U', "\xE1\xBB\xAF" /* ữ */ => 'u', |
||
| 229 | "\xE1\xBB\xB8" /* Ỹ */ => 'Y', "\xE1\xBB\xB9" /* ỹ */ => 'y', |
||
| 230 | // acute accent |
||
| 231 | "\xE1\xBA\xA4" /* Ấ */ => 'A', "\xE1\xBA\xA5" /* ấ */ => 'a', |
||
| 232 | "\xE1\xBA\xAE" /* Ắ */ => 'A', "\xE1\xBA\xAF" /* ắ */ => 'a', |
||
| 233 | "\xE1\xBA\xBE" /* Ế */ => 'E', "\xE1\xBA\xBF" /* ế */ => 'e', |
||
| 234 | "\xE1\xBB\x90" /* Ố */ => 'O', "\xE1\xBB\x91" /* ố */ => 'o', |
||
| 235 | "\xE1\xBB\x9A" /* Ớ */ => 'O', "\xE1\xBB\x9B" /* ớ */ => 'o', |
||
| 236 | "\xE1\xBB\xA8" /* Ứ */ => 'U', "\xE1\xBB\xA9" /* ứ */ => 'u', |
||
| 237 | // dot below |
||
| 238 | "\xE1\xBA\xA0" /* Ạ */ => 'A', "\xE1\xBA\xA1" /* ạ */ => 'a', |
||
| 239 | "\xE1\xBA\xAC" /* Ậ */ => 'A', "\xE1\xBA\xAD" /* ậ */ => 'a', |
||
| 240 | "\xE1\xBA\xB6" /* Ặ */ => 'A', "\xE1\xBA\xB7" /* ặ */ => 'a', |
||
| 241 | "\xE1\xBA\xB8" /* Ẹ */ => 'E', "\xE1\xBA\xB9" /* ẹ */ => 'e', |
||
| 242 | "\xE1\xBB\x86" /* Ệ */ => 'E', "\xE1\xBB\x87" /* ệ */ => 'e', |
||
| 243 | "\xE1\xBB\x8A" /* Ị */ => 'I', "\xE1\xBB\x8B" /* ị */ => 'i', |
||
| 244 | "\xE1\xBB\x8C" /* Ọ */ => 'O', "\xE1\xBB\x8D" /* ọ */ => 'o', |
||
| 245 | "\xE1\xBB\x98" /* Ộ */ => 'O', "\xE1\xBB\x99" /* ộ */ => 'o', |
||
| 246 | "\xE1\xBB\xA2" /* Ợ */ => 'O', "\xE1\xBB\xA3" /* ợ */ => 'o', |
||
| 247 | "\xE1\xBB\xA4" /* Ụ */ => 'U', "\xE1\xBB\xA5" /* ụ */ => 'u', |
||
| 248 | "\xE1\xBB\xB0" /* Ự */ => 'U', "\xE1\xBB\xB1" /* ự */ => 'u', |
||
| 249 | "\xE1\xBB\xB4" /* Ỵ */ => 'Y', "\xE1\xBB\xB5" /* ỵ */ => 'y', |
||
| 250 | ]; |
||
| 251 | } |
||
| 252 | |||
| 253 | /** |
||
| 254 | * Tests that "normalizer_normalize" exists and works |
||
| 255 | * |
||
| 256 | * @return bool |
||
| 257 | */ |
||
| 258 | 1 | static public function hasNormalizerSupport() { |
|
| 259 | 1 | static $ret = null; |
|
| 260 | 1 | if (null === $ret) { |
|
| 261 | 1 | $form_c = "\xC3\x85"; // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5) |
|
| 262 | 1 | $form_d = "A\xCC\x8A"; // A followed by 'COMBINING RING ABOVE' (U+030A) |
|
| 263 | 1 | $ret = (function_exists('normalizer_normalize') |
|
| 264 | 1 | && $form_c === normalizer_normalize($form_d)); |
|
| 265 | } |
||
| 266 | 1 | return $ret; |
|
| 267 | } |
||
| 268 | } |
||
| 269 | |||
| 270 |