@@ -34,14 +34,16 @@ discard block |
||
| 34 | 34 | $utf8_arrays['utf8_casefold_maps'] = array('file' => 'CaseFold.php', 'data' => array()); |
| 35 | 35 | $utf8_arrays['utf8_default_ignorables'] = array('file' => 'DefaultIgnorables.php', 'data' => array()); |
| 36 | 36 | |
| 37 | -foreach ($utf8_arrays as $func_name => $func_info) { |
|
| 37 | +foreach ($utf8_arrays as $func_name => $func_info) |
|
| 38 | +{ |
|
| 38 | 39 | if (!is_file($unicodedir . '/' . $func_info['file']) || !is_writable($unicodedir . '/' . $func_info['file'])) |
| 39 | 40 | die($unicodedir . '/' . $func_info['file'] . ' not found or not writable.'); |
| 40 | 41 | } |
| 41 | 42 | |
| 42 | 43 | // We need some of these for further analysis below. |
| 43 | 44 | $derived_normalization_props = array(); |
| 44 | -foreach (file($unicode_data_url . '/DerivedNormalizationProps.txt') as $line) { |
|
| 45 | +foreach (file($unicode_data_url . '/DerivedNormalizationProps.txt') as $line) |
|
| 46 | +{ |
|
| 45 | 47 | $line = substr($line, 0, strcspn($line, '#')); |
| 46 | 48 | |
| 47 | 49 | if (strpos($line, ';') === false) |
@@ -49,17 +51,22 @@ discard block |
||
| 49 | 51 | |
| 50 | 52 | $fields = explode(';', $line); |
| 51 | 53 | |
| 52 | - foreach ($fields as $key => $value) { |
|
| 54 | + foreach ($fields as $key => $value) |
|
| 55 | + { |
|
| 53 | 56 | $fields[$key] = trim($value); |
| 54 | 57 | } |
| 55 | 58 | |
| 56 | - if (!isset($derived_normalization_props[$fields[1]])) { |
|
| 59 | + if (!isset($derived_normalization_props[$fields[1]])) |
|
| 60 | + { |
|
| 57 | 61 | $derived_normalization_props[$fields[1]] = array(); |
| 58 | 62 | } |
| 59 | 63 | |
| 60 | - if (strpos($fields[0], '..') === false) { |
|
| 64 | + if (strpos($fields[0], '..') === false) |
|
| 65 | + { |
|
| 61 | 66 | $entities = array('&#x' . $fields[0] . ';'); |
| 62 | - } else { |
|
| 67 | + } |
|
| 68 | + else |
|
| 69 | + { |
|
| 63 | 70 | $entities = array(); |
| 64 | 71 | |
| 65 | 72 | list($start, $end) = explode('..', $fields[0]); |
@@ -68,27 +75,35 @@ discard block |
||
| 68 | 75 | $ord_e = hexdec($end); |
| 69 | 76 | |
| 70 | 77 | $ord = $ord_s; |
| 71 | - while ($ord <= $ord_e) { |
|
| 78 | + while ($ord <= $ord_e) |
|
| 79 | + { |
|
| 72 | 80 | $entities[] = '&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';'; |
| 73 | 81 | } |
| 74 | 82 | } |
| 75 | 83 | |
| 76 | 84 | $value = ''; |
| 77 | - if (!isset($fields[2])) { |
|
| 85 | + if (!isset($fields[2])) |
|
| 86 | + { |
|
| 78 | 87 | $value = 'SAME'; |
| 79 | - } elseif (in_array($fields[1], array('FC_NFKC', 'NFKC_CF'))) { |
|
| 88 | + } |
|
| 89 | + elseif (in_array($fields[1], array('FC_NFKC', 'NFKC_CF'))) |
|
| 90 | + { |
|
| 80 | 91 | $value = trim($fields[2]) !== '' ? '&#x' . str_replace(' ', '; &#x', trim($fields[2])) . ';' : ''; |
| 81 | - } else { |
|
| 92 | + } |
|
| 93 | + else |
|
| 94 | + { |
|
| 82 | 95 | $value = $fields[2]; |
| 83 | 96 | } |
| 84 | 97 | |
| 85 | - foreach ($entities as $entity) { |
|
| 98 | + foreach ($entities as $entity) |
|
| 99 | + { |
|
| 86 | 100 | $derived_normalization_props[$fields[1]][$entity] = $value === 'SAME' ? $entity : $value; |
| 87 | 101 | } |
| 88 | 102 | } |
| 89 | 103 | |
| 90 | 104 | // Go through all the characters in the Unicode database. |
| 91 | -foreach (file($unicode_data_url . '/UnicodeData.txt') as $line) { |
|
| 105 | +foreach (file($unicode_data_url . '/UnicodeData.txt') as $line) |
|
| 106 | +{ |
|
| 92 | 107 | $fields = explode(';', $line); |
| 93 | 108 | |
| 94 | 109 | if (!empty($fields[3])) |
@@ -109,12 +124,14 @@ discard block |
||
| 109 | 124 | $full_decomposition_maps['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim(strip_tags($fields[5]))) . ';'; |
| 110 | 125 | |
| 111 | 126 | // Just the canonical decompositions. |
| 112 | - if (strpos($fields[5], '<') === false) { |
|
| 127 | + if (strpos($fields[5], '<') === false) |
|
| 128 | + { |
|
| 113 | 129 | $utf8_arrays['utf8_normalize_d_maps']['data']['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim($fields[5])) . ';'; |
| 114 | 130 | } |
| 115 | 131 | } |
| 116 | 132 | |
| 117 | -foreach (file($unicode_data_url . '/CaseFolding.txt') as $line) { |
|
| 133 | +foreach (file($unicode_data_url . '/CaseFolding.txt') as $line) |
|
| 134 | +{ |
|
| 118 | 135 | $line = substr($line, 0, strcspn($line, '#')); |
| 119 | 136 | |
| 120 | 137 | if (strpos($line, ';') === false) |
@@ -122,12 +139,14 @@ discard block |
||
| 122 | 139 | |
| 123 | 140 | $fields = explode(';', $line); |
| 124 | 141 | |
| 125 | - foreach ($fields as $key => $value) { |
|
| 142 | + foreach ($fields as $key => $value) |
|
| 143 | + { |
|
| 126 | 144 | $fields[$key] = trim($value); |
| 127 | 145 | } |
| 128 | 146 | |
| 129 | 147 | // Full casefolding. |
| 130 | - if (in_array($fields[1], array('C', 'F'))) { |
|
| 148 | + if (in_array($fields[1], array('C', 'F'))) |
|
| 149 | + { |
|
| 131 | 150 | $utf8_arrays['utf8_casefold_maps']['data']['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim($fields[2])) . ';'; |
| 132 | 151 | } |
| 133 | 152 | |
@@ -141,13 +160,17 @@ discard block |
||
| 141 | 160 | // This is necessary because some characters decompose to other characters that |
| 142 | 161 | // themselves decompose further. |
| 143 | 162 | $changed = true; |
| 144 | -while ($changed) { |
|
| 163 | +while ($changed) |
|
| 164 | +{ |
|
| 145 | 165 | $temp = array(); |
| 146 | - foreach ($full_decomposition_maps as $composed => $decomposed) { |
|
| 166 | + foreach ($full_decomposition_maps as $composed => $decomposed) |
|
| 167 | + { |
|
| 147 | 168 | $parts = strpos($decomposed, ' ') !== false ? explode(' ', $decomposed) : (array) $decomposed; |
| 148 | 169 | |
| 149 | - foreach ($parts as $partnum => $hex) { |
|
| 150 | - if (isset($full_decomposition_maps[$hex])) { |
|
| 170 | + foreach ($parts as $partnum => $hex) |
|
| 171 | + { |
|
| 172 | + if (isset($full_decomposition_maps[$hex])) |
|
| 173 | + { |
|
| 151 | 174 | $parts[$partnum] = $full_decomposition_maps[$hex]; |
| 152 | 175 | } |
| 153 | 176 | } |
@@ -166,17 +189,22 @@ discard block |
||
| 166 | 189 | // Same as above, but using only canonical decompositions. |
| 167 | 190 | $changed = true; |
| 168 | 191 | $iteration = 0; |
| 169 | -while ($changed) { |
|
| 192 | +while ($changed) |
|
| 193 | +{ |
|
| 170 | 194 | $temp = array(); |
| 171 | - foreach ($utf8_arrays['utf8_normalize_d_maps']['data'] as $composed => $decomposed) { |
|
| 172 | - if ($iteration === 0 && !in_array($composed, $derived_normalization_props['Full_Composition_Exclusion'])) { |
|
| 195 | + foreach ($utf8_arrays['utf8_normalize_d_maps']['data'] as $composed => $decomposed) |
|
| 196 | + { |
|
| 197 | + if ($iteration === 0 && !in_array($composed, $derived_normalization_props['Full_Composition_Exclusion'])) |
|
| 198 | + { |
|
| 173 | 199 | $utf8_arrays['utf8_compose_maps']['data'][$decomposed] = $composed; |
| 174 | 200 | } |
| 175 | 201 | |
| 176 | 202 | $parts = strpos($decomposed, ' ') !== false ? explode(' ', $decomposed) : (array) $decomposed; |
| 177 | 203 | |
| 178 | - foreach ($parts as $partnum => $hex) { |
|
| 179 | - if (isset($utf8_arrays['utf8_normalize_d_maps']['data'][$hex])) { |
|
| 204 | + foreach ($parts as $partnum => $hex) |
|
| 205 | + { |
|
| 206 | + if (isset($utf8_arrays['utf8_normalize_d_maps']['data'][$hex])) |
|
| 207 | + { |
|
| 180 | 208 | $parts[$partnum] = $utf8_arrays['utf8_normalize_d_maps']['data'][$hex]; |
| 181 | 209 | } |
| 182 | 210 | } |
@@ -196,7 +224,8 @@ discard block |
||
| 196 | 224 | $utf8_arrays['utf8_normalize_kd_maps']['data'] = array_diff_assoc($full_decomposition_maps, $utf8_arrays['utf8_normalize_d_maps']['data']); |
| 197 | 225 | |
| 198 | 226 | // Some characters have the 'Default_Ignorable_Code_Point' property. |
| 199 | -foreach (file($unicode_data_url . '/DerivedCoreProperties.txt') as $line) { |
|
| 227 | +foreach (file($unicode_data_url . '/DerivedCoreProperties.txt') as $line) |
|
| 228 | +{ |
|
| 200 | 229 | if (strpos($line, 'Default_Ignorable_Code_Point') === false) |
| 201 | 230 | continue; |
| 202 | 231 | |
@@ -207,13 +236,17 @@ discard block |
||
| 207 | 236 | |
| 208 | 237 | $fields = explode(';', $line); |
| 209 | 238 | |
| 210 | - foreach ($fields as $key => $value) { |
|
| 239 | + foreach ($fields as $key => $value) |
|
| 240 | + { |
|
| 211 | 241 | $fields[$key] = trim($value); |
| 212 | 242 | } |
| 213 | 243 | |
| 214 | - if (strpos($fields[0], '..') === false) { |
|
| 244 | + if (strpos($fields[0], '..') === false) |
|
| 245 | + { |
|
| 215 | 246 | $utf8_arrays['utf8_default_ignorables']['data'][] = '&#x' . $fields[0] . ';'; |
| 216 | - } else { |
|
| 247 | + } |
|
| 248 | + else |
|
| 249 | + { |
|
| 217 | 250 | $entities = array(); |
| 218 | 251 | |
| 219 | 252 | list($start, $end) = explode('..', $fields[0]); |
@@ -222,13 +255,15 @@ discard block |
||
| 222 | 255 | $ord_e = hexdec($end); |
| 223 | 256 | |
| 224 | 257 | $ord = $ord_s; |
| 225 | - while ($ord <= $ord_e) { |
|
| 258 | + while ($ord <= $ord_e) |
|
| 259 | + { |
|
| 226 | 260 | $utf8_arrays['utf8_default_ignorables']['data'][] = '&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';'; |
| 227 | 261 | } |
| 228 | 262 | } |
| 229 | 263 | } |
| 230 | 264 | |
| 231 | -foreach ($utf8_arrays as $func_name => $func_info) { |
|
| 265 | +foreach ($utf8_arrays as $func_name => $func_info) |
|
| 266 | +{ |
|
| 232 | 267 | // Now update the file. |
| 233 | 268 | $file_contents = file_get_contents($unicodedir . '/' . $func_info['file']); |
| 234 | 269 | |
@@ -238,28 +273,35 @@ discard block |
||
| 238 | 273 | |
| 239 | 274 | $func_text .= "\n\t" . 'return array(' . "\n"; |
| 240 | 275 | |
| 241 | - foreach ($func_info['data'] as $key => $value) { |
|
| 276 | + foreach ($func_info['data'] as $key => $value) |
|
| 277 | + { |
|
| 242 | 278 | $func_text .= "\t\t"; |
| 243 | 279 | |
| 244 | - if ($func_name !== 'utf8_default_ignorables') { |
|
| 280 | + if ($func_name !== 'utf8_default_ignorables') |
|
| 281 | + { |
|
| 245 | 282 | $func_text .= '"'; |
| 246 | 283 | |
| 247 | 284 | $key = mb_decode_numericentity(str_replace(' ', '', $key), array(0,0x10FFFF,0,0xFFFFFF), 'UTF-8'); |
| 248 | 285 | |
| 249 | - foreach (unpack('C*', $key) as $byte_value) { |
|
| 286 | + foreach (unpack('C*', $key) as $byte_value) |
|
| 287 | + { |
|
| 250 | 288 | $func_text .= '\\x' . strtoupper(dechex($byte_value)); |
| 251 | 289 | } |
| 252 | 290 | |
| 253 | 291 | $func_text .= '" => '; |
| 254 | 292 | } |
| 255 | 293 | |
| 256 | - if ($func_name == 'utf8_combining_classes') { |
|
| 294 | + if ($func_name == 'utf8_combining_classes') |
|
| 295 | + { |
|
| 257 | 296 | $func_text .= $value; |
| 258 | - } else { |
|
| 297 | + } |
|
| 298 | + else |
|
| 299 | + { |
|
| 259 | 300 | $func_text .= '"'; |
| 260 | 301 | |
| 261 | 302 | $value = mb_decode_numericentity(str_replace(' ', '', $value), array(0,0x10FFFF,0,0xFFFFFF), 'UTF-8'); |
| 262 | - foreach (unpack('C*', $value) as $byte_value) { |
|
| 303 | + foreach (unpack('C*', $value) as $byte_value) |
|
| 304 | + { |
|
| 263 | 305 | $func_text .= '\\x' . strtoupper(dechex($byte_value)); |
| 264 | 306 | } |
| 265 | 307 | |