@@ -38,7 +38,8 @@ discard block |
||
38 | 38 | |
39 | 39 | // We need some of these for further analysis below. |
40 | 40 | $derived_normalization_props = array(); |
41 | -foreach (file($unicode_data_url . '/DerivedNormalizationProps.txt') as $line) { |
|
41 | +foreach (file($unicode_data_url . '/DerivedNormalizationProps.txt') as $line) |
|
42 | +{ |
|
42 | 43 | $line = substr($line, 0, strcspn($line, '#')); |
43 | 44 | |
44 | 45 | if (strpos($line, ';') === false) |
@@ -46,17 +47,22 @@ discard block |
||
46 | 47 | |
47 | 48 | $fields = explode(';', $line); |
48 | 49 | |
49 | - foreach ($fields as $key => $value) { |
|
50 | + foreach ($fields as $key => $value) |
|
51 | + { |
|
50 | 52 | $fields[$key] = trim($value); |
51 | 53 | } |
52 | 54 | |
53 | - if (!isset($derived_normalization_props[$fields[1]])) { |
|
55 | + if (!isset($derived_normalization_props[$fields[1]])) |
|
56 | + { |
|
54 | 57 | $derived_normalization_props[$fields[1]] = array(); |
55 | 58 | } |
56 | 59 | |
57 | - if (strpos($fields[0], '..') === false) { |
|
60 | + if (strpos($fields[0], '..') === false) |
|
61 | + { |
|
58 | 62 | $entities = array('&#x' . $fields[0] . ';'); |
59 | - } else { |
|
63 | + } |
|
64 | + else |
|
65 | + { |
|
60 | 66 | $entities = array(); |
61 | 67 | |
62 | 68 | list($start, $end) = explode('..', $fields[0]); |
@@ -65,27 +71,35 @@ discard block |
||
65 | 71 | $ord_e = hexdec($end); |
66 | 72 | |
67 | 73 | $ord = $ord_s; |
68 | - while ($ord <= $ord_e) { |
|
74 | + while ($ord <= $ord_e) |
|
75 | + { |
|
69 | 76 | $entities[] = '&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';'; |
70 | 77 | } |
71 | 78 | } |
72 | 79 | |
73 | 80 | $value = ''; |
74 | - if (!isset($fields[2])) { |
|
81 | + if (!isset($fields[2])) |
|
82 | + { |
|
75 | 83 | $value = 'SAME'; |
76 | - } elseif (in_array($fields[1], array('FC_NFKC', 'NFKC_CF'))) { |
|
84 | + } |
|
85 | + elseif (in_array($fields[1], array('FC_NFKC', 'NFKC_CF'))) |
|
86 | + { |
|
77 | 87 | $value = trim($fields[2]) !== '' ? '&#x' . str_replace(' ', '; &#x', trim($fields[2])) . ';' : ''; |
78 | - } else { |
|
88 | + } |
|
89 | + else |
|
90 | + { |
|
79 | 91 | $value = $fields[2]; |
80 | 92 | } |
81 | 93 | |
82 | - foreach ($entities as $entity) { |
|
94 | + foreach ($entities as $entity) |
|
95 | + { |
|
83 | 96 | $derived_normalization_props[$fields[1]][$entity] = $value === 'SAME' ? $entity : $value; |
84 | 97 | } |
85 | 98 | } |
86 | 99 | |
87 | 100 | // Go through all the characters in the Unicode database. |
88 | -foreach (file($unicode_data_url . '/UnicodeData.txt') as $line) { |
|
101 | +foreach (file($unicode_data_url . '/UnicodeData.txt') as $line) |
|
102 | +{ |
|
89 | 103 | $fields = explode(';', $line); |
90 | 104 | |
91 | 105 | if (!empty($fields[3])) |
@@ -106,12 +120,14 @@ discard block |
||
106 | 120 | $full_decomposition_maps['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim(strip_tags($fields[5]))) . ';'; |
107 | 121 | |
108 | 122 | // Just the canonical decompositions. |
109 | - if (strpos($fields[5], '<') === false) { |
|
123 | + if (strpos($fields[5], '<') === false) |
|
124 | + { |
|
110 | 125 | $utf8_arrays['utf8_normalize_d_maps']['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim($fields[5])) . ';'; |
111 | 126 | } |
112 | 127 | } |
113 | 128 | |
114 | -foreach (file($unicode_data_url . '/CaseFolding.txt') as $line) { |
|
129 | +foreach (file($unicode_data_url . '/CaseFolding.txt') as $line) |
|
130 | +{ |
|
115 | 131 | $line = substr($line, 0, strcspn($line, '#')); |
116 | 132 | |
117 | 133 | if (strpos($line, ';') === false) |
@@ -119,12 +135,14 @@ discard block |
||
119 | 135 | |
120 | 136 | $fields = explode(';', $line); |
121 | 137 | |
122 | - foreach ($fields as $key => $value) { |
|
138 | + foreach ($fields as $key => $value) |
|
139 | + { |
|
123 | 140 | $fields[$key] = trim($value); |
124 | 141 | } |
125 | 142 | |
126 | 143 | // Full casefolding. |
127 | - if (in_array($fields[1], array('C', 'F'))) { |
|
144 | + if (in_array($fields[1], array('C', 'F'))) |
|
145 | + { |
|
128 | 146 | $utf8_arrays['utf8_casefold_maps']['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim($fields[2])) . ';'; |
129 | 147 | } |
130 | 148 | |
@@ -138,13 +156,17 @@ discard block |
||
138 | 156 | // This is necessary because some characters decompose to other characters that |
139 | 157 | // themselves decompose further. |
140 | 158 | $changed = true; |
141 | -while ($changed) { |
|
159 | +while ($changed) |
|
160 | +{ |
|
142 | 161 | $temp = array(); |
143 | - foreach ($full_decomposition_maps as $composed => $decomposed) { |
|
162 | + foreach ($full_decomposition_maps as $composed => $decomposed) |
|
163 | + { |
|
144 | 164 | $parts = strpos($decomposed, ' ') !== false ? explode(' ', $decomposed) : (array) $decomposed; |
145 | 165 | |
146 | - foreach ($parts as $partnum => $hex) { |
|
147 | - if (isset($full_decomposition_maps[$hex])) { |
|
166 | + foreach ($parts as $partnum => $hex) |
|
167 | + { |
|
168 | + if (isset($full_decomposition_maps[$hex])) |
|
169 | + { |
|
148 | 170 | $parts[$partnum] = $full_decomposition_maps[$hex]; |
149 | 171 | } |
150 | 172 | } |
@@ -163,17 +185,22 @@ discard block |
||
163 | 185 | // Same as above, but using only canonical decompositions. |
164 | 186 | $changed = true; |
165 | 187 | $iteration = 0; |
166 | -while ($changed) { |
|
188 | +while ($changed) |
|
189 | +{ |
|
167 | 190 | $temp = array(); |
168 | - foreach ($utf8_arrays['utf8_normalize_d_maps'] as $composed => $decomposed) { |
|
169 | - if ($iteration === 0 && !in_array($composed, $derived_normalization_props['Full_Composition_Exclusion'])) { |
|
191 | + foreach ($utf8_arrays['utf8_normalize_d_maps'] as $composed => $decomposed) |
|
192 | + { |
|
193 | + if ($iteration === 0 && !in_array($composed, $derived_normalization_props['Full_Composition_Exclusion'])) |
|
194 | + { |
|
170 | 195 | $utf8_arrays['utf8_compose_maps'][$decomposed] = $composed; |
171 | 196 | } |
172 | 197 | |
173 | 198 | $parts = strpos($decomposed, ' ') !== false ? explode(' ', $decomposed) : (array) $decomposed; |
174 | 199 | |
175 | - foreach ($parts as $partnum => $hex) { |
|
176 | - if (isset($utf8_arrays['utf8_normalize_d_maps'][$hex])) { |
|
200 | + foreach ($parts as $partnum => $hex) |
|
201 | + { |
|
202 | + if (isset($utf8_arrays['utf8_normalize_d_maps'][$hex])) |
|
203 | + { |
|
177 | 204 | $parts[$partnum] = $utf8_arrays['utf8_normalize_d_maps'][$hex]; |
178 | 205 | } |
179 | 206 | } |
@@ -193,7 +220,8 @@ discard block |
||
193 | 220 | $utf8_arrays['utf8_normalize_kd_maps'] = array_diff_assoc($full_decomposition_maps, $utf8_arrays['utf8_normalize_d_maps']); |
194 | 221 | |
195 | 222 | // Some characters have the 'Default_Ignorable_Code_Point' property. |
196 | -foreach (file($unicode_data_url . '/DerivedCoreProperties.txt') as $line) { |
|
223 | +foreach (file($unicode_data_url . '/DerivedCoreProperties.txt') as $line) |
|
224 | +{ |
|
197 | 225 | if (strpos($line, 'Default_Ignorable_Code_Point') === false) |
198 | 226 | continue; |
199 | 227 | |
@@ -204,13 +232,17 @@ discard block |
||
204 | 232 | |
205 | 233 | $fields = explode(';', $line); |
206 | 234 | |
207 | - foreach ($fields as $key => $value) { |
|
235 | + foreach ($fields as $key => $value) |
|
236 | + { |
|
208 | 237 | $fields[$key] = trim($value); |
209 | 238 | } |
210 | 239 | |
211 | - if (strpos($fields[0], '..') === false) { |
|
240 | + if (strpos($fields[0], '..') === false) |
|
241 | + { |
|
212 | 242 | $utf8_arrays['utf8_default_ignorables'][] = '&#x' . $fields[0] . ';'; |
213 | - } else { |
|
243 | + } |
|
244 | + else |
|
245 | + { |
|
214 | 246 | $entities = array(); |
215 | 247 | |
216 | 248 | list($start, $end) = explode('..', $fields[0]); |
@@ -219,7 +251,8 @@ discard block |
||
219 | 251 | $ord_e = hexdec($end); |
220 | 252 | |
221 | 253 | $ord = $ord_s; |
222 | - while ($ord <= $ord_e) { |
|
254 | + while ($ord <= $ord_e) |
|
255 | + { |
|
223 | 256 | $utf8_arrays['utf8_default_ignorables'][] = '&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';'; |
224 | 257 | } |
225 | 258 | } |
@@ -228,35 +261,43 @@ discard block |
||
228 | 261 | // Now update the file. |
229 | 262 | $subs_charset_contents = file_get_contents($sourcedir . '/Subs-Charset.php'); |
230 | 263 | |
231 | -foreach ($utf8_arrays as $func_name => $arr) { |
|
264 | +foreach ($utf8_arrays as $func_name => $arr) |
|
265 | +{ |
|
232 | 266 | $func_text = 'function ' . $func_name . '()' . "\n" . '{'; |
233 | 267 | |
234 | 268 | $func_regex = '/' . preg_quote($func_text, '/') . '[^}]*}/'; |
235 | 269 | |
236 | 270 | $func_text .= "\n\t" . 'return array(' . "\n"; |
237 | 271 | |
238 | - foreach ($arr as $key => $value) { |
|
272 | + foreach ($arr as $key => $value) |
|
273 | + { |
|
239 | 274 | $func_text .= "\t\t"; |
240 | 275 | |
241 | - if ($func_name !== 'utf8_default_ignorables') { |
|
276 | + if ($func_name !== 'utf8_default_ignorables') |
|
277 | + { |
|
242 | 278 | $func_text .= '"'; |
243 | 279 | |
244 | 280 | $key = mb_decode_numericentity(str_replace(' ', '', $key), array(0,0x10FFFF,0,0xFFFFFF), 'UTF-8'); |
245 | 281 | |
246 | - foreach (unpack('C*', $key) as $byte_value) { |
|
282 | + foreach (unpack('C*', $key) as $byte_value) |
|
283 | + { |
|
247 | 284 | $func_text .= '\\x' . strtoupper(dechex($byte_value)); |
248 | 285 | } |
249 | 286 | |
250 | 287 | $func_text .= '" => '; |
251 | 288 | } |
252 | 289 | |
253 | - if ($func_name == 'utf8_combining_classes') { |
|
290 | + if ($func_name == 'utf8_combining_classes') |
|
291 | + { |
|
254 | 292 | $func_text .= $value; |
255 | - } else { |
|
293 | + } |
|
294 | + else |
|
295 | + { |
|
256 | 296 | $func_text .= '"'; |
257 | 297 | |
258 | 298 | $value = mb_decode_numericentity(str_replace(' ', '', $value), array(0,0x10FFFF,0,0xFFFFFF), 'UTF-8'); |
259 | - foreach (unpack('C*', $value) as $byte_value) { |
|
299 | + foreach (unpack('C*', $value) as $byte_value) |
|
300 | + { |
|
260 | 301 | $func_text .= '\\x' . strtoupper(dechex($byte_value)); |
261 | 302 | } |
262 | 303 |