@@ -249,7 +249,7 @@ discard block |
||
249 | 249 | $combining_classes = utf8_combining_classes(); |
250 | 250 | |
251 | 251 | // Replace characters with decomposed forms. |
252 | - for ($i=0; $i < count($chars); $i++) |
|
252 | + for ($i = 0; $i < count($chars); $i++) |
|
253 | 253 | { |
254 | 254 | // Hangul characters. |
255 | 255 | if ($chars[$i] >= "\xEA\xB0\x80" && $chars[$i] <= "\xED\x9E\xA3") |
@@ -283,7 +283,7 @@ discard block |
||
283 | 283 | { |
284 | 284 | $temp = $chars[$i]; |
285 | 285 | $chars[$i] = $chars[$i - 1]; |
286 | - $chars[$i -1] = $temp; |
|
286 | + $chars[$i - 1] = $temp; |
|
287 | 287 | |
288 | 288 | // Backtrack and check again. |
289 | 289 | if ($i > 1) |
@@ -527,7 +527,7 @@ discard block |
||
527 | 527 | |
528 | 528 | // Use placeholders to preserve known emoji from further processing. |
529 | 529 | // Regex source is https://unicode.org/reports/tr51/#EBNF_and_Regex |
530 | - $string = preg_replace_callback( |
|
530 | + $string = preg_replace_callback( |
|
531 | 531 | '/' . |
532 | 532 | // Flag emojis |
533 | 533 | '[' . $prop_classes['Regional_Indicator'] . ']{2}' . |
@@ -556,7 +556,7 @@ discard block |
||
556 | 556 | ')?' . |
557 | 557 | ')*' . |
558 | 558 | '/u', |
559 | - function ($matches) use (&$placeholders) |
|
559 | + function($matches) use (&$placeholders) |
|
560 | 560 | { |
561 | 561 | // Skip lone ASCII characters that are not actully part of an emoji sequence. |
562 | 562 | // This can happen because the digits 0-9 and the '*' and '#' characters are |
@@ -587,7 +587,7 @@ discard block |
||
587 | 587 | // Use placeholders for sanctioned variation selectors. |
588 | 588 | $string = preg_replace_callback( |
589 | 589 | $patterns, |
590 | - function ($matches) use (&$placeholders) |
|
590 | + function($matches) use (&$placeholders) |
|
591 | 591 | { |
592 | 592 | $placeholders[$matches[0]] = "\xEE\xB3\x9B" . md5($matches[0]) . "\xEE\xB3\x9C"; |
593 | 593 | return $placeholders[$matches[0]]; |
@@ -667,7 +667,7 @@ discard block |
||
667 | 667 | // Do the thing. |
668 | 668 | $string = preg_replace_callback( |
669 | 669 | '/' . $pattern . '/u', |
670 | - function ($matches) use ($placeholders) |
|
670 | + function($matches) use ($placeholders) |
|
671 | 671 | { |
672 | 672 | return strtr($matches[0], $placeholders); |
673 | 673 | }, |
@@ -372,7 +372,7 @@ discard block |
||
372 | 372 | $class_string = ''; |
373 | 373 | |
374 | 374 | $current_range = array('start' => null, 'end' => null); |
375 | - foreach($ords as $ord) { |
|
375 | + foreach ($ords as $ord) { |
|
376 | 376 | if (!isset($current_range['start'])) { |
377 | 377 | $current_range['start'] = $ord; |
378 | 378 | } |
@@ -724,7 +724,7 @@ discard block |
||
724 | 724 | $class_string = ''; |
725 | 725 | |
726 | 726 | $current_range = array('start' => null, 'end' => null); |
727 | - foreach($value as $ord) { |
|
727 | + foreach ($value as $ord) { |
|
728 | 728 | if (!isset($current_range['start'])) { |
729 | 729 | $current_range['start'] = $ord; |
730 | 730 | } |
@@ -809,7 +809,7 @@ discard block |
||
809 | 809 | if ($key_type == 'hexchar') { |
810 | 810 | $func_text .= '"'; |
811 | 811 | |
812 | - $key = mb_decode_numericentity(str_replace(' ', '', $key), array(0,0x10FFFF,0,0xFFFFFF), 'UTF-8'); |
|
812 | + $key = mb_decode_numericentity(str_replace(' ', '', $key), array(0, 0x10FFFF, 0, 0xFFFFFF), 'UTF-8'); |
|
813 | 813 | |
814 | 814 | foreach (unpack('C*', $key) as $byte_value) { |
815 | 815 | $func_text .= '\\x' . strtoupper(dechex($byte_value)); |
@@ -831,7 +831,7 @@ discard block |
||
831 | 831 | } elseif ($val_type == 'hexchar') { |
832 | 832 | $func_text .= '"'; |
833 | 833 | |
834 | - $value = mb_decode_numericentity(str_replace(' ', '', $value), array(0,0x10FFFF,0,0xFFFFFF), 'UTF-8'); |
|
834 | + $value = mb_decode_numericentity(str_replace(' ', '', $value), array(0, 0x10FFFF, 0, 0xFFFFFF), 'UTF-8'); |
|
835 | 835 | foreach (unpack('C*', $value) as $byte_value) { |
836 | 836 | $func_text .= '\\x' . strtoupper(dechex($byte_value)); |
837 | 837 | } |
@@ -117,7 +117,8 @@ discard block |
||
117 | 117 | ), |
118 | 118 | ); |
119 | 119 | |
120 | -foreach ($funcs as $func_name => $func_info) { |
|
120 | +foreach ($funcs as $func_name => $func_info) |
|
121 | +{ |
|
121 | 122 | if (!is_file($unicodedir . '/' . $func_info['file']) || !is_writable($unicodedir . '/' . $func_info['file'])) |
122 | 123 | die($unicodedir . '/' . $func_info['file'] . ' not found or not writable.'); |
123 | 124 | } |
@@ -130,7 +131,8 @@ discard block |
||
130 | 131 | |
131 | 132 | // We need some of these for further analysis below. |
132 | 133 | $derived_normalization_props = array(); |
133 | -foreach (file($unicode_data_url . '/DerivedNormalizationProps.txt') as $line) { |
|
134 | +foreach (file($unicode_data_url . '/DerivedNormalizationProps.txt') as $line) |
|
135 | +{ |
|
134 | 136 | $line = substr($line, 0, strcspn($line, '#')); |
135 | 137 | |
136 | 138 | if (strpos($line, ';') === false) |
@@ -138,17 +140,22 @@ discard block |
||
138 | 140 | |
139 | 141 | $fields = explode(';', $line); |
140 | 142 | |
141 | - foreach ($fields as $key => $value) { |
|
143 | + foreach ($fields as $key => $value) |
|
144 | + { |
|
142 | 145 | $fields[$key] = trim($value); |
143 | 146 | } |
144 | 147 | |
145 | - if (!isset($derived_normalization_props[$fields[1]])) { |
|
148 | + if (!isset($derived_normalization_props[$fields[1]])) |
|
149 | + { |
|
146 | 150 | $derived_normalization_props[$fields[1]] = array(); |
147 | 151 | } |
148 | 152 | |
149 | - if (strpos($fields[0], '..') === false) { |
|
153 | + if (strpos($fields[0], '..') === false) |
|
154 | + { |
|
150 | 155 | $entities = array('&#x' . $fields[0] . ';'); |
151 | - } else { |
|
156 | + } |
|
157 | + else |
|
158 | + { |
|
152 | 159 | $entities = array(); |
153 | 160 | |
154 | 161 | list($start, $end) = explode('..', $fields[0]); |
@@ -157,28 +164,36 @@ discard block |
||
157 | 164 | $ord_e = hexdec($end); |
158 | 165 | |
159 | 166 | $ord = $ord_s; |
160 | - while ($ord <= $ord_e) { |
|
167 | + while ($ord <= $ord_e) |
|
168 | + { |
|
161 | 169 | $entities[] = '&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';'; |
162 | 170 | } |
163 | 171 | } |
164 | 172 | |
165 | 173 | $value = ''; |
166 | - if (!isset($fields[2])) { |
|
174 | + if (!isset($fields[2])) |
|
175 | + { |
|
167 | 176 | $value = 'SAME'; |
168 | - } elseif (in_array($fields[1], array('FC_NFKC', 'NFKC_CF'))) { |
|
177 | + } |
|
178 | + elseif (in_array($fields[1], array('FC_NFKC', 'NFKC_CF'))) |
|
179 | + { |
|
169 | 180 | $value = trim($fields[2]) !== '' ? '&#x' . str_replace(' ', '; &#x', trim($fields[2])) . ';' : ''; |
170 | - } else { |
|
181 | + } |
|
182 | + else |
|
183 | + { |
|
171 | 184 | $value = $fields[2]; |
172 | 185 | } |
173 | 186 | |
174 | - foreach ($entities as $entity) { |
|
187 | + foreach ($entities as $entity) |
|
188 | + { |
|
175 | 189 | $derived_normalization_props[$fields[1]][$entity] = $value === 'SAME' ? $entity : $value; |
176 | 190 | } |
177 | 191 | } |
178 | 192 | |
179 | 193 | // Go through all the characters in the Unicode database. |
180 | 194 | $char_data = array(); |
181 | -foreach (file($unicode_data_url . '/UnicodeData.txt') as $line) { |
|
195 | +foreach (file($unicode_data_url . '/UnicodeData.txt') as $line) |
|
196 | +{ |
|
182 | 197 | $fields = explode(';', $line); |
183 | 198 | |
184 | 199 | if (!empty($fields[3])) |
@@ -202,12 +217,14 @@ discard block |
||
202 | 217 | $full_decomposition_maps['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim(strip_tags($fields[5]))) . ';'; |
203 | 218 | |
204 | 219 | // Just the canonical decompositions. |
205 | - if (strpos($fields[5], '<') === false) { |
|
220 | + if (strpos($fields[5], '<') === false) |
|
221 | + { |
|
206 | 222 | $funcs['utf8_normalize_d_maps']['data']['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim($fields[5])) . ';'; |
207 | 223 | } |
208 | 224 | } |
209 | 225 | |
210 | -foreach (file($unicode_data_url . '/CaseFolding.txt') as $line) { |
|
226 | +foreach (file($unicode_data_url . '/CaseFolding.txt') as $line) |
|
227 | +{ |
|
211 | 228 | $line = substr($line, 0, strcspn($line, '#')); |
212 | 229 | |
213 | 230 | if (strpos($line, ';') === false) |
@@ -215,12 +232,14 @@ discard block |
||
215 | 232 | |
216 | 233 | $fields = explode(';', $line); |
217 | 234 | |
218 | - foreach ($fields as $key => $value) { |
|
235 | + foreach ($fields as $key => $value) |
|
236 | + { |
|
219 | 237 | $fields[$key] = trim($value); |
220 | 238 | } |
221 | 239 | |
222 | 240 | // Full casefolding. |
223 | - if (in_array($fields[1], array('C', 'F'))) { |
|
241 | + if (in_array($fields[1], array('C', 'F'))) |
|
242 | + { |
|
224 | 243 | $funcs['utf8_casefold_maps']['data']['&#x' . $fields[0] . ';'] = '&#x' . str_replace(' ', '; &#x', trim($fields[2])) . ';'; |
225 | 244 | } |
226 | 245 | |
@@ -234,13 +253,17 @@ discard block |
||
234 | 253 | // This is necessary because some characters decompose to other characters that |
235 | 254 | // themselves decompose further. |
236 | 255 | $changed = true; |
237 | -while ($changed) { |
|
256 | +while ($changed) |
|
257 | +{ |
|
238 | 258 | $temp = array(); |
239 | - foreach ($full_decomposition_maps as $composed => $decomposed) { |
|
259 | + foreach ($full_decomposition_maps as $composed => $decomposed) |
|
260 | + { |
|
240 | 261 | $parts = strpos($decomposed, ' ') !== false ? explode(' ', $decomposed) : (array) $decomposed; |
241 | 262 | |
242 | - foreach ($parts as $partnum => $hex) { |
|
243 | - if (isset($full_decomposition_maps[$hex])) { |
|
263 | + foreach ($parts as $partnum => $hex) |
|
264 | + { |
|
265 | + if (isset($full_decomposition_maps[$hex])) |
|
266 | + { |
|
244 | 267 | $parts[$partnum] = $full_decomposition_maps[$hex]; |
245 | 268 | } |
246 | 269 | } |
@@ -259,17 +282,22 @@ discard block |
||
259 | 282 | // Same as above, but using only canonical decompositions. |
260 | 283 | $changed = true; |
261 | 284 | $iteration = 0; |
262 | -while ($changed) { |
|
285 | +while ($changed) |
|
286 | +{ |
|
263 | 287 | $temp = array(); |
264 | - foreach ($funcs['utf8_normalize_d_maps']['data'] as $composed => $decomposed) { |
|
265 | - if ($iteration === 0 && !in_array($composed, $derived_normalization_props['Full_Composition_Exclusion'])) { |
|
288 | + foreach ($funcs['utf8_normalize_d_maps']['data'] as $composed => $decomposed) |
|
289 | + { |
|
290 | + if ($iteration === 0 && !in_array($composed, $derived_normalization_props['Full_Composition_Exclusion'])) |
|
291 | + { |
|
266 | 292 | $funcs['utf8_compose_maps']['data'][$decomposed] = $composed; |
267 | 293 | } |
268 | 294 | |
269 | 295 | $parts = strpos($decomposed, ' ') !== false ? explode(' ', $decomposed) : (array) $decomposed; |
270 | 296 | |
271 | - foreach ($parts as $partnum => $hex) { |
|
272 | - if (isset($funcs['utf8_normalize_d_maps']['data'][$hex])) { |
|
297 | + foreach ($parts as $partnum => $hex) |
|
298 | + { |
|
299 | + if (isset($funcs['utf8_normalize_d_maps']['data'][$hex])) |
|
300 | + { |
|
273 | 301 | $parts[$partnum] = $funcs['utf8_normalize_d_maps']['data'][$hex]; |
274 | 302 | } |
275 | 303 | } |
@@ -290,7 +318,8 @@ discard block |
||
290 | 318 | unset($full_decomposition_maps, $derived_normalization_props); |
291 | 319 | |
292 | 320 | // Now update the files with the data we've got so far. |
293 | -foreach ($funcs as $func_name => $func_info) { |
|
321 | +foreach ($funcs as $func_name => $func_info) |
|
322 | +{ |
|
294 | 323 | if (empty($func_info['data'])) |
295 | 324 | continue; |
296 | 325 | |
@@ -306,8 +335,10 @@ discard block |
||
306 | 335 | ***********************************/ |
307 | 336 | |
308 | 337 | // Build regular expression classes for extended Unicode properties. |
309 | -foreach ($funcs['utf8_regex_properties']['propfiles'] as $filename) { |
|
310 | - foreach (file($unicode_data_url . '/' . $filename) as $line) { |
|
338 | +foreach ($funcs['utf8_regex_properties']['propfiles'] as $filename) |
|
339 | +{ |
|
340 | + foreach (file($unicode_data_url . '/' . $filename) as $line) |
|
341 | + { |
|
311 | 342 | $line = substr($line, 0, strcspn($line, '#')); |
312 | 343 | |
313 | 344 | if (strpos($line, ';') === false) |
@@ -315,11 +346,13 @@ discard block |
||
315 | 346 | |
316 | 347 | $fields = explode(';', $line); |
317 | 348 | |
318 | - foreach ($fields as $key => $value) { |
|
349 | + foreach ($fields as $key => $value) |
|
350 | + { |
|
319 | 351 | $fields[$key] = trim($value); |
320 | 352 | } |
321 | 353 | |
322 | - if (in_array($fields[1], $funcs['utf8_regex_properties']['props'])) { |
|
354 | + if (in_array($fields[1], $funcs['utf8_regex_properties']['props'])) |
|
355 | + { |
|
323 | 356 | if (!isset($funcs['utf8_regex_properties']['data'][$fields[1]])) |
324 | 357 | $funcs['utf8_regex_properties']['data'][$fields[1]] = ''; |
325 | 358 | |
@@ -330,16 +363,20 @@ discard block |
||
330 | 363 | if ($fields[1] !== 'Default_Ignorable_Code_Point') |
331 | 364 | continue; |
332 | 365 | |
333 | - if (strpos($fields[0], '..') === false) { |
|
366 | + if (strpos($fields[0], '..') === false) |
|
367 | + { |
|
334 | 368 | $funcs['utf8_default_ignorables']['data'][] = '&#x' . $fields[0] . ';'; |
335 | - } else { |
|
369 | + } |
|
370 | + else |
|
371 | + { |
|
336 | 372 | list($start, $end) = explode('..', $fields[0]); |
337 | 373 | |
338 | 374 | $ord_s = hexdec($start); |
339 | 375 | $ord_e = hexdec($end); |
340 | 376 | |
341 | 377 | $ord = $ord_s; |
342 | - while ($ord <= $ord_e) { |
|
378 | + while ($ord <= $ord_e) |
|
379 | + { |
|
343 | 380 | $funcs['utf8_default_ignorables']['data'][] = '&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';'; |
344 | 381 | } |
345 | 382 | } |
@@ -349,8 +386,10 @@ discard block |
||
349 | 386 | |
350 | 387 | // Build regular expression classes for filtering variation selectors. |
351 | 388 | $files = array('StandardizedVariants.txt', 'emoji/emoji-variation-sequences.txt'); |
352 | -foreach ($files as $filename) { |
|
353 | - foreach (file($unicode_data_url . '/' . $filename) as $line) { |
|
389 | +foreach ($files as $filename) |
|
390 | +{ |
|
391 | + foreach (file($unicode_data_url . '/' . $filename) as $line) |
|
392 | + { |
|
354 | 393 | $line = substr($line, 0, strcspn($line, '#')); |
355 | 394 | |
356 | 395 | if (strpos($line, ';') === false) |
@@ -358,7 +397,8 @@ discard block |
||
358 | 397 | |
359 | 398 | $fields = explode(';', $line); |
360 | 399 | |
361 | - foreach ($fields as $key => $value) { |
|
400 | + foreach ($fields as $key => $value) |
|
401 | + { |
|
362 | 402 | $fields[$key] = trim($value); |
363 | 403 | } |
364 | 404 | |
@@ -368,22 +408,29 @@ discard block |
||
368 | 408 | } |
369 | 409 | |
370 | 410 | } |
371 | -foreach ($funcs['utf8_regex_variation_selectors']['data'] as $variation_selector => $ords) { |
|
411 | +foreach ($funcs['utf8_regex_variation_selectors']['data'] as $variation_selector => $ords) |
|
412 | +{ |
|
372 | 413 | $class_string = ''; |
373 | 414 | |
374 | 415 | $current_range = array('start' => null, 'end' => null); |
375 | - foreach($ords as $ord) { |
|
376 | - if (!isset($current_range['start'])) { |
|
416 | + foreach($ords as $ord) |
|
417 | + { |
|
418 | + if (!isset($current_range['start'])) |
|
419 | + { |
|
377 | 420 | $current_range['start'] = $ord; |
378 | 421 | } |
379 | 422 | |
380 | - if (!isset($current_range['end']) || $ord == $current_range['end'] + 1) { |
|
423 | + if (!isset($current_range['end']) || $ord == $current_range['end'] + 1) |
|
424 | + { |
|
381 | 425 | $current_range['end'] = $ord; |
382 | 426 | continue; |
383 | - } else { |
|
427 | + } |
|
428 | + else |
|
429 | + { |
|
384 | 430 | $class_string .= '\\x{' . strtoupper(sprintf('%04s', dechex($current_range['start']))) . '}'; |
385 | 431 | |
386 | - if ($current_range['start'] != $current_range['end']) { |
|
432 | + if ($current_range['start'] != $current_range['end']) |
|
433 | + { |
|
387 | 434 | $class_string .= '-\\x{' . strtoupper(sprintf('%04s', dechex($current_range['end']))) . '}'; |
388 | 435 | } |
389 | 436 | |
@@ -391,16 +438,19 @@ discard block |
||
391 | 438 | } |
392 | 439 | } |
393 | 440 | |
394 | - if (isset($current_range['start'])) { |
|
441 | + if (isset($current_range['start'])) |
|
442 | + { |
|
395 | 443 | $class_string .= '\\x{' . strtoupper(sprintf('%04s', dechex($current_range['start']))) . '}'; |
396 | 444 | |
397 | - if ($current_range['start'] != $current_range['end']) { |
|
445 | + if ($current_range['start'] != $current_range['end']) |
|
446 | + { |
|
398 | 447 | $class_string .= '-\\x{' . strtoupper(sprintf('%04s', dechex($current_range['end']))) . '}'; |
399 | 448 | } |
400 | 449 | } |
401 | 450 | |
402 | 451 | // As of Unicode 14.0, \x{FE0E} and \x{FE0F} work with identical ranges of base characters. |
403 | - if (($identical = array_search($class_string, $funcs['utf8_regex_variation_selectors']['data'])) !== false) { |
|
452 | + if (($identical = array_search($class_string, $funcs['utf8_regex_variation_selectors']['data'])) !== false) |
|
453 | + { |
|
404 | 454 | unset( |
405 | 455 | $funcs['utf8_regex_variation_selectors']['data'][$identical], |
406 | 456 | $funcs['utf8_regex_variation_selectors']['data'][$variation_selector] |
@@ -419,7 +469,8 @@ discard block |
||
419 | 469 | // The regex classes for join control tests require info about language scripts. |
420 | 470 | $script_stats = array(); |
421 | 471 | $script_aliases = array(); |
422 | -foreach (file($unicode_data_url . '/PropertyValueAliases.txt') as $line) { |
|
472 | +foreach (file($unicode_data_url . '/PropertyValueAliases.txt') as $line) |
|
473 | +{ |
|
423 | 474 | $line = substr($line, 0, strcspn($line, '#')); |
424 | 475 | |
425 | 476 | if (strpos($line, ';') === false) |
@@ -427,7 +478,8 @@ discard block |
||
427 | 478 | |
428 | 479 | $fields = explode(';', $line); |
429 | 480 | |
430 | - foreach ($fields as $key => $value) { |
|
481 | + foreach ($fields as $key => $value) |
|
482 | + { |
|
431 | 483 | $fields[$key] = trim($value); |
432 | 484 | } |
433 | 485 | |
@@ -436,7 +488,8 @@ discard block |
||
436 | 488 | |
437 | 489 | $script_aliases[$fields[1]] = $fields[2]; |
438 | 490 | } |
439 | -foreach (file($unicode_data_url . '/Scripts.txt') as $line) { |
|
491 | +foreach (file($unicode_data_url . '/Scripts.txt') as $line) |
|
492 | +{ |
|
440 | 493 | $line = substr($line, 0, strcspn($line, '#')); |
441 | 494 | |
442 | 495 | if (strpos($line, ';') === false) |
@@ -444,28 +497,34 @@ discard block |
||
444 | 497 | |
445 | 498 | $fields = explode(';', $line); |
446 | 499 | |
447 | - foreach ($fields as $key => $value) { |
|
500 | + foreach ($fields as $key => $value) |
|
501 | + { |
|
448 | 502 | $fields[$key] = trim($value); |
449 | 503 | } |
450 | 504 | |
451 | 505 | if (in_array($fields[1], array('Common', 'Inherited'))) |
452 | 506 | continue; |
453 | 507 | |
454 | - if (strpos($fields[0], '..') === false) { |
|
508 | + if (strpos($fields[0], '..') === false) |
|
509 | + { |
|
455 | 510 | $char_data['&#x' . $fields[0] . ';']['scripts'][] = $fields[1]; |
456 | - } else { |
|
511 | + } |
|
512 | + else |
|
513 | + { |
|
457 | 514 | list($start, $end) = explode('..', $fields[0]); |
458 | 515 | |
459 | 516 | $ord_s = hexdec($start); |
460 | 517 | $ord_e = hexdec($end); |
461 | 518 | |
462 | 519 | $ord = $ord_s; |
463 | - while ($ord <= $ord_e) { |
|
520 | + while ($ord <= $ord_e) |
|
521 | + { |
|
464 | 522 | $char_data['&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';']['scripts'][] = $fields[1]; |
465 | 523 | } |
466 | 524 | } |
467 | 525 | } |
468 | -foreach (file($unicode_data_url . '/ScriptExtensions.txt') as $line) { |
|
526 | +foreach (file($unicode_data_url . '/ScriptExtensions.txt') as $line) |
|
527 | +{ |
|
469 | 528 | $line = substr($line, 0, strcspn($line, '#')); |
470 | 529 | |
471 | 530 | if (strpos($line, ';') === false) |
@@ -473,36 +532,46 @@ discard block |
||
473 | 532 | |
474 | 533 | $fields = explode(';', $line); |
475 | 534 | |
476 | - foreach ($fields as $key => $value) { |
|
535 | + foreach ($fields as $key => $value) |
|
536 | + { |
|
477 | 537 | $fields[$key] = trim($value); |
478 | 538 | } |
479 | 539 | |
480 | 540 | $char_scripts = array(); |
481 | - foreach (explode(' ', $fields[1]) as $alias) { |
|
482 | - if (!in_array($script_aliases[$alias], array('Common', 'Inherited'))) { |
|
541 | + foreach (explode(' ', $fields[1]) as $alias) |
|
542 | + { |
|
543 | + if (!in_array($script_aliases[$alias], array('Common', 'Inherited'))) |
|
544 | + { |
|
483 | 545 | $char_scripts[] = $script_aliases[$alias]; |
484 | 546 | } |
485 | 547 | } |
486 | 548 | |
487 | - if (strpos($fields[0], '..') === false) { |
|
488 | - foreach ($char_scripts as $char_script) { |
|
549 | + if (strpos($fields[0], '..') === false) |
|
550 | + { |
|
551 | + foreach ($char_scripts as $char_script) |
|
552 | + { |
|
489 | 553 | $char_data['&#x' . $fields[0] . ';']['scripts'][] = $char_script; |
490 | 554 | } |
491 | - } else { |
|
555 | + } |
|
556 | + else |
|
557 | + { |
|
492 | 558 | list($start, $end) = explode('..', $fields[0]); |
493 | 559 | |
494 | 560 | $ord_s = hexdec($start); |
495 | 561 | $ord_e = hexdec($end); |
496 | 562 | |
497 | 563 | $ord = $ord_s; |
498 | - while ($ord <= $ord_e) { |
|
499 | - foreach ($char_scripts as $char_script) { |
|
564 | + while ($ord <= $ord_e) |
|
565 | + { |
|
566 | + foreach ($char_scripts as $char_script) |
|
567 | + { |
|
500 | 568 | $char_data['&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';']['scripts'][] = $char_script; |
501 | 569 | } |
502 | 570 | } |
503 | 571 | } |
504 | 572 | } |
505 | -foreach (file($unicode_data_url . '/DerivedAge.txt') as $line) { |
|
573 | +foreach (file($unicode_data_url . '/DerivedAge.txt') as $line) |
|
574 | +{ |
|
506 | 575 | $line = substr($line, 0, strcspn($line, '#')); |
507 | 576 | |
508 | 577 | if (strpos($line, ';') === false) |
@@ -510,45 +579,58 @@ discard block |
||
510 | 579 | |
511 | 580 | $fields = explode(';', $line); |
512 | 581 | |
513 | - foreach ($fields as $key => $value) { |
|
582 | + foreach ($fields as $key => $value) |
|
583 | + { |
|
514 | 584 | $fields[$key] = trim($value); |
515 | 585 | } |
516 | 586 | |
517 | 587 | $fields[1] = (float) $fields[1]; |
518 | 588 | |
519 | - if (strpos($fields[0], '..') === false) { |
|
589 | + if (strpos($fields[0], '..') === false) |
|
590 | + { |
|
520 | 591 | $char_scripts = $char_data['&#x' . $fields[0] . ';']['scripts']; |
521 | 592 | |
522 | 593 | if (empty($char_scripts)) |
523 | 594 | continue; |
524 | 595 | |
525 | - foreach ($char_scripts as $char_script) { |
|
526 | - if (!isset($script_stats[$char_script])) { |
|
596 | + foreach ($char_scripts as $char_script) |
|
597 | + { |
|
598 | + if (!isset($script_stats[$char_script])) |
|
599 | + { |
|
527 | 600 | $script_stats[$char_script]['age'] = (float) $fields[1]; |
528 | 601 | $script_stats[$char_script]['count'] = 1; |
529 | - } else { |
|
602 | + } |
|
603 | + else |
|
604 | + { |
|
530 | 605 | $script_stats[$char_script]['age'] = min((float) $fields[1], $script_stats[$char_script]['age']); |
531 | 606 | $script_stats[$char_script]['count']++; |
532 | 607 | } |
533 | 608 | } |
534 | - } else { |
|
609 | + } |
|
610 | + else |
|
611 | + { |
|
535 | 612 | list($start, $end) = explode('..', $fields[0]); |
536 | 613 | |
537 | 614 | $ord_s = hexdec($start); |
538 | 615 | $ord_e = hexdec($end); |
539 | 616 | |
540 | 617 | $ord = $ord_s; |
541 | - while ($ord <= $ord_e) { |
|
618 | + while ($ord <= $ord_e) |
|
619 | + { |
|
542 | 620 | $char_scripts = $char_data['&#x' . strtoupper(sprintf('%04s', dechex($ord++))) . ';']['scripts']; |
543 | 621 | |
544 | 622 | if (empty($char_scripts)) |
545 | 623 | continue; |
546 | 624 | |
547 | - foreach ($char_scripts as $char_script) { |
|
548 | - if (!isset($script_stats[$char_script])) { |
|
625 | + foreach ($char_scripts as $char_script) |
|
626 | + { |
|
627 | + if (!isset($script_stats[$char_script])) |
|
628 | + { |
|
549 | 629 | $script_stats[$char_script]['age'] = $fields[1]; |
550 | 630 | $script_stats[$char_script]['count'] = 1; |
551 | - } else { |
|
631 | + } |
|
632 | + else |
|
633 | + { |
|
552 | 634 | $script_stats[$char_script]['age'] = min($fields[1], $script_stats[$char_script]['age']); |
553 | 635 | $script_stats[$char_script]['count']++; |
554 | 636 | } |
@@ -559,7 +641,8 @@ discard block |
||
559 | 641 | |
560 | 642 | // Build regex classes for join control tests in utf8_sanitize_invisibles: |
561 | 643 | // 1. Cursive scripts like Arabic. |
562 | -foreach (file($unicode_data_url . '/extracted/DerivedJoiningType.txt') as $line) { |
|
644 | +foreach (file($unicode_data_url . '/extracted/DerivedJoiningType.txt') as $line) |
|
645 | +{ |
|
563 | 646 | $line = substr($line, 0, strcspn($line, '#')); |
564 | 647 | |
565 | 648 | if (strpos($line, ';') === false) |
@@ -567,11 +650,13 @@ discard block |
||
567 | 650 | |
568 | 651 | $fields = explode(';', $line); |
569 | 652 | |
570 | - foreach ($fields as $key => $value) { |
|
653 | + foreach ($fields as $key => $value) |
|
654 | + { |
|
571 | 655 | $fields[$key] = trim($value); |
572 | 656 | } |
573 | 657 | |
574 | - switch ($fields[1]) { |
|
658 | + switch ($fields[1]) |
|
659 | + { |
|
575 | 660 | case 'C': |
576 | 661 | $joining_type = 'Join_Causing'; |
577 | 662 | break; |
@@ -605,7 +690,8 @@ discard block |
||
605 | 690 | if (empty($char_scripts)) |
606 | 691 | continue; |
607 | 692 | |
608 | - foreach ($char_scripts as $char_script) { |
|
693 | + foreach ($char_scripts as $char_script) |
|
694 | + { |
|
609 | 695 | if (!isset($funcs['utf8_regex_joining_type']['data'][$char_script]['stats'])) |
610 | 696 | $funcs['utf8_regex_joining_type']['data'][$char_script]['stats'] = $script_stats[$char_script]; |
611 | 697 | |
@@ -616,30 +702,38 @@ discard block |
||
616 | 702 | } |
617 | 703 | } |
618 | 704 | // This sort works decently well to ensure widely used scripts are ranked before rare scripts. |
619 | -uasort($funcs['utf8_regex_joining_type']['data'], function($a, $b) { |
|
620 | - if ($a['stats']['age'] == $b['stats']['age']) { |
|
705 | +uasort($funcs['utf8_regex_joining_type']['data'], function($a, $b) |
|
706 | +{ |
|
707 | + if ($a['stats']['age'] == $b['stats']['age']) |
|
708 | + { |
|
621 | 709 | return $b['stats']['count'] - $a['stats']['count']; |
622 | - } else { |
|
710 | + } |
|
711 | + else |
|
712 | + { |
|
623 | 713 | return $a['stats']['age'] - $b['stats']['age']; |
624 | 714 | } |
625 | 715 | }); |
626 | -foreach ($funcs['utf8_regex_joining_type']['data'] as $char_script => $joining_types) { |
|
716 | +foreach ($funcs['utf8_regex_joining_type']['data'] as $char_script => $joining_types) |
|
717 | +{ |
|
627 | 718 | unset($funcs['utf8_regex_joining_type']['data'][$char_script]['stats'], $joining_types['stats']); |
628 | 719 | |
629 | 720 | // If the only joining type in this script is transparent, we don't care about it. |
630 | - if (array_keys($joining_types) === array('Transparent')) { |
|
721 | + if (array_keys($joining_types) === array('Transparent')) |
|
722 | + { |
|
631 | 723 | unset($funcs['utf8_regex_joining_type']['data'][$char_script]); |
632 | 724 | continue; |
633 | 725 | } |
634 | 726 | |
635 | - foreach ($joining_types as $joining_type => $value) { |
|
727 | + foreach ($joining_types as $joining_type => $value) |
|
728 | + { |
|
636 | 729 | sort($value); |
637 | 730 | $funcs['utf8_regex_joining_type']['data'][$char_script][$joining_type] = implode('', $value); |
638 | 731 | } |
639 | 732 | } |
640 | 733 | |
641 | 734 | // 2. Indic scripts like Devanagari. |
642 | -foreach (file($unicode_data_url . '/IndicSyllabicCategory.txt') as $line) { |
|
735 | +foreach (file($unicode_data_url . '/IndicSyllabicCategory.txt') as $line) |
|
736 | +{ |
|
643 | 737 | $line = substr($line, 0, strcspn($line, '#')); |
644 | 738 | |
645 | 739 | if (strpos($line, ';') === false) |
@@ -647,7 +741,8 @@ discard block |
||
647 | 741 | |
648 | 742 | $fields = explode(';', $line); |
649 | 743 | |
650 | - foreach ($fields as $key => $value) { |
|
744 | + foreach ($fields as $key => $value) |
|
745 | + { |
|
651 | 746 | $fields[$key] = trim($value); |
652 | 747 | } |
653 | 748 | |
@@ -661,7 +756,8 @@ discard block |
||
661 | 756 | if (empty($char_scripts)) |
662 | 757 | continue; |
663 | 758 | |
664 | - foreach ($char_scripts as $char_script) { |
|
759 | + foreach ($char_scripts as $char_script) |
|
760 | + { |
|
665 | 761 | if (!isset($funcs['utf8_regex_indic']['data'][$char_script]['stats'])) |
666 | 762 | $funcs['utf8_regex_indic']['data'][$char_script]['stats'] = $script_stats[$char_script]; |
667 | 763 | |
@@ -672,51 +768,66 @@ discard block |
||
672 | 768 | } |
673 | 769 | } |
674 | 770 | // Again, sort commonly used scripts before rare scripts. |
675 | -uasort($funcs['utf8_regex_indic']['data'], function($a, $b) { |
|
676 | - if ($a['stats']['age'] == $b['stats']['age']) { |
|
771 | +uasort($funcs['utf8_regex_indic']['data'], function($a, $b) |
|
772 | +{ |
|
773 | + if ($a['stats']['age'] == $b['stats']['age']) |
|
774 | + { |
|
677 | 775 | return $b['stats']['count'] - $a['stats']['count']; |
678 | - } else { |
|
776 | + } |
|
777 | + else |
|
778 | + { |
|
679 | 779 | return $a['stats']['age'] - $b['stats']['age']; |
680 | 780 | } |
681 | 781 | }); |
682 | 782 | // We only want scripts with viramas. |
683 | -foreach ($funcs['utf8_regex_indic']['data'] as $char_script => $inscs) { |
|
783 | +foreach ($funcs['utf8_regex_indic']['data'] as $char_script => $inscs) |
|
784 | +{ |
|
684 | 785 | unset($funcs['utf8_regex_indic']['data'][$char_script]['stats'], $inscs['stats']); |
685 | 786 | |
686 | - if (!isset($inscs['Virama'])) { |
|
787 | + if (!isset($inscs['Virama'])) |
|
788 | + { |
|
687 | 789 | unset($funcs['utf8_regex_indic']['data'][$char_script]); |
688 | 790 | continue; |
689 | 791 | } |
690 | 792 | } |
691 | 793 | // Now add some more classes that we need for each script. |
692 | -foreach ($char_data as $entity => $info) { |
|
794 | +foreach ($char_data as $entity => $info) |
|
795 | +{ |
|
693 | 796 | if (empty($info['scripts'])) |
694 | 797 | continue; |
695 | 798 | |
696 | 799 | $ord = hexdec(trim($entity, '&#x;')); |
697 | 800 | |
698 | - foreach ($info['scripts'] as $char_script) { |
|
801 | + foreach ($info['scripts'] as $char_script) |
|
802 | + { |
|
699 | 803 | if (!isset($funcs['utf8_regex_indic']['data'][$char_script])) |
700 | 804 | continue; |
701 | 805 | |
702 | 806 | $funcs['utf8_regex_indic']['data'][$char_script]['All'][] = $ord; |
703 | 807 | |
704 | - if ($info['General_Category'] == 'Mn') { |
|
808 | + if ($info['General_Category'] == 'Mn') |
|
809 | + { |
|
705 | 810 | $funcs['utf8_regex_indic']['data'][$char_script]['Nonspacing_Mark'][] = $ord; |
706 | 811 | |
707 | - if (!empty($funcs['utf8_combining_classes']['data'][$entity])) { |
|
812 | + if (!empty($funcs['utf8_combining_classes']['data'][$entity])) |
|
813 | + { |
|
708 | 814 | $funcs['utf8_regex_indic']['data'][$char_script]['Nonspacing_Combining_Mark'][] = $ord; |
709 | 815 | } |
710 | - } elseif (substr($info['General_Category'], 0, 1) == 'L') { |
|
816 | + } |
|
817 | + elseif (substr($info['General_Category'], 0, 1) == 'L') |
|
818 | + { |
|
711 | 819 | $funcs['utf8_regex_indic']['data'][$char_script]['Letter'][] = $ord; |
712 | 820 | } |
713 | 821 | } |
714 | 822 | } |
715 | -foreach ($funcs['utf8_regex_indic']['data'] as $char_script => $inscs) { |
|
716 | - foreach ($inscs as $insc => $value) { |
|
823 | +foreach ($funcs['utf8_regex_indic']['data'] as $char_script => $inscs) |
|
824 | +{ |
|
825 | + foreach ($inscs as $insc => $value) |
|
826 | + { |
|
717 | 827 | sort($value); |
718 | 828 | |
719 | - if (!in_array($insc, array('All', 'Letter', 'Nonspacing_Mark', 'Nonspacing_Combining_Mark'))) { |
|
829 | + if (!in_array($insc, array('All', 'Letter', 'Nonspacing_Mark', 'Nonspacing_Combining_Mark'))) |
|
830 | + { |
|
720 | 831 | $funcs['utf8_regex_indic']['data'][$char_script][$insc] = implode('', $value); |
721 | 832 | continue; |
722 | 833 | } |
@@ -724,18 +835,24 @@ discard block |
||
724 | 835 | $class_string = ''; |
725 | 836 | |
726 | 837 | $current_range = array('start' => null, 'end' => null); |
727 | - foreach($value as $ord) { |
|
728 | - if (!isset($current_range['start'])) { |
|
838 | + foreach($value as $ord) |
|
839 | + { |
|
840 | + if (!isset($current_range['start'])) |
|
841 | + { |
|
729 | 842 | $current_range['start'] = $ord; |
730 | 843 | } |
731 | 844 | |
732 | - if (!isset($current_range['end']) || $ord == $current_range['end'] + 1) { |
|
845 | + if (!isset($current_range['end']) || $ord == $current_range['end'] + 1) |
|
846 | + { |
|
733 | 847 | $current_range['end'] = $ord; |
734 | 848 | continue; |
735 | - } else { |
|
849 | + } |
|
850 | + else |
|
851 | + { |
|
736 | 852 | $class_string .= '\\x{' . strtoupper(sprintf('%04s', dechex($current_range['start']))) . '}'; |
737 | 853 | |
738 | - if ($current_range['start'] != $current_range['end']) { |
|
854 | + if ($current_range['start'] != $current_range['end']) |
|
855 | + { |
|
739 | 856 | $class_string .= '-\\x{' . strtoupper(sprintf('%04s', dechex($current_range['end']))) . '}'; |
740 | 857 | } |
741 | 858 | |
@@ -743,10 +860,12 @@ discard block |
||
743 | 860 | } |
744 | 861 | } |
745 | 862 | |
746 | - if (isset($current_range['start'])) { |
|
863 | + if (isset($current_range['start'])) |
|
864 | + { |
|
747 | 865 | $class_string .= '\\x{' . strtoupper(sprintf('%04s', dechex($current_range['start']))) . '}'; |
748 | 866 | |
749 | - if ($current_range['start'] != $current_range['end']) { |
|
867 | + if ($current_range['start'] != $current_range['end']) |
|
868 | + { |
|
750 | 869 | $class_string .= '-\\x{' . strtoupper(sprintf('%04s', dechex($current_range['end']))) . '}'; |
751 | 870 | } |
752 | 871 | } |
@@ -758,7 +877,8 @@ discard block |
||
758 | 877 | } |
759 | 878 | unset($funcs['utf8_combining_classes']); |
760 | 879 | |
761 | -foreach ($funcs as $func_name => $func_info) { |
|
880 | +foreach ($funcs as $func_name => $func_info) |
|
881 | +{ |
|
762 | 882 | export_func_to_file($func_name, $func_info); |
763 | 883 | } |
764 | 884 | |
@@ -803,24 +923,30 @@ discard block |
||
803 | 923 | { |
804 | 924 | static $indent = 2; |
805 | 925 | |
806 | - foreach ($data as $key => $value) { |
|
926 | + foreach ($data as $key => $value) |
|
927 | + { |
|
807 | 928 | $func_text .= str_repeat("\t", $indent); |
808 | 929 | |
809 | - if ($key_type == 'hexchar') { |
|
930 | + if ($key_type == 'hexchar') |
|
931 | + { |
|
810 | 932 | $func_text .= '"'; |
811 | 933 | |
812 | 934 | $key = mb_decode_numericentity(str_replace(' ', '', $key), array(0,0x10FFFF,0,0xFFFFFF), 'UTF-8'); |
813 | 935 | |
814 | - foreach (unpack('C*', $key) as $byte_value) { |
|
936 | + foreach (unpack('C*', $key) as $byte_value) |
|
937 | + { |
|
815 | 938 | $func_text .= '\\x' . strtoupper(dechex($byte_value)); |
816 | 939 | } |
817 | 940 | |
818 | 941 | $func_text .= '" => '; |
819 | - } elseif ($key_type == 'string') { |
|
942 | + } |
|
943 | + elseif ($key_type == 'string') |
|
944 | + { |
|
820 | 945 | $func_text .= var_export($key, true) . ' => '; |
821 | 946 | } |
822 | 947 | |
823 | - if (is_array($value)) { |
|
948 | + if (is_array($value)) |
|
949 | + { |
|
824 | 950 | $func_text .= 'array(' . "\n"; |
825 | 951 | |
826 | 952 | $indent++; |
@@ -828,18 +954,25 @@ discard block |
||
828 | 954 | $indent--; |
829 | 955 | |
830 | 956 | $func_text .= str_repeat("\t", $indent) . ')'; |
831 | - } elseif ($val_type == 'hexchar') { |
|
957 | + } |
|
958 | + elseif ($val_type == 'hexchar') |
|
959 | + { |
|
832 | 960 | $func_text .= '"'; |
833 | 961 | |
834 | 962 | $value = mb_decode_numericentity(str_replace(' ', '', $value), array(0,0x10FFFF,0,0xFFFFFF), 'UTF-8'); |
835 | - foreach (unpack('C*', $value) as $byte_value) { |
|
963 | + foreach (unpack('C*', $value) as $byte_value) |
|
964 | + { |
|
836 | 965 | $func_text .= '\\x' . strtoupper(dechex($byte_value)); |
837 | 966 | } |
838 | 967 | |
839 | 968 | $func_text .= '"'; |
840 | - } elseif ($val_type == 'string') { |
|
969 | + } |
|
970 | + elseif ($val_type == 'string') |
|
971 | + { |
|
841 | 972 | $func_text .= var_export($value, true); |
842 | - } else { |
|
973 | + } |
|
974 | + else |
|
975 | + { |
|
843 | 976 | $func_text .= $value; |
844 | 977 | } |
845 | 978 |