| Conditions | 3 |
| Paths | 3 |
| Total Lines | 65 |
| Code Lines | 47 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 49 | function normalizeUtf8String(string $s) : string |
||
| 50 | { |
||
| 51 | $original_string = $s; |
||
| 52 | |||
| 53 | // Normalizer-class missing! |
||
| 54 | if (!class_exists("Normalizer", false)) { |
||
| 55 | return $original_string; |
||
| 56 | } |
||
| 57 | |||
| 58 | // maps German (umlauts) and other European characters onto two characters before just removing diacritics |
||
| 59 | $s = preg_replace('/\x{00c4}/u', "AE", $s); // umlaut Ä => AE |
||
| 60 | $s = preg_replace('/\x{00d6}/u', "OE", $s); // umlaut Ö => OE |
||
| 61 | $s = preg_replace('/\x{00dc}/u', "UE", $s); // umlaut Ü => UE |
||
| 62 | $s = preg_replace('/\x{00e4}/u', "ae", $s); // umlaut ä => ae |
||
| 63 | $s = preg_replace('/\x{00f6}/u', "oe", $s); // umlaut ö => oe |
||
| 64 | $s = preg_replace('/\x{00fc}/u', "ue", $s); // umlaut ü => ue |
||
| 65 | $s = preg_replace('/\x{00f1}/u', "ny", $s); // ñ => ny |
||
| 66 | $s = preg_replace('/\x{00ff}/u', "yu", $s); // ÿ => yu |
||
| 67 | |||
| 68 | // maps special characters (characters with diacritics) on their base-character followed by the diacritical mark |
||
| 69 | // exmaple: Ú => U´, á => a` |
||
| 70 | $s = Normalizer::normalize($s, Normalizer::FORM_D); |
||
| 71 | |||
| 72 | $s = preg_replace('/\pM/u', "", $s); // removes diacritics |
||
| 73 | |||
| 74 | $s = preg_replace('/\x{00df}/u', "ss", $s); // maps German ß onto ss |
||
| 75 | $s = preg_replace('/\x{00c6}/u', "AE", $s); // Æ => AE |
||
| 76 | $s = preg_replace('/\x{00e6}/u', "ae", $s); // æ => ae |
||
| 77 | $s = preg_replace('/\x{0132}/u', "IJ", $s); // ? => IJ |
||
| 78 | $s = preg_replace('/\x{0133}/u', "ij", $s); // ? => ij |
||
| 79 | $s = preg_replace('/\x{0152}/u', "OE", $s); // Œ => OE |
||
| 80 | $s = preg_replace('/\x{0153}/u', "oe", $s); // œ => oe |
||
| 81 | |||
| 82 | $s = preg_replace('/\x{00d0}/u', "D", $s); // Ð => D |
||
| 83 | $s = preg_replace('/\x{0110}/u', "D", $s); // Ð => D |
||
| 84 | $s = preg_replace('/\x{00f0}/u', "d", $s); // ð => d |
||
| 85 | $s = preg_replace('/\x{0111}/u', "d", $s); // d => d |
||
| 86 | $s = preg_replace('/\x{0126}/u', "H", $s); // H => H |
||
| 87 | $s = preg_replace('/\x{0127}/u', "h", $s); // h => h |
||
| 88 | $s = preg_replace('/\x{0131}/u', "i", $s); // i => i |
||
| 89 | $s = preg_replace('/\x{0138}/u', "k", $s); // ? => k |
||
| 90 | $s = preg_replace('/\x{013f}/u', "L", $s); // ? => L |
||
| 91 | $s = preg_replace('/\x{0141}/u', "L", $s); // L => L |
||
| 92 | $s = preg_replace('/\x{0140}/u', "l", $s); // ? => l |
||
| 93 | $s = preg_replace('/\x{0142}/u', "l", $s); // l => l |
||
| 94 | $s = preg_replace('/\x{014a}/u', "N", $s); // ? => N |
||
| 95 | $s = preg_replace('/\x{0149}/u', "n", $s); // ? => n |
||
| 96 | $s = preg_replace('/\x{014b}/u', "n", $s); // ? => n |
||
| 97 | $s = preg_replace('/\x{00d8}/u', "O", $s); // Ø => O |
||
| 98 | $s = preg_replace('/\x{00f8}/u', "o", $s); // ø => o |
||
| 99 | $s = preg_replace('/\x{017f}/u', "s", $s); // ? => s |
||
| 100 | $s = preg_replace('/\x{00de}/u', "T", $s); // Þ => T |
||
| 101 | $s = preg_replace('/\x{0166}/u', "T", $s); // T => T |
||
| 102 | $s = preg_replace('/\x{00fe}/u', "t", $s); // þ => t |
||
| 103 | $s = preg_replace('/\x{0167}/u', "t", $s); // t => t |
||
| 104 | |||
| 105 | // remove all non-ASCii characters |
||
| 106 | $s = preg_replace('/[^\0-\x80]/u', "", $s); |
||
| 107 | |||
| 108 | // possible errors in UTF8-regular-expressions |
||
| 109 | if (isNullOrEmpty($s)) { |
||
| 110 | return $original_string; |
||
| 111 | } |
||
| 112 | return $s; |
||
| 113 | } |
||
| 114 | |||
| 284 |