Conditions | 3 |
Paths | 3 |
Total Lines | 65 |
Code Lines | 47 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | <?php |
||
49 | function normalizeUtf8String(string $s) : string |
||
50 | { |
||
51 | $original_string = $s; |
||
52 | |||
53 | // Normalizer-class missing! |
||
54 | if (!class_exists("Normalizer", false)) { |
||
55 | return $original_string; |
||
56 | } |
||
57 | |||
58 | // maps German (umlauts) and other European characters onto two characters before just removing diacritics |
||
59 | $s = preg_replace('/\x{00c4}/u', "AE", $s); // umlaut Ä => AE |
||
60 | $s = preg_replace('/\x{00d6}/u', "OE", $s); // umlaut Ö => OE |
||
61 | $s = preg_replace('/\x{00dc}/u', "UE", $s); // umlaut Ü => UE |
||
62 | $s = preg_replace('/\x{00e4}/u', "ae", $s); // umlaut ä => ae |
||
63 | $s = preg_replace('/\x{00f6}/u', "oe", $s); // umlaut ö => oe |
||
64 | $s = preg_replace('/\x{00fc}/u', "ue", $s); // umlaut ü => ue |
||
65 | $s = preg_replace('/\x{00f1}/u', "ny", $s); // ñ => ny |
||
66 | $s = preg_replace('/\x{00ff}/u', "yu", $s); // ÿ => yu |
||
67 | |||
68 | // maps special characters (characters with diacritics) on their base-character followed by the diacritical mark |
||
69 | // exmaple: Ú => U´, á => a` |
||
70 | $s = Normalizer::normalize($s, Normalizer::FORM_D); |
||
71 | |||
72 | $s = preg_replace('/\pM/u', "", $s); // removes diacritics |
||
73 | |||
74 | $s = preg_replace('/\x{00df}/u', "ss", $s); // maps German ß onto ss |
||
75 | $s = preg_replace('/\x{00c6}/u', "AE", $s); // Æ => AE |
||
76 | $s = preg_replace('/\x{00e6}/u', "ae", $s); // æ => ae |
||
77 | $s = preg_replace('/\x{0132}/u', "IJ", $s); // ? => IJ |
||
78 | $s = preg_replace('/\x{0133}/u', "ij", $s); // ? => ij |
||
79 | $s = preg_replace('/\x{0152}/u', "OE", $s); // Œ => OE |
||
80 | $s = preg_replace('/\x{0153}/u', "oe", $s); // œ => oe |
||
81 | |||
82 | $s = preg_replace('/\x{00d0}/u', "D", $s); // Ð => D |
||
83 | $s = preg_replace('/\x{0110}/u', "D", $s); // Ð => D |
||
84 | $s = preg_replace('/\x{00f0}/u', "d", $s); // ð => d |
||
85 | $s = preg_replace('/\x{0111}/u', "d", $s); // d => d |
||
86 | $s = preg_replace('/\x{0126}/u', "H", $s); // H => H |
||
87 | $s = preg_replace('/\x{0127}/u', "h", $s); // h => h |
||
88 | $s = preg_replace('/\x{0131}/u', "i", $s); // i => i |
||
89 | $s = preg_replace('/\x{0138}/u', "k", $s); // ? => k |
||
90 | $s = preg_replace('/\x{013f}/u', "L", $s); // ? => L |
||
91 | $s = preg_replace('/\x{0141}/u', "L", $s); // L => L |
||
92 | $s = preg_replace('/\x{0140}/u', "l", $s); // ? => l |
||
93 | $s = preg_replace('/\x{0142}/u', "l", $s); // l => l |
||
94 | $s = preg_replace('/\x{014a}/u', "N", $s); // ? => N |
||
95 | $s = preg_replace('/\x{0149}/u', "n", $s); // ? => n |
||
96 | $s = preg_replace('/\x{014b}/u', "n", $s); // ? => n |
||
97 | $s = preg_replace('/\x{00d8}/u', "O", $s); // Ø => O |
||
98 | $s = preg_replace('/\x{00f8}/u', "o", $s); // ø => o |
||
99 | $s = preg_replace('/\x{017f}/u', "s", $s); // ? => s |
||
100 | $s = preg_replace('/\x{00de}/u', "T", $s); // Þ => T |
||
101 | $s = preg_replace('/\x{0166}/u', "T", $s); // T => T |
||
102 | $s = preg_replace('/\x{00fe}/u', "t", $s); // þ => t |
||
103 | $s = preg_replace('/\x{0167}/u', "t", $s); // t => t |
||
104 | |||
105 | // remove all non-ASCii characters |
||
106 | $s = preg_replace('/[^\0-\x80]/u', "", $s); |
||
107 | |||
108 | // possible errors in UTF8-regular-expressions |
||
109 | if (isNullOrEmpty($s)) { |
||
110 | return $original_string; |
||
111 | } |
||
112 | return $s; |
||
113 | } |
||
114 | |||
284 |