Completed
Push — master ( 4bf1e7...e2641c )
by Lorenzo
03:12
created

sanitize.php ➔ normalizeUtf8String()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 65
Code Lines 47

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 47
nc 3
nop 1
dl 0
loc 65
rs 9.3571
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Strip new line breaks from a string
5
 * @param $str
6
 * @return string|array
7
 */
8
function strip_nl($str)
9
{
10
    return str_replace("\n", "", str_replace("\r", "", $str));
11
}
12
13
/**
14
 * Javascript escape
15
 * @param string $str
16
 * @return string
17
 * @source https://github.com/rtconner/laravel-plusplus/blob/laravel-5/src/plus-functions.php
18
 */
19
function jse(string $str) : string
20
{
21
    if (isNullOrEmpty($str)) {
22
        return '';
23
    }
24
    $str = str_replace("\n", "", str_replace("\r", "", $str));
25
    return addslashes($str);
26
}
27
28
if (!function_exists('e')) {
29
    /**
30
     * Escape HTML entities in a string.
31
     *
32
     * @param  string $value
33
     * @return string
34
     */
35
    function e($value)
36
    {
37
        return htmlentities($value, ENT_QUOTES, 'UTF-8', false);
38
    }
39
}
40
41
/**
42
 * Normalize the texts before.
43
 * The following function removes all diacritics (marks like accents) from a given UTF8-encoded
44
 * texts and returns ASCii-text.
45
 * @param string $s
46
 * @return string
47
 * @see http://php.net/manual/en/normalizer.normalize.php#92592
48
 */
49
function normalizeUtf8String(string $s) : string
50
{
51
    $original_string = $s;
52
53
    // Normalizer-class missing!
54
    if (!class_exists("Normalizer", false)) {
55
        return $original_string;
56
    }
57
58
    // maps German (umlauts) and other European characters onto two characters before just removing diacritics
59
    $s = preg_replace('/\x{00c4}/u', "AE", $s);    // umlaut Ä => AE
60
    $s = preg_replace('/\x{00d6}/u', "OE", $s);    // umlaut Ö => OE
61
    $s = preg_replace('/\x{00dc}/u', "UE", $s);    // umlaut Ü => UE
62
    $s = preg_replace('/\x{00e4}/u', "ae", $s);    // umlaut ä => ae
63
    $s = preg_replace('/\x{00f6}/u', "oe", $s);    // umlaut ö => oe
64
    $s = preg_replace('/\x{00fc}/u', "ue", $s);    // umlaut ü => ue
65
    $s = preg_replace('/\x{00f1}/u', "ny", $s);    // ñ => ny
66
    $s = preg_replace('/\x{00ff}/u', "yu", $s);    // ÿ => yu
67
68
    // maps special characters (characters with diacritics) on their base-character followed by the diacritical mark
69
    // exmaple:  Ú => U´,  á => a`
70
    $s = Normalizer::normalize($s, Normalizer::FORM_D);
71
72
    $s = preg_replace('/\pM/u', "", $s);    // removes diacritics
73
74
    $s = preg_replace('/\x{00df}/u', "ss", $s);    // maps German ß onto ss
75
    $s = preg_replace('/\x{00c6}/u', "AE", $s);    // Æ => AE
76
    $s = preg_replace('/\x{00e6}/u', "ae", $s);    // æ => ae
77
    $s = preg_replace('/\x{0132}/u', "IJ", $s);    // ? => IJ
78
    $s = preg_replace('/\x{0133}/u', "ij", $s);    // ? => ij
79
    $s = preg_replace('/\x{0152}/u', "OE", $s);    // Œ => OE
80
    $s = preg_replace('/\x{0153}/u', "oe", $s);    // œ => oe
81
82
    $s = preg_replace('/\x{00d0}/u', "D", $s);    // Ð => D
83
    $s = preg_replace('/\x{0110}/u', "D", $s);    // Ð => D
84
    $s = preg_replace('/\x{00f0}/u', "d", $s);    // ð => d
85
    $s = preg_replace('/\x{0111}/u', "d", $s);    // d => d
86
    $s = preg_replace('/\x{0126}/u', "H", $s);    // H => H
87
    $s = preg_replace('/\x{0127}/u', "h", $s);    // h => h
88
    $s = preg_replace('/\x{0131}/u', "i", $s);    // i => i
89
    $s = preg_replace('/\x{0138}/u', "k", $s);    // ? => k
90
    $s = preg_replace('/\x{013f}/u', "L", $s);    // ? => L
91
    $s = preg_replace('/\x{0141}/u', "L", $s);    // L => L
92
    $s = preg_replace('/\x{0140}/u', "l", $s);    // ? => l
93
    $s = preg_replace('/\x{0142}/u', "l", $s);    // l => l
94
    $s = preg_replace('/\x{014a}/u', "N", $s);    // ? => N
95
    $s = preg_replace('/\x{0149}/u', "n", $s);    // ? => n
96
    $s = preg_replace('/\x{014b}/u', "n", $s);    // ? => n
97
    $s = preg_replace('/\x{00d8}/u', "O", $s);    // Ø => O
98
    $s = preg_replace('/\x{00f8}/u', "o", $s);    // ø => o
99
    $s = preg_replace('/\x{017f}/u', "s", $s);    // ? => s
100
    $s = preg_replace('/\x{00de}/u', "T", $s);    // Þ => T
101
    $s = preg_replace('/\x{0166}/u', "T", $s);    // T => T
102
    $s = preg_replace('/\x{00fe}/u', "t", $s);    // þ => t
103
    $s = preg_replace('/\x{0167}/u', "t", $s);    // t => t
104
105
    // remove all non-ASCii characters
106
    $s = preg_replace('/[^\0-\x80]/u', "", $s);
107
108
    // possible errors in UTF8-regular-expressions
109
    if (isNullOrEmpty($s)) {
110
        return $original_string;
111
    }
112
    return $s;
113
}
114
115
/**
116
 * String Sanitizer for Filename
117
 * @param string $fileName
118
 * @param bool $sanitizeForPath if set to false (default) sanitize file name, otherwise file path name
119
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
120
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
121
 * @return string
122
 * @see for base script idea http://stackoverflow.com/a/2021729
123
 */
124
function sanitize_filename(
125
    string $fileName,
126
    bool $sanitizeForPath = false,
127
    string $charToReplaceWhiteSpace = ' '
128
) : string
129
{
130
    //check whitespace
131
    $fileName = str_replace(' ', $charToReplaceWhiteSpace, $fileName);
132
133
    // Remove any runs of periods - avoid Path Traversal Vulnerabilities OSWAP
134
    // https://www.owasp.org/index.php/Path_Traversal
135
    $notAllowedPath = [
136
        '//',
137
        '\\\\',
138
        '../',
139
        './',
140
        '..\\',
141
        '.\\',
142
        '%2e%2e%2f',
143
        '%2e%2e/',
144
        '..%2f',
145
        '%2e%2e%5c',
146
        '%2e%2e\\',
147
        '..%5c',
148
        '%252e%252e%255c',
149
        '..%255c',
150
        '..%c0%af',
151
        '..%c1%9c',
152
    ];
153
    while (str_contains($fileName, $notAllowedPath) !== false) {
154
        $fileName = str_replace($notAllowedPath, '', $fileName);
155
    }
156
157
    // Remove anything which isn't a word, whitespace, number
158
    // or any of the following caracters -_~,;[]().
159
    // If you don't need to handle multi-byte characters
160
    // you can use preg_replace rather than mb_ereg_replace
161
    // Thanks @Łukasz Rysiak!
162
    $fileName = mb_ereg_replace('([^\w\s\d\-_~,;\[\]\(\).' . ($sanitizeForPath ? '\\/' : '') . '])', '', $fileName);
163
164
    // remove exadecimal, non white space chars
165
    $fileName = mb_ereg_replace('([[:cntrl:]\b\0\n\r\t\f])', '', $fileName);
166
167
    //normalize and trim
168
    $fileName = trim(normalizeUtf8String($fileName));
169
170
    //do not start with ..
171
    while (starts_with($fileName, '..') !== false) {
172
        $fileName = substr($fileName, 2);
173
    }
174
175
    //do not end with ..
176
    while (ends_with($fileName, '..') !== false) {
177
        $fileName = substr($fileName, 0, -2);
178
    }
179
    //do not end with .
180
    while (ends_with($fileName, '.') !== false) {
181
        $fileName = substr($fileName, 0, -1);
182
    }
183
184
    return $fileName;
185
}
186
187
/**
188
 * String Sanitizer for Path name
189
 * @param string $pathName
190
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
191
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
192
 * @return string
193
 */
194
195
function sanitize_pathname(string $pathName, string $charToReplaceWhiteSpace) : string
196
{
197
    return sanitize_filename($pathName, true, $charToReplaceWhiteSpace);
198
}
199
200
/**
201
 * Perform XSS clean to prevent cross site scripting.
202
 *
203
 * @param array $data
204
 *
205
 * @return array
206
 */
207
function sanitize_arr_string_xss(array $data) : array
208
{
209
    foreach ($data as $k => $v) {
210
        $data[$k] = filter_var($v, FILTER_SANITIZE_STRING);
211
    }
212
    return $data;
213
}
214
215
/**
216
 * Perform XSS clean to prevent cross site scripting.
217
 *
218
 * @param string $data
219
 *
220
 * @return string
221
 *
222
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
223
 */
224
function sanitize_string_xss(string $data) : string
225
{
226
    return filter_var($data, FILTER_SANITIZE_STRING);
227
}
228
229
/**
230
 * Sanitize the string by urlencoding characters.
231
 *
232
 * @param string $value
233
 *
234
 * @return string
235
 *
236
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
237
 */
238
function sanitize_urlencode($value)
239
{
240
    return filter_var($value, FILTER_SANITIZE_ENCODED);
241
}
242
243
/**
244
 * Sanitize the string by removing illegal characters from emails.
245
 *
246
 * @param string $value
247
 *
248
 * @return string
249
 *
250
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
251
 */
252
function sanitize_email($value)
253
{
254
    return filter_var($value, FILTER_SANITIZE_EMAIL);
255
}
256
257
/**
258
 * Sanitize the string by removing illegal characters from numbers.
259
 *
260
 * @param string $value
261
 *
262
 * @return string
263
 *
264
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
265
 */
266
function sanitize_numbers($value)
267
{
268
    return filter_var($value, FILTER_SANITIZE_NUMBER_INT);
269
}
270
271
/**
272
 * Sanitize the string by removing illegal characters from float numbers.
273
 *
274
 * @param string $value
275
 *
276
 * @return string
277
 *
278
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
279
 */
280
function sanitize_floats($value)
281
{
282
    return filter_var($value, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
283
}
284