Completed
Push — master ( 988869...84d590 )
by Lorenzo
08:17
created

sanitize.php ➔ normalizeUtf8String()   B

Complexity

Conditions 4
Paths 8

Size

Total Lines 69
Code Lines 49

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 49
nc 8
nop 1
dl 0
loc 69
rs 8.7653
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
if (!function_exists('strip_nl')) {
4
5
    /**
6
     * Strip new line breaks from a string
7
     * @param $str
8
     * @return string|array
9
     */
10
    function strip_nl($str)
11
    {
12
        return str_replace("\n", "", str_replace("\r", "", $str));
13
    }
14
}
15
16
if (!function_exists('jse')) {
17
18
    /**
19
     * Javascript escape
20
     * @param string $str
21
     * @return string
22
     * @source https://github.com/rtconner/laravel-plusplus/blob/laravel-5/src/plus-functions.php
23
     */
24
    function jse(string $str) : string
25
    {
26
        if (isNullOrEmpty($str)) {
27
            return '';
28
        }
29
        $str = str_replace("\n", "", str_replace("\r", "", $str));
30
        return addslashes($str);
31
    }
32
}
33
34
if (!function_exists('e')) {
35
    /**
36
     * Escape HTML entities in a string.
37
     *
38
     * @param  string $value
39
     * @return string
40
     */
41
    function e($value)
42
    {
43
        return htmlentities($value, ENT_QUOTES, 'UTF-8', false);
44
    }
45
}
46
47
if (!function_exists('csse')) {
48
    /**
49
     * Escape CSS entities in a string.
50
     *
51
     * @param  string $value
52
     * @return string
53
     * @see https://github.com/auraphp/Aura.Html/blob/2.x/src/Escaper/CssEscaper.php
54
     */
55
    function csse($value)
56
    {
57
        // pre-empt replacement
58
        if ($value === '' || ctype_digit($value)) {
59
            return $value;
60
        }
61
        return preg_replace_callback(
62
            '/[^a-z0-9]/iSu',
63
            function ($matches) {
64
                // get the character
65
                $chr = $matches[0];
66
                // is it UTF-8?
67
                if (strlen($chr) == 1) {
68
                    // yes
69
                    $ord = ord($chr);
70
                    return sprintf('\\%X ', $ord);
71
                }
72
            },
73
            $value
74
        );
75
    }
76
}
77
78
if (!function_exists('attre')) {
79
    /**
80
     * Escape HTML Attribute entities in a string.
81
     *
82
     * @param  string $value
83
     * @return string
84
     * @see https://github.com/auraphp/Aura.Html/blob/2.x/src/Escaper/AttrEscaper.php
85
     */
86
    function attre($value)
87
    {
88
        // pre-empt replacement
89
        if ($value === '' || ctype_digit($value)) {
90
            return $value;
91
        }
92
        return preg_replace_callback(
93
            '/[^a-z0-9,\.\-_]/iSu',
94
            function ($matches) {
95
                $chr = $matches[0];
96
                $ord = ord($chr);
97
                if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r")
98
                    || ($ord >= 0x7f && $ord <= 0x9f)
99
                ) {
100
                    // use the Unicode replacement char
101
                    return '&#xFFFD;';
102
                }
103
                $entities = array(
104
                    34 => '&quot;',
105
                    38 => '&amp;',
106
                    60 => '&lt;',
107
                    62 => '&gt;',
108
                );
109
                // is this a mapped entity?
110
                if (isset($entities[$ord])) {
111
                    return $entities[$ord];
112
                }
113
                // is this an upper-range hex entity?
114
                if ($ord > 255) {
115
                    return sprintf('&#x%04X;', $ord);
116
                }
117
                // everything else
118
                return sprintf('&#x%02X;', $ord);
119
            },
120
            $value
121
        );
122
    }
123
}
124
125
if (!function_exists('she')) {
126
127
    /**
128
     * Escape Shell argument.
129
     * @param string $input
130
     * @return string
131
     */
132
    function she(string $input) : string
133
    {
134
        if (windows_os()) {
135
            return '"' . addcslashes($input, '\\"') . '"';
136
        }
137
138
        return escapeshellarg($input);
139
    }
140
}
141
142
/**
143
 * Normalize the texts before.
144
 * The following function removes all diacritics (marks like accents) from a given UTF8-encoded
145
 * texts and returns ASCii-text.
146
 * @param string $s
147
 * @return string
148
 * @see https://github.com/illuminate/support/blob/master/Str.php#L38
149
 * @see http://php.net/manual/en/normalizer.normalize.php#92592
150
 */
151
function normalizeUtf8String(string $s) : string
152
{
153
    $original_string = $s;
154
155
    //Transliterate UTF-8 value to ASCII with chars array map.
156
    foreach (charsArray() as $key => $val) {
157
        $s = str_replace($val, $key, $s);
158
    }
159
    $s = preg_replace('/[^\x20-\x7E]/u', '', $s);
160
161
    // maps German (umlauts) and other European characters onto two characters before just removing diacritics
162
    $s = preg_replace('/\x{00c4}/u', "AE", $s);    // umlaut Ä => AE
163
    $s = preg_replace('/\x{00d6}/u', "OE", $s);    // umlaut Ö => OE
164
    $s = preg_replace('/\x{00dc}/u', "UE", $s);    // umlaut Ü => UE
165
    $s = preg_replace('/\x{00e4}/u', "ae", $s);    // umlaut ä => ae
166
    $s = preg_replace('/\x{00f6}/u', "oe", $s);    // umlaut ö => oe
167
    $s = preg_replace('/\x{00fc}/u', "ue", $s);    // umlaut ü => ue
168
    $s = preg_replace('/\x{00f1}/u', "ny", $s);    // ñ => ny
169
    $s = preg_replace('/\x{00ff}/u', "yu", $s);    // ÿ => yu
170
171
    // if exists use Normalizer-class
172
    if (class_exists("Normalizer", false)) {
173
        // maps special characters (characters with diacritics) on their base-character followed by the diacritical mark
174
        // exmaple:  Ú => U´,  á => a`
175
        $s = Normalizer::normalize($s, Normalizer::FORM_D);
176
    }
177
178
    $s = preg_replace('/\pM/u', "", $s);    // removes diacritics
179
180
    $s = preg_replace('/\x{00df}/u', "ss", $s);    // maps German ß onto ss
181
    $s = preg_replace('/\x{00c6}/u', "AE", $s);    // Æ => AE
182
    $s = preg_replace('/\x{00e6}/u', "ae", $s);    // æ => ae
183
    $s = preg_replace('/\x{0132}/u', "IJ", $s);    // ? => IJ
184
    $s = preg_replace('/\x{0133}/u', "ij", $s);    // ? => ij
185
    $s = preg_replace('/\x{0152}/u', "OE", $s);    // Œ => OE
186
    $s = preg_replace('/\x{0153}/u', "oe", $s);    // œ => oe
187
188
    $s = preg_replace('/\x{00d0}/u', "D", $s);    // Ð => D
189
    $s = preg_replace('/\x{0110}/u', "D", $s);    // Ð => D
190
    $s = preg_replace('/\x{00f0}/u', "d", $s);    // ð => d
191
    $s = preg_replace('/\x{0111}/u', "d", $s);    // d => d
192
    $s = preg_replace('/\x{0126}/u', "H", $s);    // H => H
193
    $s = preg_replace('/\x{0127}/u', "h", $s);    // h => h
194
    $s = preg_replace('/\x{0131}/u', "i", $s);    // i => i
195
    $s = preg_replace('/\x{0138}/u', "k", $s);    // ? => k
196
    $s = preg_replace('/\x{013f}/u', "L", $s);    // ? => L
197
    $s = preg_replace('/\x{0141}/u', "L", $s);    // L => L
198
    $s = preg_replace('/\x{0140}/u', "l", $s);    // ? => l
199
    $s = preg_replace('/\x{0142}/u', "l", $s);    // l => l
200
    $s = preg_replace('/\x{014a}/u', "N", $s);    // ? => N
201
    $s = preg_replace('/\x{0149}/u', "n", $s);    // ? => n
202
    $s = preg_replace('/\x{014b}/u', "n", $s);    // ? => n
203
    $s = preg_replace('/\x{00d8}/u', "O", $s);    // Ø => O
204
    $s = preg_replace('/\x{00f8}/u', "o", $s);    // ø => o
205
    $s = preg_replace('/\x{017f}/u', "s", $s);    // ? => s
206
    $s = preg_replace('/\x{00de}/u', "T", $s);    // Þ => T
207
    $s = preg_replace('/\x{0166}/u', "T", $s);    // T => T
208
    $s = preg_replace('/\x{00fe}/u', "t", $s);    // þ => t
209
    $s = preg_replace('/\x{0167}/u', "t", $s);    // t => t
210
211
    // remove all non-ASCii characters
212
    $s = preg_replace('/[^\0-\x80]/u', "", $s);
213
214
    // possible errors in UTF8-regular-expressions
215
    if (isNullOrEmpty($s)) {
216
        return $original_string;
217
    }
218
    return $s;
219
}
220
221
/**
222
 * String Sanitizer for Filename
223
 * @param string $fileName
224
 * @param bool $sanitizeForPath if set to false (default) sanitize file name, otherwise file path name
225
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
226
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
227
 * @return string
228
 * @see for base script idea http://stackoverflow.com/a/2021729
229
 */
230
function sanitize_filename(
231
    string $fileName,
232
    bool $sanitizeForPath = false,
233
    string $charToReplaceWhiteSpace = ' '
234
) : string
235
{
236
    //check whitespace
237
    $fileName = str_replace(' ', $charToReplaceWhiteSpace, $fileName);
238
239
    // Remove any runs of periods - avoid Path Traversal Vulnerabilities OSWAP
240
    // https://www.owasp.org/index.php/Path_Traversal
241
    $notAllowedPath = [
242
        '//',
243
        '\\\\',
244
        '../',
245
        './',
246
        '..\\',
247
        '.\\',
248
        '%2e%2e%2f',
249
        '%2e%2e/',
250
        '..%2f',
251
        '%2e%2e%5c',
252
        '%2e%2e\\',
253
        '..%5c',
254
        '%252e%252e%255c',
255
        '..%255c',
256
        '..%c0%af',
257
        '..%c1%9c',
258
    ];
259
    while (str_contains($fileName, $notAllowedPath) !== false) {
260
        $fileName = str_replace($notAllowedPath, '', $fileName);
261
    }
262
263
    // Remove anything which isn't a word, whitespace, number
264
    // or any of the following caracters -_~,;[]().
265
    // If you don't need to handle multi-byte characters
266
    // you can use preg_replace rather than mb_ereg_replace
267
    // Thanks @Łukasz Rysiak!
268
    $fileName = mb_ereg_replace('([^\w\s\d\-_~,;\[\]\(\).' . ($sanitizeForPath ? '\\/' : '') . '])', '', $fileName);
269
270
    // remove exadecimal, non white space chars
271
    $fileName = mb_ereg_replace('([[:cntrl:]\b\0\n\r\t\f])', '', $fileName);
272
273
    //normalize and trim
274
    $fileName = trim(normalizeUtf8String($fileName));
275
276
    //do not start with ..
277
    while (starts_with($fileName, '..') !== false) {
278
        $fileName = substr($fileName, 2);
279
    }
280
281
    //do not end with ..
282
    while (ends_with($fileName, '..') !== false) {
283
        $fileName = substr($fileName, 0, -2);
284
    }
285
    //do not end with .
286
    while (ends_with($fileName, '.') !== false) {
287
        $fileName = substr($fileName, 0, -1);
288
    }
289
290
    return $fileName;
291
}
292
293
/**
294
 * String Sanitizer for Path name
295
 * @param string $pathName
296
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
297
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
298
 * @return string
299
 */
300
301
function sanitize_pathname(string $pathName, string $charToReplaceWhiteSpace) : string
302
{
303
    return sanitize_filename($pathName, true, $charToReplaceWhiteSpace);
304
}
305
306
/**
307
 * Perform XSS clean to prevent cross site scripting.
308
 *
309
 * @param array $data
310
 *
311
 * @return array
312
 */
313
function sanitize_arr_string_xss(array $data) : array
314
{
315
    foreach ($data as $k => $v) {
316
        $data[$k] = filter_var($v, FILTER_SANITIZE_STRING);
317
    }
318
    return $data;
319
}
320
321
/**
322
 * Perform XSS clean to prevent cross site scripting.
323
 *
324
 * @param string $data
325
 *
326
 * @return string
327
 *
328
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
329
 */
330
function sanitize_string_xss(string $data) : string
331
{
332
    return filter_var($data, FILTER_SANITIZE_STRING);
333
}
334
335
/**
336
 * Sanitize the string by urlencoding characters.
337
 *
338
 * @param string $value
339
 *
340
 * @return string
341
 *
342
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
343
 */
344
function sanitize_urlencode($value)
345
{
346
    return filter_var($value, FILTER_SANITIZE_ENCODED);
347
}
348
349
/**
350
 * Sanitize the string by removing illegal characters from emails.
351
 *
352
 * @param string $value
353
 *
354
 * @return string
355
 *
356
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
357
 */
358
function sanitize_email($value)
359
{
360
    return filter_var($value, FILTER_SANITIZE_EMAIL);
361
}
362
363
/**
364
 * Sanitize the string by removing illegal characters from numbers.
365
 *
366
 * @param string $value
367
 *
368
 * @return string
369
 *
370
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
371
 */
372
function sanitize_numbers($value)
373
{
374
    return filter_var($value, FILTER_SANITIZE_NUMBER_INT);
375
}
376
377
/**
378
 * Sanitize the string by removing illegal characters from float numbers.
379
 *
380
 * @param string $value
381
 *
382
 * @return string
383
 *
384
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
385
 */
386
function sanitize_floats($value)
387
{
388
    return filter_var($value, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
389
}
390