Completed
Push — master ( 3b0909...f58ce7 )
by Lorenzo
02:00
created

sanitize.php ➔ normalizerUtf8Safe()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 5
nc 2
nop 2
dl 0
loc 8
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
if (!function_exists('strip_nl')) {
4
5
    /**
6
     * Strip new line breaks from a string
7
     * @param $str
8
     * @return string|array
9
     */
10
    function strip_nl($str)
11
    {
12
        return str_replace("\n", "", str_replace("\r", "", $str));
13
    }
14
}
15
16
if (!function_exists('jse')) {
17
18
    /**
19
     * Javascript escape
20
     * @param string $str
21
     * @return string
22
     * @source https://github.com/rtconner/laravel-plusplus/blob/laravel-5/src/plus-functions.php
23
     */
24
    function jse(string $str) : string
25
    {
26
        if (isNullOrEmpty($str)) {
27
            return '';
28
        }
29
        $str = str_replace("\n", "", str_replace("\r", "", $str));
30
        return addslashes($str);
31
    }
32
}
33
34
if (!function_exists('e')) {
35
    /**
36
     * Escape HTML entities in a string.
37
     *
38
     * @param  string $value
39
     * @return string
40
     */
41
    function e($value)
42
    {
43
        return htmlentities($value, ENT_QUOTES, 'UTF-8', false);
44
    }
45
}
46
47
if (!function_exists('csse')) {
48
    /**
49
     * Escape CSS entities in a string.
50
     *
51
     * @param  string $value
52
     * @return string
53
     * @see https://github.com/auraphp/Aura.Html/blob/2.x/src/Escaper/CssEscaper.php
54
     */
55
    function csse($value)
56
    {
57
        // pre-empt replacement
58
        if ($value === '' || ctype_digit($value)) {
59
            return $value;
60
        }
61
        return preg_replace_callback(
62
            '/[^a-z0-9]/iSu',
63
            function ($matches) {
64
                // get the character
65
                $chr = $matches[0];
66
                // is it UTF-8?
67
                if (strlen($chr) == 1) {
68
                    // yes
69
                    $ord = ord($chr);
70
                    return sprintf('\\%X ', $ord);
71
                }
72
            },
73
            $value
74
        );
75
    }
76
}
77
78
if (!function_exists('attre')) {
79
    /**
80
     * Escape HTML Attribute entities in a string.
81
     *
82
     * @param  string $value
83
     * @return string
84
     * @see https://github.com/auraphp/Aura.Html/blob/2.x/src/Escaper/AttrEscaper.php
85
     */
86
    function attre($value)
87
    {
88
        // pre-empt replacement
89
        if ($value === '' || ctype_digit($value)) {
90
            return $value;
91
        }
92
        return preg_replace_callback(
93
            '/[^a-z0-9,\.\-_]/iSu',
94
            function ($matches) {
95
                $chr = $matches[0];
96
                $ord = ord($chr);
97
                if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r")
98
                    || ($ord >= 0x7f && $ord <= 0x9f)
99
                ) {
100
                    // use the Unicode replacement char
101
                    return '&#xFFFD;';
102
                }
103
                $entities = array(
104
                    34 => '&quot;',
105
                    38 => '&amp;',
106
                    60 => '&lt;',
107
                    62 => '&gt;',
108
                );
109
                // is this a mapped entity?
110
                if (isset($entities[$ord])) {
111
                    return $entities[$ord];
112
                }
113
                // is this an upper-range hex entity?
114
                if ($ord > 255) {
115
                    return sprintf('&#x%04X;', $ord);
116
                }
117
                // everything else
118
                return sprintf('&#x%02X;', $ord);
119
            },
120
            $value
121
        );
122
    }
123
}
124
125
if (!function_exists('she')) {
126
127
    /**
128
     * Escape Shell argument.
129
     * @param string $input
130
     * @return string
131
     */
132
    function she(string $input) : string
133
    {
134
        if (windows_os()) {
135
            return '"' . addcslashes($input, '\\"') . '"';
136
        }
137
138
        return escapeshellarg($input);
139
    }
140
}
141
142
/**
143
 * Normalize the string.
144
 * The following function removes all diacritics (marks like accents) from a given UTF8-encoded
145
 * texts and returns ASCii-text.
146
 * @param string $s
147
 * @return string
148
 * @see https://github.com/illuminate/support/blob/master/Str.php#L38
149
 * @see http://php.net/manual/en/normalizer.normalize.php#92592
150
 */
151
function normalizeUtf8String(string $s) : string
152
{
153
    $original_string = $s;
154
155
    //Transliterate UTF-8 value to ASCII with chars array map.
156
    foreach (charsArray() as $key => $val) {
157
        $s = str_replace($val, $key, $s);
158
    }
159
160
    //replace non ASCII chars with array regex map.
161
    foreach (charsArrayRegEx() as $key => $val) {
162
        $s = preg_replace($val, $key, $s);
163
    }
164
165
    // Normalize utf8 in form D
166
    // if exists use Normalizer-class to maps remaining special characters
167
    // (characters with diacritics) on their base-character followed by the diacritical mark
168
    // exmaple:  Ú => U´,  á => a`
169
    $s = normalizerUtf8Safe($s, Normalizer::FORM_D);
170
171
    // possible errors in UTF8-regular-expressions
172
    if (isNullOrEmpty($s)) {
173
        return $original_string;
174
    }
175
176
    return $s;
177
}
178
179
/**
180
 * Normalize uft8 to various form with php normalizer function if exists,
181
 * otherwise return original string.
182
 * maps special characters (characters with diacritics) on their base-character
183
 * followed by the diacritical mark
184
 * exmaple:  Ú => U´,  á => a`
185
 * @param string $s
186
 * @param $normalizationForm UTF8 Normalization Form if empty Default Normalizer::FORM_D
187
 * @return string
188
 */
189
function normalizerUtf8Safe(string $s, $normalizationForm):string
190
{
191
    if (class_exists("Normalizer", false)) {
192
        $s = Normalizer::normalize($s, isNullOrEmpty($normalizationForm) ? Normalizer::FORM_D : $normalizationForm);
193
        return $s;
194
    }
195
    return $s;
196
}
197
198
/**
199
 * String Sanitizer for Filename
200
 * @param string $fileName
201
 * @param bool $sanitizeForPath if set to false (default) sanitize file name, otherwise file path name
202
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
203
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
204
 * @return string
205
 * @see for base script idea http://stackoverflow.com/a/2021729
206
 */
207
function sanitize_filename(
208
    string $fileName,
209
    bool $sanitizeForPath = false,
210
    string $charToReplaceWhiteSpace = ' '
211
) : string
212
{
213
    //check whitespace
214
    $fileName = str_replace(' ', $charToReplaceWhiteSpace, $fileName);
215
216
    // Remove any runs of periods - avoid Path Traversal Vulnerabilities OSWAP
217
    // https://www.owasp.org/index.php/Path_Traversal
218
    $notAllowedPath = [
219
        '//',
220
        '\\\\',
221
        '../',
222
        './',
223
        '..\\',
224
        '.\\',
225
        '%2e%2e%2f',
226
        '%2e%2e/',
227
        '..%2f',
228
        '%2e%2e%5c',
229
        '%2e%2e\\',
230
        '..%5c',
231
        '%252e%252e%255c',
232
        '..%255c',
233
        '..%c0%af',
234
        '..%c1%9c',
235
    ];
236
    while (str_contains($fileName, $notAllowedPath) !== false) {
237
        $fileName = str_replace($notAllowedPath, '', $fileName);
238
    }
239
240
    // Remove anything which isn't a word, whitespace, number
241
    // or any of the following caracters -_~,;[]().
242
    // If you don't need to handle multi-byte characters
243
    // you can use preg_replace rather than mb_ereg_replace
244
    // Thanks @Łukasz Rysiak!
245
    $fileName = mb_ereg_replace('([^\w\s\d\-_~,;\[\]\(\).' . ($sanitizeForPath ? '\\/' : '') . '])', '', $fileName);
246
247
    // remove exadecimal, non white space chars
248
    $fileName = mb_ereg_replace('([[:cntrl:]\b\0\n\r\t\f])', '', $fileName);
249
250
    //normalize and trim
251
    $fileName = trim(normalizeUtf8String($fileName));
252
253
    //do not start with ..
254
    while (starts_with($fileName, '..') !== false) {
255
        $fileName = substr($fileName, 2);
256
    }
257
258
    //do not end with ..
259
    while (ends_with($fileName, '..') !== false) {
260
        $fileName = substr($fileName, 0, -2);
261
    }
262
    //do not end with .
263
    while (ends_with($fileName, '.') !== false) {
264
        $fileName = substr($fileName, 0, -1);
265
    }
266
267
    return $fileName;
268
}
269
270
/**
271
 * String Sanitizer for Path name
272
 * @param string $pathName
273
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
274
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
275
 * @return string
276
 */
277
278
function sanitize_pathname(string $pathName, string $charToReplaceWhiteSpace) : string
279
{
280
    return sanitize_filename($pathName, true, $charToReplaceWhiteSpace);
281
}
282
283
/**
284
 * Perform XSS clean to prevent cross site scripting.
285
 *
286
 * @param array $data
287
 *
288
 * @return array
289
 */
290
function sanitize_arr_string_xss(array $data) : array
291
{
292
    foreach ($data as $k => $v) {
293
        $data[$k] = filter_var($v, FILTER_SANITIZE_STRING);
294
    }
295
    return $data;
296
}
297
298
/**
299
 * Perform XSS clean to prevent cross site scripting.
300
 *
301
 * @param string $data
302
 *
303
 * @return string
304
 *
305
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
306
 */
307
function sanitize_string_xss(string $data) : string
308
{
309
    return filter_var($data, FILTER_SANITIZE_STRING);
310
}
311
312
/**
313
 * Sanitize the string by urlencoding characters.
314
 *
315
 * @param string $value
316
 *
317
 * @return string
318
 *
319
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
320
 */
321
function sanitize_urlencode($value)
322
{
323
    return filter_var($value, FILTER_SANITIZE_ENCODED);
324
}
325
326
/**
327
 * Sanitize the string by removing illegal characters from emails.
328
 *
329
 * @param string $value
330
 *
331
 * @return string
332
 *
333
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
334
 */
335
function sanitize_email($value)
336
{
337
    return filter_var($value, FILTER_SANITIZE_EMAIL);
338
}
339
340
/**
341
 * Sanitize the string by removing illegal characters from numbers.
342
 *
343
 * @param string $value
344
 *
345
 * @return string
346
 *
347
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
348
 */
349
function sanitize_numbers($value)
350
{
351
    return filter_var($value, FILTER_SANITIZE_NUMBER_INT);
352
}
353
354
/**
355
 * Sanitize the string by removing illegal characters from float numbers.
356
 *
357
 * @param string $value
358
 *
359
 * @return string
360
 *
361
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
362
 */
363
function sanitize_floats($value)
364
{
365
    return filter_var($value, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
366
}
367