Completed
Push — master ( 9802ab...25d319 )
by Lorenzo
03:18
created

sanitize.php ➔ sanitize_filename()   B

Complexity

Conditions 6
Paths 16

Size

Total Lines 63
Code Lines 35

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 35
nc 16
nop 3
dl 0
loc 63
rs 8.6498
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Strip new line breaks from a string
5
 * @param $str
6
 * @return string|array
7
 */
8
function strip_nl($str)
9
{
10
    return str_replace("\n", "", str_replace("\r", "", $str));
11
}
12
13
/**
14
 * Javascript escape
15
 * @param string $str
16
 * @return string
17
 * @source https://github.com/rtconner/laravel-plusplus/blob/laravel-5/src/plus-functions.php
18
 */
19
function jse(string $str) : string
20
{
21
    if (isNullOrEmpty($str)) {
22
        return '';
23
    }
24
    $str = str_replace("\n", "", str_replace("\r", "", $str));
25
    return addslashes($str);
26
}
27
28
if (!function_exists('e')) {
29
    /**
30
     * Escape HTML entities in a string.
31
     *
32
     * @param  string $value
33
     * @return string
34
     */
35
    function e($value)
36
    {
37
        return htmlentities($value, ENT_QUOTES, 'UTF-8', false);
38
    }
39
}
40
41
/**
42
 * Normalize the texts before.
43
 * The following function removes all diacritics (marks like accents) from a given UTF8-encoded
44
 * texts and returns ASCii-text.
45
 * @param string $s
46
 * @return string
47
 * @see http://php.net/manual/en/normalizer.normalize.php#92592
48
 */
49
function normalizeUtf8String(string $s) : string
50
{
51
    $original_string = $s;
52
53
    // Normalizer-class missing!
54
    if (!class_exists("Normalizer", $autoload = false)) {
55
        return $original_string;
56
    }
57
58
    // maps German (umlauts) and other European characters onto two characters before just removing diacritics
59
    $s = preg_replace('/\x{00c4}/u', "AE", $s);    // umlaut Ä => AE
60
    $s = preg_replace('/\x{00d6}/u', "OE", $s);    // umlaut Ö => OE
61
    $s = preg_replace('/\x{00dc}/u', "UE", $s);    // umlaut Ü => UE
62
    $s = preg_replace('/\x{00e4}/u', "ae", $s);    // umlaut ä => ae
63
    $s = preg_replace('/\x{00f6}/u', "oe", $s);    // umlaut ö => oe
64
    $s = preg_replace('/\x{00fc}/u', "ue", $s);    // umlaut ü => ue
65
    $s = preg_replace('/\x{00f1}/u', "ny", $s);    // ñ => ny
66
    $s = preg_replace('/\x{00ff}/u', "yu", $s);    // ÿ => yu
67
68
    // maps special characters (characters with diacritics) on their base-character followed by the diacritical mark
69
    // exmaple:  Ú => U´,  á => a`
70
    $s = Normalizer::normalize($s, Normalizer::FORM_D);
71
72
    $s = preg_replace('/\pM/u', "", $s);    // removes diacritics
73
74
    $s = preg_replace('/\x{00df}/u', "ss", $s);    // maps German ß onto ss
75
    $s = preg_replace('/\x{00c6}/u', "AE", $s);    // Æ => AE
76
    $s = preg_replace('/\x{00e6}/u', "ae", $s);    // æ => ae
77
    $s = preg_replace('/\x{0132}/u', "IJ", $s);    // ? => IJ
78
    $s = preg_replace('/\x{0133}/u', "ij", $s);    // ? => ij
79
    $s = preg_replace('/\x{0152}/u', "OE", $s);    // Œ => OE
80
    $s = preg_replace('/\x{0153}/u', "oe", $s);    // œ => oe
81
82
    $s = preg_replace('/\x{00d0}/u', "D", $s);    // Ð => D
83
    $s = preg_replace('/\x{0110}/u', "D", $s);    // Ð => D
84
    $s = preg_replace('/\x{00f0}/u', "d", $s);    // ð => d
85
    $s = preg_replace('/\x{0111}/u', "d", $s);    // d => d
86
    $s = preg_replace('/\x{0126}/u', "H", $s);    // H => H
87
    $s = preg_replace('/\x{0127}/u', "h", $s);    // h => h
88
    $s = preg_replace('/\x{0131}/u', "i", $s);    // i => i
89
    $s = preg_replace('/\x{0138}/u', "k", $s);    // ? => k
90
    $s = preg_replace('/\x{013f}/u', "L", $s);    // ? => L
91
    $s = preg_replace('/\x{0141}/u', "L", $s);    // L => L
92
    $s = preg_replace('/\x{0140}/u', "l", $s);    // ? => l
93
    $s = preg_replace('/\x{0142}/u', "l", $s);    // l => l
94
    $s = preg_replace('/\x{014a}/u', "N", $s);    // ? => N
95
    $s = preg_replace('/\x{0149}/u', "n", $s);    // ? => n
96
    $s = preg_replace('/\x{014b}/u', "n", $s);    // ? => n
97
    $s = preg_replace('/\x{00d8}/u', "O", $s);    // Ø => O
98
    $s = preg_replace('/\x{00f8}/u', "o", $s);    // ø => o
99
    $s = preg_replace('/\x{017f}/u', "s", $s);    // ? => s
100
    $s = preg_replace('/\x{00de}/u', "T", $s);    // Þ => T
101
    $s = preg_replace('/\x{0166}/u', "T", $s);    // T => T
102
    $s = preg_replace('/\x{00fe}/u', "t", $s);    // þ => t
103
    $s = preg_replace('/\x{0167}/u', "t", $s);    // t => t
104
105
    // remove all non-ASCii characters
106
    $s = preg_replace('/[^\0-\x80]/u', "", $s);
107
108
    // possible errors in UTF8-regular-expressions
109
    if ($s == null || $s == '') {
110
        return $original_string;
111
    }
112
    return $s;
113
}
114
115
/**
116
 * String Sanitizer for Filename
117
 * @param string $fileName
118
 * @param bool $sanitizeForPath if set to false (default) sanitize file name, otherwise file path name
119
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
120
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
121
 * @return string
122
 * @see for base script idea http://stackoverflow.com/a/2021729
123
 */
124
function sanitize_filename(
125
    string $fileName,
126
    bool $sanitizeForPath = false,
127
    string $charToReplaceWhiteSpace = ' '
128
) : string
129
{
130
    //check whitespace
131
    $fileName = str_replace(' ', $charToReplaceWhiteSpace, $fileName);
132
133
    // Remove any runs of periods - avoid Path Traversal Vulnerabilities OSWAP
134
    // https://www.owasp.org/index.php/Path_Traversal
135
    $notAllowedPath = [
136
        '//',
137
        '\\\\',
138
        '../',
139
        './',
140
        '..\\',
141
        '.\\',
142
        '%2e%2e%2f',
143
        '%2e%2e/',
144
        '..%2f',
145
        '%2e%2e%5c',
146
        '%2e%2e\\',
147
        '..%5c',
148
        '%252e%252e%255c',
149
        '..%255c',
150
        '..%c0%af',
151
        '..%c1%9c',
152
    ];
153
    while (str_contains($fileName, $notAllowedPath) !== false) {
154
        $fileName = str_replace($notAllowedPath, '', $fileName);
155
    }
156
157
    // Remove anything which isn't a word, whitespace, number
158
    // or any of the following caracters -_~,;[]().
159
    // If you don't need to handle multi-byte characters
160
    // you can use preg_replace rather than mb_ereg_replace
161
    // Thanks @Łukasz Rysiak!
162
    $fileName = mb_ereg_replace('([^\w\s\d\-_~,;\[\]\(\).' . ($sanitizeForPath ? '\\/' : '') . '])', '', $fileName);
163
164
    // remove exadecimal, non white space chars
165
    //$fileName = mb_ereg_replace('([[:cntrl:][:xdigit:]\b\0\n\r\t\f])', '', $fileName);
0 ignored issues
show
Unused Code Comprehensibility introduced by
60% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
166
    $fileName = mb_ereg_replace('([[:cntrl:]\b\0\n\r\t\f])', '', $fileName);
167
168
    //normalize and trim
169
    $fileName = trim(normalizeUtf8String($fileName));
170
171
    //do not start with ..
172
    while (starts_with($fileName, '..') !== false) {
173
        $fileName = substr($fileName, 2);
174
    }
175
176
    //do not end with ..
177
    while (ends_with($fileName, '..') !== false) {
178
        $fileName = substr($fileName, 0, -2);
179
    }
180
    //do not end with .
181
    while (ends_with($fileName, '.') !== false) {
182
        $fileName = substr($fileName, 0, -1);
183
    }
184
185
    return $fileName;
186
}
187
188
/**
189
 * String Sanitizer for Path name
190
 * @param string $pathName
191
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
192
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
193
 * @return string
194
 */
195
196
function sanitize_pathname(string $pathName, string $charToReplaceWhiteSpace) : string
197
{
198
    return sanitize_filename($pathName, true, $charToReplaceWhiteSpace);
199
}
200
201
/**
202
 * Perform XSS clean to prevent cross site scripting.
203
 *
204
 * @param array $data
205
 *
206
 * @return array
207
 */
208
function sanitize_arr_string_xss(array $data) : array
209
{
210
    foreach ($data as $k => $v) {
211
        $data[$k] = filter_var($v, FILTER_SANITIZE_STRING);
212
    }
213
    return $data;
214
}
215
216
/**
217
 * Perform XSS clean to prevent cross site scripting.
218
 *
219
 * @param string $data
220
 *
221
 * @return string
222
 *
223
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
224
 */
225
function sanitize_string_xss(string $data) : string
226
{
227
    return filter_var($data, FILTER_SANITIZE_STRING);
228
}
229
230
/**
231
 * Sanitize the string by urlencoding characters.
232
 *
233
 * Usage: '<index>' => 'urlencode'
234
 *
235
 * @param string $value
236
 *
237
 * @return string
238
 *
239
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
240
 */
241
function sanitize_urlencode($value)
242
{
243
    return filter_var($value, FILTER_SANITIZE_ENCODED);
244
}
245
246
/**
247
 * Sanitize the string by removing illegal characters from emails.
248
 *
249
 * Usage: '<index>' => 'sanitize_email'
250
 *
251
 * @param string $value
252
 * @param array $params
0 ignored issues
show
Bug introduced by
There is no parameter named $params. Was it maybe removed?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function.

Consider the following example. The parameter $italy is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was removed, but the annotation was not.

Loading history...
253
 *
254
 * @return string
255
 *
256
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
257
 */
258
function sanitize_email($value)
259
{
260
    return filter_var($value, FILTER_SANITIZE_EMAIL);
261
}
262
263
/**
264
 * Sanitize the string by removing illegal characters from numbers.
265
 *
266
 * @param string $value
267
 *
268
 * @return string
269
 *
270
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
271
 */
272
function sanitize_numbers($value)
273
{
274
    return filter_var($value, FILTER_SANITIZE_NUMBER_INT);
275
}
276
277
/**
278
 * Sanitize the string by removing illegal characters from float numbers.
279
 *
280
 * @param string $value
281
 *
282
 * @return string
283
 *
284
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
285
 */
286
function sanitize_floats($value)
287
{
288
    return filter_var($value, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
289
}
290