sanitize.php ➔ normalizeUtf8String() - Code Metrics - Inspection of "prepare 1.3.0" - padosoft/support - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 9802ab...25d319 )

by Lorenzo

created 2016-08-24 17:28 UTC

sanitize.php ➔ normalizeUtf8String() B

↳ Parent: Project

Complexity

Conditions	4
Paths	3

Size

Total Lines	65
Code Lines	47

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	4
eloc	47
nc	3
nop	1
dl	0
loc	65
rs	8.8507
c	0
b	0
f	0

How to fix Long Method

<?php

/**
 * Strip new line breaks from a string
 * @param $str
 * @return string|array
 */
function strip_nl($str)
{
    return str_replace("\n", "", str_replace("\r", "", $str));
}

/**
 * Javascript escape
 * @param string $str
 * @return string
 * @source https://github.com/rtconner/laravel-plusplus/blob/laravel-5/src/plus-functions.php
 */
function jse(string $str) : string
{
    if (isNullOrEmpty($str)) {
        return '';
    }
    $str = str_replace("\n", "", str_replace("\r", "", $str));
    return addslashes($str);
}

if (!function_exists('e')) {
    /**
     * Escape HTML entities in a string.
     *
     * @param  string $value
     * @return string
     */
    function e($value)
    {
        return htmlentities($value, ENT_QUOTES, 'UTF-8', false);
    }
}

/**
 * Normalize the texts before.
 * The following function removes all diacritics (marks like accents) from a given UTF8-encoded
 * texts and returns ASCii-text.
 * @param string $s
 * @return string
 * @see http://php.net/manual/en/normalizer.normalize.php#92592
 */
function normalizeUtf8String(string $s) : string
{
    $original_string = $s;

    // Normalizer-class missing!
    if (!class_exists("Normalizer", $autoload = false)) {
        return $original_string;
    }

    // maps German (umlauts) and other European characters onto two characters before just removing diacritics
    $s = preg_replace('/\x{00c4}/u', "AE", $s);    // umlaut Ä => AE
    $s = preg_replace('/\x{00d6}/u', "OE", $s);    // umlaut Ö => OE
    $s = preg_replace('/\x{00dc}/u', "UE", $s);    // umlaut Ü => UE
    $s = preg_replace('/\x{00e4}/u', "ae", $s);    // umlaut ä => ae
    $s = preg_replace('/\x{00f6}/u', "oe", $s);    // umlaut ö => oe
    $s = preg_replace('/\x{00fc}/u', "ue", $s);    // umlaut ü => ue
    $s = preg_replace('/\x{00f1}/u', "ny", $s);    // ñ => ny
    $s = preg_replace('/\x{00ff}/u', "yu", $s);    // ÿ => yu

    // maps special characters (characters with diacritics) on their base-character followed by the diacritical mark
    // exmaple:  Ú => U´,  á => a`
    $s = Normalizer::normalize($s, Normalizer::FORM_D);

    $s = preg_replace('/\pM/u', "", $s);    // removes diacritics

    $s = preg_replace('/\x{00df}/u', "ss", $s);    // maps German ß onto ss
    $s = preg_replace('/\x{00c6}/u', "AE", $s);    // Æ => AE
    $s = preg_replace('/\x{00e6}/u', "ae", $s);    // æ => ae
    $s = preg_replace('/\x{0132}/u', "IJ", $s);    // ? => IJ
    $s = preg_replace('/\x{0133}/u', "ij", $s);    // ? => ij
    $s = preg_replace('/\x{0152}/u', "OE", $s);    // Œ => OE
    $s = preg_replace('/\x{0153}/u', "oe", $s);    // œ => oe

    $s = preg_replace('/\x{00d0}/u', "D", $s);    // Ð => D
    $s = preg_replace('/\x{0110}/u', "D", $s);    // Ð => D
    $s = preg_replace('/\x{00f0}/u', "d", $s);    // ð => d
    $s = preg_replace('/\x{0111}/u', "d", $s);    // d => d
    $s = preg_replace('/\x{0126}/u', "H", $s);    // H => H
    $s = preg_replace('/\x{0127}/u', "h", $s);    // h => h
    $s = preg_replace('/\x{0131}/u', "i", $s);    // i => i
    $s = preg_replace('/\x{0138}/u', "k", $s);    // ? => k
    $s = preg_replace('/\x{013f}/u', "L", $s);    // ? => L
    $s = preg_replace('/\x{0141}/u', "L", $s);    // L => L
    $s = preg_replace('/\x{0140}/u', "l", $s);    // ? => l
    $s = preg_replace('/\x{0142}/u', "l", $s);    // l => l
    $s = preg_replace('/\x{014a}/u', "N", $s);    // ? => N
    $s = preg_replace('/\x{0149}/u', "n", $s);    // ? => n
    $s = preg_replace('/\x{014b}/u', "n", $s);    // ? => n
    $s = preg_replace('/\x{00d8}/u', "O", $s);    // Ø => O
    $s = preg_replace('/\x{00f8}/u', "o", $s);    // ø => o
    $s = preg_replace('/\x{017f}/u', "s", $s);    // ? => s
    $s = preg_replace('/\x{00de}/u', "T", $s);    // Þ => T
    $s = preg_replace('/\x{0166}/u', "T", $s);    // T => T
    $s = preg_replace('/\x{00fe}/u', "t", $s);    // þ => t
    $s = preg_replace('/\x{0167}/u', "t", $s);    // t => t

    // remove all non-ASCii characters
    $s = preg_replace('/[^\0-\x80]/u', "", $s);

    // possible errors in UTF8-regular-expressions
    if ($s == null || $s == '') {
        return $original_string;
    }
    return $s;
}

/**
 * String Sanitizer for Filename
 * @param string $fileName
 * @param bool $sanitizeForPath if set to false (default) sanitize file name, otherwise file path name
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
 * @return string
 * @see for base script idea http://stackoverflow.com/a/2021729
 */
function sanitize_filename(
    string $fileName,
    bool $sanitizeForPath = false,
    string $charToReplaceWhiteSpace = ' '
) : string
{
    //check whitespace
    $fileName = str_replace(' ', $charToReplaceWhiteSpace, $fileName);

    // Remove any runs of periods - avoid Path Traversal Vulnerabilities OSWAP
    // https://www.owasp.org/index.php/Path_Traversal
    $notAllowedPath = [
        '//',
        '\\\\',
        '../',
        './',
        '..\\',
        '.\\',
        '%2e%2e%2f',
        '%2e%2e/',
        '..%2f',
        '%2e%2e%5c',
        '%2e%2e\\',
        '..%5c',
        '%252e%252e%255c',
        '..%255c',
        '..%c0%af',
        '..%c1%9c',
    ];
    while (str_contains($fileName, $notAllowedPath) !== false) {
        $fileName = str_replace($notAllowedPath, '', $fileName);
    }

    // Remove anything which isn't a word, whitespace, number
    // or any of the following caracters -_~,;[]().
    // If you don't need to handle multi-byte characters
    // you can use preg_replace rather than mb_ereg_replace
    // Thanks @Łukasz Rysiak!
    $fileName = mb_ereg_replace('([^\w\s\d\-_~,;\[\]\(\).' . ($sanitizeForPath ? '\\/' : '') . '])', '', $fileName);

    // remove exadecimal, non white space chars
    //$fileName = mb_ereg_replace('([[:cntrl:][:xdigit:]\b\0\n\r\t\f])', '', $fileName);

    $fileName = mb_ereg_replace('([[:cntrl:]\b\0\n\r\t\f])', '', $fileName);

    //normalize and trim
    $fileName = trim(normalizeUtf8String($fileName));

    //do not start with ..
    while (starts_with($fileName, '..') !== false) {
        $fileName = substr($fileName, 2);
    }

    //do not end with ..
    while (ends_with($fileName, '..') !== false) {
        $fileName = substr($fileName, 0, -2);
    }
    //do not end with .
    while (ends_with($fileName, '.') !== false) {
        $fileName = substr($fileName, 0, -1);
    }

    return $fileName;
}

/**
 * String Sanitizer for Path name
 * @param string $pathName
 * @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
 * othrwise it will be replaced with $charToReplaceWhiteSpace.
 * @return string
 */

function sanitize_pathname(string $pathName, string $charToReplaceWhiteSpace) : string
{
    return sanitize_filename($pathName, true, $charToReplaceWhiteSpace);
}

/**
 * Perform XSS clean to prevent cross site scripting.
 *
 * @param array $data
 *
 * @return array
 */
function sanitize_arr_string_xss(array $data) : array
{
    foreach ($data as $k => $v) {
        $data[$k] = filter_var($v, FILTER_SANITIZE_STRING);
    }
    return $data;
}

/**
 * Perform XSS clean to prevent cross site scripting.
 *
 * @param string $data
 *
 * @return string
 *
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
 */
function sanitize_string_xss(string $data) : string
{
    return filter_var($data, FILTER_SANITIZE_STRING);
}

/**
 * Sanitize the string by urlencoding characters.
 *
 * Usage: '<index>' => 'urlencode'
 *
 * @param string $value
 *
 * @return string
 *
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
 */
function sanitize_urlencode($value)
{
    return filter_var($value, FILTER_SANITIZE_ENCODED);
}

/**
 * Sanitize the string by removing illegal characters from emails.
 *
 * Usage: '<index>' => 'sanitize_email'
 *
 * @param string $value
 * @param array $params
/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}
 *
 * @return string
 *
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
 */
function sanitize_email($value)
{
    return filter_var($value, FILTER_SANITIZE_EMAIL);
}

/**
 * Sanitize the string by removing illegal characters from numbers.
 *
 * @param string $value
 *
 * @return string
 *
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
 */
function sanitize_numbers($value)
{
    return filter_var($value, FILTER_SANITIZE_NUMBER_INT);
}

/**
 * Sanitize the string by removing illegal characters from float numbers.
 *
 * @param string $value
 *
 * @return string
 *
 * @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
 */
function sanitize_floats($value)
{
    return filter_var($value, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
}


1			<?php
2
3			/**
4			* Strip new line breaks from a string
5			* @param $str
6			* @return string\|array
7			*/
8			function strip_nl($str)
9			{
10			return str_replace("\n", "", str_replace("\r", "", $str));
11			}
12
13			/**
14			* Javascript escape
15			* @param string $str
16			* @return string
17			* @source https://github.com/rtconner/laravel-plusplus/blob/laravel-5/src/plus-functions.php
18			*/
19			function jse(string $str) : string
20			{
21			if (isNullOrEmpty($str)) {
22			return '';
23			}
24			$str = str_replace("\n", "", str_replace("\r", "", $str));
25			return addslashes($str);
26			}
27
28			if (!function_exists('e')) {
29			/**
30			* Escape HTML entities in a string.
31			*
32			* @param string $value
33			* @return string
34			*/
35			function e($value)
36			{
37			return htmlentities($value, ENT_QUOTES, 'UTF-8', false);
38			}
39			}
40
41			/**
42			* Normalize the texts before.
43			* The following function removes all diacritics (marks like accents) from a given UTF8-encoded
44			* texts and returns ASCii-text.
45			* @param string $s
46			* @return string
47			* @see http://php.net/manual/en/normalizer.normalize.php#92592
48			*/
49			function normalizeUtf8String(string $s) : string
50			{
51			$original_string = $s;
52
53			// Normalizer-class missing!
54			if (!class_exists("Normalizer", $autoload = false)) {
55			return $original_string;
56			}
57
58			// maps German (umlauts) and other European characters onto two characters before just removing diacritics
59			$s = preg_replace('/\x{00c4}/u', "AE", $s); // umlaut Ä => AE
60			$s = preg_replace('/\x{00d6}/u', "OE", $s); // umlaut Ö => OE
61			$s = preg_replace('/\x{00dc}/u', "UE", $s); // umlaut Ü => UE
62			$s = preg_replace('/\x{00e4}/u', "ae", $s); // umlaut ä => ae
63			$s = preg_replace('/\x{00f6}/u', "oe", $s); // umlaut ö => oe
64			$s = preg_replace('/\x{00fc}/u', "ue", $s); // umlaut ü => ue
65			$s = preg_replace('/\x{00f1}/u', "ny", $s); // ñ => ny
66			$s = preg_replace('/\x{00ff}/u', "yu", $s); // ÿ => yu
67
68			// maps special characters (characters with diacritics) on their base-character followed by the diacritical mark
69			// exmaple: Ú => U´, á => a`
70			$s = Normalizer::normalize($s, Normalizer::FORM_D);
71
72			$s = preg_replace('/\pM/u', "", $s); // removes diacritics
73
74			$s = preg_replace('/\x{00df}/u', "ss", $s); // maps German ß onto ss
75			$s = preg_replace('/\x{00c6}/u', "AE", $s); // Æ => AE
76			$s = preg_replace('/\x{00e6}/u', "ae", $s); // æ => ae
77			$s = preg_replace('/\x{0132}/u', "IJ", $s); // ? => IJ
78			$s = preg_replace('/\x{0133}/u', "ij", $s); // ? => ij
79			$s = preg_replace('/\x{0152}/u', "OE", $s); // Œ => OE
80			$s = preg_replace('/\x{0153}/u', "oe", $s); // œ => oe
81
82			$s = preg_replace('/\x{00d0}/u', "D", $s); // Ð => D
83			$s = preg_replace('/\x{0110}/u', "D", $s); // Ð => D
84			$s = preg_replace('/\x{00f0}/u', "d", $s); // ð => d
85			$s = preg_replace('/\x{0111}/u', "d", $s); // d => d
86			$s = preg_replace('/\x{0126}/u', "H", $s); // H => H
87			$s = preg_replace('/\x{0127}/u', "h", $s); // h => h
88			$s = preg_replace('/\x{0131}/u', "i", $s); // i => i
89			$s = preg_replace('/\x{0138}/u', "k", $s); // ? => k
90			$s = preg_replace('/\x{013f}/u', "L", $s); // ? => L
91			$s = preg_replace('/\x{0141}/u', "L", $s); // L => L
92			$s = preg_replace('/\x{0140}/u', "l", $s); // ? => l
93			$s = preg_replace('/\x{0142}/u', "l", $s); // l => l
94			$s = preg_replace('/\x{014a}/u', "N", $s); // ? => N
95			$s = preg_replace('/\x{0149}/u', "n", $s); // ? => n
96			$s = preg_replace('/\x{014b}/u', "n", $s); // ? => n
97			$s = preg_replace('/\x{00d8}/u', "O", $s); // Ø => O
98			$s = preg_replace('/\x{00f8}/u', "o", $s); // ø => o
99			$s = preg_replace('/\x{017f}/u', "s", $s); // ? => s
100			$s = preg_replace('/\x{00de}/u', "T", $s); // Þ => T
101			$s = preg_replace('/\x{0166}/u', "T", $s); // T => T
102			$s = preg_replace('/\x{00fe}/u', "t", $s); // þ => t
103			$s = preg_replace('/\x{0167}/u', "t", $s); // t => t
104
105			// remove all non-ASCii characters
106			$s = preg_replace('/[^\0-\x80]/u', "", $s);
107
108			// possible errors in UTF8-regular-expressions
109			if ($s == null \|\| $s == '') {
110			return $original_string;
111			}
112			return $s;
113			}
114
115			/**
116			* String Sanitizer for Filename
117			* @param string $fileName
118			* @param bool $sanitizeForPath if set to false (default) sanitize file name, otherwise file path name
119			* @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
120			* othrwise it will be replaced with $charToReplaceWhiteSpace.
121			* @return string
122			* @see for base script idea http://stackoverflow.com/a/2021729
123			*/
124			function sanitize_filename(
125			string $fileName,
126			bool $sanitizeForPath = false,
127			string $charToReplaceWhiteSpace = ' '
128			) : string
129			{
130			//check whitespace
131			$fileName = str_replace(' ', $charToReplaceWhiteSpace, $fileName);
132
133			// Remove any runs of periods - avoid Path Traversal Vulnerabilities OSWAP
134			// https://www.owasp.org/index.php/Path_Traversal
135			$notAllowedPath = [
136			'//',
137			'\\\\',
138			'../',
139			'./',
140			'..\\',
141			'.\\',
142			'%2e%2e%2f',
143			'%2e%2e/',
144			'..%2f',
145			'%2e%2e%5c',
146			'%2e%2e\\',
147			'..%5c',
148			'%252e%252e%255c',
149			'..%255c',
150			'..%c0%af',
151			'..%c1%9c',
152			];
153			while (str_contains($fileName, $notAllowedPath) !== false) {
154			$fileName = str_replace($notAllowedPath, '', $fileName);
155			}
156
157			// Remove anything which isn't a word, whitespace, number
158			// or any of the following caracters -_~,;[]().
159			// If you don't need to handle multi-byte characters
160			// you can use preg_replace rather than mb_ereg_replace
161			// Thanks @Łukasz Rysiak!
162			$fileName = mb_ereg_replace('([^\w\s\d\-_~,;\[\]\(\).' . ($sanitizeForPath ? '\\/' : '') . '])', '', $fileName);
163
164			// remove exadecimal, non white space chars
165			//$fileName = mb_ereg_replace('([[:cntrl:][:xdigit:]\b\0\n\r\t\f])', '', $fileName);
			0 ignored issues – show Unused Code Comprehensibility introduced 2016-08-24 17:31 UTC by Report Bug Copy Issue Report `60%` of this comment could be valid code. Did you maybe forget this after debugging? Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it. The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production. This check looks for comments that seem to be mostly valid code and reports them. Loading history...
166			$fileName = mb_ereg_replace('([[:cntrl:]\b\0\n\r\t\f])', '', $fileName);
167
168			//normalize and trim
169			$fileName = trim(normalizeUtf8String($fileName));
170
171			//do not start with ..
172			while (starts_with($fileName, '..') !== false) {
173			$fileName = substr($fileName, 2);
174			}
175
176			//do not end with ..
177			while (ends_with($fileName, '..') !== false) {
178			$fileName = substr($fileName, 0, -2);
179			}
180			//do not end with .
181			while (ends_with($fileName, '.') !== false) {
182			$fileName = substr($fileName, 0, -1);
183			}
184
185			return $fileName;
186			}
187
188			/**
189			* String Sanitizer for Path name
190			* @param string $pathName
191			* @param string $charToReplaceWhiteSpace if empty (default) or ' ' then white space ' ' will be preservede
192			* othrwise it will be replaced with $charToReplaceWhiteSpace.
193			* @return string
194			*/
195
196			function sanitize_pathname(string $pathName, string $charToReplaceWhiteSpace) : string
197			{
198			return sanitize_filename($pathName, true, $charToReplaceWhiteSpace);
199			}
200
201			/**
202			* Perform XSS clean to prevent cross site scripting.
203			*
204			* @param array $data
205			*
206			* @return array
207			*/
208			function sanitize_arr_string_xss(array $data) : array
209			{
210			foreach ($data as $k => $v) {
211			$data[$k] = filter_var($v, FILTER_SANITIZE_STRING);
212			}
213			return $data;
214			}
215
216			/**
217			* Perform XSS clean to prevent cross site scripting.
218			*
219			* @param string $data
220			*
221			* @return string
222			*
223			* @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
224			*/
225			function sanitize_string_xss(string $data) : string
226			{
227			return filter_var($data, FILTER_SANITIZE_STRING);
228			}
229
230			/**
231			* Sanitize the string by urlencoding characters.
232			*
233			* Usage: '<index>' => 'urlencode'
234			*
235			* @param string $value
236			*
237			* @return string
238			*
239			* @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
240			*/
241			function sanitize_urlencode($value)
242			{
243			return filter_var($value, FILTER_SANITIZE_ENCODED);
244			}
245
246			/**
247			* Sanitize the string by removing illegal characters from emails.
248			*
249			* Usage: '<index>' => 'sanitize_email'
250			*
251			* @param string $value
252			* @param array $params
			0 ignored issues – show Bug introduced 2016-08-24 17:31 UTC by Report Bug Copy Issue Report There is no parameter named `$params`. Was it maybe removed? This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function. Consider the following example. The parameter `$italy` is not defined by the method `finale(...)`. /** * @param array $germany * @param array $island * @param array $italy */ function finale($germany, $island) { return "2:1"; } The most likely cause is that the parameter was removed, but the annotation was not. Loading history...
253			*
254			* @return string
255			*
256			* @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
257			*/
258			function sanitize_email($value)
259			{
260			return filter_var($value, FILTER_SANITIZE_EMAIL);
261			}
262
263			/**
264			* Sanitize the string by removing illegal characters from numbers.
265			*
266			* @param string $value
267			*
268			* @return string
269			*
270			* @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
271			*/
272			function sanitize_numbers($value)
273			{
274			return filter_var($value, FILTER_SANITIZE_NUMBER_INT);
275			}
276
277			/**
278			* Sanitize the string by removing illegal characters from float numbers.
279			*
280			* @param string $value
281			*
282			* @return string
283			*
284			* @see https://github.com/Wixel/GUMP/blob/master/gump.class.php
285			*/
286			function sanitize_floats($value)
287			{
288			return filter_var($value, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
289			}
290

padosoft / support

Push — master ( 9802ab...25d319 )

sanitize.php ➔ normalizeUtf8String() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like