Issues in UTF8Helper.php (staging) - Issues in staging - mautic/mautic - Measure and Improve Code Quality continuously with Scrutinizer

Issues (3627)

app/bundles/CoreBundle/Helper/UTF8Helper.php (8 issues)

Labels

Bug 8

Severity

Minor 8

Alan Hartless 8

<?php

/*
 * @copyright   2015 Mautic Contributors. All rights reserved
 * @author      Mautic
 *
 * @link        http://mautic.org
 *
 * @license     GNU/GPLv3 http://www.gnu.org/licenses/gpl-3.0.html
 */

/*
Copyright (c) 2008 Sebastián Grignoli
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
3. Neither the name of copyright holders nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/

/*
 * @author   "Sebastián Grignoli" <[email protected]>
 *
 * @version  2.0
 *
 * @link     https://github.com/neitanod/forceutf8
 *
 * @example  https://github.com/neitanod/forceutf8
 *
 * @license  Revised BSD
 */

namespace Mautic\CoreBundle\Helper;

class UTF8Helper
{
    const ICONV_TRANSLIT = 'TRANSLIT';
    const ICONV_IGNORE   = 'IGNORE';
    const WITHOUT_ICONV  = '';

    protected static $win1252ToUtf8 = [
        128 => "\xe2\x82\xac",
        130 => "\xe2\x80\x9a",
        131 => "\xc6\x92",
        132 => "\xe2\x80\x9e",
        133 => "\xe2\x80\xa6",
        134 => "\xe2\x80\xa0",
        135 => "\xe2\x80\xa1",
        136 => "\xcb\x86",
        137 => "\xe2\x80\xb0",
        138 => "\xc5\xa0",
        139 => "\xe2\x80\xb9",
        140 => "\xc5\x92",
        142 => "\xc5\xbd",
        145 => "\xe2\x80\x98",
        146 => "\xe2\x80\x99",
        147 => "\xe2\x80\x9c",
        148 => "\xe2\x80\x9d",
        149 => "\xe2\x80\xa2",
        150 => "\xe2\x80\x93",
        151 => "\xe2\x80\x94",
        152 => "\xcb\x9c",
        153 => "\xe2\x84\xa2",
        154 => "\xc5\xa1",
        155 => "\xe2\x80\xba",
        156 => "\xc5\x93",
        158 => "\xc5\xbe",
        159 => "\xc5\xb8",
    ];

    protected static $brokenUtf8ToUtf8 = [
        "\xc2\x80" => "\xe2\x82\xac",
        "\xc2\x82" => "\xe2\x80\x9a",
        "\xc2\x83" => "\xc6\x92",
        "\xc2\x84" => "\xe2\x80\x9e",
        "\xc2\x85" => "\xe2\x80\xa6",
        "\xc2\x86" => "\xe2\x80\xa0",
        "\xc2\x87" => "\xe2\x80\xa1",
        "\xc2\x88" => "\xcb\x86",
        "\xc2\x89" => "\xe2\x80\xb0",
        "\xc2\x8a" => "\xc5\xa0",
        "\xc2\x8b" => "\xe2\x80\xb9",
        "\xc2\x8c" => "\xc5\x92",
        "\xc2\x8e" => "\xc5\xbd",
        "\xc2\x91" => "\xe2\x80\x98",
        "\xc2\x92" => "\xe2\x80\x99",
        "\xc2\x93" => "\xe2\x80\x9c",
        "\xc2\x94" => "\xe2\x80\x9d",
        "\xc2\x95" => "\xe2\x80\xa2",
        "\xc2\x96" => "\xe2\x80\x93",
        "\xc2\x97" => "\xe2\x80\x94",
        "\xc2\x98" => "\xcb\x9c",
        "\xc2\x99" => "\xe2\x84\xa2",
        "\xc2\x9a" => "\xc5\xa1",
        "\xc2\x9b" => "\xe2\x80\xba",
        "\xc2\x9c" => "\xc5\x93",
        "\xc2\x9e" => "\xc5\xbe",
        "\xc2\x9f" => "\xc5\xb8",
    ];

    protected static $utf8ToWin1252 = [
        "\xe2\x82\xac" => "\x80",
        "\xe2\x80\x9a" => "\x82",
        "\xc6\x92"     => "\x83",
        "\xe2\x80\x9e" => "\x84",
        "\xe2\x80\xa6" => "\x85",
        "\xe2\x80\xa0" => "\x86",
        "\xe2\x80\xa1" => "\x87",
        "\xcb\x86"     => "\x88",
        "\xe2\x80\xb0" => "\x89",
        "\xc5\xa0"     => "\x8a",
        "\xe2\x80\xb9" => "\x8b",
        "\xc5\x92"     => "\x8c",
        "\xc5\xbd"     => "\x8e",
        "\xe2\x80\x98" => "\x91",
        "\xe2\x80\x99" => "\x92",
        "\xe2\x80\x9c" => "\x93",
        "\xe2\x80\x9d" => "\x94",
        "\xe2\x80\xa2" => "\x95",
        "\xe2\x80\x93" => "\x96",
        "\xe2\x80\x94" => "\x97",
        "\xcb\x9c"     => "\x98",
        "\xe2\x84\xa2" => "\x99",
        "\xc5\xa1"     => "\x9a",
        "\xe2\x80\xba" => "\x9b",
        "\xc5\x93"     => "\x9c",
        "\xc5\xbe"     => "\x9e",
        "\xc5\xb8"     => "\x9f",
    ];

    public static function toUTF8($text)
    {
        /**
         * Function \ForceUTF8\Encoding::toUTF8.
         *
         * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
         *
         * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
         *
         * It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
         *
         * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
         *    are followed by any of these:  ("group B")
         *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
         * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
         * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
         * is also a valid unicode character, and will be left unchanged.
         *
         * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
         * 3) when any of these: ðñòó  are followed by THREE chars from group B.
         *
         * @name         toUTF8
         *
         * @param string $text Any string
         *
         * @return string The same string, UTF8 encoded
         */
        if (is_array($text)) {
            foreach ($text as $k => $v) {
                $text[$k] = self::toUTF8($v);
            }

            return $text;
        }

        if (!is_string($text)) {
            return $text;
        }

        $max = self::strlen($text);

        $buf = '';
        for ($i = 0; $i < $max; ++$i) {
            $c1 = $text[$i];
            if ($c1 >= "\xc0") { //Should be converted to UTF8, if it's not UTF8 already
                $c2 = $i + 1 >= $max ? "\x00" : $text[$i + 1];
                $c3 = $i + 2 >= $max ? "\x00" : $text[$i + 2];
                $c4 = $i + 3 >= $max ? "\x00" : $text[$i + 3];
                if ($c1 >= "\xc0" & $c1 <= "\xdf") { //looks like 2 bytes UTF8

                    if ($c2 >= "\x80" && $c2 <= "\xbf") { //yeah, almost sure it's UTF8 already
                        $buf .= $c1.$c2;
                        ++$i;
                    } else { //not valid UTF8.  Convert it.
                        $cc1 = (chr(ord($c1) / 64) | "\xc0");

                        $cc2 = ($c1 & "\x3f") | "\x80";
                        $buf .= $cc1.$cc2;
                    }
                } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8

                    if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { //yeah, almost sure it's UTF8 already
                        $buf .= $c1.$c2.$c3;
                        $i = $i + 2;
                    } else { //not valid UTF8.  Convert it.
                        $cc1 = (chr(ord($c1) / 64) | "\xc0");

                        $cc2 = ($c1 & "\x3f") | "\x80";
                        $buf .= $cc1.$cc2;
                    }
                } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8

                    if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80"
                        && $c4 <= "\xbf"
                    ) { //yeah, almost sure it's UTF8 already
                        $buf .= $c1.$c2.$c3.$c4;
                        $i = $i + 3;
                    } else { //not valid UTF8.  Convert it.
                        $cc1 = (chr(ord($c1) / 64) | "\xc0");

                        $cc2 = ($c1 & "\x3f") | "\x80";
                        $buf .= $cc1.$cc2;
                    }
                } else { //doesn't look like UTF8, but should be converted
                    $cc1 = (chr(ord($c1) / 64) | "\xc0");

                    $cc2 = (($c1 & "\x3f") | "\x80");
                    $buf .= $cc1.$cc2;
                }
            } elseif ("\x80" == ($c1 & "\xc0")) { // needs conversion
                if (isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
                    $buf .= self::$win1252ToUtf8[ord($c1)];
                } else {
                    $cc1 = (chr(ord($c1) / 64) | "\xc0");

                    $cc2 = (($c1 & "\x3f") | "\x80");
                    $buf .= $cc1.$cc2;
                }
            } else { // it doesn't need conversion
                $buf .= $c1;
            }
        }

        return $buf;
    }

    public static function toWin1252($text, $option = self::WITHOUT_ICONV)
    {
        if (is_array($text)) {
            foreach ($text as $k => $v) {
                $text[$k] = self::toWin1252($v, $option);
            }

            return $text;
        } elseif (is_string($text)) {
            return static::utf8_decode($text, $option);
        } else {
            return $text;
        }
    }

    public static function toISO8859($text)
    {
        return self::toWin1252($text);
    }

    public static function toLatin1($text)
    {
        return self::toWin1252($text);
    }

    public static function fixUTF8($text, $option = self::WITHOUT_ICONV)
    {
        if (is_array($text)) {
            foreach ($text as $k => $v) {
                $text[$k] = self::fixUTF8($v, $option);
            }

            return $text;
        }

        $last = '';
        while ($last != $text) {
            $last = $text;
            $text = self::toUTF8(static::utf8_decode($text, $option));
        }

        return self::toUTF8(static::utf8_decode($text, $option));
    }

    public static function UTF8FixWin1252Chars($text)
    {
        // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
        // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
        // See: http://en.wikipedia.org/wiki/Windows-1252

        return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
    }

    public static function removeBOM($str = '')
    {
        if (substr($str, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
            $str = substr($str, 3);
        }

        return $str;
    }

    protected static function strlen($text)
    {
        return (function_exists('mb_strlen') && ((int) ini_get('mbstring.func_overload')) & 2) ?
            mb_strlen($text, '8bit') : strlen($text);
    }

    public static function normalizeEncoding($encodingLabel)
    {
        $encoding     = strtoupper($encodingLabel);
        $encoding     = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
        $equivalences = [
            'ISO88591'    => 'ISO-8859-1',
            'ISO8859'     => 'ISO-8859-1',
            'ISO'         => 'ISO-8859-1',
            'LATIN1'      => 'ISO-8859-1',
            'LATIN'       => 'ISO-8859-1',
            'UTF8'        => 'UTF-8',
            'UTF'         => 'UTF-8',
            'WIN1252'     => 'ISO-8859-1',
            'WINDOWS1252' => 'ISO-8859-1',
        ];

        if (empty($equivalences[$encoding])) {
            return 'UTF-8';
        }

        return $equivalences[$encoding];
    }

    public static function encode($encodingLabel, $text)
    {
        $encodingLabel = self::normalizeEncoding($encodingLabel);
        if ('ISO-8859-1' == $encodingLabel) {
            return self::toLatin1($text);
        }

        return self::toUTF8($text);
    }

    protected static function utf8_decode($text, $option)
    {
        if (self::WITHOUT_ICONV == $option || !function_exists('iconv')) {
            $o = utf8_decode(
                str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))
            );
        } else {
            $o = iconv(
                'UTF-8',
                'Windows-1252'.(self::ICONV_TRANSLIT == $option ? '//TRANSLIT' : (self::ICONV_IGNORE == $option ? '//IGNORE' : '')),
                $text
            );
        }

        return $o;
    }
}


1			<?php
2
3			/*
4			* @copyright 2015 Mautic Contributors. All rights reserved
5			* @author Mautic
6			*
7			* @link http://mautic.org
8			*
9			* @license GNU/GPLv3 http://www.gnu.org/licenses/gpl-3.0.html
10			*/
11
12			/*
13			Copyright (c) 2008 Sebastián Grignoli
14			All rights reserved.
15
16			Redistribution and use in source and binary forms, with or without
17			modification, are permitted provided that the following conditions
18			are met:
19			1. Redistributions of source code must retain the above copyright
20			notice, this list of conditions and the following disclaimer.
21			2. Redistributions in binary form must reproduce the above copyright
22			notice, this list of conditions and the following disclaimer in the
23			documentation and/or other materials provided with the distribution.
24			3. Neither the name of copyright holders nor the names of its
25			contributors may be used to endorse or promote products derived
26			from this software without specific prior written permission.
27
28			THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29			``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30			TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31			PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
32			BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33			CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34			SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35			INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36			CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37			ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38			POSSIBILITY OF SUCH DAMAGE.
39			*/
40
41			/*
42			* @author "Sebastián Grignoli" <[email protected]>
43			*
44			* @version 2.0
45			*
46			* @link https://github.com/neitanod/forceutf8
47			*
48			* @example https://github.com/neitanod/forceutf8
49			*
50			* @license Revised BSD
51			*/
52
53			namespace Mautic\CoreBundle\Helper;
54
55			class UTF8Helper
56			{
57			const ICONV_TRANSLIT = 'TRANSLIT';
58			const ICONV_IGNORE = 'IGNORE';
59			const WITHOUT_ICONV = '';
60
61			protected static $win1252ToUtf8 = [
62			128 => "\xe2\x82\xac",
63			130 => "\xe2\x80\x9a",
64			131 => "\xc6\x92",
65			132 => "\xe2\x80\x9e",
66			133 => "\xe2\x80\xa6",
67			134 => "\xe2\x80\xa0",
68			135 => "\xe2\x80\xa1",
69			136 => "\xcb\x86",
70			137 => "\xe2\x80\xb0",
71			138 => "\xc5\xa0",
72			139 => "\xe2\x80\xb9",
73			140 => "\xc5\x92",
74			142 => "\xc5\xbd",
75			145 => "\xe2\x80\x98",
76			146 => "\xe2\x80\x99",
77			147 => "\xe2\x80\x9c",
78			148 => "\xe2\x80\x9d",
79			149 => "\xe2\x80\xa2",
80			150 => "\xe2\x80\x93",
81			151 => "\xe2\x80\x94",
82			152 => "\xcb\x9c",
83			153 => "\xe2\x84\xa2",
84			154 => "\xc5\xa1",
85			155 => "\xe2\x80\xba",
86			156 => "\xc5\x93",
87			158 => "\xc5\xbe",
88			159 => "\xc5\xb8",
89			];
90
91			protected static $brokenUtf8ToUtf8 = [
92			"\xc2\x80" => "\xe2\x82\xac",
93			"\xc2\x82" => "\xe2\x80\x9a",
94			"\xc2\x83" => "\xc6\x92",
95			"\xc2\x84" => "\xe2\x80\x9e",
96			"\xc2\x85" => "\xe2\x80\xa6",
97			"\xc2\x86" => "\xe2\x80\xa0",
98			"\xc2\x87" => "\xe2\x80\xa1",
99			"\xc2\x88" => "\xcb\x86",
100			"\xc2\x89" => "\xe2\x80\xb0",
101			"\xc2\x8a" => "\xc5\xa0",
102			"\xc2\x8b" => "\xe2\x80\xb9",
103			"\xc2\x8c" => "\xc5\x92",
104			"\xc2\x8e" => "\xc5\xbd",
105			"\xc2\x91" => "\xe2\x80\x98",
106			"\xc2\x92" => "\xe2\x80\x99",
107			"\xc2\x93" => "\xe2\x80\x9c",
108			"\xc2\x94" => "\xe2\x80\x9d",
109			"\xc2\x95" => "\xe2\x80\xa2",
110			"\xc2\x96" => "\xe2\x80\x93",
111			"\xc2\x97" => "\xe2\x80\x94",
112			"\xc2\x98" => "\xcb\x9c",
113			"\xc2\x99" => "\xe2\x84\xa2",
114			"\xc2\x9a" => "\xc5\xa1",
115			"\xc2\x9b" => "\xe2\x80\xba",
116			"\xc2\x9c" => "\xc5\x93",
117			"\xc2\x9e" => "\xc5\xbe",
118			"\xc2\x9f" => "\xc5\xb8",
119			];
120
121			protected static $utf8ToWin1252 = [
122			"\xe2\x82\xac" => "\x80",
123			"\xe2\x80\x9a" => "\x82",
124			"\xc6\x92" => "\x83",
125			"\xe2\x80\x9e" => "\x84",
126			"\xe2\x80\xa6" => "\x85",
127			"\xe2\x80\xa0" => "\x86",
128			"\xe2\x80\xa1" => "\x87",
129			"\xcb\x86" => "\x88",
130			"\xe2\x80\xb0" => "\x89",
131			"\xc5\xa0" => "\x8a",
132			"\xe2\x80\xb9" => "\x8b",
133			"\xc5\x92" => "\x8c",
134			"\xc5\xbd" => "\x8e",
135			"\xe2\x80\x98" => "\x91",
136			"\xe2\x80\x99" => "\x92",
137			"\xe2\x80\x9c" => "\x93",
138			"\xe2\x80\x9d" => "\x94",
139			"\xe2\x80\xa2" => "\x95",
140			"\xe2\x80\x93" => "\x96",
141			"\xe2\x80\x94" => "\x97",
142			"\xcb\x9c" => "\x98",
143			"\xe2\x84\xa2" => "\x99",
144			"\xc5\xa1" => "\x9a",
145			"\xe2\x80\xba" => "\x9b",
146			"\xc5\x93" => "\x9c",
147			"\xc5\xbe" => "\x9e",
148			"\xc5\xb8" => "\x9f",
149			];
150
151			public static function toUTF8($text)
152			{
153			/**
154			* Function \ForceUTF8\Encoding::toUTF8.
155			*
156			* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
157			*
158			* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
159			*
160			* It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
161			*
162			* 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
163			* are followed by any of these: ("group B")
164			* ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿
165			* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»
166			* The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
167			* is also a valid unicode character, and will be left unchanged.
168			*
169			* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,
170			* 3) when any of these: ðñòó are followed by THREE chars from group B.
171			*
172			* @name toUTF8
173			*
174			* @param string $text Any string
175			*
176			* @return string The same string, UTF8 encoded
177			*/
178			if (is_array($text)) {
179			foreach ($text as $k => $v) {
180			$text[$k] = self::toUTF8($v);
181			}
182
183			return $text;
184			}
185
186			if (!is_string($text)) {
187			return $text;
188			}
189
190			$max = self::strlen($text);
191
192			$buf = '';
193			for ($i = 0; $i < $max; ++$i) {
194			$c1 = $text[$i];
195			if ($c1 >= "\xc0") { //Should be converted to UTF8, if it's not UTF8 already
196			$c2 = $i + 1 >= $max ? "\x00" : $text[$i + 1];
197			$c3 = $i + 2 >= $max ? "\x00" : $text[$i + 2];
198			$c4 = $i + 3 >= $max ? "\x00" : $text[$i + 3];
199			if ($c1 >= "\xc0" & $c1 <= "\xdf") { //looks like 2 bytes UTF8
			0 ignored issues – show Bug introduced 2018-01-24 20:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this Are you sure you want to use the bitwise `&` or did you mean `&&`? Loading history...
200			if ($c2 >= "\x80" && $c2 <= "\xbf") { //yeah, almost sure it's UTF8 already
201			$buf .= $c1.$c2;
202			++$i;
203			} else { //not valid UTF8. Convert it.
204			$cc1 = (chr(ord($c1) / 64) \| "\xc0");
			0 ignored issues – show Bug introduced 2018-01-24 20:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this Are you sure you want to use the bitwise `\|` or did you mean `\|\|`? Loading history...
205			$cc2 = ($c1 & "\x3f") \| "\x80";
206			$buf .= $cc1.$cc2;
207			}
208			} elseif ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8
			0 ignored issues – show Bug introduced 2018-01-24 20:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this Are you sure you want to use the bitwise `&` or did you mean `&&`? Loading history...
209			if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { //yeah, almost sure it's UTF8 already
210			$buf .= $c1.$c2.$c3;
211			$i = $i + 2;
212			} else { //not valid UTF8. Convert it.
213			$cc1 = (chr(ord($c1) / 64) \| "\xc0");
			0 ignored issues – show Bug introduced 2018-01-24 20:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this Are you sure you want to use the bitwise `\|` or did you mean `\|\|`? Loading history...
214			$cc2 = ($c1 & "\x3f") \| "\x80";
215			$buf .= $cc1.$cc2;
216			}
217			} elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8
			0 ignored issues – show Bug introduced 2018-01-24 20:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this Are you sure you want to use the bitwise `&` or did you mean `&&`? Loading history...
218			if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80"
219			&& $c4 <= "\xbf"
220			) { //yeah, almost sure it's UTF8 already
221			$buf .= $c1.$c2.$c3.$c4;
222			$i = $i + 3;
223			} else { //not valid UTF8. Convert it.
224			$cc1 = (chr(ord($c1) / 64) \| "\xc0");
			0 ignored issues – show Bug introduced 2018-01-24 20:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this Are you sure you want to use the bitwise `\|` or did you mean `\|\|`? Loading history...
225			$cc2 = ($c1 & "\x3f") \| "\x80";
226			$buf .= $cc1.$cc2;
227			}
228			} else { //doesn't look like UTF8, but should be converted
229			$cc1 = (chr(ord($c1) / 64) \| "\xc0");
			0 ignored issues – show Bug introduced 2018-01-24 20:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this Are you sure you want to use the bitwise `\|` or did you mean `\|\|`? Loading history...
230			$cc2 = (($c1 & "\x3f") \| "\x80");
231			$buf .= $cc1.$cc2;
232			}
233			} elseif ("\x80" == ($c1 & "\xc0")) { // needs conversion
234			if (isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
235			$buf .= self::$win1252ToUtf8[ord($c1)];
236			} else {
237			$cc1 = (chr(ord($c1) / 64) \| "\xc0");
			0 ignored issues – show Bug introduced 2018-01-24 20:02 UTC by Report Bug Copy Issue Report Show Similar Issues like this Are you sure you want to use the bitwise `\|` or did you mean `\|\|`? Loading history...
238			$cc2 = (($c1 & "\x3f") \| "\x80");
239			$buf .= $cc1.$cc2;
240			}
241			} else { // it doesn't need conversion
242			$buf .= $c1;
243			}
244			}
245
246			return $buf;
247			}
248
249			public static function toWin1252($text, $option = self::WITHOUT_ICONV)
250			{
251			if (is_array($text)) {
252			foreach ($text as $k => $v) {
253			$text[$k] = self::toWin1252($v, $option);
254			}
255
256			return $text;
257			} elseif (is_string($text)) {
258			return static::utf8_decode($text, $option);
259			} else {
260			return $text;
261			}
262			}
263
264			public static function toISO8859($text)
265			{
266			return self::toWin1252($text);
267			}
268
269			public static function toLatin1($text)
270			{
271			return self::toWin1252($text);
272			}
273
274			public static function fixUTF8($text, $option = self::WITHOUT_ICONV)
275			{
276			if (is_array($text)) {
277			foreach ($text as $k => $v) {
278			$text[$k] = self::fixUTF8($v, $option);
279			}
280
281			return $text;
282			}
283
284			$last = '';
285			while ($last != $text) {
286			$last = $text;
287			$text = self::toUTF8(static::utf8_decode($text, $option));
288			}
289
290			return self::toUTF8(static::utf8_decode($text, $option));
291			}
292
293			public static function UTF8FixWin1252Chars($text)
294			{
295			// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
296			// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
297			// See: http://en.wikipedia.org/wiki/Windows-1252
298
299			return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
300			}
301
302			public static function removeBOM($str = '')
303			{
304			if (substr($str, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
305			$str = substr($str, 3);
306			}
307
308			return $str;
309			}
310
311			protected static function strlen($text)
312			{
313			return (function_exists('mb_strlen') && ((int) ini_get('mbstring.func_overload')) & 2) ?
314			mb_strlen($text, '8bit') : strlen($text);
315			}
316
317			public static function normalizeEncoding($encodingLabel)
318			{
319			$encoding = strtoupper($encodingLabel);
320			$encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
321			$equivalences = [
322			'ISO88591' => 'ISO-8859-1',
323			'ISO8859' => 'ISO-8859-1',
324			'ISO' => 'ISO-8859-1',
325			'LATIN1' => 'ISO-8859-1',
326			'LATIN' => 'ISO-8859-1',
327			'UTF8' => 'UTF-8',
328			'UTF' => 'UTF-8',
329			'WIN1252' => 'ISO-8859-1',
330			'WINDOWS1252' => 'ISO-8859-1',
331			];
332
333			if (empty($equivalences[$encoding])) {
334			return 'UTF-8';
335			}
336
337			return $equivalences[$encoding];
338			}
339
340			public static function encode($encodingLabel, $text)
341			{
342			$encodingLabel = self::normalizeEncoding($encodingLabel);
343			if ('ISO-8859-1' == $encodingLabel) {
344			return self::toLatin1($text);
345			}
346
347			return self::toUTF8($text);
348			}
349
350			protected static function utf8_decode($text, $option)
351			{
352			if (self::WITHOUT_ICONV == $option \|\| !function_exists('iconv')) {
353			$o = utf8_decode(
354			str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))
355			);
356			} else {
357			$o = iconv(
358			'UTF-8',
359			'Windows-1252'.(self::ICONV_TRANSLIT == $option ? '//TRANSLIT' : (self::ICONV_IGNORE == $option ? '//IGNORE' : '')),
360			$text
361			);
362			}
363
364			return $o;
365			}
366			}
367

mautic / mautic

Issues (3627)

app/bundles/CoreBundle/Helper/UTF8Helper.php (8 issues)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like