Utf8::clean() - Code Metrics - Inspection of "UTF 8 Backport for PHP 5.4 Updated (somewhat)" - devtoolboxuk/soteria - Measure and Improve Code Quality continuously with Scrutinizer

Completed
Push — master ( 1cc8fa...13169b )

by Rob
created 2019-05-19 21:04 UTC
Utf8::clean() B

↳ Parent: Utf8
Complexity

Conditions	6
Paths	32
Size

Total Lines
Duplication

Lines	0
Ratio	0 %
Code Coverage

Tests	0
CRAP Score	42
Importance

Changes
Metric	Value
dl	0
loc	40
ccs	0
cts	14
cp	0
rs	8.6577
c	0
b	0
f	0
cc	6
nc	32
nop	7
crap	42
<?php

namespace devtoolboxuk\soteria\voku\Resources;

class Utf8 extends Resources

{

    private $system;
    private $ENCODINGS;
    private $SUPPORT = [];
    private $BROKEN_UTF8_FIX;
    private $ORD;
    private $CHR;
    private $WIN1252_TO_UTF8;
    private $BOM = [
        "\xef\xbb\xbf" => 3, // UTF-8 BOM
        'ï»¿' => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
        '  þÿ' => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
        'ÿþ  ' => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
        "\xfe\xff" => 2, // UTF-16 (BE) BOM
        'þÿ' => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
        "\xff\xfe" => 2, // UTF-16 (LE) BOM
        'ÿþ' => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
    ];

    private $BIDI_UNI_CODE_CONTROLS_TABLE = [
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
        8234 => "\xE2\x80\xAA",
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
        8235 => "\xE2\x80\xAB",
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
        8236 => "\xE2\x80\xAC",
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
        8237 => "\xE2\x80\xAD",
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
        8238 => "\xE2\x80\xAE",
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
        8294 => "\xE2\x81\xA6",
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
        8295 => "\xE2\x81\xA7",
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
        8296 => "\xE2\x81\xA8",
        // POP DIRECTIONAL ISOLATE
        8297 => "\xE2\x81\xA9",
    ];
//    private $WHITESPACE = [
//        // NUL Byte
//        0 => "\x0",
//        // Tab
//        9 => "\x9",
//        // New Line
//        10 => "\xa",
//        // Vertical Tab
//        11 => "\xb",
//        // Carriage Return
//        13 => "\xd",
//        // Ordinary Space
//        32 => "\x20",
//        // NO-BREAK SPACE
//        160 => "\xc2\xa0",
//        // OGHAM SPACE MARK
//        5760 => "\xe1\x9a\x80",
//        // MONGOLIAN VOWEL SEPARATOR
//        6158 => "\xe1\xa0\x8e",
//        // EN QUAD
//        8192 => "\xe2\x80\x80",
//        // EM QUAD
//        8193 => "\xe2\x80\x81",
//        // EN SPACE
//        8194 => "\xe2\x80\x82",
//        // EM SPACE
//        8195 => "\xe2\x80\x83",
//        // THREE-PER-EM SPACE
//        8196 => "\xe2\x80\x84",
//        // FOUR-PER-EM SPACE
//        8197 => "\xe2\x80\x85",
//        // SIX-PER-EM SPACE
//        8198 => "\xe2\x80\x86",
//        // FIGURE SPACE
//        8199 => "\xe2\x80\x87",
//        // PUNCTUATION SPACE
//        8200 => "\xe2\x80\x88",
//        // THIN SPACE
//        8201 => "\xe2\x80\x89",
//        //HAIR SPACE
//        8202 => "\xe2\x80\x8a",
//        // LINE SEPARATOR
//        8232 => "\xe2\x80\xa8",
//        // PARAGRAPH SEPARATOR
//        8233 => "\xe2\x80\xa9",
//        // NARROW NO-BREAK SPACE
//        8239 => "\xe2\x80\xaf",
//        // MEDIUM MATHEMATICAL SPACE
//        8287 => "\xe2\x81\x9f",
//        // IDEOGRAPHIC SPACE
//        12288 => "\xe3\x80\x80",
//    ];
    /**
     * @var array
     */
    private $WHITESPACE_TABLE = [
        'SPACE' => "\x20",
        'NO-BREAK SPACE' => "\xc2\xa0",
        'OGHAM SPACE MARK' => "\xe1\x9a\x80",
        'EN QUAD' => "\xe2\x80\x80",
        'EM QUAD' => "\xe2\x80\x81",
        'EN SPACE' => "\xe2\x80\x82",
        'EM SPACE' => "\xe2\x80\x83",
        'THREE-PER-EM SPACE' => "\xe2\x80\x84",
        'FOUR-PER-EM SPACE' => "\xe2\x80\x85",
        'SIX-PER-EM SPACE' => "\xe2\x80\x86",
        'FIGURE SPACE' => "\xe2\x80\x87",
        'PUNCTUATION SPACE' => "\xe2\x80\x88",
        'THIN SPACE' => "\xe2\x80\x89",
        'HAIR SPACE' => "\xe2\x80\x8a",
        'LINE SEPARATOR' => "\xe2\x80\xa8",
        'PARAGRAPH SEPARATOR' => "\xe2\x80\xa9",
        'ZERO WIDTH SPACE' => "\xe2\x80\x8b",
        'NARROW NO-BREAK SPACE' => "\xe2\x80\xaf",
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
        'IDEOGRAPHIC SPACE' => "\xe3\x80\x80",
    ];

    function __construct()

    {
        $this->system = new System();
        $this->checkForSupport();
    }

    private function checkForSupport()

    {
        if (!isset($this->SUPPORT['already_checked_via_portable_utf8'])) {
            $this->SUPPORT['already_checked_via_portable_utf8'] = true;

            // http://php.net/manual/en/book.mbstring.php
            $this->SUPPORT['mbstring'] = $this->system->mbstring_loaded();
            $this->SUPPORT['mbstring_func_overload'] = $this->system->mbstring_overloaded();
            if ($this->SUPPORT['mbstring'] === true) {
                \mb_internal_encoding('UTF-8');
                /** @noinspection UnusedFunctionResultInspection */
                /** @noinspection PhpComposerExtensionStubsInspection */
                \mb_regex_encoding('UTF-8');
                $this->SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
            }

            // http://php.net/manual/en/book.iconv.php
            $this->SUPPORT['iconv'] = $this->system->iconv_loaded();

            // http://php.net/manual/en/book.intl.php
            $this->SUPPORT['intl'] = $this->system->intl_loaded();
            $this->SUPPORT['intl__transliterator_list_ids'] = [];

            if (
                $this->SUPPORT['intl'] === true
                &&
                \function_exists('transliterator_list_ids') === true
            ) {
                /** @noinspection PhpComposerExtensionStubsInspection */
                $this->SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
            }

            // http://php.net/manual/en/class.intlchar.php
            $this->SUPPORT['intlChar'] = $this->system->intlChar_loaded();

            // http://php.net/manual/en/book.ctype.php
            $this->SUPPORT['ctype'] = $this->system->ctype_loaded();

            // http://php.net/manual/en/class.finfo.php
            $this->SUPPORT['finfo'] = $this->system->finfo_loaded();

            // http://php.net/manual/en/book.json.php
            $this->SUPPORT['json'] = $this->system->json_loaded();

            // http://php.net/manual/en/book.pcre.php
            $this->SUPPORT['pcre_utf8'] = $this->system->pcre_utf8_support();

            $this->SUPPORT['symfony_polyfill_used'] = $this->system->symfony_polyfill_used();
            if ($this->SUPPORT['symfony_polyfill_used'] === true) {
                \mb_internal_encoding('UTF-8');
                $this->SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
            }
        }
    }

    public function rawurldecode($str, $multi_decode = true)

    {
        if ($str === '') {
            return '';
        }

        if (strpos($str, '&') === false && strpos($str, '%') === false && strpos($str, '+') === false && strpos($str, '\u') === false) {
            return $this->fixSimpleUtf8($str);
        }

        $pattern = '/%u([0-9a-fA-F]{3,4})/';
        if (preg_match($pattern, $str)) {
            $str = (string)preg_replace($pattern, '&#x\\1;', rawurldecode($str));
        }

        $flags = \ENT_QUOTES | \ENT_HTML5;

        if ($multi_decode === true) {
            do {
                $str_compare = $str;

                /**
                 * @psalm-suppress PossiblyInvalidArgument
                 */
                $str = $this->fixSimpleUtf8(rawurldecode($this->htmlEntityDecode($this->toUtf8($str), $flags)));
            } while ($str_compare !== $str);
        }

        return $str;
    }

    private function fixSimpleUtf8($str)

    {
        if ($str === '') {
            return '';
        }

        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;

        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;


        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
            if ($this->BROKEN_UTF8_FIX === null) {
                $this->BROKEN_UTF8_FIX = $this->getData('utf8_fix');
            }

            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys($this->BROKEN_UTF8_FIX);
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values($this->BROKEN_UTF8_FIX);
        }

        return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
    }

    private function getData($file)
    {

        return include __DIR__ . '/../Data/' . $file . '.php';
    }

    private function htmlEntityDecode($str, $flags = null, $encoding = 'UTF-8')

    {
        if (
            !isset($str[3]) // examples: &; || &x;
            ||
            strpos($str, '&') === false // no "&"
        ) {
            return $str;
        }

        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
            $encoding = $this->normalize_encoding($encoding, 'UTF-8');
        }

        if ($flags === null) {
            $flags = \ENT_QUOTES | \ENT_HTML5;
        }

        if ($encoding !== 'UTF-8' && $encoding !== 'ISO-8859-1' && $encoding !== 'WINDOWS-1252' && $this->SUPPORT['mbstring'] === false) {
            trigger_error('UTF8::htmlEntityDecode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
        }

        do {
            $str_compare = $str;

            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
            if ($this->SUPPORT['mbstring'] === true) {
                if ($encoding === 'UTF-8') {
                    $str = mb_decode_numericentity($str, [0x80, 0xfffff, 0, 0xfffff, 0]);
                } else {

                    $str = mb_decode_numericentity($str, [0x80, 0xfffff, 0, 0xfffff, 0], $encoding);
                }
            } else {

                $str = (string)preg_replace_callback(
                    "/&#\d{2,6};/",
                    /**
                     * @param string[] $matches
                     *
                     * @return string
                     */
                    static function ($matches) use ($encoding) {
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
                            return $returnTmp;
                        }

                        return $matches[0];
                    },
                    $str
                );
            }

            if (strpos($str, '&') !== false) {
                if (strpos($str, '&#') !== false) {
                    // decode also numeric & UTF16 two byte entities
                    $str = (string)preg_replace('/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S', '$1;', $str);
                }

                $str = html_entity_decode($str, $flags, $encoding);
            }
        } while ($str_compare !== $str);

        return $str;
    }

    private function normalize_encoding($encoding, $fallback = '')

    {
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];


        // init
        $encoding = (string)$encoding;

        if (!$encoding) {
            return $fallback;
        }

        if ($encoding === 'UTF-8' || $encoding === 'UTF8') {
            return 'UTF-8';
        }

        if ($encoding === '8BIT' || $encoding === 'BINARY') {
            return 'CP850';
        }

        if ($encoding === 'HTML' || $encoding === 'HTML-ENTITIES') {
            return 'HTML-ENTITIES';
        }

        if (
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
            ||
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
        ) {
            return $fallback;
        }

        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
        }

        if ($this->ENCODINGS === null) {
            $this->ENCODINGS = $this->getData('encodings');
        }

        if (in_array($encoding, $this->ENCODINGS, true)) {
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;

            return $encoding;
        }

        $encodingOrig = $encoding;
        $encoding = strtoupper($encoding);
        $encodingUpperHelper = (string)preg_replace('/[^a-zA-Z0-9\s]/u', '', $encoding);

        $equivalences = [
            'ISO8859' => 'ISO-8859-1',
            'ISO88591' => 'ISO-8859-1',
            'ISO' => 'ISO-8859-1',
            'LATIN' => 'ISO-8859-1',
            'LATIN1' => 'ISO-8859-1', // Western European
            'ISO88592' => 'ISO-8859-2',
            'LATIN2' => 'ISO-8859-2', // Central European
            'ISO88593' => 'ISO-8859-3',
            'LATIN3' => 'ISO-8859-3', // Southern European
            'ISO88594' => 'ISO-8859-4',
            'LATIN4' => 'ISO-8859-4', // Northern European
            'ISO88595' => 'ISO-8859-5',
            'ISO88596' => 'ISO-8859-6', // Greek
            'ISO88597' => 'ISO-8859-7',
            'ISO88598' => 'ISO-8859-8', // Hebrew
            'ISO88599' => 'ISO-8859-9',
            'LATIN5' => 'ISO-8859-9', // Turkish
            'ISO885911' => 'ISO-8859-11',
            'TIS620' => 'ISO-8859-11', // Thai
            'ISO885910' => 'ISO-8859-10',
            'LATIN6' => 'ISO-8859-10', // Nordic
            'ISO885913' => 'ISO-8859-13',
            'LATIN7' => 'ISO-8859-13', // Baltic
            'ISO885914' => 'ISO-8859-14',
            'LATIN8' => 'ISO-8859-14', // Celtic
            'ISO885915' => 'ISO-8859-15',
            'LATIN9' => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
            'ISO885916' => 'ISO-8859-16',
            'LATIN10' => 'ISO-8859-16', // Southeast European
            'CP1250' => 'WINDOWS-1250',
            'WIN1250' => 'WINDOWS-1250',
            'WINDOWS1250' => 'WINDOWS-1250',
            'CP1251' => 'WINDOWS-1251',
            'WIN1251' => 'WINDOWS-1251',
            'WINDOWS1251' => 'WINDOWS-1251',
            'CP1252' => 'WINDOWS-1252',
            'WIN1252' => 'WINDOWS-1252',
            'WINDOWS1252' => 'WINDOWS-1252',
            'CP1253' => 'WINDOWS-1253',
            'WIN1253' => 'WINDOWS-1253',
            'WINDOWS1253' => 'WINDOWS-1253',
            'CP1254' => 'WINDOWS-1254',
            'WIN1254' => 'WINDOWS-1254',
            'WINDOWS1254' => 'WINDOWS-1254',
            'CP1255' => 'WINDOWS-1255',
            'WIN1255' => 'WINDOWS-1255',
            'WINDOWS1255' => 'WINDOWS-1255',
            'CP1256' => 'WINDOWS-1256',
            'WIN1256' => 'WINDOWS-1256',
            'WINDOWS1256' => 'WINDOWS-1256',
            'CP1257' => 'WINDOWS-1257',
            'WIN1257' => 'WINDOWS-1257',
            'WINDOWS1257' => 'WINDOWS-1257',
            'CP1258' => 'WINDOWS-1258',
            'WIN1258' => 'WINDOWS-1258',
            'WINDOWS1258' => 'WINDOWS-1258',
            'UTF16' => 'UTF-16',
            'UTF32' => 'UTF-32',
            'UTF8' => 'UTF-8',
            'UTF' => 'UTF-8',
            'UTF7' => 'UTF-7',
            '8BIT' => 'CP850',
            'BINARY' => 'CP850',
        ];

        if (!empty($equivalences[$encodingUpperHelper])) {
            $encoding = $equivalences[$encodingUpperHelper];
        }

        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;

        return $encoding;
    }

    private function toUtf8($str)

    {

        if (is_array($str) === true) {
            foreach ($str as $key => $value) {
                $str[$key] = $this->toUtf8($value);
            }
            return $str;
        }


        $str = (string)$str;
        if ($str === '') {
            return $str;
        }

        $max = \strlen($str);
        $buf = '';

        for ($i = 0; $i < $max; ++$i) {
            $c1 = $str[$i];


            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already

                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8

                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];


                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
                        $buf .= $c1 . $c2;
                        ++$i;
                    } else { // not valid UTF8 - convert it

                        $buf .= $this->toUtf8ConvertHelper($c1);
                    }
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8

                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];


                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
                        $buf .= $c1 . $c2 . $c3;
                        $i += 2;
                    } else { // not valid UTF8 - convert it

                        $buf .= $this->toUtf8ConvertHelper($c1);
                    }
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8

                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];


                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
                        $buf .= $c1 . $c2 . $c3 . $c4;
                        $i += 3;
                    } else { // not valid UTF8 - convert it

                        $buf .= $this->toUtf8ConvertHelper($c1);
                    }
                } else { // doesn't look like UTF8, but should be converted


                    $buf .= $this->toUtf8ConvertHelper($c1);
                }
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion

                $buf .= $this->toUtf8ConvertHelper($c1);
            } else { // it doesn't need conversion


                $buf .= $c1;
            }
        }

        // decode unicode escape sequences + unicode surrogate pairs
        $buf = preg_replace_callback(
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
            /**
             * @param array $matches
             *
             * @return string
             */
            function (array $matches) {
                if (isset($matches[3])) {
                    $cp = (int)hexdec($matches[3]);

                } else {

                    // http://unicode.org/faq/utf_bom.html#utf16-4
                    $cp = ((int)hexdec($matches[1]) << 10)
                        + (int)hexdec($matches[2])
                        + 0x10000
                        - (0xD800 << 10)
                        - 0xDC00;
                }

                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
                //
                // php_utf32_utf8(unsigned char *buf, unsigned k)

                if ($cp < 0x80) {
                    return (string)$this->chr($cp);
                }

                if ($cp < 0xA0) {
                    /** @noinspection UnnecessaryCastingInspection */
                    return (string)$this->chr(0xC0 | $cp >> 6) . (string)$this->chr(0x80 | $cp & 0x3F);
                }

                return $this->decimalToChr($cp);
            },
            $buf
        );

        if ($buf === null) {
            return '';
        }


        return $buf;
    }

    private function toUtf8ConvertHelper($input)

    {
        // init
        $buf = '';

        if ($this->ORD === null) {
            $this->ORD = $this->getData('ord');
        }

        if ($this->CHR === null) {
            $this->CHR = $this->getData('chr');
        }

        if ($this->WIN1252_TO_UTF8 === null) {
            $this->WIN1252_TO_UTF8 = $this->getData('win1252_to_utf8');
        }

        $ordC1 = $this->ORD[$input];
        if (isset($this->WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
            $buf .= $this->WIN1252_TO_UTF8[$ordC1];
        } else {

            $cc1 = $this->CHR[$ordC1 / 64] | "\xC0";
            $cc2 = ((string)$input & "\x3F") | "\x80";
            $buf .= $cc1 . $cc2;
        }

        return $buf;
    }

    private function chr($code_point, $encoding = 'UTF-8')

    {
        // init
        static $CHAR_CACHE = [];

        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
            $encoding = $this->normalize_encoding($encoding, 'UTF-8');
        }

        if ($encoding !== 'UTF-8' && $encoding !== 'ISO-8859-1' && $encoding !== 'WINDOWS-1252' && $this->SUPPORT['mbstring'] === false) {
            trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
        }

        $cacheKey = $code_point . $encoding;
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
            return $CHAR_CACHE[$cacheKey];
        }

        if ($code_point <= 127) { // use "simple"-char only until "\x80"

            if ($this->CHR === null) {
                $this->CHR = (array)$this->getData('chr');
            }

            /**
             * @psalm-suppress PossiblyNullArrayAccess
             */
            $chr = $this->CHR[$code_point];

            if ($encoding !== 'UTF-8') {
                $chr = $this->encode($encoding, $chr);
            }

            return $CHAR_CACHE[$cacheKey] = $chr;
        }

        //
        // fallback via "IntlChar"
        //

        if ($this->SUPPORT['intlChar'] === true) {
            /** @noinspection PhpComposerExtensionStubsInspection */
            $chr = IntlChar::chr($code_point);

            if ($encoding !== 'UTF-8') {
                $chr = $this->encode($encoding, $chr);
            }

            return $CHAR_CACHE[$cacheKey] = $chr;
        }

        //
        // fallback via vanilla php
        //

        if ($this->CHR === null) {
            $this->CHR = (array)$this->getData('chr');
        }

        $code_point = (int)$code_point;
        if ($code_point <= 0x7F) {
            /**
             * @psalm-suppress PossiblyNullArrayAccess
             */
            $chr = $this->CHR[$code_point];
        } elseif ($code_point <= 0x7FF) {
            /**
             * @psalm-suppress PossiblyNullArrayAccess
             */
            $chr = $this->CHR[($code_point >> 6) + 0xC0] .
                $this->CHR[($code_point & 0x3F) + 0x80];
        } elseif ($code_point <= 0xFFFF) {
            /**
             * @psalm-suppress PossiblyNullArrayAccess
             */
            $chr = $this->CHR[($code_point >> 12) + 0xE0] .
                $this->CHR[(($code_point >> 6) & 0x3F) + 0x80] .
                $this->CHR[($code_point & 0x3F) + 0x80];
        } else {

            /**
             * @psalm-suppress PossiblyNullArrayAccess
             */
            $chr = $this->CHR[($code_point >> 18) + 0xF0] .
                $this->CHR[(($code_point >> 12) & 0x3F) + 0x80] .
                $this->CHR[(($code_point >> 6) & 0x3F) + 0x80] .
                $this->CHR[($code_point & 0x3F) + 0x80];
        }

        if ($encoding !== 'UTF-8') {
            $chr = $this->encode($encoding, $chr);
        }

        return $CHAR_CACHE[$cacheKey] = $chr;
    }

    private function encode($toEncoding, $str)

    {
        if ($str === '' || $toEncoding === '') {
            return $str;
        }

        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
            $toEncoding = $this->normalize_encoding($toEncoding, 'UTF-8');
        }

//        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
//            $fromEncoding = $this->normalize_encoding($fromEncoding, null);
//        }

//        if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
//            return $str;
//        }

        if ($toEncoding === 'JSON') {
            $return = $this->jsonEncode($str);
            if ($return === false) {
                throw new InvalidArgumentException('The input string [' . $str . '] can not be used for jsonEncode().');
            }

            return $return;
        }
//        if ($fromEncoding === 'JSON') {
//            $str = $this->json_decode($str);
//            $fromEncoding = '';
//        }

        if ($toEncoding === 'BASE64') {
            return base64_encode($str);
        }
//        if ($fromEncoding === 'BASE64') {
//            $str = base64_decode($str, true);
//            $fromEncoding = '';
//        }

        if ($toEncoding === 'HTML-ENTITIES') {
            return $this->htmlEncode($str, true, 'UTF-8');
        }
//        if ($fromEncoding === 'HTML-ENTITIES') {
//            $str = $this->html_decode($str, \ENT_COMPAT, 'UTF-8');
//            $fromEncoding = '';
//        }

        $fromEncodingDetected = false;
$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}
//        if ($autodetectFromEncoding === true || !$fromEncoding) {
//            $fromEncodingDetected = $this->str_detect_encoding($str);
//        }

        // DEBUG
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");

//        if ($fromEncodingDetected !== false) {
//            $fromEncoding = $fromEncodingDetected;
//        } elseif ($autodetectFromEncoding === true) {
//            // fallback for the "autodetect"-mode
//            return $this->toUtf8($str);
//        }

//        if (!$fromEncoding || $fromEncoding === $toEncoding) {
//            return $str;
//        }

//        if ($toEncoding === 'UTF-8' && ($fromEncoding === 'WINDOWS-1252' || $fromEncoding === 'ISO-8859-1')) {
//            return $this->toUtf8($str);
//        }

//        if ($toEncoding === 'ISO-8859-1' && ($fromEncoding === 'WINDOWS-1252' || $fromEncoding === 'UTF-8')) {
//            return $this->to_iso8859($str);
//        }

        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'ISO-8859-1' && $toEncoding !== 'WINDOWS-1252' && $this->SUPPORT['mbstring'] === false) {
            trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
        }
//
//        if ($this->SUPPORT['mbstring'] === true) {
//            // warning: do not use the symfony polyfill here
//            $strEncoded = mb_convert_encoding(
//                $str,
//                $toEncoding,
//                $fromEncoding
//            );
//
//            if ($strEncoded) {
//                return $strEncoded;
//            }
//        }
//
//        $return = \iconv($fromEncoding, $toEncoding, $str);
//        if ($return !== false) {
//            return $return;
//        }

        return $str;
    }

    private function jsonEncode($value)
    {
        $value = $this->filter($value);

        if ($this->SUPPORT['json'] === false) {
            throw new \RuntimeException('ext-json: is not installed');
        }

        /** @noinspection PhpComposerExtensionStubsInspection */
        return json_encode($value, 0, 512);
    }

    private function filter($var, $normalization_form = \Normalizer::NFC, $leading_combining = '◌')

    {
        switch (\gettype($var)) {
            case 'array':
                foreach ($var as $key => $value) {
                    $var[$key] = $this->filter($value, $normalization_form, $leading_combining);
                }
                unset($v);


                break;
            case 'object':
                foreach ($var as $key => $value) {
                    $str[$key] = $this->filter($value, $normalization_form, $leading_combining);
foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}
                }
                unset($v);

                break;
            case 'string':
switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

                if (strpos($var, "\r") !== false) {
                    // Workaround https://bugs.php.net/65732
                    $var = $this->normalizeLineEnding($var);
                }

                if ($this->isAscii($var) === false) {
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
                        $n = '-';

                    } else {

                        $n = \Normalizer::normalize($var, $normalization_form);

                        if (isset($n[0])) {
                            $var = $n;
                        } else {

                            $var = $this->encode('UTF-8', $var, true);

                        }
                    }

                    if (
                        $var[0] >= "\x80"
                        &&
                        isset($n[0], $leading_combining[0])
                        &&
                        preg_match('/^\p{Mn}/u', $var)
                    ) {
                        // Prevent leading combining chars
                        // for NFC-safe concatenations.
                        $var = $leading_combining . $var;
                    }
                }

                break;
        }

        return $var;
    }

    private function normalizeLineEnding($str)
    {
        return str_replace(["\r\n", "\r"], "\n", $str);
    }

    private function isAscii($str)
    {
        if ($str === '') {
            return true;
        }

        return !preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
    }

    private function htmlEncode($str, $keepAsciiChars = false, $encoding = 'UTF-8')

    {
        if ($str === '') {
            return '';
        }

        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
            $encoding = $this->normalize_encoding($encoding, 'UTF-8');
        }

        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
        if ($this->SUPPORT['mbstring'] === true) {
            $startCode = 0x00;
            if ($keepAsciiChars === true) {
                $startCode = 0x80;
            }

            if ($encoding === 'UTF-8') {
                return mb_encode_numericentity(
                    $str,
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
                );
            }

            return mb_encode_numericentity(
                $str,
                [$startCode, 0xfffff, 0, 0xfffff, 0],
                $encoding
            );
        }

        return implode(
            '',
            \array_map(
                function (string $chr) use ($keepAsciiChars, $encoding) {
                    return $this->singleChrHtmlEncode($chr, $keepAsciiChars, $encoding);
                },
                $this->strSplit($str)
            )
        );
    }

    private function singleChrHtmlEncode($char, $keepAsciiChars = false, $encoding = 'UTF-8')

    {
        if ($char === '') {
            return '';
        }

        if ($keepAsciiChars === true && $this->isAscii($char) === true) {
            return $char;
        }

        return '&#' . $this->ord($char, $encoding) . ';';
    }

    private function ord($chr, $encoding = 'UTF-8')

    {
        static $CHAR_CACHE = [];

        // init
        $chr = (string)$chr;

        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
            $encoding = $this->normalize_encoding($encoding, 'UTF-8');
        }

        $cacheKey = $chr . $encoding;
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
            return $CHAR_CACHE[$cacheKey];
        }

        // check again, if it's still not UTF-8
        if ($encoding !== 'UTF-8') {
            $chr = $this->encode($encoding, $chr);
        }

        if ($this->ORD === null) {
            $this->ORD = $this->getData('ord');
        }

        if (isset($this->ORD[$chr])) {
            return $CHAR_CACHE[$cacheKey] = $this->ORD[$chr];
        }

        //
        // fallback via "IntlChar"
        //

        if ($this->SUPPORT['intlChar'] === true) {
            /** @noinspection PhpComposerExtensionStubsInspection */
            $code = \IntlChar::ord($chr);
            if ($code) {
                return $CHAR_CACHE[$cacheKey] = $code;
            }
        }

        //
        // fallback via vanilla php
        //

        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
        $chr = \unpack('C*', (string)\substr($chr, 0, 4));
        $code = $chr ? $chr[1] : 0;

        if ($code >= 0xF0 && isset($chr[4])) {
            /** @noinspection UnnecessaryCastingInspection */
            return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
        }

        if ($code >= 0xE0 && isset($chr[3])) {
            /** @noinspection UnnecessaryCastingInspection */
            return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
        }

        if ($code >= 0xC0 && isset($chr[2])) {
            /** @noinspection UnnecessaryCastingInspection */
            return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
        }

        return $CHAR_CACHE[$cacheKey] = $code;
    }

    private function strSplit($str, $length = 1, $cleanUtf8 = false, $tryToUseMbFunction = true)

    {
        if ($length <= 0) {
            return [];
        }

        if (is_array($str) === true) {
            foreach ($str as $key => $value) {
                $str[$key] = $this->strSplit($value, $length, $cleanUtf8, $tryToUseMbFunction);
            }

            return $str;
        }

        // init
        $str = (string)$str;

        if ($str === '') {
            return [];
        }

        if ($cleanUtf8 === true) {
            $str = $this->clean($str);
        }

        if ($tryToUseMbFunction === true && $this->SUPPORT['mbstring'] === true) {
            $iMax = \mb_strlen($str);
            if ($iMax <= 127) {
                $ret = [];
                for ($i = 0; $i < $iMax; ++$i) {
                    $ret[] = \mb_substr($str, $i, 1);
                }
            } else {

                $retArray = [];
                preg_match_all('/./us', $str, $retArray);
                $ret = isset($retArray[0]) ? $retArray[0] : [];
            }
        } elseif ($this->SUPPORT['pcre_utf8'] === true) {
            $retArray = [];
            preg_match_all('/./us', $str, $retArray);
            $ret = isset($retArray[0]) ? $retArray[0] : [];
        } else {


            // fallback

            $ret = [];
            $len = \strlen($str);

            /** @noinspection ForeachInvariantsInspection */
            for ($i = 0; $i < $len; ++$i) {
                if (($str[$i] & "\x80") === "\x00") {
                    $ret[] = $str[$i];
                } elseif (
                    isset($str[$i + 1])
                    &&
                    ($str[$i] & "\xE0") === "\xC0"
                ) {
                    if (($str[$i + 1] & "\xC0") === "\x80") {
                        $ret[] = $str[$i] . $str[$i + 1];

                        ++$i;
                    }
                } elseif (
                    isset($str[$i + 2])
                    &&
                    ($str[$i] & "\xF0") === "\xE0"
                ) {
                    if (
                        ($str[$i + 1] & "\xC0") === "\x80"
                        &&
                        ($str[$i + 2] & "\xC0") === "\x80"
                    ) {
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];

                        $i += 2;
                    }
                } elseif (
                    isset($str[$i + 3])
                    &&
                    ($str[$i] & "\xF8") === "\xF0"
                ) {
                    if (
                        ($str[$i + 1] & "\xC0") === "\x80"
                        &&
                        ($str[$i + 2] & "\xC0") === "\x80"
                        &&
                        ($str[$i + 3] & "\xC0") === "\x80"
                    ) {
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];

                        $i += 3;
                    }
                }
            }
        }

        if ($length > 1) {
            $ret = \array_chunk($ret, $length);

            return array_map(
                static function (&$item) {
                    return implode('', $item);
                },
                $ret
            );
        }

        if (isset($ret[0]) && $ret[0] === '') {
            return [];
        }

        return $ret;
    }

    private function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false, $replace_diamond_question_mark = false, $remove_invisible_characters = true)

    {
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
        // caused connection reset problem on larger strings

        $regx = '/
          (
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
            ){1,100}                      # ...one or more times
          )
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
        /x';
        $str = (string)preg_replace($regx, '$1', $str);

        if ($replace_diamond_question_mark === true) {
            $str = $this->replace_diamond_question_mark($str, '');
        }

        if ($remove_invisible_characters === true) {
            $str = $this->remove_invisible_characters($str);
        }

        if ($normalize_whitespace === true) {
            $str = $this->normalize_whitespace($str, $keep_non_breaking_space);
        }

        if ($normalize_msword === true) {
            $str = $this->normalize_msword($str);
        }

        if ($remove_bom === true) {
            $str = $this->remove_bom($str);
        }

        return $str;
    }

    public function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)

    {
        if ($str === '') {
            return '';
        }

        if ($processInvalidUtf8 === true) {
            $replacementCharHelper = $replacementChar;

            if ($replacementChar === '') {
                $replacementCharHelper = 'none';
            }

            if ($this->SUPPORT['mbstring'] === false) {
                // if there is no native support for "mbstring",
                // then we need to clean the string before ...
                $str = $this->clean($str);
            }

            $save = \mb_substitute_character();
            \mb_substitute_character($replacementCharHelper);
            // the polyfill maybe return false, so cast to string
            $str = (string)\mb_convert_encoding($str, 'UTF-8', 'UTF-8');
            \mb_substitute_character($save);
        }

        return str_replace(
            [
                "\xEF\xBF\xBD",
                '�',
            ],
            [
                $replacementChar,
                $replacementChar,
            ],
            $str
        );
    }

    public function remove_invisible_characters($str, $url_encoded = true, $replacement = '')

    {
        // init
        $non_displayables = [];

        // every control character except newline (dec 10),
        // carriage return (dec 13) and horizontal tab (dec 09)
        if ($url_encoded) {
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
        }

        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127

        do {
            $str = (string)preg_replace($non_displayables, $replacement, $str, -1, $count);
        } while ($count !== 0);

        return $str;
    }

    public function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)

    {
        if ($str === '') {
            return '';
        }

        static $WHITESPACE_CACHE = [];
        $cacheKey = (int)$keepNonBreakingSpace;

        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
            $WHITESPACE_CACHE[$cacheKey] = $this->WHITESPACE_TABLE;

            if ($keepNonBreakingSpace === true) {
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
            }

            $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
        }

        if ($keepBidiUnicodeControls === false) {
            static $BIDI_UNICODE_CONTROLS_CACHE = null;


            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
                $BIDI_UNICODE_CONTROLS_CACHE = array_values($this->BIDI_UNI_CODE_CONTROLS_TABLE);
            }

            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
        }

        return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
    }

    private function normalize_msword($str)

    {
        if ($str === '') {
            return '';
        }

        $keys = [
            "\xc2\xab", // « (U+00AB) in UTF-8
            "\xc2\xbb", // » (U+00BB) in UTF-8
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
            "\xe2\x80\xba", // › (U+203A) in UTF-8
            "\xe2\x80\x93", // – (U+2013) in UTF-8
            "\xe2\x80\x94", // — (U+2014) in UTF-8
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
        ];

        $values = [
            '"', // « (U+00AB) in UTF-8
            '"', // » (U+00BB) in UTF-8
            "'", // ‘ (U+2018) in UTF-8
            "'", // ’ (U+2019) in UTF-8
            "'", // ‚ (U+201A) in UTF-8
            "'", // ‛ (U+201B) in UTF-8
            '"', // “ (U+201C) in UTF-8
            '"', // ” (U+201D) in UTF-8
            '"', // „ (U+201E) in UTF-8
            '"', // ‟ (U+201F) in UTF-8
            "'", // ‹ (U+2039) in UTF-8
            "'", // › (U+203A) in UTF-8
            '-', // – (U+2013) in UTF-8
            '-', // — (U+2014) in UTF-8
            '...', // … (U+2026) in UTF-8
        ];

        return str_replace($keys, $values, $str);
    }

    public function remove_bom($str)

    {
        if ($str === '') {
            return '';
        }

        $strLength = \strlen($str);
        foreach ($this->BOM as $bomString => $bomByteLength) {
            if (strpos($str, $bomString, 0) === 0) {
                $strTmp = \substr($str, $bomByteLength, $strLength);
                if ($strTmp === false) {
                    return '';
                }

                $strLength -= (int)$bomByteLength;
                $str = (string)$strTmp;
            }
        }

        return $str;
    }

//    private function str_detect_encoding($str)
//    {
//        // init
//        $str = (string)$str;
//
//        //
//        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
//        //
//
//        if ($this->is_binary($str, true) === true) {
//            $isUtf16 = $this->is_utf16($str, false);
//            if ($isUtf16 === 1) {
//                return 'UTF-16LE';
//            }
//            if ($isUtf16 === 2) {
//                return 'UTF-16BE';
//            }
//
//            $isUtf32 = $this->is_utf32($str, false);
//            if ($isUtf32 === 1) {
//                return 'UTF-32LE';
//            }
//            if ($isUtf32 === 2) {
//                return 'UTF-32BE';
//            }
//
//            // is binary but not "UTF-16" or "UTF-32"
//            return false;
//        }
//
//        //
//        // 2.) simple check for ASCII chars
//        //
//
//        if ($this->isAscii($str) === true) {
//            return 'ASCII';
//        }
//
//        //
//        // 3.) simple check for UTF-8 chars
//        //
//
//        if ($this->isUtf8($str) === true) {
//            return 'UTF-8';
//        }
//
//        //
//        // 4.) check via "mb_detect_encoding()"
//        //
//        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
//
//        $detectOrder = [
//            'ISO-8859-1',
//            'ISO-8859-2',
//            'ISO-8859-3',
//            'ISO-8859-4',
//            'ISO-8859-5',
//            'ISO-8859-6',
//            'ISO-8859-7',
//            'ISO-8859-8',
//            'ISO-8859-9',
//            'ISO-8859-10',
//            'ISO-8859-13',
//            'ISO-8859-14',
//            'ISO-8859-15',
//            'ISO-8859-16',
//            'WINDOWS-1251',
//            'WINDOWS-1252',
//            'WINDOWS-1254',
//            'CP932',
//            'CP936',
//            'CP950',
//            'CP866',
//            'CP850',
//            'CP51932',
//            'CP50220',
//            'CP50221',
//            'CP50222',
//            'ISO-2022-JP',
//            'ISO-2022-KR',
//            'JIS',
//            'JIS-ms',
//            'EUC-CN',
//            'EUC-JP',
//        ];
//
//        if ($this->SUPPORT['mbstring'] === true) {
//            // info: do not use the symfony polyfill here
//            $encoding = \mb_detect_encoding($str, $detectOrder, true);
//            if ($encoding) {
//                return $encoding;
//            }
//        }
//
//        //
//        // 5.) check via "iconv()"
//        //
//
//        if ($this->ENCODINGS === null) {
//            $this->ENCODINGS = $this->getData('encodings');
//        }
//
//        foreach ($this->ENCODINGS as $encodingTmp) {
//            // INFO: //IGNORE but still throw notice
//            /** @noinspection PhpUsageOfSilenceOperatorInspection */
//            if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
//                return $encodingTmp;
//            }
//        }
//
//        return false;
//    }

    private function decimalToChr($int)
    {
        return $this->htmlEntityDecode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
    }
//
//    private function is_utf16($str, $checkIfStringIsBinary = true)
//    {
//
//        // init
//        $str = (string)$str;
//        $strChars = [];
//
//        if (
//            $checkIfStringIsBinary === true
//            &&
//            $this->is_binary($str, true) === false
//        ) {
//            return false;
//        }
//
//        if ($this->SUPPORT['mbstring'] === false) {
//            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
//        }
//
//        $str = $this->remove_bom($str);
//
//
//        $maybeUTF16LE = 0;
//        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
//        if ($test) {
//            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
//            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
//            if ($test3 === $test) {
//                if (\count($strChars) === 0) {
//                    $strChars = $this->count_chars($str, true, false);
//                }
//                $countChars = $this->count_chars($test3);
//                foreach ($countChars as $test3char => $test3charEmpty) {
//                    if (\in_array($test3char, $strChars, true) === true) {
//                        ++$maybeUTF16LE;
//                    }
//                    unset($countChars[$test3char]);
//                }
//            }
//        }
//
//        $maybeUTF16BE = 0;
//        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
//        if ($test) {
//            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
//            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
//            if ($test3 === $test) {
//                if (\count($strChars) === 0) {
//                    $strChars = $this->count_chars($str, true, false);
//                }
//                $countChars = $this->count_chars($test3);
//                foreach ($countChars as $test3char => $test3charEmpty) {
//                    if (\in_array($test3char, $strChars, true) === true) {
//                        ++$maybeUTF16BE;
//                    }
//                    unset($countChars[$test3char]);
//                }
//
//            }
//        }
//
//        if ($maybeUTF16BE !== $maybeUTF16LE) {
//            if ($maybeUTF16LE > $maybeUTF16BE) {
//                return 1;
//            }
//
//            return 2;
//        }
//
//        return false;
//    }

    /**
     * Check if the string is UTF-32.
     *
     * @param mixed $str <p>The input string.</p>
     * @param bool $checkIfStringIsBinary
     *
     * @return false|int
     *                   <strong>false</strong> if is't not UTF-32,<br>
     *                   <strong>1</strong> for UTF-32LE,<br>
     *                   <strong>2</strong> for UTF-32BE
     */
    private function is_utf32($str, $checkIfStringIsBinary = true)

    {
        // init
        $str = (string)$str;
        $strChars = [];

        if ($checkIfStringIsBinary === true && $this->is_binary($str, true) === false) {
            return false;
        }

        if ($this->SUPPORT['mbstring'] === false) {
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
        }

        $str = $this->remove_bom($str);

        $maybeUTF32LE = 0;
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
        if ($test) {
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
            if ($test3 === $test) {
                if (\count($strChars) === 0) {
                    $strChars = $this->count_chars($str, true, false);
                }
                $countChars = $this->count_chars($test3);
                foreach ($countChars as $test3char => $test3charEmpty) {
                    if (\in_array($test3char, $strChars, true) === true) {
                        ++$maybeUTF32LE;
                    }
                    unset($countChars[$test3char]);
                }
            }
        }

        $maybeUTF32BE = 0;
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
        if ($test) {
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
            if ($test3 === $test) {
                if (\count($strChars) === 0) {
                    $strChars = $this->count_chars($str, true, false);
                }
                $countChars = $this->count_chars($test3);
                foreach ($countChars as $test3char => $test3charEmpty) {
                    if (\in_array($test3char, $strChars, true) === true) {
                        ++$maybeUTF32BE;
                    }
                    unset($countChars[$test3char]);
                }
            }
        }

        if ($maybeUTF32BE !== $maybeUTF32LE) {
            if ($maybeUTF32LE > $maybeUTF32BE) {
                return 1;
            }

            return 2;
        }

        return false;
    }

    private function is_binary($input, $strict = false)

    {
        $input = (string)$input;
        if ($input === '') {
            return false;
        }

        if (preg_match('~^[01]+$~', $input)) {
            return true;
        }

        $ext = $this->get_file_type($input);
        if ($ext['type'] === 'binary') {
            return true;
        }

        $testLength = \strlen($input);
        $testNull = \substr_count($input, "\x0", 0, $testLength);
        if (($testNull / $testLength) > 0.25) {
            return true;
        }

        if ($strict === true) {
            if ($this->SUPPORT['finfo'] === false) {
                throw new \RuntimeException('ext-fileinfo: is not installed');
            }

            /** @noinspection PhpComposerExtensionStubsInspection */
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
            if ($finfo_encoding && $finfo_encoding === 'binary') {
                return true;
            }
        }

        return false;
    }

    private function get_file_type(

        $str,
        $fallback = [
            'ext' => null,
            'mime' => 'application/octet-stream',
            'type' => null,
        ]
    ) {
        if ($str === '') {
            return $fallback;
        }

        $str_info = \substr($str, 0, 2);
        if ($str_info === false || \strlen($str_info) !== 2) {
            return $fallback;
        }

        $str_info = \unpack('C2chars', $str_info);
        if ($str_info === false) {
            return $fallback;
        }
        $type_code = (int)($str_info['chars1'] . $str_info['chars2']);

        switch ($type_code) {
            case 3780:
                $ext = 'pdf';
                $mime = 'application/pdf';
                $type = 'binary';

                break;
            case 7790:
                $ext = 'exe';
                $mime = 'application/octet-stream';
                $type = 'binary';

                break;
            case 7784:
                $ext = 'midi';
                $mime = 'audio/x-midi';
                $type = 'binary';

                break;
            case 8075:
                $ext = 'zip';
                $mime = 'application/zip';
                $type = 'binary';

                break;
            case 8297:
                $ext = 'rar';
                $mime = 'application/rar';
                $type = 'binary';

                break;
            case 255216:
                $ext = 'jpg';
                $mime = 'image/jpeg';
                $type = 'binary';

                break;
            case 7173:
                $ext = 'gif';
                $mime = 'image/gif';
                $type = 'binary';

                break;
            case 6677:
                $ext = 'bmp';
                $mime = 'image/bmp';
                $type = 'binary';

                break;
            case 13780:
                $ext = 'png';
                $mime = 'image/png';
                $type = 'binary';

                break;
            default:
                return $fallback;
        }

        return [
            'ext' => $ext,
            'mime' => $mime,
            'type' => $type,
        ];
    }

    private function count_chars($str, $cleanUtf8 = false, $tryToUseMbFunction = true)

    {
        return array_count_values($this->strSplit($str, 1, $cleanUtf8, $tryToUseMbFunction));
    }

//    private function to_iso8859($str)
//    {
//        if (is_array($str) === true) {
//
//            foreach ($str as $key => $value) {
//                $str[$k] = $this->to_iso8859($value);
//            }
//
//            return $str;
//        }
//
//        $str = (string)$str;
//        if ($str === '') {
//            return '';
//        }
//
//        return $this->utf8_decode($str);
//    }

    /**
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
     *
     * @see    http://hsivonen.iki.fi/php-utf8/
     *
     * @param string|string[] $str <p>The string to be checked.</p>
     * @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}
     *
     * @return bool
     */
    private function isUtf8($str)

    {
        if (\is_array($str) === true) {
            foreach ($str as $v) {
                if ($this->isUtf8($v) === false) {
                    return false;
                }
            }

            return true;
        }

        if ($str === '') {
            return true;
        }

        if ($this->system->pcre_utf8_support() !== true) {


            // If even just the first character can be matched, when the /u
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
            // invalid, nothing at all will match, even if the string contains
            // some valid sequences
            return preg_match('/^.{1}/us', $str, $ar) === 1;

        }

        $mState = 0; // cached expected number of octets after the current octet
        // until the beginning of the next UTF8 character sequence
        $mUcs4 = 0; // cached Unicode character
        $mBytes = 1; // cached expected number of octets in the current sequence

        if ($this->ORD === null) {
            $this->ORD = $this->getData('ord');
        }

        $len = \strlen((string)$str);
        /** @noinspection ForeachInvariantsInspection */
        for ($i = 0; $i < $len; ++$i) {
            $in = $this->ORD[$str[$i]];

            if ($mState === 0) {
                // When mState is zero we expect either a US-ASCII character or a
                // multi-octet sequence.
                if ((0x80 & $in) === 0) {
                    // US-ASCII, pass straight through.
                    $mBytes = 1;
                } elseif ((0xE0 & $in) === 0xC0) {
                    // First octet of 2 octet sequence.
                    $mUcs4 = $in;
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
                    $mState = 1;
                    $mBytes = 2;
                } elseif ((0xF0 & $in) === 0xE0) {
                    // First octet of 3 octet sequence.
                    $mUcs4 = $in;
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
                    $mState = 2;
                    $mBytes = 3;
                } elseif ((0xF8 & $in) === 0xF0) {
                    // First octet of 4 octet sequence.
                    $mUcs4 = $in;
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
                    $mState = 3;
                    $mBytes = 4;
                } elseif ((0xFC & $in) === 0xF8) {
                    /* First octet of 5 octet sequence.
                     *
                     * This is illegal because the encoded codepoint must be either
                     * (a) not the shortest form or
                     * (b) outside the Unicode range of 0-0x10FFFF.
                     * Rather than trying to resynchronize, we will carry on until the end
                     * of the sequence and let the later error handling code catch it.
                     */
                    $mUcs4 = $in;
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
                    $mState = 4;
                    $mBytes = 5;
                } elseif ((0xFE & $in) === 0xFC) {
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
                    $mUcs4 = $in;
                    $mUcs4 = ($mUcs4 & 1) << 30;
                    $mState = 5;
                    $mBytes = 6;
                } else {

                    // Current octet is neither in the US-ASCII range nor a legal first
                    // octet of a multi-octet sequence.
                    return false;
                }
            } elseif ((0xC0 & $in) === 0x80) {


                // When mState is non-zero, we expect a continuation of the multi-octet
                // sequence

                // Legal continuation.
                $shift = ($mState - 1) * 6;
                $tmp = $in;
                $tmp = ($tmp & 0x0000003F) << $shift;
                $mUcs4 |= $tmp;
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
                // Unicode code point to be output.
                if (--$mState === 0) {
                    // Check for illegal sequences and code points.
                    //
                    // From Unicode 3.1, non-shortest form is illegal
                    if (
                        ($mBytes === 2 && $mUcs4 < 0x0080)
                        ||
                        ($mBytes === 3 && $mUcs4 < 0x0800)
                        ||
                        ($mBytes === 4 && $mUcs4 < 0x10000)
                        ||
                        ($mBytes > 4)
                        ||
                        // From Unicode 3.2, surrogate characters are illegal.
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
                        ||
                        // Code points outside the Unicode range are illegal.
                        ($mUcs4 > 0x10FFFF)
                    ) {
                        return false;
                    }
                    // initialize UTF8 cache
                    $mState = 0;
                    $mUcs4 = 0;
                    $mBytes = 1;
                }
            } else {

                // ((0xC0 & (*in) != 0x80) && (mState != 0))
                // Incomplete multi-octet sequence.
                return false;
            }
        }

        return true;
    }

    /**
     * Decodes an UTF-8 string to ISO-8859-1.
     *
     * @param string $str <p>The input string.</p>
     * @param bool $keepUtf8Chars
     *
     * @return string
     */
    private function utf8_decode($str, $keepUtf8Chars = false)

    {
        if ($str === '') {
            return '';
        }

        // save for later comparision
        $str_backup = $str;
        $len = \strlen($str);

        if ($this->ORD === null) {
            $this->ORD = $this->getData('ord');
        }

        if ($this->CHR === null) {
            $this->CHR = $this->getData('chr');
        }

        $noCharFound = '?';
        /** @noinspection ForeachInvariantsInspection */
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
            switch ($str[$i] & "\xF0") {
                case "\xC0":
                case "\xD0":
                    $c = ($this->ORD[$str[$i] & "\x1F"] << 6) | $this->ORD[$str[++$i] & "\x3F"];

                    $str[$j] = $c < 256 ? $this->CHR[$c] : $noCharFound;

                    break;

                /** @noinspection PhpMissingBreakStatementInspection */
                case "\xF0":
                    ++$i;

                // no break

                case "\xE0":
                    $str[$j] = $noCharFound;
                    $i += 2;

                    break;

                default:
                    $str[$j] = $str[$i];
            }
        }

        $return = substr($str, 0, $j);
        if ($return === false) {
            $return = '';
        }

        if (
            $keepUtf8Chars === true
            &&
            $this->stringLength($return) >= (int)$this->stringLength($str_backup)
        ) {
            return $str_backup;
        }

        return $return;
    }

    /**
     * @param $str
     * @param string $encoding
/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}
     * @param bool $cleanUtf8
/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}
     * @return bool|int
     */
    private function stringLength($str)

    {
        if ($str === '') {
            return 0;
        }

        if ($this->SUPPORT['mbstring'] === true) {
            return mb_strlen($str, 'UTF-8');
        }

        if ($this->SUPPORT['iconv'] === true) {
            $returnTmp = \iconv_strlen($str, 'UTF-8');
            if ($returnTmp !== false) {
                return $returnTmp;
            }
        }

        if (
            $this->SUPPORT['intl'] === true
        ) {
            $returnTmp = \grapheme_strlen($str);
            if ($returnTmp !== null) {
                return $returnTmp;
            }
        }

        if ($this->isAscii($str)) {
            return strlen($str);
        }

        //
        // fallback via vanilla php
        //

        \preg_match_all('/./us', $str, $parts);

        $returnTmp = \count($parts[0]);
        if ($returnTmp === 0) {
            return false;
        }

        return $returnTmp;
    }


}

1			<?php
2
3			namespace devtoolboxuk\soteria\voku\Resources;
4
5			class Utf8 extends Resources
			0 ignored issues – show Coding Style introduced 2019-05-19 08:43 UTC by Report Bug Copy Issue Report The property $BROKEN_UTF8_FIX is not named in camelCase. This check marks property names that have not been written in camelCase. In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes `databaseConnectionString`. Loading history... Coding Style introduced 2019-05-19 08:43 UTC by Report Bug Copy Issue Report The property $WIN1252_TO_UTF8 is not named in camelCase. This check marks property names that have not been written in camelCase. In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes `databaseConnectionString`. Loading history... Coding Style introduced 2019-05-19 08:43 UTC by Report Bug Copy Issue Report The property $BIDI_UNI_CODE_CONTROLS_TABLE is not named in camelCase. This check marks property names that have not been written in camelCase. In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes `databaseConnectionString`. Loading history... Coding Style introduced 2019-05-19 08:43 UTC by Report Bug Copy Issue Report The property $WHITESPACE_TABLE is not named in camelCase. This check marks property names that have not been written in camelCase. In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection string becomes `databaseConnectionString`. Loading history... Complexity introduced 2019-05-19 08:43 UTC by Report Bug Copy Issue Report This class has 1967 lines of code which exceeds the configured maximum of 1000. Really long classes often contain too much logic and violate the single responsibility principle. We suggest to take a look at the “Code” section for options on how to refactor this code. Loading history... Complexity introduced 2019-05-19 08:43 UTC by Report Bug Copy Issue Report This class has a complexity of 313 which exceeds the configured maximum of 50. The class complexity is the sum of the complexity of all methods. A very high value is usually an indication that your class does not follow the single reponsibility principle and does more than one job. Some resources for further reading: Single Reponsibility Principle Open/Closed Principle You can also find more detailed suggestions for refactoring in the “Code” section of your repository. Loading history...
6			{
7
8			private $system;
9			private $ENCODINGS;
10			private $SUPPORT = [];
11			private $BROKEN_UTF8_FIX;
12			private $ORD;
13			private $CHR;
14			private $WIN1252_TO_UTF8;
15			private $BOM = [
16			"\xef\xbb\xbf" => 3, // UTF-8 BOM
17			'ï»¿' => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
18			"\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
19			' þÿ' => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
20			"\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
21			'ÿþ ' => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
22			"\xfe\xff" => 2, // UTF-16 (BE) BOM
23			'þÿ' => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
24			"\xff\xfe" => 2, // UTF-16 (LE) BOM
25			'ÿþ' => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
26			];
27
28			private $BIDI_UNI_CODE_CONTROLS_TABLE = [
29			// LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
30			8234 => "\xE2\x80\xAA",
31			// RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
32			8235 => "\xE2\x80\xAB",
33			// POP DIRECTIONAL FORMATTING // (use -> </bdo>)
34			8236 => "\xE2\x80\xAC",
35			// LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
36			8237 => "\xE2\x80\xAD",
37			// RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
38			8238 => "\xE2\x80\xAE",
39			// LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
devtoolboxuk / soteria

Push — master ( 1cc8fa...13169b )

Utf8::clean() B

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like