Issues in AbstractUTF16Encoding.php (main) - Issues in main - fisharebest/webtrees - Measure and Improve Code Quality continuously with Scrutinizer

Issues (2503)

app/Encodings/AbstractUTF16Encoding.php (1 issue)

Labels

Bug 1

Severity

Major 1

<?php

/**
 * webtrees: online genealogy
 * Copyright (C) 2025 webtrees development team
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
 */

declare(strict_types=1);

namespace Fisharebest\Webtrees\Encodings;

use function chr;
use function intdiv;
use function ord;
use function str_split;
use function strlen;

/**
 * Convert between an encoding and UTF-16.
 */
abstract class AbstractUTF16Encoding implements EncodingInterface
{
    // Concrete classes should implement this.
    public const string REPLACEMENT_CHARACTER = '';


    /**
     * Convert a string from UTF-8 to another encoding.
     *
     * @param string $text
     *
     * @return string
     */
    public function fromUtf8(string $text): string
    {
        $out = '';
        $len = strlen($text);

        for ($n = 0; $n < $len; ++$n) {
            $code_point = ord($text[$n]);

            if ($code_point <= 0x7F) {
                $out .= $this->codePointToCharacter($code_point);
            } elseif ($code_point <= 0xBF) {
                // Invalid
                $out .= static::REPLACEMENT_CHARACTER;
            } elseif ($code_point <= 0xDF) {
                $byte2 = ord($text[++$n]);

                if (($byte2 & 0xC0) !== 0x80) {
                    // Invalid
                    $out .= static::REPLACEMENT_CHARACTER;
                } else {
                    $out .= $this->codePointToCharacter($code_point << 6 + $byte2 & 0x3F);
                }
            } elseif ($code_point <= 0xEF) {
                $byte2 = ord($text[++$n]);
                $byte3 = ord($text[++$n]);

                if (($byte2 & 0xC0) !== 0x80 || ($byte3 & 0xC0) !== 0x80) {
                    // Invalid
                    $out .= static::REPLACEMENT_CHARACTER;
                } else {
                    $out .= $this->codePointToCharacter($code_point << 12 + ($byte2 & 0x3F) << 6 + $byte3 & 0x3F);
                }
            } else {
                // Invalid
                $out .= static::REPLACEMENT_CHARACTER;
            }
        }

        return $out;
    }

    /**
     * Convert a string from another encoding to UTF-8.
     *
     * @param string $text
     *
     * @return string
     */
    public function toUtf8(string $text): string
    {
        $utf8 = '';

        foreach (str_split($text, 2) as $character) {
            $code_point = $this->characterToCodePoint($character);

            if ($code_point <= 0x7F) {
                // 7 bits => 1 byte
                $utf8 .= chr($code_point);
            } elseif ($code_point <= 0xFF) {
                // U+80 - U+FF are invalid
                $utf8 .= UTF8::REPLACEMENT_CHARACTER;
            } elseif ($code_point <= 0x7FF) {
                // 11 bits (5,6) => 2 bytes
                $utf8 .= chr(0xC0 | ($code_point >> 6));
                $utf8 .= chr(0x80 | $code_point & 0x3F);
            } elseif ($code_point <= 0xD7FF || $code_point >= 0xE000) {
                // 16 bits (4,6,6) => 3 bytes
                $utf8 .= chr(0xE0 | ($code_point >> 12));
                $utf8 .= chr(0x80 | ($code_point >> 6) & 0x3F);
                $utf8 .= chr(0x80 | $code_point & 0x3F);
            } else {
                // U+D800 - U+DFFF are invalid
                $utf8 .= UTF8::REPLACEMENT_CHARACTER;
            }
        }

        return $utf8;
    }

    /**
     * When reading multi-byte encodings using a stream, we must avoid incomplete characters.
     *
     * @param string $text
     *
     * @return int
     */
    public function convertibleBytes(string $text): int
    {
        return 2 * intdiv(strlen($text), 2);
    }

    /**
     * Convert two bytes to a code-point, taking care of byte-order.
     *
     * @param string $character
     *
     * @return int
     */
    abstract protected function characterToCodePoint(string $character): int;

    /**
     * Convert a code-point to two bytes, taking care of byte-order.
     *
     * @param int $code_point
     *
     * @return string
     */
    abstract protected function codePointToCharacter(int $code_point): string;
}


1			<?php
2
3			/**
4			* webtrees: online genealogy
5			* Copyright (C) 2025 webtrees development team
6			* This program is free software: you can redistribute it and/or modify
7			* it under the terms of the GNU General Public License as published by
8			* the Free Software Foundation, either version 3 of the License, or
9			* (at your option) any later version.
10			* This program is distributed in the hope that it will be useful,
11			* but WITHOUT ANY WARRANTY; without even the implied warranty of
12			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13			* GNU General Public License for more details.
14			* You should have received a copy of the GNU General Public License
15			* along with this program. If not, see <https://www.gnu.org/licenses/>.
16			*/
17
18			declare(strict_types=1);
19
20			namespace Fisharebest\Webtrees\Encodings;
21
22			use function chr;
23			use function intdiv;
24			use function ord;
25			use function str_split;
26			use function strlen;
27
28			/**
29			* Convert between an encoding and UTF-16.
30			*/
31			abstract class AbstractUTF16Encoding implements EncodingInterface
32			{
33			// Concrete classes should implement this.
34			public const string REPLACEMENT_CHARACTER = '';
			0 ignored issues – show Bug introduced 2024-11-23 00:11 UTC by Report Bug Copy Issue Report Show Similar Issues like this A parse error occurred: Syntax error, unexpected T_STRING, expecting '=' on line 34 at column 24 Loading history...
35
36			/**
37			* Convert a string from UTF-8 to another encoding.
38			*
39			* @param string $text
40			*
41			* @return string
42			*/
43			public function fromUtf8(string $text): string
44			{
45			$out = '';
46			$len = strlen($text);
47
48			for ($n = 0; $n < $len; ++$n) {
49			$code_point = ord($text[$n]);
50
51			if ($code_point <= 0x7F) {
52			$out .= $this->codePointToCharacter($code_point);
53			} elseif ($code_point <= 0xBF) {
54			// Invalid
55			$out .= static::REPLACEMENT_CHARACTER;
56			} elseif ($code_point <= 0xDF) {
57			$byte2 = ord($text[++$n]);
58
59			if (($byte2 & 0xC0) !== 0x80) {
60			// Invalid
61			$out .= static::REPLACEMENT_CHARACTER;
62			} else {
63			$out .= $this->codePointToCharacter($code_point << 6 + $byte2 & 0x3F);
64			}
65			} elseif ($code_point <= 0xEF) {
66			$byte2 = ord($text[++$n]);
67			$byte3 = ord($text[++$n]);
68
69			if (($byte2 & 0xC0) !== 0x80 \|\| ($byte3 & 0xC0) !== 0x80) {
70			// Invalid
71			$out .= static::REPLACEMENT_CHARACTER;
72			} else {
73			$out .= $this->codePointToCharacter($code_point << 12 + ($byte2 & 0x3F) << 6 + $byte3 & 0x3F);
74			}
75			} else {
76			// Invalid
77			$out .= static::REPLACEMENT_CHARACTER;
78			}
79			}
80
81			return $out;
82			}
83
84			/**
85			* Convert a string from another encoding to UTF-8.
86			*
87			* @param string $text
88			*
89			* @return string
90			*/
91			public function toUtf8(string $text): string
92			{
93			$utf8 = '';
94
95			foreach (str_split($text, 2) as $character) {
96			$code_point = $this->characterToCodePoint($character);
97
98			if ($code_point <= 0x7F) {
99			// 7 bits => 1 byte
100			$utf8 .= chr($code_point);
101			} elseif ($code_point <= 0xFF) {
102			// U+80 - U+FF are invalid
103			$utf8 .= UTF8::REPLACEMENT_CHARACTER;
104			} elseif ($code_point <= 0x7FF) {
105			// 11 bits (5,6) => 2 bytes
106			$utf8 .= chr(0xC0 \| ($code_point >> 6));
107			$utf8 .= chr(0x80 \| $code_point & 0x3F);
108			} elseif ($code_point <= 0xD7FF \|\| $code_point >= 0xE000) {
109			// 16 bits (4,6,6) => 3 bytes
110			$utf8 .= chr(0xE0 \| ($code_point >> 12));
111			$utf8 .= chr(0x80 \| ($code_point >> 6) & 0x3F);
112			$utf8 .= chr(0x80 \| $code_point & 0x3F);
113			} else {
114			// U+D800 - U+DFFF are invalid
115			$utf8 .= UTF8::REPLACEMENT_CHARACTER;
116			}
117			}
118
119			return $utf8;
120			}
121
122			/**
123			* When reading multi-byte encodings using a stream, we must avoid incomplete characters.
124			*
125			* @param string $text
126			*
127			* @return int
128			*/
129			public function convertibleBytes(string $text): int
130			{
131			return 2 * intdiv(strlen($text), 2);
132			}
133
134			/**
135			* Convert two bytes to a code-point, taking care of byte-order.
136			*
137			* @param string $character
138			*
139			* @return int
140			*/
141			abstract protected function characterToCodePoint(string $character): int;
142
143			/**
144			* Convert a code-point to two bytes, taking care of byte-order.
145			*
146			* @param int $code_point
147			*
148			* @return string
149			*/
150			abstract protected function codePointToCharacter(int $code_point): string;
151			}
152

fisharebest / webtrees

Issues (2503)

app/Encodings/AbstractUTF16Encoding.php (1 issue)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like