Passed
Push — dev ( 824cd4...d410ef )
by Greg
12:51
created

AbstractEncoding   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 67
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
eloc 21
c 1
b 0
f 1
dl 0
loc 67
rs 10
wmc 6

3 Methods

Rating   Name   Duplication   Size   Complexity  
A fromUtf8() 0 15 2
A toUtf8() 0 3 1
A convertibleBytes() 0 17 3
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2021 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Encodings;
21
22
use function array_flip;
23
use function array_map;
24
use function implode;
25
use function ord;
26
use function preg_split;
27
use function strlen;
28
use function strrpos;
29
use function strtr;
30
31
use const PREG_SPLIT_NO_EMPTY;
32
33
/**
34
 * Convert between an encoding and UTF-8.
35
 */
36
abstract class AbstractEncoding implements EncodingInterface
37
{
38
    protected const REPLACEMENT_CHARACTER = '?';
39
40
    /** @var array<string,string> Encoded character => utf8 character */
41
    protected const TO_UTF8 = [];
42
43
    /**
44
     * Convert a string from UTF-8 to another encoding.
45
     *
46
     * @param string $text
47
     *
48
     * @return string
49
     */
50
    public function fromUtf8(string $text): string
51
    {
52
        $utf8  = array_flip(static::TO_UTF8);
53
        $utf8[UTF8::REPLACEMENT_CHARACTER] = static::REPLACEMENT_CHARACTER;
54
55
        $chars = preg_split('//u', $text, -1, PREG_SPLIT_NO_EMPTY);
56
        $chars = array_map(static function (string $char) use ($utf8): string {
57
            if (ord($char) < 128) {
58
                return $char;
59
            }
60
61
            return $utf8[$char] ?? static::REPLACEMENT_CHARACTER;
62
        }, $chars);
63
64
        return implode('', $chars);
65
    }
66
67
    /**
68
     * Convert a string from another encoding to UTF-8.
69
     *
70
     * @param string $text
71
     *
72
     * @return string
73
     */
74
    public function toUtf8(string $text): string
75
    {
76
        return strtr($text, static::TO_UTF8);
77
    }
78
79
    /**
80
     * When reading multi-byte encodings using a stream, we must avoid incomplete characters.
81
     *
82
     * @param string $text
83
     *
84
     * @return int
85
     */
86
    public function convertibleBytes(string $text): int
87
    {
88
        $safe_chars = [
89
            $this->fromUtf8("\n"),
90
            $this->fromUtf8("\r"),
91
            $this->fromUtf8(' '),
92
        ];
93
94
        foreach ($safe_chars as $char) {
95
            $pos = strrpos($text, $char);
96
97
            if ($pos !== false) {
98
                return $pos + strlen($char);
99
            }
100
        }
101
102
        return 0;
103
    }
104
}
105