UnicodeHandling   A
last analyzed

Complexity

Total Complexity 12

Size/Duplication

Total Lines 130
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
wmc 12
eloc 36
c 3
b 0
f 0
dl 0
loc 130
rs 10

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A initEncodings() 0 19 3
A initBOMs() 0 13 2
A detectUTFBom() 0 21 3
A getKnownEncodings() 0 3 1
A isValidEncoding() 0 3 1
A getUTFBOMs() 0 3 1
1
<?php
2
/**
3
 * File containing the class {@see \AppUtils\FileHelper\UnicodeHandling}.
4
 *
5
 * @package Application Utils
6
 * @subpackage FileHelper
7
 * @see \AppUtils\FileHelper\UnicodeHandling
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils\FileHelper;
13
14
use AppUtils\FileHelper;
15
use AppUtils\FileHelper_Exception;
16
use DirectoryIterator;
17
18
/**
19
 * Collection of methods related to unicode-safe file
20
 * operations and information access.
21
 *
22
 * @package Application Utils
23
 * @subpackage FileHelper
24
 * @author Sebastian Mordziol <[email protected]>
25
 */
26
class UnicodeHandling
27
{
28
    /**
29
     * @var array<string,string>|NULL
30
     */
31
    protected static ?array $utfBoms = null;
32
33
    /**
34
     * @var string[]|NULL
35
     */
36
    protected static ?array $encodings = null;
37
38
    public function __construct()
39
    {
40
        $this->initBOMs();
41
        $this->initEncodings();
42
    }
43
44
    /**
45
     * Detects the UTF BOM in the target file, if any. Returns
46
     * the encoding matching the BOM, which can be any of the
47
     * following:
48
     *
49
     * <ul>
50
     * <li>UTF32-BE</li>
51
     * <li>UTF32-LE</li>
52
     * <li>UTF16-BE</li>
53
     * <li>UTF16-LE</li>
54
     * <li>UTF8</li>
55
     * </ul>
56
     *
57
     * @param string|PathInfoInterface|DirectoryIterator $file
58
     * @return string|NULL
59
     * @throws FileHelper_Exception
60
     * @see FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM
61
     */
62
    public function detectUTFBom($file) : ?string
63
    {
64
        $file = FileHelper::getFileInfo($file)
65
            ->requireExists(FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM)
66
            ->requireReadable(FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM);
67
68
        $fp = fopen($file->getPath(), 'rb');
69
70
        $text = fread($fp, 20);
71
72
        fclose($fp);
73
74
        foreach(self::$utfBoms as $bom => $value)
75
        {
76
            if(mb_strpos($text, $value) === 0)
77
            {
78
                return $bom;
79
            }
80
        }
81
82
        return null;
83
    }
84
85
    private function initBOMs() : void
86
    {
87
        if(isset(self::$utfBoms))
88
        {
89
            return;
90
        }
91
92
        self::$utfBoms = array(
93
            'UTF32-BE' => chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF),
94
            'UTF32-LE' => chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00),
95
            'UTF16-BE' => chr(0xFE) . chr(0xFF),
96
            'UTF16-LE' => chr(0xFF) . chr(0xFE),
97
            'UTF8' => chr(0xEF) . chr(0xBB) . chr(0xBF)
98
        );
99
    }
100
101
    private function initEncodings() : void
102
    {
103
        if(isset(self::$encodings))
104
        {
105
            return;
106
        }
107
108
        $encodings = $this->getKnownEncodings();
109
110
        self::$encodings = array();
111
112
        foreach($encodings as $string)
113
        {
114
            $withHyphen = str_replace('UTF', 'UTF-', $string);
115
116
            self::$encodings[] = $string;
117
            self::$encodings[] = $withHyphen;
118
            self::$encodings[] = str_replace(array('-BE', '-LE'), '', $string);
119
            self::$encodings[] = str_replace(array('-BE', '-LE'), '', $withHyphen);
120
        }
121
    }
122
123
    /**
124
     * Retrieves a list of all UTF byte order mark character
125
     * sequences, as an associative array with
126
     * UTF encoding => bom sequence pairs.
127
     *
128
     * @return array<string,string>
129
     */
130
    public function getUTFBOMs() : array
131
    {
132
        return self::$utfBoms;
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::utfBoms could return the type null which is incompatible with the type-hinted return array. Consider adding an additional type-check to rule them out.
Loading history...
133
    }
134
135
    /**
136
     * Checks whether the specified encoding is a valid
137
     * unicode encoding, for example "UTF16-LE" or "UTF8".
138
     * Also accounts for alternate way to write them, like
139
     * "UTF-8", and omitting little/big endian suffixes.
140
     *
141
     * @param string $encoding
142
     * @return boolean
143
     */
144
    public function isValidEncoding(string $encoding) : bool
145
    {
146
        return in_array($encoding, self::$encodings, true);
0 ignored issues
show
Bug introduced by
It seems like self::encodings can also be of type null; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

146
        return in_array($encoding, /** @scrutinizer ignore-type */ self::$encodings, true);
Loading history...
147
    }
148
149
    /**
150
     * Retrieves a list of all known unicode file encodings.
151
     * @return string[]
152
     */
153
    public function getKnownEncodings() : array
154
    {
155
        return array_keys(self::$utfBoms);
0 ignored issues
show
Bug introduced by
It seems like self::utfBoms can also be of type null; however, parameter $array of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

155
        return array_keys(/** @scrutinizer ignore-type */ self::$utfBoms);
Loading history...
156
    }
157
}
158