1 | <?php |
||||
2 | /** |
||||
3 | * File containing the class {@see \AppUtils\FileHelper\UnicodeHandling}. |
||||
4 | * |
||||
5 | * @package Application Utils |
||||
6 | * @subpackage FileHelper |
||||
7 | * @see \AppUtils\FileHelper\UnicodeHandling |
||||
8 | */ |
||||
9 | |||||
10 | declare(strict_types=1); |
||||
11 | |||||
12 | namespace AppUtils\FileHelper; |
||||
13 | |||||
14 | use AppUtils\FileHelper; |
||||
15 | use AppUtils\FileHelper_Exception; |
||||
16 | use DirectoryIterator; |
||||
17 | |||||
18 | /** |
||||
19 | * Collection of methods related to unicode-safe file |
||||
20 | * operations and information access. |
||||
21 | * |
||||
22 | * @package Application Utils |
||||
23 | * @subpackage FileHelper |
||||
24 | * @author Sebastian Mordziol <[email protected]> |
||||
25 | */ |
||||
26 | class UnicodeHandling |
||||
27 | { |
||||
28 | /** |
||||
29 | * @var array<string,string>|NULL |
||||
30 | */ |
||||
31 | protected static ?array $utfBoms = null; |
||||
32 | |||||
33 | /** |
||||
34 | * @var string[]|NULL |
||||
35 | */ |
||||
36 | protected static ?array $encodings = null; |
||||
37 | |||||
38 | public function __construct() |
||||
39 | { |
||||
40 | $this->initBOMs(); |
||||
41 | $this->initEncodings(); |
||||
42 | } |
||||
43 | |||||
44 | /** |
||||
45 | * Detects the UTF BOM in the target file, if any. Returns |
||||
46 | * the encoding matching the BOM, which can be any of the |
||||
47 | * following: |
||||
48 | * |
||||
49 | * <ul> |
||||
50 | * <li>UTF32-BE</li> |
||||
51 | * <li>UTF32-LE</li> |
||||
52 | * <li>UTF16-BE</li> |
||||
53 | * <li>UTF16-LE</li> |
||||
54 | * <li>UTF8</li> |
||||
55 | * </ul> |
||||
56 | * |
||||
57 | * @param string|PathInfoInterface|DirectoryIterator $file |
||||
58 | * @return string|NULL |
||||
59 | * @throws FileHelper_Exception |
||||
60 | * @see FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM |
||||
61 | */ |
||||
62 | public function detectUTFBom($file) : ?string |
||||
63 | { |
||||
64 | $file = FileHelper::getFileInfo($file) |
||||
65 | ->requireExists(FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM) |
||||
66 | ->requireReadable(FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM); |
||||
67 | |||||
68 | $fp = fopen($file->getPath(), 'rb'); |
||||
69 | |||||
70 | $text = fread($fp, 20); |
||||
71 | |||||
72 | fclose($fp); |
||||
73 | |||||
74 | foreach(self::$utfBoms as $bom => $value) |
||||
75 | { |
||||
76 | if(mb_strpos($text, $value) === 0) |
||||
77 | { |
||||
78 | return $bom; |
||||
79 | } |
||||
80 | } |
||||
81 | |||||
82 | return null; |
||||
83 | } |
||||
84 | |||||
85 | private function initBOMs() : void |
||||
86 | { |
||||
87 | if(isset(self::$utfBoms)) |
||||
88 | { |
||||
89 | return; |
||||
90 | } |
||||
91 | |||||
92 | self::$utfBoms = array( |
||||
93 | 'UTF32-BE' => chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF), |
||||
94 | 'UTF32-LE' => chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00), |
||||
95 | 'UTF16-BE' => chr(0xFE) . chr(0xFF), |
||||
96 | 'UTF16-LE' => chr(0xFF) . chr(0xFE), |
||||
97 | 'UTF8' => chr(0xEF) . chr(0xBB) . chr(0xBF) |
||||
98 | ); |
||||
99 | } |
||||
100 | |||||
101 | private function initEncodings() : void |
||||
102 | { |
||||
103 | if(isset(self::$encodings)) |
||||
104 | { |
||||
105 | return; |
||||
106 | } |
||||
107 | |||||
108 | $encodings = $this->getKnownEncodings(); |
||||
109 | |||||
110 | self::$encodings = array(); |
||||
111 | |||||
112 | foreach($encodings as $string) |
||||
113 | { |
||||
114 | $withHyphen = str_replace('UTF', 'UTF-', $string); |
||||
115 | |||||
116 | self::$encodings[] = $string; |
||||
117 | self::$encodings[] = $withHyphen; |
||||
118 | self::$encodings[] = str_replace(array('-BE', '-LE'), '', $string); |
||||
119 | self::$encodings[] = str_replace(array('-BE', '-LE'), '', $withHyphen); |
||||
120 | } |
||||
121 | } |
||||
122 | |||||
123 | /** |
||||
124 | * Retrieves a list of all UTF byte order mark character |
||||
125 | * sequences, as an associative array with |
||||
126 | * UTF encoding => bom sequence pairs. |
||||
127 | * |
||||
128 | * @return array<string,string> |
||||
129 | */ |
||||
130 | public function getUTFBOMs() : array |
||||
131 | { |
||||
132 | return self::$utfBoms; |
||||
0 ignored issues
–
show
Bug
Best Practice
introduced
by
![]() |
|||||
133 | } |
||||
134 | |||||
135 | /** |
||||
136 | * Checks whether the specified encoding is a valid |
||||
137 | * unicode encoding, for example "UTF16-LE" or "UTF8". |
||||
138 | * Also accounts for alternate way to write them, like |
||||
139 | * "UTF-8", and omitting little/big endian suffixes. |
||||
140 | * |
||||
141 | * @param string $encoding |
||||
142 | * @return boolean |
||||
143 | */ |
||||
144 | public function isValidEncoding(string $encoding) : bool |
||||
145 | { |
||||
146 | return in_array($encoding, self::$encodings, true); |
||||
0 ignored issues
–
show
It seems like
self::encodings can also be of type null ; however, parameter $haystack of in_array() does only seem to accept array , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
147 | } |
||||
148 | |||||
149 | /** |
||||
150 | * Retrieves a list of all known unicode file encodings. |
||||
151 | * @return string[] |
||||
152 | */ |
||||
153 | public function getKnownEncodings() : array |
||||
154 | { |
||||
155 | return array_keys(self::$utfBoms); |
||||
0 ignored issues
–
show
It seems like
self::utfBoms can also be of type null ; however, parameter $array of array_keys() does only seem to accept array , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
156 | } |
||||
157 | } |
||||
158 |