Mistralys /
application-utils
| 1 | <?php |
||||
| 2 | /** |
||||
| 3 | * File containing the class {@see \AppUtils\FileHelper\UnicodeHandling}. |
||||
| 4 | * |
||||
| 5 | * @package Application Utils |
||||
| 6 | * @subpackage FileHelper |
||||
| 7 | * @see \AppUtils\FileHelper\UnicodeHandling |
||||
| 8 | */ |
||||
| 9 | |||||
| 10 | declare(strict_types=1); |
||||
| 11 | |||||
| 12 | namespace AppUtils\FileHelper; |
||||
| 13 | |||||
| 14 | use AppUtils\FileHelper; |
||||
| 15 | use AppUtils\FileHelper_Exception; |
||||
| 16 | use DirectoryIterator; |
||||
| 17 | |||||
| 18 | /** |
||||
| 19 | * Collection of methods related to unicode-safe file |
||||
| 20 | * operations and information access. |
||||
| 21 | * |
||||
| 22 | * @package Application Utils |
||||
| 23 | * @subpackage FileHelper |
||||
| 24 | * @author Sebastian Mordziol <[email protected]> |
||||
| 25 | */ |
||||
| 26 | class UnicodeHandling |
||||
| 27 | { |
||||
| 28 | /** |
||||
| 29 | * @var array<string,string>|NULL |
||||
| 30 | */ |
||||
| 31 | protected static ?array $utfBoms = null; |
||||
| 32 | |||||
| 33 | /** |
||||
| 34 | * @var string[]|NULL |
||||
| 35 | */ |
||||
| 36 | protected static ?array $encodings = null; |
||||
| 37 | |||||
| 38 | public function __construct() |
||||
| 39 | { |
||||
| 40 | $this->initBOMs(); |
||||
| 41 | $this->initEncodings(); |
||||
| 42 | } |
||||
| 43 | |||||
| 44 | /** |
||||
| 45 | * Detects the UTF BOM in the target file, if any. Returns |
||||
| 46 | * the encoding matching the BOM, which can be any of the |
||||
| 47 | * following: |
||||
| 48 | * |
||||
| 49 | * <ul> |
||||
| 50 | * <li>UTF32-BE</li> |
||||
| 51 | * <li>UTF32-LE</li> |
||||
| 52 | * <li>UTF16-BE</li> |
||||
| 53 | * <li>UTF16-LE</li> |
||||
| 54 | * <li>UTF8</li> |
||||
| 55 | * </ul> |
||||
| 56 | * |
||||
| 57 | * @param string|PathInfoInterface|DirectoryIterator $file |
||||
| 58 | * @return string|NULL |
||||
| 59 | * @throws FileHelper_Exception |
||||
| 60 | * @see FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM |
||||
| 61 | */ |
||||
| 62 | public function detectUTFBom($file) : ?string |
||||
| 63 | { |
||||
| 64 | $file = FileHelper::getFileInfo($file) |
||||
| 65 | ->requireExists(FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM) |
||||
| 66 | ->requireReadable(FileHelper::ERROR_CANNOT_OPEN_FILE_TO_DETECT_BOM); |
||||
| 67 | |||||
| 68 | $fp = fopen($file->getPath(), 'rb'); |
||||
| 69 | |||||
| 70 | $text = fread($fp, 20); |
||||
| 71 | |||||
| 72 | fclose($fp); |
||||
| 73 | |||||
| 74 | foreach(self::$utfBoms as $bom => $value) |
||||
| 75 | { |
||||
| 76 | if(mb_strpos($text, $value) === 0) |
||||
| 77 | { |
||||
| 78 | return $bom; |
||||
| 79 | } |
||||
| 80 | } |
||||
| 81 | |||||
| 82 | return null; |
||||
| 83 | } |
||||
| 84 | |||||
| 85 | private function initBOMs() : void |
||||
| 86 | { |
||||
| 87 | if(isset(self::$utfBoms)) |
||||
| 88 | { |
||||
| 89 | return; |
||||
| 90 | } |
||||
| 91 | |||||
| 92 | self::$utfBoms = array( |
||||
| 93 | 'UTF32-BE' => chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF), |
||||
| 94 | 'UTF32-LE' => chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00), |
||||
| 95 | 'UTF16-BE' => chr(0xFE) . chr(0xFF), |
||||
| 96 | 'UTF16-LE' => chr(0xFF) . chr(0xFE), |
||||
| 97 | 'UTF8' => chr(0xEF) . chr(0xBB) . chr(0xBF) |
||||
| 98 | ); |
||||
| 99 | } |
||||
| 100 | |||||
| 101 | private function initEncodings() : void |
||||
| 102 | { |
||||
| 103 | if(isset(self::$encodings)) |
||||
| 104 | { |
||||
| 105 | return; |
||||
| 106 | } |
||||
| 107 | |||||
| 108 | $encodings = $this->getKnownEncodings(); |
||||
| 109 | |||||
| 110 | self::$encodings = array(); |
||||
| 111 | |||||
| 112 | foreach($encodings as $string) |
||||
| 113 | { |
||||
| 114 | $withHyphen = str_replace('UTF', 'UTF-', $string); |
||||
| 115 | |||||
| 116 | self::$encodings[] = $string; |
||||
| 117 | self::$encodings[] = $withHyphen; |
||||
| 118 | self::$encodings[] = str_replace(array('-BE', '-LE'), '', $string); |
||||
| 119 | self::$encodings[] = str_replace(array('-BE', '-LE'), '', $withHyphen); |
||||
| 120 | } |
||||
| 121 | } |
||||
| 122 | |||||
| 123 | /** |
||||
| 124 | * Retrieves a list of all UTF byte order mark character |
||||
| 125 | * sequences, as an associative array with |
||||
| 126 | * UTF encoding => bom sequence pairs. |
||||
| 127 | * |
||||
| 128 | * @return array<string,string> |
||||
| 129 | */ |
||||
| 130 | public function getUTFBOMs() : array |
||||
| 131 | { |
||||
| 132 | return self::$utfBoms; |
||||
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
Loading history...
|
|||||
| 133 | } |
||||
| 134 | |||||
| 135 | /** |
||||
| 136 | * Checks whether the specified encoding is a valid |
||||
| 137 | * unicode encoding, for example "UTF16-LE" or "UTF8". |
||||
| 138 | * Also accounts for alternate way to write them, like |
||||
| 139 | * "UTF-8", and omitting little/big endian suffixes. |
||||
| 140 | * |
||||
| 141 | * @param string $encoding |
||||
| 142 | * @return boolean |
||||
| 143 | */ |
||||
| 144 | public function isValidEncoding(string $encoding) : bool |
||||
| 145 | { |
||||
| 146 | return in_array($encoding, self::$encodings, true); |
||||
|
0 ignored issues
–
show
It seems like
self::encodings can also be of type null; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 147 | } |
||||
| 148 | |||||
| 149 | /** |
||||
| 150 | * Retrieves a list of all known unicode file encodings. |
||||
| 151 | * @return string[] |
||||
| 152 | */ |
||||
| 153 | public function getKnownEncodings() : array |
||||
| 154 | { |
||||
| 155 | return array_keys(self::$utfBoms); |
||||
|
0 ignored issues
–
show
It seems like
self::utfBoms can also be of type null; however, parameter $array of array_keys() does only seem to accept array, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 156 | } |
||||
| 157 | } |
||||
| 158 |