| Conditions | 13 |
| Paths | 75 |
| Total Lines | 97 |
| Code Lines | 67 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 1 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 60 | public function detect(string $header): ?EncodingInterface |
||
| 61 | { |
||
| 62 | $utf_bom = [ |
||
| 63 | '/^' . UTF8::BYTE_ORDER_MARK . '/' => UTF8::NAME, |
||
| 64 | '/^' . UTF16BE::BYTE_ORDER_MARK . '/' => UTF16BE::NAME, |
||
| 65 | '/^' . UTF16LE::BYTE_ORDER_MARK . '/' => UTF16LE::NAME, |
||
| 66 | ]; |
||
| 67 | |||
| 68 | foreach ($utf_bom as $regex => $encoding) { |
||
| 69 | if (preg_match($regex, $header) === 1) { |
||
| 70 | return $this->make($encoding); |
||
| 71 | } |
||
| 72 | } |
||
| 73 | |||
| 74 | $utf16 = [ |
||
| 75 | "\x000" => UTF16BE::NAME, |
||
| 76 | "0\x00" => UTF16LE::NAME, |
||
| 77 | ]; |
||
| 78 | |||
| 79 | foreach ($utf16 as $start => $encoding) { |
||
| 80 | if (str_starts_with($header, $start)) { |
||
| 81 | return $this->make($encoding); |
||
| 82 | } |
||
| 83 | } |
||
| 84 | |||
| 85 | // Standardize whitespace to simplify matching. |
||
| 86 | $header = strtr(ltrim($header), ["\r\n" => "\n", "\n\r" => "\n", "\r" => "\n"]); |
||
| 87 | |||
| 88 | while (str_contains($header, "\n ") || str_contains($header, " \n") || str_contains($header, ' ')) { |
||
| 89 | $header = strtr($header, ["\n " => "\n", " \n" => "\n", ' ' => ' ']); |
||
| 90 | } |
||
| 91 | |||
| 92 | // We need a complete header record |
||
| 93 | $header = strstr($header, "\n0", true); |
||
| 94 | |||
| 95 | if ($header === false) { |
||
| 96 | return null; |
||
| 97 | } |
||
| 98 | |||
| 99 | // Some of these come from Tamura Jones, the rest from webtrees users. |
||
| 100 | $character_sets = [ |
||
| 101 | 'ASCII' => ASCII::NAME, |
||
| 102 | 'ANSEL' => ANSEL::NAME, |
||
| 103 | 'UTF-8' => UTF8::NAME, |
||
| 104 | 'UNICODE' => UTF8::NAME, // If the null byte test failed, this can't be UTF16 |
||
| 105 | 'ASCII/MacOS Roman' => MacRoman::NAME, // GEDitCOM |
||
| 106 | 'ASCII/MACINTOSH' => MacRoman::NAME, // MacFamilyTree < 8.3.5 |
||
| 107 | 'MACINTOSH' => MacRoman::NAME, // MacFamilyTree >= 8.3.5 |
||
| 108 | 'CP437' => CP437::NAME, |
||
| 109 | 'IBMPC' => CP437::NAME, |
||
| 110 | 'IBM' => CP437::NAME, // Reunion |
||
| 111 | 'IBM-PC' => CP437::NAME, // CumberlandFamilyTree |
||
| 112 | 'OEM' => CP437::NAME, // Généatique |
||
| 113 | 'CP850' => CP850::NAME, |
||
| 114 | 'MSDOS' => CP850::NAME, |
||
| 115 | 'IBM-DOS' => CP850::NAME, // Reunion, EasyTree |
||
| 116 | 'MS-DOS' => CP850::NAME, // AbrEdit FTM for Windows |
||
| 117 | 'ANSI' => CP850::NAME, |
||
| 118 | 'WINDOWS' => CP850::NAME, // Parentele |
||
| 119 | 'IBM WINDOWS' => CP850::NAME, // EasyTree, Généalogie, Reunion, TribalPages |
||
| 120 | 'IBM_WINDOWS' => CP850::NAME, // EasyTree |
||
| 121 | 'CP1250' => Windows1250::NAME, |
||
| 122 | 'windows-1250' => Windows1250::NAME, // GenoPro, Rodokmen Pro |
||
| 123 | 'CP1251' => Windows1251::NAME, |
||
| 124 | 'WINDOWS-1251' => Windows1251::NAME, // Rodovid |
||
| 125 | 'CP1252' => Windows1252::NAME, // Lifelines |
||
| 126 | 'ISO-8859-1' => ISO88591::NAME, // Cumberland Family Tree, Lifelines |
||
| 127 | 'ISO8859-1' => ISO88591::NAME, // Scion Genealogist |
||
| 128 | 'ISO8859' => ISO88591::NAME, // Genealogica Grafica |
||
| 129 | 'LATIN-1' => ISO88591::NAME, |
||
| 130 | 'LATIN1' => ISO88591::NAME, // GenealogyJ |
||
| 131 | 'ISO-8859-2' => ISO88592::NAME, |
||
| 132 | 'ISO8859-2' => ISO88592::NAME, |
||
| 133 | 'LATIN-2' => ISO88592::NAME, |
||
| 134 | 'LATIN2' => ISO88592::NAME, |
||
| 135 | ]; |
||
| 136 | |||
| 137 | foreach ($character_sets as $pattern => $encoding) { |
||
| 138 | if (str_contains($pattern, '/')) { |
||
| 139 | [$char, $vers] = explode('/', $pattern); |
||
| 140 | $regex = "\n1 CHAR " . $char . "\n2 VERS " . $vers; |
||
| 141 | } else { |
||
| 142 | $regex = "\n1 CHAR(?:ACTER)? " . $pattern; |
||
| 143 | } |
||
| 144 | |||
| 145 | if (preg_match("/" . $regex . "/i", $header) === 1) { |
||
| 146 | return $this->make($encoding); |
||
| 147 | } |
||
| 148 | } |
||
| 149 | |||
| 150 | if (preg_match('/1 CHAR (.+)/', $header, $match) === 1) { |
||
| 151 | $charset = $match[1]; |
||
| 152 | } else { |
||
| 153 | $charset = '???'; |
||
| 154 | } |
||
| 155 | |||
| 156 | throw new InvalidGedcomEncodingException($charset); |
||
| 157 | } |
||
| 238 |