Conditions | 13 |
Paths | 75 |
Total Lines | 97 |
Code Lines | 67 |
Lines | 0 |
Ratio | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 1 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | <?php |
||
60 | public function detect(string $header): ?EncodingInterface |
||
61 | { |
||
62 | $utf_bom = [ |
||
63 | '/^' . UTF8::BYTE_ORDER_MARK . '/' => UTF8::NAME, |
||
64 | '/^' . UTF16BE::BYTE_ORDER_MARK . '/' => UTF16BE::NAME, |
||
65 | '/^' . UTF16LE::BYTE_ORDER_MARK . '/' => UTF16LE::NAME, |
||
66 | ]; |
||
67 | |||
68 | foreach ($utf_bom as $regex => $encoding) { |
||
69 | if (preg_match($regex, $header) === 1) { |
||
70 | return $this->make($encoding); |
||
71 | } |
||
72 | } |
||
73 | |||
74 | $utf16 = [ |
||
75 | "\x000" => UTF16BE::NAME, |
||
76 | "0\x00" => UTF16LE::NAME, |
||
77 | ]; |
||
78 | |||
79 | foreach ($utf16 as $start => $encoding) { |
||
80 | if (str_starts_with($header, $start)) { |
||
81 | return $this->make($encoding); |
||
82 | } |
||
83 | } |
||
84 | |||
85 | // Standardize whitespace to simplify matching. |
||
86 | $header = strtr(ltrim($header), ["\r\n" => "\n", "\n\r" => "\n", "\r" => "\n"]); |
||
87 | |||
88 | while (str_contains($header, "\n ") || str_contains($header, " \n") || str_contains($header, ' ')) { |
||
89 | $header = strtr($header, ["\n " => "\n", " \n" => "\n", ' ' => ' ']); |
||
90 | } |
||
91 | |||
92 | // We need a complete header record |
||
93 | $header = strstr($header, "\n0", true); |
||
94 | |||
95 | if ($header === false) { |
||
96 | return null; |
||
97 | } |
||
98 | |||
99 | // Some of these come from Tamura Jones, the rest from webtrees users. |
||
100 | $character_sets = [ |
||
101 | 'ASCII' => ASCII::NAME, |
||
102 | 'ANSEL' => ANSEL::NAME, |
||
103 | 'UTF-8' => UTF8::NAME, |
||
104 | 'UNICODE' => UTF8::NAME, // If the null byte test failed, this can't be UTF16 |
||
105 | 'ASCII/MacOS Roman' => MacRoman::NAME, // GEDitCOM |
||
106 | 'ASCII/MACINTOSH' => MacRoman::NAME, // MacFamilyTree < 8.3.5 |
||
107 | 'MACINTOSH' => MacRoman::NAME, // MacFamilyTree >= 8.3.5 |
||
108 | 'CP437' => CP437::NAME, |
||
109 | 'IBMPC' => CP437::NAME, |
||
110 | 'IBM' => CP437::NAME, // Reunion |
||
111 | 'IBM-PC' => CP437::NAME, // CumberlandFamilyTree |
||
112 | 'OEM' => CP437::NAME, // Généatique |
||
113 | 'CP850' => CP850::NAME, |
||
114 | 'MSDOS' => CP850::NAME, |
||
115 | 'IBM-DOS' => CP850::NAME, // Reunion, EasyTree |
||
116 | 'MS-DOS' => CP850::NAME, // AbrEdit FTM for Windows |
||
117 | 'ANSI' => CP850::NAME, |
||
118 | 'WINDOWS' => CP850::NAME, // Parentele |
||
119 | 'IBM WINDOWS' => CP850::NAME, // EasyTree, Généalogie, Reunion, TribalPages |
||
120 | 'IBM_WINDOWS' => CP850::NAME, // EasyTree |
||
121 | 'CP1250' => Windows1250::NAME, |
||
122 | 'windows-1250' => Windows1250::NAME, // GenoPro, Rodokmen Pro |
||
123 | 'CP1251' => Windows1251::NAME, |
||
124 | 'WINDOWS-1251' => Windows1251::NAME, // Rodovid |
||
125 | 'CP1252' => Windows1252::NAME, // Lifelines |
||
126 | 'ISO-8859-1' => ISO88591::NAME, // Cumberland Family Tree, Lifelines |
||
127 | 'ISO8859-1' => ISO88591::NAME, // Scion Genealogist |
||
128 | 'ISO8859' => ISO88591::NAME, // Genealogica Grafica |
||
129 | 'LATIN-1' => ISO88591::NAME, |
||
130 | 'LATIN1' => ISO88591::NAME, // GenealogyJ |
||
131 | 'ISO-8859-2' => ISO88592::NAME, |
||
132 | 'ISO8859-2' => ISO88592::NAME, |
||
133 | 'LATIN-2' => ISO88592::NAME, |
||
134 | 'LATIN2' => ISO88592::NAME, |
||
135 | ]; |
||
136 | |||
137 | foreach ($character_sets as $pattern => $encoding) { |
||
138 | if (str_contains($pattern, '/')) { |
||
139 | [$char, $vers] = explode('/', $pattern); |
||
140 | $regex = "\n1 CHAR " . $char . "\n2 VERS " . $vers; |
||
141 | } else { |
||
142 | $regex = "\n1 CHAR(?:ACTER)? " . $pattern; |
||
143 | } |
||
144 | |||
145 | if (preg_match("/" . $regex . "/i", $header) === 1) { |
||
146 | return $this->make($encoding); |
||
147 | } |
||
148 | } |
||
149 | |||
150 | if (preg_match('/1 CHAR (.+)/', $header, $match) === 1) { |
||
151 | $charset = $match[1]; |
||
152 | } else { |
||
153 | $charset = '???'; |
||
154 | } |
||
155 | |||
156 | throw new InvalidGedcomEncodingException($charset); |
||
157 | } |
||
238 |