Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
1 | <?php |
||
49 | class IniParser |
||
50 | { |
||
51 | /** |
||
52 | * Options for regex patterns. |
||
53 | * |
||
54 | * REGEX_DELIMITER: Delimiter of all the regex patterns in the whole class. |
||
55 | * REGEX_MODIFIERS: Regex modifiers. |
||
56 | */ |
||
57 | const REGEX_DELIMITER = '@'; |
||
58 | const REGEX_MODIFIERS = 'i'; |
||
59 | const COMPRESSION_PATTERN_START = '@'; |
||
60 | const COMPRESSION_PATTERN_DELIMITER = '|'; |
||
61 | |||
62 | /** |
||
63 | * Number of pattern to combine for a faster regular expression search. |
||
64 | * |
||
65 | * @important The number of patterns that can be processed in one step |
||
66 | * is limited by the internal regular expression limits. |
||
67 | * @var int |
||
68 | */ |
||
69 | const COUNT_PATTERN = 50; |
||
70 | |||
71 | /** |
||
72 | * Creates new ini part cache files |
||
73 | * |
||
74 | * @param string $content |
||
75 | * |
||
76 | * @return \Generator |
||
77 | */ |
||
78 | 6 | public function createIniParts($content) |
|
79 | { |
||
80 | // get all patterns from the ini file in the correct order, |
||
81 | // so that we can calculate with index number of the resulting array, |
||
82 | // which part to use when the ini file is splitted into its sections. |
||
83 | 6 | preg_match_all('/(?<=\[)(?:[^\r\n]+)(?=\])/m', $content, $patternpositions); |
|
84 | 6 | $patternpositions = $patternpositions[0]; |
|
85 | |||
86 | // split the ini file into sections and save the data in one line with a hash of the beloging |
||
87 | // pattern (filtered in the previous step) |
||
88 | 6 | $iniParts = preg_split('/\[[^\r\n]+\]/', $content); |
|
89 | 6 | $contents = []; |
|
90 | |||
91 | 6 | $propertyFormatter = new PropertyFormatter(new PropertyHolder()); |
|
92 | |||
93 | 6 | foreach ($patternpositions as $position => $pattern) { |
|
94 | 6 | $pattern = strtolower($pattern); |
|
95 | 6 | $patternhash = Pattern::getHashForParts($pattern); |
|
96 | 6 | $subkey = SubKey::getIniPartCacheSubKey($patternhash); |
|
97 | |||
98 | 6 | if (!isset($contents[$subkey])) { |
|
99 | 6 | $contents[$subkey] = []; |
|
100 | } |
||
101 | |||
102 | 6 | $browserProperties = parse_ini_string($iniParts[($position + 1)], INI_SCANNER_RAW); |
|
103 | |||
104 | 6 | foreach (array_keys($browserProperties) as $property) { |
|
105 | 6 | $browserProperties[$property] = $propertyFormatter->formatPropertyValue( |
|
106 | 6 | $browserProperties[$property], |
|
107 | $property |
||
108 | ); |
||
109 | } |
||
110 | |||
111 | // the position has to be moved by one, because the header of the ini file |
||
112 | // is also returned as a part |
||
113 | 6 | $contents[$subkey][] = $patternhash . "\t" . json_encode( |
|
114 | $browserProperties, |
||
115 | 6 | JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_QUOT | JSON_HEX_AMP |
|
116 | ); |
||
117 | } |
||
118 | |||
119 | 6 | unset($patternpositions); |
|
120 | 6 | unset($iniParts); |
|
121 | |||
122 | 6 | $subkeys = array_flip(SubKey::getAllIniPartCacheSubKeys()); |
|
123 | 6 | View Code Duplication | foreach ($contents as $subkey => $content) { |
|
|||
124 | 6 | $subkey = (string) $subkey; |
|
125 | |||
126 | 6 | yield [$subkey => $content]; |
|
127 | |||
128 | 6 | unset($subkeys[$subkey]); |
|
129 | } |
||
130 | |||
131 | 6 | foreach (array_keys($subkeys) as $subkey) { |
|
132 | 5 | $subkey = (string) $subkey; |
|
133 | |||
134 | 5 | yield [$subkey => []]; |
|
135 | } |
||
136 | 6 | } |
|
137 | |||
138 | /** |
||
139 | * Creates new pattern cache files |
||
140 | * |
||
141 | * @param string $content |
||
142 | * |
||
143 | * @return \Generator |
||
144 | */ |
||
145 | 6 | public function createPatterns($content) |
|
146 | { |
||
147 | // get all relevant patterns from the INI file |
||
148 | // - containing "*" or "?" |
||
149 | // - not containing "*" or "?", but not having a comment |
||
150 | 6 | preg_match_all( |
|
151 | 6 | '/(?<=\[)(?:[^\r\n]*[?*][^\r\n]*)(?=\])|(?<=\[)(?:[^\r\n*?]+)(?=\])(?![^\[]*Comment=)/m', |
|
152 | $content, |
||
153 | $matches |
||
154 | ); |
||
155 | |||
156 | 6 | if (empty($matches[0]) || !is_array($matches[0])) { |
|
157 | yield []; |
||
158 | |||
159 | return; |
||
160 | } |
||
161 | |||
162 | 6 | $quoterHelper = new Quoter(); |
|
163 | 6 | $matches = $matches[0]; |
|
164 | 6 | usort($matches, [$this, 'compareBcStrings']); |
|
165 | |||
166 | // build an array to structure the data. this requires some memory, but we need this step to be able to |
||
167 | // sort the data in the way we need it (see below). |
||
168 | 6 | $data = []; |
|
169 | |||
170 | 6 | foreach ($matches as $pattern) { |
|
171 | 6 | if ('GJK_Browscap_Version' === $pattern) { |
|
172 | 6 | continue; |
|
173 | } |
||
174 | |||
175 | 5 | $pattern = strtolower($pattern); |
|
176 | 5 | $patternhash = Pattern::getHashForPattern($pattern, false); |
|
177 | 5 | $tmpLength = Pattern::getPatternLength($pattern); |
|
178 | |||
179 | // special handling of default entry |
||
180 | 5 | if ($tmpLength === 0) { |
|
181 | 1 | $patternhash = str_repeat('z', 32); |
|
182 | } |
||
183 | |||
184 | 5 | if (!isset($data[$patternhash])) { |
|
185 | 5 | $data[$patternhash] = []; |
|
186 | } |
||
187 | |||
188 | 5 | if (!isset($data[$patternhash][$tmpLength])) { |
|
189 | 5 | $data[$patternhash][$tmpLength] = []; |
|
190 | } |
||
191 | |||
192 | 5 | $pattern = $quoterHelper->pregQuote($pattern); |
|
193 | |||
194 | // Check if the pattern contains digits - in this case we replace them with a digit regular expression, |
||
195 | // so that very similar patterns (e.g. only with different browser version numbers) can be compressed. |
||
196 | // This helps to speed up the first (and most expensive) part of the pattern search a lot. |
||
197 | 5 | if (strpbrk($pattern, '0123456789') !== false) { |
|
198 | 5 | $compressedPattern = preg_replace('/\d/', '[\d]', $pattern); |
|
199 | |||
200 | 5 | if (!in_array($compressedPattern, $data[$patternhash][$tmpLength])) { |
|
201 | 5 | $data[$patternhash][$tmpLength][] = $compressedPattern; |
|
202 | } |
||
203 | } else { |
||
204 | 1 | $data[$patternhash][$tmpLength][] = $pattern; |
|
205 | } |
||
206 | } |
||
207 | |||
208 | 6 | unset($matches); |
|
209 | |||
210 | // sorting of the data is important to check the patterns later in the correct order, because |
||
211 | // we need to check the most specific (=longest) patterns first, and the least specific |
||
212 | // (".*" for "Default Browser") last. |
||
213 | // |
||
214 | // sort by pattern start to group them |
||
215 | 6 | ksort($data); |
|
216 | // and then by pattern length (longest first) |
||
217 | 6 | foreach (array_keys($data) as $key) { |
|
218 | 5 | krsort($data[$key]); |
|
219 | } |
||
220 | |||
221 | // write optimized file (grouped by the first character of the has, generated from the pattern |
||
222 | // start) with multiple patterns joined by tabs. this is to speed up loading of the data (small |
||
223 | // array with pattern strings instead of an large array with single patterns) and also enables |
||
224 | // us to search for multiple patterns in one preg_match call for a fast first search |
||
225 | // (3-10 faster), followed by a detailed search for each single pattern. |
||
226 | 6 | $contents = []; |
|
227 | 6 | foreach ($data as $patternhash => $tmpEntries) { |
|
228 | 5 | if (empty($tmpEntries)) { |
|
229 | continue; |
||
230 | } |
||
231 | |||
232 | 5 | $subkey = SubKey::getPatternCacheSubkey($patternhash); |
|
233 | |||
234 | 5 | if (!isset($contents[$subkey])) { |
|
235 | 5 | $contents[$subkey] = []; |
|
236 | } |
||
237 | |||
238 | 5 | foreach ($tmpEntries as $tmpLength => $tmpPatterns) { |
|
239 | 5 | if (empty($tmpPatterns)) { |
|
240 | continue; |
||
241 | } |
||
242 | |||
243 | 5 | $chunks = array_chunk($tmpPatterns, self::COUNT_PATTERN); |
|
244 | |||
245 | 5 | foreach ($chunks as $chunk) { |
|
246 | 5 | $contents[$subkey][] = $patternhash . "\t" . $tmpLength . "\t" . implode("\t", $chunk); |
|
247 | } |
||
248 | } |
||
249 | } |
||
250 | |||
251 | 6 | unset($data); |
|
252 | |||
253 | 6 | $subkeys = SubKey::getAllPatternCacheSubkeys(); |
|
254 | 6 | View Code Duplication | foreach ($contents as $subkey => $content) { |
255 | 5 | $subkey = (string) $subkey; |
|
256 | |||
257 | 5 | yield [$subkey => $content]; |
|
258 | |||
259 | 5 | unset($subkeys[$subkey]); |
|
260 | } |
||
261 | |||
262 | 6 | foreach (array_keys($subkeys) as $subkey) { |
|
263 | 5 | $subkey = (string) $subkey; |
|
264 | |||
265 | 5 | yield [$subkey => []]; |
|
266 | } |
||
267 | 6 | } |
|
268 | |||
269 | /** |
||
270 | * @param string $a |
||
271 | * @param string $b |
||
272 | * |
||
273 | * @return int |
||
274 | */ |
||
275 | 5 | private function compareBcStrings($a, $b) |
|
301 | } |
||
302 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.