Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like DomainPart often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DomainPart, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
35 | class DomainPart extends Parser |
||
36 | { |
||
37 | const DOMAIN_MAX_LENGTH = 254; |
||
38 | const LABEL_MAX_LENGTH = 63; |
||
39 | |||
40 | /** |
||
41 | * @var string |
||
42 | */ |
||
43 | protected $domainPart = ''; |
||
44 | |||
45 | 101 | public function parse($domainPart) |
|
70 | |||
71 | 101 | private function performDomainStartChecks() |
|
81 | |||
82 | 100 | private function checkEmptyDomain() |
|
92 | |||
93 | 101 | private function checkInvalidTokensAfterAT() |
|
102 | |||
103 | /** |
||
104 | * @return string |
||
105 | */ |
||
106 | 57 | public function getDomainPart() |
|
110 | |||
111 | /** |
||
112 | * @param string $addressLiteral |
||
113 | * @param int $maxGroups |
||
114 | */ |
||
115 | 7 | public function checkIPV6Tag($addressLiteral, $maxGroups = 8) |
|
157 | |||
158 | /** |
||
159 | * @return string |
||
160 | */ |
||
161 | 91 | protected function doParseDomainPart() |
|
162 | { |
||
163 | 91 | $domain = ''; |
|
164 | 91 | $label = ''; |
|
165 | 91 | $openedParenthesis = 0; |
|
166 | do { |
||
167 | 91 | $prev = $this->lexer->getPrevious(); |
|
168 | |||
169 | 91 | $this->checkNotAllowedChars($this->lexer->token); |
|
170 | |||
171 | 91 | if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) { |
|
172 | 3 | $this->parseComments(); |
|
173 | 3 | $openedParenthesis += $this->getOpenedParenthesis(); |
|
174 | 3 | $this->lexer->moveNext(); |
|
175 | 3 | $tmpPrev = $this->lexer->getPrevious(); |
|
176 | 3 | if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) { |
|
177 | 3 | $openedParenthesis--; |
|
178 | 3 | } |
|
179 | 3 | } |
|
180 | 91 | View Code Duplication | if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) { |
|
|||
181 | 3 | if ($openedParenthesis === 0) { |
|
182 | 3 | throw new UnopenedComment(); |
|
183 | } else { |
||
184 | $openedParenthesis--; |
||
185 | } |
||
186 | } |
||
187 | |||
188 | 90 | $this->checkConsecutiveDots(); |
|
189 | 90 | $this->checkDomainPartExceptions($prev); |
|
190 | |||
191 | 89 | if ($this->hasBrackets()) { |
|
192 | 13 | $this->parseDomainLiteral(); |
|
193 | 11 | } |
|
194 | |||
195 | 87 | if ($this->lexer->token['type'] === EmailLexer::S_DOT) { |
|
196 | 48 | $this->checkLabelLength($label); |
|
197 | 48 | $label = ''; |
|
198 | 48 | } else { |
|
199 | 87 | $label .= $this->lexer->token['value']; |
|
200 | } |
||
201 | |||
202 | 87 | if ($this->isFWS()) { |
|
203 | 3 | $this->parseFWS(); |
|
204 | 2 | } |
|
205 | |||
206 | 87 | $domain .= $this->lexer->token['value']; |
|
207 | 87 | $this->lexer->moveNext(); |
|
208 | 87 | if ($this->lexer->token['type'] === EmailLexer::S_SP) { |
|
209 | 8 | throw new CharNotAllowed(); |
|
210 | } |
||
211 | 87 | } while (null !== $this->lexer->token['type']); |
|
212 | |||
213 | 60 | $this->checkLabelLength($label); |
|
214 | |||
215 | 60 | return $domain; |
|
216 | } |
||
217 | |||
218 | 91 | private function checkNotAllowedChars(array $token) |
|
219 | { |
||
220 | 91 | $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true]; |
|
221 | 91 | if (isset($notAllowed[$token['type']])) { |
|
222 | 3 | throw new CharNotAllowed(); |
|
223 | } |
||
224 | 91 | } |
|
225 | |||
226 | /** |
||
227 | * @return string|false |
||
228 | */ |
||
229 | 13 | protected function parseDomainLiteral() |
|
230 | { |
||
231 | 13 | if ($this->lexer->isNextToken(EmailLexer::S_COLON)) { |
|
232 | $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart(); |
||
233 | } |
||
234 | 13 | if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) { |
|
235 | 7 | $lexer = clone $this->lexer; |
|
236 | 7 | $lexer->moveNext(); |
|
237 | 7 | if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) { |
|
238 | 1 | $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart(); |
|
239 | 1 | } |
|
240 | 7 | } |
|
241 | |||
242 | 13 | return $this->doParseDomainLiteral(); |
|
243 | } |
||
244 | |||
245 | /** |
||
246 | * @return string|false |
||
247 | */ |
||
248 | 13 | protected function doParseDomainLiteral() |
|
249 | { |
||
250 | 13 | $IPv6TAG = false; |
|
251 | 13 | $addressLiteral = ''; |
|
252 | do { |
||
253 | 13 | if ($this->lexer->token['type'] === EmailLexer::C_NUL) { |
|
254 | throw new ExpectingDTEXT(); |
||
255 | } |
||
256 | |||
257 | 13 | if ($this->lexer->token['type'] === EmailLexer::INVALID || |
|
258 | 13 | $this->lexer->token['type'] === EmailLexer::C_DEL || |
|
259 | 13 | $this->lexer->token['type'] === EmailLexer::S_LF |
|
260 | 13 | ) { |
|
261 | 1 | $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT(); |
|
262 | 1 | } |
|
263 | |||
264 | 13 | if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) { |
|
265 | 1 | throw new ExpectingDTEXT(); |
|
266 | } |
||
267 | |||
268 | 12 | if ($this->lexer->isNextTokenAny( |
|
269 | 12 | array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF) |
|
270 | 12 | )) { |
|
271 | $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS(); |
||
272 | $this->parseFWS(); |
||
273 | } |
||
274 | |||
275 | 12 | if ($this->lexer->isNextToken(EmailLexer::S_CR)) { |
|
276 | 1 | throw new CRNoLF(); |
|
277 | } |
||
278 | |||
279 | 11 | if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) { |
|
280 | $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT(); |
||
281 | $addressLiteral .= $this->lexer->token['value']; |
||
282 | $this->lexer->moveNext(); |
||
283 | $this->validateQuotedPair(); |
||
284 | } |
||
285 | 11 | if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) { |
|
286 | 7 | $IPv6TAG = true; |
|
287 | 7 | } |
|
288 | 11 | if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) { |
|
289 | break; |
||
290 | } |
||
291 | |||
292 | 11 | $addressLiteral .= $this->lexer->token['value']; |
|
293 | |||
294 | 11 | } while ($this->lexer->moveNext()); |
|
295 | |||
296 | 11 | $addressLiteral = str_replace('[', '', $addressLiteral); |
|
297 | 11 | $addressLiteral = $this->checkIPV4Tag($addressLiteral); |
|
298 | |||
299 | 11 | if (false === $addressLiteral) { |
|
300 | 1 | return $addressLiteral; |
|
301 | } |
||
302 | |||
303 | 10 | if (!$IPv6TAG) { |
|
304 | 3 | $this->warnings[DomainLiteral::CODE] = new DomainLiteral(); |
|
305 | 3 | return $addressLiteral; |
|
306 | } |
||
307 | |||
308 | 7 | $this->warnings[AddressLiteral::CODE] = new AddressLiteral(); |
|
309 | |||
310 | 7 | $this->checkIPV6Tag($addressLiteral); |
|
311 | |||
312 | 7 | return $addressLiteral; |
|
313 | } |
||
314 | |||
315 | /** |
||
316 | * @param string $addressLiteral |
||
317 | * |
||
318 | * @return string|false |
||
319 | */ |
||
320 | 11 | protected function checkIPV4Tag($addressLiteral) |
|
321 | { |
||
322 | 11 | $matchesIP = array(); |
|
323 | |||
324 | // Extract IPv4 part from the end of the address-literal (if there is one) |
||
325 | 11 | if (preg_match( |
|
326 | 11 | '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/', |
|
327 | 11 | $addressLiteral, |
|
328 | $matchesIP |
||
329 | 11 | ) > 0 |
|
330 | 11 | ) { |
|
331 | 2 | $index = strrpos($addressLiteral, $matchesIP[0]); |
|
332 | 2 | if ($index === 0) { |
|
333 | 1 | $this->warnings[AddressLiteral::CODE] = new AddressLiteral(); |
|
334 | 1 | return false; |
|
335 | } |
||
336 | // Convert IPv4 part to IPv6 format for further testing |
||
337 | 1 | $addressLiteral = substr($addressLiteral, 0, (int) $index) . '0:0'; |
|
338 | 1 | } |
|
339 | |||
340 | 10 | return $addressLiteral; |
|
341 | } |
||
342 | |||
343 | 90 | protected function checkDomainPartExceptions(array $prev) |
|
344 | { |
||
345 | $invalidDomainTokens = array( |
||
346 | 90 | EmailLexer::S_DQUOTE => true, |
|
347 | 90 | EmailLexer::S_SQUOTE => true, |
|
348 | 90 | EmailLexer::S_BACKTICK => true, |
|
349 | 90 | EmailLexer::S_SEMICOLON => true, |
|
350 | 90 | EmailLexer::S_GREATERTHAN => true, |
|
351 | 90 | EmailLexer::S_LOWERTHAN => true, |
|
352 | 90 | ); |
|
353 | |||
354 | 90 | if (isset($invalidDomainTokens[$this->lexer->token['type']])) { |
|
355 | 7 | throw new ExpectingATEXT(); |
|
356 | } |
||
357 | |||
358 | 90 | if ($this->lexer->token['type'] === EmailLexer::S_COMMA) { |
|
359 | 1 | throw new CommaInDomain(); |
|
360 | } |
||
361 | |||
362 | 90 | if ($this->lexer->token['type'] === EmailLexer::S_AT) { |
|
363 | 2 | throw new ConsecutiveAt(); |
|
364 | } |
||
365 | |||
366 | 89 | View Code Duplication | if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) { |
367 | 1 | throw new ExpectingATEXT(); |
|
368 | } |
||
369 | |||
370 | 89 | View Code Duplication | if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) { |
371 | 1 | throw new DomainHyphened(); |
|
372 | } |
||
373 | |||
374 | 89 | View Code Duplication | if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH |
375 | 89 | && $this->lexer->isNextToken(EmailLexer::GENERIC)) { |
|
376 | throw new ExpectingATEXT(); |
||
377 | } |
||
378 | 89 | } |
|
379 | |||
380 | /** |
||
381 | * @return bool |
||
382 | */ |
||
383 | 89 | protected function hasBrackets() |
|
384 | { |
||
385 | 89 | if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) { |
|
386 | 76 | return false; |
|
387 | } |
||
388 | |||
389 | try { |
||
390 | 13 | $this->lexer->find(EmailLexer::S_CLOSEBRACKET); |
|
391 | 13 | } catch (\RuntimeException $e) { |
|
392 | throw new ExpectingDomainLiteralClose(); |
||
393 | } |
||
394 | |||
395 | 13 | return true; |
|
396 | } |
||
397 | |||
398 | /** |
||
399 | * @param string $label |
||
400 | */ |
||
401 | 70 | protected function checkLabelLength($label) |
|
402 | { |
||
403 | 70 | if ($this->isLabelTooLong($label)) { |
|
404 | 6 | $this->warnings[LabelTooLong::CODE] = new LabelTooLong(); |
|
405 | 6 | } |
|
406 | 70 | } |
|
407 | |||
408 | /** |
||
409 | * @param string $label |
||
410 | * @return bool |
||
411 | */ |
||
412 | 70 | private function isLabelTooLong($label) |
|
413 | { |
||
414 | 70 | if (preg_match('/[^\x00-\x7F]/', $label)) { |
|
415 | 5 | idn_to_ascii($label, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46, $idnaInfo); |
|
416 | |||
417 | 5 | return (bool) ($idnaInfo['errors'] & IDNA_ERROR_LABEL_TOO_LONG); |
|
418 | } |
||
419 | |||
420 | 68 | return strlen($label) > self::LABEL_MAX_LENGTH; |
|
421 | } |
||
422 | |||
423 | 3 | protected function parseDomainComments() |
|
436 | |||
437 | protected function addTLDWarnings() |
||
443 | } |
||
444 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.