Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like TimeZoneDataParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use TimeZoneDataParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
35 | class TimeZoneDataParser |
||
36 | { |
||
37 | /** |
||
38 | * @var Context An Context instance. |
||
39 | */ |
||
40 | protected $context = null; |
||
41 | |||
42 | /** |
||
43 | * Retrieve the current application context. |
||
44 | * |
||
45 | * @return Context An Context instance. |
||
46 | * |
||
47 | * @author Dominik del Bondio <[email protected]> |
||
48 | * @since 0.11.0 |
||
49 | */ |
||
50 | final public function getContext() |
||
54 | |||
55 | /** |
||
56 | * Initialize this parser. |
||
57 | * |
||
58 | * @param Context $context A Context instance. |
||
59 | * |
||
60 | * @author Dominik del Bondio <[email protected]> |
||
61 | * @since 0.11.0 |
||
62 | */ |
||
63 | public function initialize(Context $context) |
||
67 | |||
68 | const MIN_GEN_YEAR = 1900; |
||
69 | const MAX_GEN_YEAR = 2040; |
||
70 | const MAX_YEAR_VALUE = 2147483647; |
||
71 | const MIN_YEAR_VALUE = -2147483647; |
||
72 | |||
73 | /** |
||
74 | * @var array The preprocessed rules array. |
||
75 | */ |
||
76 | protected $rules = array(); |
||
77 | |||
78 | /** |
||
79 | * @see AgaviConfigParser::parse() |
||
80 | * |
||
81 | * @author Dominik del Bondio <[email protected]> |
||
82 | * @since 0.11.0 |
||
83 | */ |
||
84 | public function parse($config) |
||
93 | |||
94 | /** |
||
95 | * Parses the given file |
||
96 | * |
||
97 | * @param string $file The full path to the file to parse. |
||
98 | * |
||
99 | * @return array An array of zones and links. |
||
100 | * |
||
101 | * @author Dominik del Bondio <[email protected]> |
||
102 | * @since 0.11.0 |
||
103 | */ |
||
104 | protected function parseFile($file) |
||
172 | |||
173 | /** |
||
174 | * Prepares as much info for each internal rule as possible and set them in |
||
175 | * $this->rules. |
||
176 | * |
||
177 | * @param array $rules The rules. |
||
178 | * |
||
179 | * @author Dominik del Bondio <[email protected]> |
||
180 | * @since 0.11.0 |
||
181 | */ |
||
182 | protected function prepareRules($rules) |
||
251 | |||
252 | /** |
||
253 | * Comparison function for usort comparing the time of 2 rules. |
||
254 | * |
||
255 | * @param array $a Parameter a |
||
256 | * @param array $b Parameter b |
||
257 | * |
||
258 | * @return int 0 if the time equals -1 if a is smaller, 1 if b is smaller. |
||
259 | * |
||
260 | * @author Dominik del Bondio <[email protected]> |
||
261 | * @since 0.11.0 |
||
262 | */ |
||
263 | public static function ruleCmp($a, $b) |
||
271 | |||
272 | /** |
||
273 | * Returns as rules with the given name within the given limits. |
||
274 | * |
||
275 | * @param string $name The name of the ruleset. |
||
276 | * @param int $from The lower time limit of the rules. |
||
277 | * @param string $until The upper time limit as string. |
||
278 | * @param int $gmtOff The gmt offset to be used. |
||
279 | * @param string $format The dst format. |
||
280 | * |
||
281 | * @return array The rules which matched the criteria completely |
||
282 | * processed. |
||
283 | * |
||
284 | * @author Dominik del Bondio <[email protected]> |
||
285 | * @since 0.11.0 |
||
286 | */ |
||
287 | protected function getRules($name, $from, $until, $gmtOff, $format) |
||
288 | { |
||
289 | if (!isset($this->rules[$name])) { |
||
290 | throw new \InvalidArgumentException('No rule with the name ' . $name . ' exists'); |
||
291 | } |
||
292 | |||
293 | $lastDstOff = 0; |
||
294 | |||
295 | $rules = array(); |
||
296 | $lastUntilTime = $untilTime = null; |
||
297 | $firstHit = true; |
||
298 | $lastRule = null; |
||
299 | $lastSkippedRule = null; |
||
300 | |||
301 | foreach ($this->rules[$name]['rules'] as $rule) { |
||
302 | $time = $rule['time']; |
||
303 | $dstOff = $rule['rule']['save']; |
||
304 | $isEndless = $rule['rule']['endYear'] == self::MAX_YEAR_VALUE; |
||
305 | |||
306 | View Code Duplication | if ($until !== null) { |
|
307 | $untilDate = $this->dateStrToArray($until); |
||
308 | $untilTime = $this->getOnDate($untilDate['year'], $untilDate['month'], array('type' => 'date', 'date' => $untilDate['day'], 'day' => null), array('secondsInDay' => $untilDate['time']['seconds'], 'type' => $untilDate['time']['type']), $gmtOff, $dstOff); |
||
309 | } |
||
310 | |||
311 | switch ($rule['rule']['at']['type']) { |
||
312 | case 'wallclock': |
||
313 | $time -= $lastDstOff; |
||
314 | $time -= $gmtOff; |
||
315 | break; |
||
316 | |||
317 | case 'standard': |
||
318 | $time -= $gmtOff; |
||
319 | break; |
||
320 | } |
||
321 | |||
322 | $lastDstOff = $dstOff; |
||
323 | |||
324 | if ($from !== null && $time < $from) { |
||
325 | $lastSkippedRule = $rule; |
||
326 | // if we need to skip the first few items until we reached the desired from |
||
327 | continue; |
||
328 | } elseif ($firstHit) { |
||
329 | if ($from != $time) { |
||
330 | $insertRuleName = sprintf(is_array($format) ? $format[0] : $format, $lastSkippedRule !== null ? $lastSkippedRule['rule']['variablePart'] : ''); |
||
331 | |||
332 | $rules[] = array( |
||
333 | 'time' => $from, |
||
334 | 'rawOffset' => $gmtOff, |
||
335 | 'dstOffset' => 0, |
||
336 | 'name' => $insertRuleName, |
||
337 | 'fromEndless' => false, |
||
338 | ); |
||
339 | } |
||
340 | $firstHit = false; |
||
341 | } |
||
342 | |||
343 | if ($until !== null && $time >= $untilTime) { |
||
344 | break; |
||
345 | } |
||
346 | |||
347 | $rules[] = array( |
||
348 | 'time' => $time, |
||
349 | 'rawOffset' => $gmtOff, |
||
350 | 'dstOffset' => $dstOff, |
||
351 | 'name' => sprintf(is_array($format) ? ($dstOff == 0 ? $format[0] : $format[1]) : $format, $rule['rule']['variablePart']), |
||
352 | 'fromEndless' => $isEndless, |
||
353 | ); |
||
354 | |||
355 | $lastUntilTime = $untilTime; |
||
356 | $lastRule = $rule; |
||
357 | } |
||
358 | |||
359 | return array('rules' => $rules, 'untilTime' => $lastUntilTime, 'activeRules' => $this->rules[$name]['activeRules']); |
||
360 | } |
||
361 | |||
362 | /** |
||
363 | * Generates all the zone tables by processing their rules. |
||
364 | * |
||
365 | * @param array $zones The input zones tables. |
||
366 | * |
||
367 | * @return array The processed zones. |
||
368 | * |
||
369 | * @author Dominik del Bondio <[email protected]> |
||
370 | * @since 0.11.0 |
||
371 | */ |
||
372 | protected function generateDatatables($zones) |
||
545 | |||
546 | /** |
||
547 | * Returns the time specified by the input arguments. |
||
548 | * |
||
549 | * @param int $year The year. |
||
550 | * @param int $month The month. |
||
551 | * @param array $dateDef The date definition. |
||
552 | * @param array $atDef The at (time into the day) definition. |
||
553 | * @param int $gmtOff The gmt offset. |
||
554 | * @param int $dstOff The dst offset. |
||
555 | * |
||
556 | * @return int The unix timestamp. |
||
557 | * |
||
558 | * @author Dominik del Bondio <[email protected]> |
||
559 | * @since 0.11.0 |
||
560 | */ |
||
561 | protected function getOnDate($year, $month, $dateDef, $atDef, $gmtOff, $dstOff) |
||
605 | |||
606 | /** |
||
607 | * Splits a line into the amount of items requested according to the |
||
608 | * olson definition (which allows the last item to contain spaces) |
||
609 | * |
||
610 | * @param string $line The line. |
||
611 | * @param int $itemCount The amount of items. |
||
612 | * |
||
613 | * @return array The items. |
||
614 | * |
||
615 | * @author Dominik del Bondio <[email protected]> |
||
616 | * @since 0.11.0 |
||
617 | */ |
||
618 | protected function splitLine($line, $itemCount) |
||
671 | |||
672 | /** |
||
673 | * NAME FROM TO TYPE IN ON AT SAVE LETTER/S |
||
674 | * |
||
675 | * For example: |
||
676 | * |
||
677 | * Rule US 1967 1973 - Apr lastSun 2:00 1:00 D |
||
678 | * |
||
679 | * The fields that make up a rule line are: |
||
680 | * |
||
681 | * NAME Gives the (arbitrary) name of the set of rules this |
||
682 | * rule is part of. |
||
683 | * |
||
684 | * FROM Gives the first year in which the rule applies. Any |
||
685 | * integer year can be supplied; the Gregorian calendar |
||
686 | * is assumed. The word minimum (or an abbreviation) |
||
687 | * means the minimum year representable as an integer. |
||
688 | * The word maximum (or an abbreviation) means the |
||
689 | * maximum year representable as an integer. Rules can |
||
690 | * describe times that are not representable as time |
||
691 | * values, with the unrepresentable times ignored; this |
||
692 | * allows rules to be portable among hosts with |
||
693 | * differing time value types. |
||
694 | * |
||
695 | * TO Gives the final year in which the rule applies. In |
||
696 | * addition to minimum and maximum (as above), the word |
||
697 | * only (or an abbreviation) may be used to repeat the |
||
698 | * value of the FROM field. |
||
699 | * |
||
700 | * TYPE Gives the type of year in which the rule applies. |
||
701 | * If TYPE is - then the rule applies in all years |
||
702 | * between FROM and TO inclusive. If TYPE is something |
||
703 | * else, then zic executes the command |
||
704 | * yearistype year type |
||
705 | * to check the type of a year: an exit status of zero |
||
706 | * is taken to mean that the year is of the given type; |
||
707 | * an exit status of one is taken to mean that the year |
||
708 | * is not of the given type. |
||
709 | * |
||
710 | * IN Names the month in which the rule takes effect. |
||
711 | * Month names may be abbreviated. |
||
712 | * |
||
713 | * ON Gives the day on which the rule takes effect. |
||
714 | * Recognized forms include: |
||
715 | * |
||
716 | * 5 the fifth of the month |
||
717 | * lastSun the last Sunday in the month |
||
718 | * lastMon the last Monday in the month |
||
719 | * Sun>=8 first Sunday on or after the eighth |
||
720 | * Sun<=25 last Sunday on or before the 25th |
||
721 | * |
||
722 | * Names of days of the week may be abbreviated or |
||
723 | * spelled out in full. Note that there must be no |
||
724 | * spaces within the ON field. |
||
725 | * |
||
726 | * AT Gives the time of day at which the rule takes |
||
727 | * effect. Recognized forms include: |
||
728 | * |
||
729 | * 2 time in hours |
||
730 | * 2:00 time in hours and minutes |
||
731 | * 15:00 24-hour format time (for times after noon) |
||
732 | * 1:28:14 time in hours, minutes, and seconds |
||
733 | * - equivalent to 0 |
||
734 | * |
||
735 | * where hour 0 is midnight at the start of the day, |
||
736 | * and hour 24 is midnight at the end of the day. Any |
||
737 | * of these forms may be followed by the letter w if |
||
738 | * the given time is local "wall clock" time, s if the |
||
739 | * given time is local "standard" time, or u (or g or |
||
740 | * z) if the given time is universal time; in the |
||
741 | * absence of an indicator, wall clock time is assumed. |
||
742 | * |
||
743 | * SAVE Gives the amount of time to be added to local |
||
744 | * standard time when the rule is in effect. This |
||
745 | * field has the same format as the AT field (although, |
||
746 | * of course, the w and s suffixes are not used). |
||
747 | * |
||
748 | * LETTER/S |
||
749 | * Gives the "variable part" (for example, the "S" or |
||
750 | * "D" in "EST" or "EDT") of time zone abbreviations to |
||
751 | * be used when this rule is in effect. If this field |
||
752 | * is -, the variable part is null. |
||
753 | */ |
||
754 | /** |
||
755 | * Parses a rule. |
||
756 | * |
||
757 | * @param array $ruleColumns The columns of this rule. |
||
758 | * |
||
759 | * @return array The parsed rule. |
||
760 | * |
||
761 | * @author Dominik del Bondio <[email protected]> |
||
762 | * @since 0.11.0 |
||
763 | */ |
||
764 | protected function parseRule($ruleColumns) |
||
854 | |||
855 | /* |
||
856 | * NAME GMTOFF RULES/SAVE FORMAT [UNTIL] |
||
857 | * |
||
858 | * For example: |
||
859 | * |
||
860 | * Australia/Adelaide 9:30 Aus CST 1971 Oct 31 2:00 |
||
861 | * |
||
862 | * The fields that make up a zone line are: |
||
863 | * |
||
864 | * NAME The name of the time zone. This is the name used in |
||
865 | * creating the time conversion information file for the |
||
866 | * zone. |
||
867 | * |
||
868 | * GMTOFF |
||
869 | * The amount of time to add to UTC to get standard time |
||
870 | * in this zone. This field has the same format as the |
||
871 | * AT and SAVE fields of rule lines; begin the field with |
||
872 | * a minus sign if time must be subtracted from UTC. |
||
873 | * |
||
874 | * RULES/SAVE |
||
875 | * The name of the rule(s) that apply in the time zone |
||
876 | * or, alternately, an amount of time to add to local |
||
877 | * standard time. If this field is - then standard time |
||
878 | * always applies in the time zone. |
||
879 | * |
||
880 | * FORMAT |
||
881 | * The format for time zone abbreviations in this time |
||
882 | * zone. The pair of characters %s is used to show where |
||
883 | * the "variable part" of the time zone abbreviation |
||
884 | * goes. Alternately, a slash (/) separates standard and |
||
885 | * daylight abbreviations. |
||
886 | * |
||
887 | * UNTIL The time at which the UTC offset or the rule(s) change |
||
888 | * for a location. It is specified as a year, a month, a |
||
889 | * day, and a time of day. If this is specified, the |
||
890 | * time zone information is generated from the given UTC |
||
891 | * offset and rule change until the time specified. The |
||
892 | * month, day, and time of day have the same format as |
||
893 | * the IN, ON, and AT columns of a rule; trailing columns |
||
894 | * can be omitted, and default to the earliest possible |
||
895 | * value for the missing columns. |
||
896 | * |
||
897 | * The next line must be a "continuation" line; this has |
||
898 | * the same form as a zone line except that the string |
||
899 | * "Zone" and the name are omitted, as the continuation |
||
900 | * line will place information starting at the time |
||
901 | * specified as the UNTIL field in the previous line in |
||
902 | * the file used by the previous line. Continuation |
||
903 | * lines may contain an UNTIL field, just as zone lines |
||
904 | * do, indicating that the next line is a further |
||
905 | * continuation. |
||
906 | */ |
||
907 | /** |
||
908 | * Parses a zone. |
||
909 | * |
||
910 | * @param array $zoneLines The lines of this zone. |
||
911 | * |
||
912 | * @return array The parsed zone. |
||
913 | * |
||
914 | * @author Dominik del Bondio <[email protected]> |
||
915 | * @since 0.11.0 |
||
916 | */ |
||
917 | protected function parseZone($zoneLines) |
||
964 | |||
965 | /** |
||
966 | * Determines the month definition from an abbreviation. |
||
967 | * |
||
968 | * @param string $month The abbreviated month. |
||
969 | * |
||
970 | * @return int The definition of this month from AgaviDateDefinitions. |
||
971 | * |
||
972 | * @author Dominik del Bondio <[email protected]> |
||
973 | * @since 0.11.0 |
||
974 | */ |
||
975 | protected function getMonthFromAbbr($month) |
||
988 | |||
989 | /** |
||
990 | * Determines the day definition from an abbreviation. |
||
991 | * |
||
992 | * @param string $day The abbreviated day. |
||
993 | * |
||
994 | * @return int The definition of this day from AgaviDateDefinitions. |
||
995 | * |
||
996 | * @author Dominik del Bondio <[email protected]> |
||
997 | * @since 0.11.0 |
||
998 | */ |
||
999 | protected function getDayFromAbbr($day) |
||
1012 | |||
1013 | /** |
||
1014 | * Returns the seconds from a string in the hh:mm:ss format. |
||
1015 | * |
||
1016 | * @param string $time The time as string. |
||
1017 | * |
||
1018 | * @return int The seconds into the day defined by the input. |
||
1019 | * |
||
1020 | * @author Dominik del Bondio <[email protected]> |
||
1021 | * @since 0.11.0 |
||
1022 | */ |
||
1023 | protected function timeStrToSeconds($time) |
||
1045 | |||
1046 | /** |
||
1047 | * Parses a date string and returns its parts as array. |
||
1048 | * |
||
1049 | * @param string $date The date as string. |
||
1050 | * |
||
1051 | * @return array The parts of the date. |
||
1052 | * |
||
1053 | * @author Dominik del Bondio <[email protected]> |
||
1054 | * @since 0.11.0 |
||
1055 | */ |
||
1056 | protected function dateStrToArray($date) |
||
1089 | } |
||
1090 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.