Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like FormulaParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use FormulaParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
5 | class FormulaParser |
||
6 | { |
||
7 | /** |
||
8 | * The index of the character we are currently looking at |
||
9 | * @var integer |
||
10 | */ |
||
11 | protected $currentChar = 0; |
||
12 | |||
13 | /** |
||
14 | * The token we are working on. |
||
15 | * @var string |
||
16 | */ |
||
17 | protected $currentToken = ''; |
||
18 | |||
19 | /** |
||
20 | * The formula to parse |
||
21 | * @var string |
||
22 | */ |
||
23 | protected $formula = ''; |
||
24 | |||
25 | /** |
||
26 | * The character ahead of the current char |
||
27 | * @var string |
||
28 | */ |
||
29 | protected $lookahead = ''; |
||
30 | |||
31 | /** |
||
32 | * The parse tree to be generated |
||
33 | * @var array |
||
34 | */ |
||
35 | protected $parseTree = array(); |
||
36 | |||
37 | /** |
||
38 | * Array of external sheets |
||
39 | * @var array |
||
40 | */ |
||
41 | protected $extSheets = array(); |
||
42 | |||
43 | /** |
||
44 | * Array of sheet references in the form of REF structures |
||
45 | * @var array |
||
46 | */ |
||
47 | protected $references = array(); |
||
48 | |||
49 | /** |
||
50 | * Convert a token to the proper ptg value. |
||
51 | * |
||
52 | * @param mixed $token The token to convert. |
||
53 | * @return string the converted token |
||
54 | * @throws \Exception |
||
55 | */ |
||
56 | protected function convert($token) |
||
80 | |||
81 | /** |
||
82 | * Convert a number token to ptgInt or ptgNum |
||
83 | * |
||
84 | * @param mixed $num an integer or double for conversion to its ptg value |
||
85 | * @return string |
||
86 | */ |
||
87 | protected function convertNumber($num) |
||
97 | |||
98 | /** |
||
99 | * Convert a string token to ptgStr |
||
100 | * |
||
101 | * @param string $string A string for conversion to its ptg value. |
||
102 | * @throws \Exception |
||
103 | * @return string the converted token |
||
104 | */ |
||
105 | protected function convertString($string) |
||
117 | |||
118 | /** |
||
119 | * Convert a function to a ptgFunc or ptgFuncVarV depending on the number of |
||
120 | * args that it takes. |
||
121 | * |
||
122 | * @param string $token The name of the function for convertion to ptg value. |
||
123 | * @param integer $numArgs The number of arguments the function receives. |
||
124 | * |
||
125 | * @return string The packed ptg for the function |
||
126 | */ |
||
127 | protected function convertFunction($token, $numArgs) |
||
140 | |||
141 | /** |
||
142 | * Convert an Excel range such as A1:D4 to a ptgRefV. |
||
143 | * |
||
144 | * @param string $range An Excel range in the A1:A2 or A1..A2 format. |
||
145 | * @return string |
||
146 | */ |
||
147 | protected function convertRange2d($range) |
||
160 | |||
161 | /** |
||
162 | * Convert an Excel 3d range such as "Sheet1!A1:D4" or "Sheet1:Sheet2!A1:D4" to |
||
163 | * a ptgArea3d. |
||
164 | * |
||
165 | * @param string $token An Excel range in the Sheet1!A1:A2 format. |
||
166 | * @return string The packed ptgArea3d token |
||
167 | */ |
||
168 | protected function convertRange3d($token) |
||
187 | |||
188 | /** |
||
189 | * Convert an Excel reference such as A1, $B2, C$3 or $D$4 to a ptgRefV. |
||
190 | * |
||
191 | * @param string $cell An Excel cell reference |
||
192 | * @return string The cell in packed() format with the corresponding ptg |
||
193 | */ |
||
194 | protected function convertRef2d($cell) |
||
202 | |||
203 | /** |
||
204 | * Convert an Excel 3d reference such as "Sheet1!A1" or "Sheet1:Sheet2!A1" to a |
||
205 | * ptgRef3d. |
||
206 | * |
||
207 | * @param string $cell An Excel cell reference |
||
208 | * @return string The packed ptgRef3d token |
||
209 | */ |
||
210 | protected function convertRef3d($cell) |
||
225 | |||
226 | /** |
||
227 | * @param string $str |
||
228 | * |
||
229 | * @return string |
||
230 | */ |
||
231 | protected function removeTrailingQuotes($str) |
||
238 | |||
239 | /** |
||
240 | * @param $extRef |
||
241 | * |
||
242 | * @return array |
||
243 | * @throws \Exception |
||
244 | */ |
||
245 | protected function getRangeSheets($extRef) |
||
277 | |||
278 | /** |
||
279 | * Look up the REF index that corresponds to an external sheet name |
||
280 | * (or range). If it doesn't exist yet add it to the workbook's references |
||
281 | * array. It assumes all sheet names given must exist. |
||
282 | * |
||
283 | * @param string $extRef The name of the external reference |
||
284 | * |
||
285 | * @throws \Exception |
||
286 | * @return string The reference index in packed() format |
||
287 | */ |
||
288 | protected function getRefIndex($extRef) |
||
296 | |||
297 | /** |
||
298 | * Add reference and return its index |
||
299 | * @param int $sheet1 |
||
300 | * @param int $sheet2 |
||
301 | * |
||
302 | * @return int |
||
303 | */ |
||
304 | public function addRef($sheet1, $sheet2) |
||
319 | |||
320 | /** |
||
321 | * Look up the index that corresponds to an external sheet name. The hash of |
||
322 | * sheet names is updated by the addworksheet() method of the |
||
323 | * Workbook class. |
||
324 | * |
||
325 | * @param string $sheetName |
||
326 | * |
||
327 | * @return integer The sheet index, -1 if the sheet was not found |
||
328 | */ |
||
329 | protected function getSheetIndex($sheetName) |
||
337 | |||
338 | /** |
||
339 | * This method is used to update the array of sheet names. It is |
||
340 | * called by the addWorksheet() method of the |
||
341 | * Workbook class. |
||
342 | * |
||
343 | * @see Workbook::addWorksheet() |
||
344 | * @param string $name The name of the worksheet being added |
||
345 | * @param integer $index The index of the worksheet being added |
||
346 | */ |
||
347 | public function addSheet($name, $index) |
||
351 | |||
352 | /** |
||
353 | * pack() row and column into the required 3 or 4 byte format. |
||
354 | * |
||
355 | * @param string $cellAddress The Excel cell reference to be packed |
||
356 | * |
||
357 | * @throws \Exception |
||
358 | * @return array Array containing the row and column in packed() format |
||
359 | */ |
||
360 | protected function cellToPackedRowcol($cellAddress) |
||
375 | |||
376 | /** |
||
377 | * Advance to the next valid token. |
||
378 | * |
||
379 | */ |
||
380 | protected function advance() |
||
410 | |||
411 | /** |
||
412 | * @return int |
||
413 | */ |
||
414 | protected function eatWhitespace() |
||
432 | |||
433 | /** |
||
434 | * Checks if it's a valid token. |
||
435 | * |
||
436 | * @param string $token The token to check. |
||
437 | * @return string The checked token |
||
438 | */ |
||
439 | protected function match($token) |
||
456 | |||
457 | /** |
||
458 | * @param string $token |
||
459 | * |
||
460 | * @return string |
||
461 | */ |
||
462 | protected function processDefaultCase($token) |
||
476 | |||
477 | /** |
||
478 | * @return bool |
||
479 | */ |
||
480 | protected function lookaheadHasNumber() |
||
484 | |||
485 | /** |
||
486 | * @return bool |
||
487 | */ |
||
488 | protected function isLookaheadDotOrColon() |
||
492 | |||
493 | /** |
||
494 | * @param string $token |
||
495 | * |
||
496 | * @return bool |
||
497 | */ |
||
498 | protected function isAnyRange($token) |
||
503 | |||
504 | /** |
||
505 | * @param string $token |
||
506 | * |
||
507 | * @return bool |
||
508 | */ |
||
509 | protected function isReference($token) |
||
516 | |||
517 | /** |
||
518 | * @param string $token |
||
519 | * |
||
520 | * @return bool |
||
521 | */ |
||
522 | protected function isExternalReference($token) |
||
528 | |||
529 | /** |
||
530 | * If it's a number (check that it's not a sheet name or range) |
||
531 | * @param string $token |
||
532 | * |
||
533 | * @return bool |
||
534 | */ |
||
535 | protected function isNumber($token) |
||
542 | |||
543 | /** |
||
544 | * @param string $token |
||
545 | * |
||
546 | * @return bool |
||
547 | */ |
||
548 | protected function isFunctionCall($token) |
||
553 | |||
554 | /** |
||
555 | * The parsing method. It parses a formula. |
||
556 | * |
||
557 | * @param string $formula The formula to parse, without the initial equal sign (=). |
||
558 | */ |
||
559 | public function parse($formula) |
||
569 | |||
570 | /** |
||
571 | * It parses a condition. It assumes the following rule: |
||
572 | * Cond -> Expr [(">" | "<") Expr] |
||
573 | * |
||
574 | * @return array The parsed ptg'd tree |
||
575 | */ |
||
576 | View Code Duplication | protected function condition() |
|
588 | |||
589 | /** |
||
590 | * It parses a expression. It assumes the following rule: |
||
591 | * Expr -> Term [("+" | "-") Term] |
||
592 | * -> "string" |
||
593 | * -> "-" Term |
||
594 | * |
||
595 | * @return array The parsed ptg'd tree |
||
596 | */ |
||
597 | protected function expression() |
||
622 | |||
623 | /** |
||
624 | * This function just introduces a ptgParen element in the tree, so that Excel |
||
625 | * doesn't get confused when working with a parenthesized formula afterwards. |
||
626 | * |
||
627 | * @see _fact() |
||
628 | * @return array The parsed ptg'd tree |
||
629 | */ |
||
630 | protected function parenthesizedExpression() |
||
634 | |||
635 | /** |
||
636 | * It parses a term. It assumes the following rule: |
||
637 | * Term -> Fact [("*" | "/") Fact] |
||
638 | * |
||
639 | * @return array The parsed ptg'd tree |
||
640 | */ |
||
641 | View Code Duplication | protected function term() |
|
653 | |||
654 | /** |
||
655 | * It parses a factor. It assumes the following rule: |
||
656 | * Fact -> ( Expr ) |
||
657 | * | CellRef |
||
658 | * | CellRange |
||
659 | * | Number |
||
660 | * | Function |
||
661 | * @throws \Exception |
||
662 | * @return array The parsed ptg'd tree |
||
663 | */ |
||
664 | protected function fact() |
||
706 | |||
707 | /** |
||
708 | * It parses a function call. It assumes the following rule: |
||
709 | * Func -> ( Expr [,Expr]* ) |
||
710 | * @throws \Exception |
||
711 | * @return string|array The parsed ptg'd tree |
||
712 | */ |
||
713 | protected function func() |
||
752 | |||
753 | /** |
||
754 | * Creates a tree. In fact an array which may have one or two arrays (sub-trees) |
||
755 | * as elements. |
||
756 | * |
||
757 | * @param mixed $value The value of this node. |
||
758 | * @param mixed $left The left array (sub-tree) or a final node. |
||
759 | * @param mixed $right The right array (sub-tree) or a final node. |
||
760 | * @return array A tree |
||
761 | */ |
||
762 | protected function createTree($value, $left, $right) |
||
770 | |||
771 | /** |
||
772 | * Builds a string containing the tree in reverse polish notation (What you |
||
773 | * would use in a HP calculator stack). |
||
774 | * The following tree: |
||
775 | * |
||
776 | * + |
||
777 | * / \ |
||
778 | * 2 3 |
||
779 | * |
||
780 | * produces: "23+" |
||
781 | * |
||
782 | * The following tree: |
||
783 | * |
||
784 | * + |
||
785 | * / \ |
||
786 | * 3 * |
||
787 | * / \ |
||
788 | * 6 A1 |
||
789 | * |
||
790 | * produces: "36A1*+" |
||
791 | * |
||
792 | * In fact all operands, functions, references, etc... are written as ptg's |
||
793 | * |
||
794 | * @param array $tree The optional tree to convert. |
||
795 | * @return string The tree in reverse polish notation |
||
796 | */ |
||
797 | protected function toReversePolish($tree) |
||
814 | |||
815 | /** |
||
816 | * @param $tree |
||
817 | * |
||
818 | * @return string |
||
819 | */ |
||
820 | protected function getFunctionPolish($tree) |
||
833 | |||
834 | /** |
||
835 | * @param $value |
||
836 | * |
||
837 | * @return bool |
||
838 | */ |
||
839 | protected function isFunction($value) |
||
847 | |||
848 | /** |
||
849 | * @param $part |
||
850 | * |
||
851 | * @return string |
||
852 | * @throws \Exception |
||
853 | */ |
||
854 | protected function getTreePartPolish($part) |
||
867 | |||
868 | /** |
||
869 | * @return array |
||
870 | */ |
||
871 | public function getReferences() |
||
875 | |||
876 | /** |
||
877 | * @param $formula |
||
878 | * |
||
879 | * @return string |
||
880 | */ |
||
881 | public function getReversePolish($formula) |
||
887 | } |
||
888 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.