Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
1 | <?php |
||
49 | class PunctuationTokenizer implements Tokenizer |
||
50 | { |
||
51 | |||
52 | /** |
||
53 | * The tokens to be handled by this tokenizer as an array. |
||
54 | * |
||
55 | * @var string[] $tokens |
||
56 | */ |
||
57 | protected $tokens = [ |
||
58 | '.', |
||
59 | '?', |
||
60 | '!', |
||
61 | ':', |
||
62 | ';', |
||
63 | ',', |
||
64 | '#', |
||
65 | '"', |
||
66 | '$', |
||
67 | '§', |
||
68 | '%', |
||
69 | '&', |
||
70 | '/', |
||
71 | '(', |
||
72 | ')', |
||
73 | '=', |
||
74 | '[', |
||
75 | ']', |
||
76 | '|', |
||
77 | '{', |
||
78 | '}', |
||
79 | '\\', |
||
80 | '<', |
||
81 | '>', |
||
82 | '«', |
||
83 | '»', |
||
84 | '“', |
||
85 | '”', |
||
86 | '^', |
||
87 | '°', |
||
88 | '≤', |
||
89 | '≥', |
||
90 | '¥', |
||
91 | '©', |
||
92 | '€', |
||
93 | "'", |
||
94 | '-', |
||
95 | '_', |
||
96 | ]; |
||
97 | |||
98 | /** |
||
99 | * Split the given input into tokens using punktuation marks as splitter |
||
100 | * |
||
101 | * The input can be a string or a tokenRegistry. If the input is a |
||
102 | * TokenRegistry, each item will be tokenized. |
||
103 | * |
||
104 | * @param string|TokenRegistry $input The |
||
105 | * input to be tokenized |
||
106 | * |
||
107 | * @return TokenRegistry |
||
108 | */ |
||
109 | View Code Duplication | public function run($input) |
|
137 | |||
138 | /** |
||
139 | * Split the given string into tokens using whitespace. |
||
140 | * |
||
141 | * Each whitespace is placed in a WhitespaceToken and everything else is |
||
142 | * placed in a WordToken-Object |
||
143 | * |
||
144 | * @param string $input The String to tokenize |
||
145 | * |
||
146 | * @return Token[] |
||
147 | */ |
||
148 | View Code Duplication | private function tokenize($input) |
|
166 | } |
||
167 |