Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like HalsteadAnalyzer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HalsteadAnalyzer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
62 | class HalsteadAnalyzer extends AbstractCachingAnalyzer implements AnalyzerNodeAware |
||
63 | { |
||
64 | /** |
||
65 | * Metrics provided by the analyzer implementation. |
||
66 | */ |
||
67 | const M_HALSTEAD_LENGTH = 'hnt', // N = N1 + N2 (total operators + operands) |
||
68 | M_HALSTEAD_VOCABULARY = 'hnd', // n = n1 + n2 (distinct operators + operands) |
||
69 | M_HALSTEAD_VOLUME = 'hv', // V = N * log2(n) |
||
70 | M_HALSTEAD_DIFFICULTY = 'hd', // D = (n1 / 2) * (N2 / n2) |
||
71 | M_HALSTEAD_LEVEL = 'hl', // L = 1 / D |
||
72 | M_HALSTEAD_EFFORT = 'he', // E = V * D |
||
73 | M_HALSTEAD_TIME = 'ht', // T = E / 18 |
||
74 | M_HALSTEAD_BUGS = 'hb', // B = (E ** (2/3)) / 3000 |
||
75 | M_HALSTEAD_CONTENT = 'hi'; // I = (V / D) |
||
76 | |||
77 | /** |
||
78 | * Processes all {@link \PDepend\Source\AST\ASTNamespace} code nodes. |
||
79 | * |
||
80 | * @param \PDepend\Source\AST\ASTNamespace $namespaces |
||
81 | * @return void |
||
82 | */ |
||
83 | 12 | View Code Duplication | public function analyze($namespaces) |
|
|||
84 | { |
||
85 | 12 | if ($this->metrics === null) { |
|
86 | 12 | $this->loadCache(); |
|
87 | 12 | $this->fireStartAnalyzer(); |
|
88 | |||
89 | // Init node metrics |
||
90 | 12 | $this->metrics = array(); |
|
91 | |||
92 | 12 | foreach ($namespaces as $namespace) { |
|
93 | 12 | $namespace->accept($this); |
|
94 | } |
||
95 | |||
96 | 12 | $this->fireEndAnalyzer(); |
|
97 | 12 | $this->unloadCache(); |
|
98 | } |
||
99 | } |
||
100 | |||
101 | /** |
||
102 | * This method will return an <b>array</b> with all generated basis metrics |
||
103 | * for the given <b>$node</b> (n1, n2, N1, N2). If there are no metrics for |
||
104 | * the requested node, this method will return an empty <b>array</b>. |
||
105 | * |
||
106 | * @param \PDepend\Source\AST\ASTArtifact $artifact |
||
107 | * @return array |
||
108 | */ |
||
109 | 14 | View Code Duplication | public function getNodeBasisMetrics(ASTArtifact $artifact) |
117 | |||
118 | /** |
||
119 | * This method will return an <b>array</b> with all generated metric values |
||
120 | * for the given <b>$node</b>. If there are no metrics for the requested |
||
121 | * node, this method will return an empty <b>array</b>. |
||
122 | * |
||
123 | * @param \PDepend\Source\AST\ASTArtifact $artifact |
||
124 | * @return array |
||
125 | */ |
||
126 | 10 | public function getNodeMetrics(ASTArtifact $artifact) |
|
135 | |||
136 | /** |
||
137 | * Visits a function node. |
||
138 | * |
||
139 | * @param \PDepend\Source\AST\ASTFunction $function |
||
140 | * @return void |
||
141 | */ |
||
142 | 6 | View Code Duplication | public function visitFunction(ASTFunction $function) |
143 | { |
||
144 | 6 | $this->fireStartFunction($function); |
|
145 | |||
146 | 6 | if (false === $this->restoreFromCache($function)) { |
|
147 | 6 | $this->calculateHalsteadBasis($function); |
|
148 | } |
||
149 | |||
150 | 6 | $this->fireEndFunction($function); |
|
151 | } |
||
152 | |||
153 | /** |
||
154 | * Visits a code interface object. |
||
155 | * |
||
156 | * @param \PDepend\Source\AST\ASTInterface $interface |
||
157 | * @return void |
||
158 | */ |
||
159 | public function visitInterface(ASTInterface $interface) |
||
160 | { |
||
161 | // Empty visit method, we don't want interface metrics |
||
162 | } |
||
163 | |||
164 | /** |
||
165 | * Visits a method node. |
||
166 | * |
||
167 | * @param \PDepend\Source\AST\ASTMethod $method |
||
168 | * @return void |
||
169 | */ |
||
170 | 6 | View Code Duplication | public function visitMethod(ASTMethod $method) |
171 | { |
||
172 | 6 | $this->fireStartMethod($method); |
|
173 | |||
174 | 6 | if (false === $this->restoreFromCache($method)) { |
|
175 | 6 | $this->calculateHalsteadBasis($method); |
|
176 | } |
||
177 | |||
178 | 6 | $this->fireEndMethod($method); |
|
179 | } |
||
180 | |||
181 | /** |
||
182 | * @see http://www.scribd.com/doc/99533/Halstead-s-Operators-and-Operands-in-C-C-JAVA-by-Indranil-Nandy |
||
183 | * |
||
184 | * @param \PDepend\Source\AST\AbstractASTCallable $callable |
||
185 | * @return void |
||
186 | */ |
||
187 | 12 | public function calculateHalsteadBasis(AbstractASTCallable $callable) |
|
188 | { |
||
189 | 12 | $operators = array(); |
|
190 | 12 | $operands = array(); |
|
191 | |||
192 | 12 | $skipUntil = null; |
|
193 | |||
194 | 12 | $tokens = $callable->getTokens(); |
|
195 | 12 | foreach ($tokens as $i => $token) { |
|
196 | /* |
||
197 | * Some operations should be ignored, e.g. function declarations. |
||
198 | * When we encounter a new function, we'll skip all tokens until we |
||
199 | * find the closing token. |
||
200 | */ |
||
201 | 12 | if ($skipUntil !== null) { |
|
202 | 12 | if ($token->type === $skipUntil) { |
|
203 | 12 | $skipUntil = null; |
|
204 | } |
||
205 | |||
206 | 12 | continue; |
|
207 | } |
||
208 | |||
209 | 12 | switch ($token->type) { |
|
210 | // A pair of parenthesis is considered a single operator. |
||
211 | case Tokens::T_PARENTHESIS_CLOSE: |
||
212 | case Tokens::T_CURLY_BRACE_CLOSE: |
||
213 | case Tokens::T_SQUARED_BRACKET_CLOSE: |
||
214 | case Tokens::T_ANGLE_BRACKET_CLOSE: |
||
215 | 12 | break; |
|
216 | |||
217 | // A label is considered an operator if it is used as the target |
||
218 | // of a GOTO statement. |
||
219 | case Tokens::T_GOTO: |
||
220 | $operators[] = $token->image; |
||
221 | // Ignore next token as operand but count as operator instead. |
||
222 | $skipUntil = $tokens[$i + 1]->type; |
||
223 | $operators[] = $tokens[$i + 1]->image; |
||
224 | break; |
||
225 | |||
226 | /* |
||
227 | * The following control structures case ...: for (...) if (...) |
||
228 | * switch (...) while(...) and try-catch (...) are treated in a |
||
229 | * special way. The colon and the parentheses are considered to |
||
230 | * be a part of the constructs. The case and the colon or the |
||
231 | * “for (...)”, “if (...)”, “switch (...)”, “while(...)”, |
||
232 | * “try-catch( )” are counted together as one operator. |
||
233 | */ |
||
234 | // case Tokens::T_SWITCH: // not followed by () |
||
235 | // case Tokens::T_TRY: // not followed by () |
||
236 | // case Tokens::T_DO: // always comes with while, which accounts for () already |
||
237 | case Tokens::T_IF: |
||
238 | case Tokens::T_FOR: |
||
239 | case Tokens::T_FOREACH: |
||
240 | case Tokens::T_WHILE: |
||
241 | case Tokens::T_CATCH: |
||
242 | $operators[] = $token->image; |
||
243 | /* |
||
244 | * These are always followed by parenthesis, which would add |
||
245 | * another operator (only opening parenthesis counts) |
||
246 | * so we'll have to skip that one. |
||
247 | */ |
||
248 | $skipUntil = Tokens::T_PARENTHESIS_OPEN; |
||
249 | break; |
||
250 | |||
251 | /* |
||
252 | * The ternary operator ‘?’ followed by ‘:’ is considered a |
||
253 | * single operator as it is equivalent to “if-else” construct. |
||
254 | */ |
||
255 | case Tokens::T_COLON: |
||
256 | /* |
||
257 | * Colon is used after keyword, where it counts as part of |
||
258 | * that operator, or in ternary operator, where it also |
||
259 | * counts as 1. |
||
260 | */ |
||
261 | break; |
||
262 | |||
263 | // The comments are considered neither an operator nor an operand. |
||
264 | case Tokens::T_DOC_COMMENT: |
||
265 | case Tokens::T_COMMENT: |
||
266 | 12 | break; |
|
267 | |||
268 | /* |
||
269 | * `new` is considered same as the function call, mainly because |
||
270 | * it's equivalent to the function call. |
||
271 | */ |
||
272 | case Tokens::T_NEW: |
||
273 | break; |
||
274 | |||
275 | /* |
||
276 | * Like T_IF & co, array(..) needs 3 tokens ("array", "(" and |
||
277 | * ")") for what's essentially just 1 operator. |
||
278 | */ |
||
279 | case Tokens::T_ARRAY: |
||
280 | break; |
||
281 | |||
282 | /* |
||
283 | * Class::method or $object->method both only count as 1 |
||
284 | * identifier, even though they consist of 3 tokens. |
||
285 | */ |
||
286 | case Tokens::T_OBJECT_OPERATOR: |
||
287 | case Tokens::T_DOUBLE_COLON: |
||
288 | // Glue ->/:: and before & after parts together. |
||
289 | 6 | $image = array_pop($operands).$token->image.$tokens[$i + 1]->image; |
|
290 | 6 | $operands[] = $image; |
|
291 | |||
292 | // Skip next part (would be seen as operand) |
||
293 | 6 | $skipUntil = $tokens[$i + 1]->type; |
|
294 | 6 | break; |
|
295 | |||
296 | // Ignore HEREDOC delimiters. |
||
297 | case Tokens::T_START_HEREDOC: |
||
298 | case Tokens::T_END_HEREDOC: |
||
299 | break; |
||
300 | |||
301 | // Ignore PHP open & close tags and non-PHP content. |
||
302 | case Tokens::T_OPEN_TAG: |
||
303 | case Tokens::T_CLOSE_TAG: |
||
304 | case Tokens::T_NO_PHP: |
||
305 | break; |
||
306 | |||
307 | /* |
||
308 | * The function name is considered a single operator when it |
||
309 | * appears as calling a function, but when it appears in |
||
310 | * declarations or in function definitions it is not counted as |
||
311 | * operator. |
||
312 | * Default parameter assignments are not counted. |
||
313 | */ |
||
314 | case Tokens::T_FUNCTION: |
||
315 | // Because `)` could appear in default argument assignment |
||
316 | // (`$var = array()`), we need to skip until `{`, but that |
||
317 | // one should be included in operators. |
||
318 | 12 | $skipUntil = Tokens::T_CURLY_BRACE_OPEN; |
|
319 | 12 | $operators[] = '{'; |
|
320 | 12 | break; |
|
321 | |||
322 | /* |
||
323 | * When variables or constants appear in declaration they are |
||
324 | * not considered as operands, they are considered operands only |
||
325 | * when they appear with operators in expressions. |
||
326 | */ |
||
327 | case Tokens::T_VAR: |
||
328 | case Tokens::T_CONST: |
||
329 | $skipUntil = Tokens::T_SEMICOLON; |
||
330 | break; |
||
331 | case Tokens::T_STRING: |
||
332 | // `define` is T_STRING, just like any other identifier. |
||
333 | 6 | if ($token->image === 'define') { |
|
334 | // Undo all of "define", "(", name, ",", value, ")" |
||
335 | $skipUntil = Tokens::T_PARENTHESIS_CLOSE; |
||
336 | } else { |
||
337 | 6 | $operands[] = $token->image; |
|
338 | } |
||
339 | 6 | break; |
|
340 | |||
341 | // Operands |
||
342 | case Tokens::T_CONSTANT_ENCAPSED_STRING: |
||
343 | case Tokens::T_VARIABLE: |
||
344 | case Tokens::T_LNUMBER: |
||
345 | case Tokens::T_DNUMBER: |
||
346 | case Tokens::T_NUM_STRING: |
||
347 | case Tokens::T_NULL: |
||
348 | case Tokens::T_TRUE: |
||
349 | case Tokens::T_FALSE: |
||
350 | case Tokens::T_CLASS_FQN: |
||
351 | case Tokens::T_LINE: |
||
352 | case Tokens::T_METHOD_C: |
||
353 | case Tokens::T_NS_C: |
||
354 | case Tokens::T_DIR: |
||
355 | case TOKENS::T_ENCAPSED_AND_WHITESPACE: // content of HEREDOC |
||
356 | 12 | $operands[] = $token->image; |
|
357 | 12 | break; |
|
358 | |||
359 | // Everything else is an operator. |
||
360 | default: |
||
361 | 12 | $operators[] = $token->image; |
|
362 | 12 | break; |
|
363 | } |
||
364 | } |
||
365 | |||
366 | 12 | $this->metrics[$callable->getId()] = array( |
|
367 | 12 | 'n1' => count($operators), |
|
368 | 12 | 'n2' => count($operands), |
|
369 | 12 | 'N1' => count(array_unique($operators)), |
|
370 | 12 | 'N2' => count(array_unique($operands)), |
|
371 | ); |
||
372 | } |
||
373 | |||
374 | /** |
||
375 | * Calculates Halstead measures from n1, n2, N1 & N2. |
||
376 | * |
||
377 | * @see http://www.verifysoft.com/en_halstead_metrics.html |
||
378 | * @see http://www.grammatech.com/codesonar/workflow-features/halstead |
||
379 | * |
||
380 | * @param array $basis [n1, n2, N1, N2] |
||
381 | * @return array |
||
382 | */ |
||
383 | 8 | public function calculateHalsteadMeasures(array $basis) |
|
401 | } |
||
402 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.