Complex classes like DecisionTree often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DecisionTree, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
12 | class DecisionTree implements Classifier |
||
13 | { |
||
14 | use Trainable, Predictable; |
||
15 | |||
16 | const CONTINUOS = 1; |
||
17 | const NOMINAL = 2; |
||
18 | |||
19 | /** |
||
20 | * @var array |
||
21 | */ |
||
22 | private $samples = []; |
||
23 | |||
24 | /** |
||
25 | * @var array |
||
26 | */ |
||
27 | protected $columnTypes; |
||
28 | |||
29 | /** |
||
30 | * @var array |
||
31 | */ |
||
32 | private $labels = []; |
||
33 | |||
34 | /** |
||
35 | * @var int |
||
36 | */ |
||
37 | private $featureCount = 0; |
||
38 | |||
39 | /** |
||
40 | * @var DecisionTreeLeaf |
||
41 | */ |
||
42 | protected $tree = null; |
||
43 | |||
44 | /** |
||
45 | * @var int |
||
46 | */ |
||
47 | protected $maxDepth; |
||
48 | |||
49 | /** |
||
50 | * @var int |
||
51 | */ |
||
52 | public $actualDepth = 0; |
||
53 | |||
54 | /** |
||
55 | * @var int |
||
56 | */ |
||
57 | private $numUsableFeatures = 0; |
||
58 | |||
59 | /** |
||
60 | * @var array |
||
61 | */ |
||
62 | private $selectedFeatures; |
||
63 | |||
64 | /** |
||
65 | * @var array |
||
66 | */ |
||
67 | private $featureImportances = null; |
||
68 | |||
69 | /** |
||
70 | * |
||
71 | * @var array |
||
72 | */ |
||
73 | private $columnNames = null; |
||
74 | |||
75 | /** |
||
76 | * @param int $maxDepth |
||
77 | */ |
||
78 | public function __construct($maxDepth = 10) |
||
82 | |||
83 | /** |
||
84 | * @param array $samples |
||
85 | * @param array $targets |
||
86 | */ |
||
87 | public function train(array $samples, array $targets) |
||
112 | |||
113 | protected function getColumnTypes(array $samples) |
||
123 | |||
124 | /** |
||
125 | * @param null|array $records |
||
126 | * @return DecisionTreeLeaf |
||
127 | */ |
||
128 | protected function getSplitLeaf($records, $depth = 0) |
||
184 | |||
185 | /** |
||
186 | * @param array $records |
||
187 | * @return DecisionTreeLeaf[] |
||
188 | */ |
||
189 | protected function getBestSplit($records) |
||
230 | |||
231 | /** |
||
232 | * Returns available features/columns to the tree for the decision making |
||
233 | * process. <br> |
||
234 | * |
||
235 | * If a number is given with setNumFeatures() method, then a random selection |
||
236 | * of features up to this number is returned. <br> |
||
237 | * |
||
238 | * If some features are manually selected by use of setSelectedFeatures(), |
||
239 | * then only these features are returned <br> |
||
240 | * |
||
241 | * If any of above methods were not called beforehand, then all features |
||
242 | * are returned by default. |
||
243 | * |
||
244 | * @return array |
||
245 | */ |
||
246 | protected function getSelectedFeatures() |
||
267 | |||
268 | /** |
||
269 | * @param string $baseValue |
||
270 | * @param array $colValues |
||
271 | * @param array $targets |
||
272 | */ |
||
273 | public function getGiniIndex($baseValue, $colValues, $targets) |
||
297 | |||
298 | /** |
||
299 | * @param array $samples |
||
300 | * @return array |
||
301 | */ |
||
302 | protected function preprocess(array $samples) |
||
325 | |||
326 | /** |
||
327 | * @param array $columnValues |
||
328 | * @return bool |
||
329 | */ |
||
330 | protected function isCategoricalColumn(array $columnValues) |
||
354 | |||
355 | /** |
||
356 | * This method is used to set number of columns to be used |
||
357 | * when deciding a split at an internal node of the tree. <br> |
||
358 | * If the value is given 0, then all features are used (default behaviour), |
||
359 | * otherwise the given value will be used as a maximum for number of columns |
||
360 | * randomly selected for each split operation. |
||
361 | * |
||
362 | * @param int $numFeatures |
||
363 | * @return $this |
||
364 | * @throws Exception |
||
365 | */ |
||
366 | public function setNumFeatures(int $numFeatures) |
||
376 | |||
377 | /** |
||
378 | * Used to set predefined features to consider while deciding which column to use for a split |
||
379 | * |
||
380 | * @param array $selectedFeatures |
||
381 | */ |
||
382 | protected function setSelectedFeatures(array $selectedFeatures) |
||
386 | |||
387 | /** |
||
388 | * A string array to represent columns. Useful when HTML output or |
||
389 | * column importances are desired to be inspected. |
||
390 | * |
||
391 | * @param array $names |
||
392 | * @return $this |
||
393 | */ |
||
394 | public function setColumnNames(array $names) |
||
404 | |||
405 | /** |
||
406 | * @return string |
||
407 | */ |
||
408 | public function getHtml() |
||
412 | |||
413 | /** |
||
414 | * This will return an array including an importance value for |
||
415 | * each column in the given dataset. The importance values are |
||
416 | * normalized and their total makes 1.<br/> |
||
417 | * |
||
418 | * @param array $labels |
||
419 | * @return array |
||
420 | */ |
||
421 | public function getFeatureImportances() |
||
451 | |||
452 | /** |
||
453 | * Collects and returns an array of internal nodes that use the given |
||
454 | * column as a split criteron |
||
455 | * |
||
456 | * @param int $column |
||
457 | * @param DecisionTreeLeaf |
||
458 | * @param array $collected |
||
459 | * |
||
460 | * @return array |
||
461 | */ |
||
462 | protected function getSplitNodesByColumn($column, DecisionTreeLeaf $node) |
||
485 | |||
486 | /** |
||
487 | * @param array $sample |
||
488 | * @return mixed |
||
489 | */ |
||
490 | protected function predictSample(array $sample) |
||
506 | } |
||
507 |
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..