Complex classes like DecisionTree often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DecisionTree, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class DecisionTree implements Classifier |
||
14 | { |
||
15 | use Trainable, Predictable; |
||
16 | |||
17 | const CONTINUOUS = 1; |
||
18 | const NOMINAL = 2; |
||
19 | |||
20 | /** |
||
21 | * @var array |
||
22 | */ |
||
23 | protected $columnTypes; |
||
24 | |||
25 | /** |
||
26 | * @var array |
||
27 | */ |
||
28 | private $labels = []; |
||
29 | |||
30 | /** |
||
31 | * @var int |
||
32 | */ |
||
33 | private $featureCount = 0; |
||
34 | |||
35 | /** |
||
36 | * @var DecisionTreeLeaf |
||
37 | */ |
||
38 | protected $tree = null; |
||
39 | |||
40 | /** |
||
41 | * @var int |
||
42 | */ |
||
43 | protected $maxDepth; |
||
44 | |||
45 | /** |
||
46 | * @var int |
||
47 | */ |
||
48 | public $actualDepth = 0; |
||
49 | |||
50 | /** |
||
51 | * @var int |
||
52 | */ |
||
53 | private $numUsableFeatures = 0; |
||
54 | |||
55 | /** |
||
56 | * @var array |
||
57 | */ |
||
58 | private $selectedFeatures; |
||
59 | |||
60 | /** |
||
61 | * @var array |
||
62 | */ |
||
63 | private $featureImportances = null; |
||
64 | |||
65 | /** |
||
66 | * |
||
67 | * @var array |
||
68 | */ |
||
69 | private $columnNames = null; |
||
70 | |||
71 | /** |
||
72 | * @param int $maxDepth |
||
73 | */ |
||
74 | public function __construct(int $maxDepth = 10) |
||
78 | |||
79 | /** |
||
80 | * @param array $samples |
||
81 | * @param array $targets |
||
82 | */ |
||
83 | public function train(array $samples, array $targets) |
||
108 | |||
109 | /** |
||
110 | * @param array $samples |
||
111 | * @return array |
||
112 | */ |
||
113 | public static function getColumnTypes(array $samples) : array |
||
125 | |||
126 | /** |
||
127 | * @param array $records |
||
128 | * @param int $depth |
||
129 | * @return DecisionTreeLeaf |
||
130 | */ |
||
131 | protected function getSplitLeaf(array $records, int $depth = 0) : DecisionTreeLeaf |
||
188 | |||
189 | /** |
||
190 | * @param array $records |
||
191 | * @return DecisionTreeLeaf |
||
192 | */ |
||
193 | protected function getBestSplit(array $records) : DecisionTreeLeaf |
||
235 | |||
236 | /** |
||
237 | * Returns available features/columns to the tree for the decision making |
||
238 | * process. <br> |
||
239 | * |
||
240 | * If a number is given with setNumFeatures() method, then a random selection |
||
241 | * of features up to this number is returned. <br> |
||
242 | * |
||
243 | * If some features are manually selected by use of setSelectedFeatures(), |
||
244 | * then only these features are returned <br> |
||
245 | * |
||
246 | * If any of above methods were not called beforehand, then all features |
||
247 | * are returned by default. |
||
248 | * |
||
249 | * @return array |
||
250 | */ |
||
251 | protected function getSelectedFeatures() : array |
||
272 | |||
273 | /** |
||
274 | * @param $baseValue |
||
275 | * @param array $colValues |
||
276 | * @param array $targets |
||
277 | * @return float |
||
278 | */ |
||
279 | public function getGiniIndex($baseValue, array $colValues, array $targets) : float |
||
304 | |||
305 | /** |
||
306 | * @param array $samples |
||
307 | * @return array |
||
308 | */ |
||
309 | protected function preprocess(array $samples) : array |
||
332 | |||
333 | /** |
||
334 | * @param array $columnValues |
||
335 | * @return bool |
||
336 | */ |
||
337 | protected static function isCategoricalColumn(array $columnValues) : bool |
||
359 | |||
360 | /** |
||
361 | * This method is used to set number of columns to be used |
||
362 | * when deciding a split at an internal node of the tree. <br> |
||
363 | * If the value is given 0, then all features are used (default behaviour), |
||
364 | * otherwise the given value will be used as a maximum for number of columns |
||
365 | * randomly selected for each split operation. |
||
366 | * |
||
367 | * @param int $numFeatures |
||
368 | * @return $this |
||
369 | * @throws InvalidArgumentException |
||
370 | */ |
||
371 | public function setNumFeatures(int $numFeatures) |
||
381 | |||
382 | /** |
||
383 | * Used to set predefined features to consider while deciding which column to use for a split |
||
384 | * |
||
385 | * @param array $selectedFeatures |
||
386 | */ |
||
387 | protected function setSelectedFeatures(array $selectedFeatures) |
||
391 | |||
392 | /** |
||
393 | * A string array to represent columns. Useful when HTML output or |
||
394 | * column importances are desired to be inspected. |
||
395 | * |
||
396 | * @param array $names |
||
397 | * @return $this |
||
398 | * @throws InvalidArgumentException |
||
399 | */ |
||
400 | public function setColumnNames(array $names) |
||
410 | |||
411 | /** |
||
412 | * @return string |
||
413 | */ |
||
414 | public function getHtml() |
||
418 | |||
419 | /** |
||
420 | * This will return an array including an importance value for |
||
421 | * each column in the given dataset. The importance values are |
||
422 | * normalized and their total makes 1.<br/> |
||
423 | * |
||
424 | * @return array |
||
425 | */ |
||
426 | public function getFeatureImportances() |
||
456 | |||
457 | /** |
||
458 | * Collects and returns an array of internal nodes that use the given |
||
459 | * column as a split criterion |
||
460 | * |
||
461 | * @param int $column |
||
462 | * @param DecisionTreeLeaf $node |
||
463 | * @return array |
||
464 | */ |
||
465 | protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node) : array |
||
488 | |||
489 | /** |
||
490 | * @param array $sample |
||
491 | * @return mixed |
||
492 | */ |
||
493 | protected function predictSample(array $sample) |
||
509 | } |
||
510 |
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..