Complex classes like DecisionTree often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DecisionTree, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 12 | class DecisionTree implements Classifier |
||
| 13 | { |
||
| 14 | use Trainable, Predictable; |
||
| 15 | |||
| 16 | const CONTINUOS = 1; |
||
| 17 | const NOMINAL = 2; |
||
| 18 | |||
| 19 | /** |
||
| 20 | * @var array |
||
| 21 | */ |
||
| 22 | private $samples = []; |
||
| 23 | |||
| 24 | /** |
||
| 25 | * @var array |
||
| 26 | */ |
||
| 27 | private $columnTypes; |
||
| 28 | |||
| 29 | /** |
||
| 30 | * @var array |
||
| 31 | */ |
||
| 32 | private $labels = []; |
||
| 33 | |||
| 34 | /** |
||
| 35 | * @var int |
||
| 36 | */ |
||
| 37 | private $featureCount = 0; |
||
| 38 | |||
| 39 | /** |
||
| 40 | * @var DecisionTreeLeaf |
||
| 41 | */ |
||
| 42 | private $tree = null; |
||
| 43 | |||
| 44 | /** |
||
| 45 | * @var int |
||
| 46 | */ |
||
| 47 | private $maxDepth; |
||
| 48 | |||
| 49 | /** |
||
| 50 | * @var int |
||
| 51 | */ |
||
| 52 | public $actualDepth = 0; |
||
| 53 | |||
| 54 | /** |
||
| 55 | * @var int |
||
| 56 | */ |
||
| 57 | private $numUsableFeatures = 0; |
||
| 58 | |||
| 59 | /** |
||
| 60 | * @var array |
||
| 61 | */ |
||
| 62 | private $selectedFeatures; |
||
| 63 | |||
| 64 | /** |
||
| 65 | * @var array |
||
| 66 | */ |
||
| 67 | private $featureImportances = null; |
||
| 68 | |||
| 69 | /** |
||
| 70 | * |
||
| 71 | * @var array |
||
| 72 | */ |
||
| 73 | private $columnNames = null; |
||
| 74 | |||
| 75 | /** |
||
| 76 | * @param int $maxDepth |
||
| 77 | */ |
||
| 78 | public function __construct($maxDepth = 10) |
||
| 82 | /** |
||
| 83 | * @param array $samples |
||
| 84 | * @param array $targets |
||
| 85 | */ |
||
| 86 | public function train(array $samples, array $targets) |
||
| 111 | |||
| 112 | protected function getColumnTypes(array $samples) |
||
| 122 | |||
| 123 | /** |
||
| 124 | * @param null|array $records |
||
| 125 | * @return DecisionTreeLeaf |
||
| 126 | */ |
||
| 127 | protected function getSplitLeaf($records, $depth = 0) |
||
| 183 | |||
| 184 | /** |
||
| 185 | * @param array $records |
||
| 186 | * @return DecisionTreeLeaf[] |
||
| 187 | */ |
||
| 188 | protected function getBestSplit($records) |
||
| 218 | |||
| 219 | /** |
||
| 220 | * Returns available features/columns to the tree for the decision making |
||
| 221 | * process. <br> |
||
| 222 | * |
||
| 223 | * If a number is given with setNumFeatures() method, then a random selection |
||
| 224 | * of features up to this number is returned. <br> |
||
| 225 | * |
||
| 226 | * If some features are manually selected by use of setSelectedFeatures(), |
||
| 227 | * then only these features are returned <br> |
||
| 228 | * |
||
| 229 | * If any of above methods were not called beforehand, then all features |
||
| 230 | * are returned by default. |
||
| 231 | * |
||
| 232 | * @return array |
||
| 233 | */ |
||
| 234 | protected function getSelectedFeatures() |
||
| 255 | |||
| 256 | /** |
||
| 257 | * @param string $baseValue |
||
| 258 | * @param array $colValues |
||
| 259 | * @param array $targets |
||
| 260 | */ |
||
| 261 | public function getGiniIndex($baseValue, $colValues, $targets) |
||
| 285 | |||
| 286 | /** |
||
| 287 | * @param array $samples |
||
| 288 | * @return array |
||
| 289 | */ |
||
| 290 | protected function preprocess(array $samples) |
||
| 313 | |||
| 314 | /** |
||
| 315 | * @param array $columnValues |
||
| 316 | * @return bool |
||
| 317 | */ |
||
| 318 | protected function isCategoricalColumn(array $columnValues) |
||
| 336 | |||
| 337 | /** |
||
| 338 | * This method is used to set number of columns to be used |
||
| 339 | * when deciding a split at an internal node of the tree. <br> |
||
| 340 | * If the value is given 0, then all features are used (default behaviour), |
||
| 341 | * otherwise the given value will be used as a maximum for number of columns |
||
| 342 | * randomly selected for each split operation. |
||
| 343 | * |
||
| 344 | * @param int $numFeatures |
||
| 345 | * @return $this |
||
| 346 | * @throws Exception |
||
| 347 | */ |
||
| 348 | public function setNumFeatures(int $numFeatures) |
||
| 358 | |||
| 359 | /** |
||
| 360 | * Used to set predefined features to consider while deciding which column to use for a split, |
||
| 361 | * |
||
| 362 | * @param array $features |
||
| 363 | */ |
||
| 364 | protected function setSelectedFeatures(array $selectedFeatures) |
||
| 368 | |||
| 369 | /** |
||
| 370 | * A string array to represent columns. Useful when HTML output or |
||
| 371 | * column importances are desired to be inspected. |
||
| 372 | * |
||
| 373 | * @param array $names |
||
| 374 | * @return $this |
||
| 375 | */ |
||
| 376 | public function setColumnNames(array $names) |
||
| 386 | |||
| 387 | /** |
||
| 388 | * @return string |
||
| 389 | */ |
||
| 390 | public function getHtml() |
||
| 394 | |||
| 395 | /** |
||
| 396 | * This will return an array including an importance value for |
||
| 397 | * each column in the given dataset. The importance values are |
||
| 398 | * normalized and their total makes 1.<br/> |
||
| 399 | * |
||
| 400 | * @param array $labels |
||
| 401 | * @return array |
||
| 402 | */ |
||
| 403 | public function getFeatureImportances() |
||
| 433 | |||
| 434 | /** |
||
| 435 | * Collects and returns an array of internal nodes that use the given |
||
| 436 | * column as a split criteron |
||
| 437 | * |
||
| 438 | * @param int $column |
||
| 439 | * @param DecisionTreeLeaf |
||
| 440 | * @param array $collected |
||
| 441 | * |
||
| 442 | * @return array |
||
| 443 | */ |
||
| 444 | protected function getSplitNodesByColumn($column, DecisionTreeLeaf $node) |
||
| 467 | |||
| 468 | /** |
||
| 469 | * @param array $sample |
||
| 470 | * @return mixed |
||
| 471 | */ |
||
| 472 | protected function predictSample(array $sample) |
||
| 488 | } |
||
| 489 |
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..