1 | <?php |
||
14 | class DecisionStump extends WeightedClassifier |
||
15 | { |
||
16 | use Predictable, OneVsRest; |
||
17 | |||
18 | public const AUTO_SELECT = -1; |
||
19 | |||
20 | /** |
||
21 | * @var int |
||
22 | */ |
||
23 | protected $givenColumnIndex; |
||
24 | |||
25 | /** |
||
26 | * @var array |
||
27 | */ |
||
28 | protected $binaryLabels = []; |
||
29 | |||
30 | /** |
||
31 | * Lowest error rate obtained while training/optimizing the model |
||
32 | * |
||
33 | * @var float |
||
34 | */ |
||
35 | protected $trainingErrorRate; |
||
36 | |||
37 | /** |
||
38 | * @var int |
||
39 | */ |
||
40 | protected $column; |
||
41 | |||
42 | /** |
||
43 | * @var mixed |
||
44 | */ |
||
45 | protected $value; |
||
46 | |||
47 | /** |
||
48 | * @var string |
||
49 | */ |
||
50 | protected $operator; |
||
51 | |||
52 | /** |
||
53 | * @var array |
||
54 | */ |
||
55 | protected $columnTypes = []; |
||
56 | |||
57 | /** |
||
58 | * @var int |
||
59 | */ |
||
60 | protected $featureCount; |
||
61 | |||
62 | /** |
||
63 | * @var float |
||
64 | */ |
||
65 | protected $numSplitCount = 100.0; |
||
66 | |||
67 | /** |
||
68 | * Distribution of samples in the leaves |
||
69 | * |
||
70 | * @var array |
||
71 | */ |
||
72 | protected $prob = []; |
||
73 | |||
74 | /** |
||
75 | * A DecisionStump classifier is a one-level deep DecisionTree. It is generally |
||
76 | * used with ensemble algorithms as in the weak classifier role. <br> |
||
77 | * |
||
78 | * If columnIndex is given, then the stump tries to produce a decision node |
||
79 | * on this column, otherwise in cases given the value of -1, the stump itself |
||
80 | * decides which column to take for the decision (Default DecisionTree behaviour) |
||
81 | */ |
||
82 | public function __construct(int $columnIndex = self::AUTO_SELECT) |
||
86 | |||
87 | public function __toString(): string |
||
93 | |||
94 | /** |
||
95 | * While finding best split point for a numerical valued column, |
||
96 | * DecisionStump looks for equally distanced values between minimum and maximum |
||
97 | * values in the column. Given <i>$count</i> value determines how many split |
||
98 | * points to be probed. The more split counts, the better performance but |
||
99 | * worse processing time (Default value is 10.0) |
||
100 | */ |
||
101 | public function setNumericalSplitCount(float $count): void |
||
105 | |||
106 | /** |
||
107 | * @throws InvalidArgumentException |
||
108 | */ |
||
109 | protected function trainBinary(array $samples, array $targets, array $labels): void |
||
164 | |||
165 | /** |
||
166 | * Determines best split point for the given column |
||
167 | */ |
||
168 | protected function getBestNumericalSplit(array $samples, array $targets, int $col): array |
||
214 | |||
215 | protected function getBestNominalSplit(array $samples, array $targets, int $col): array |
||
241 | |||
242 | /** |
||
243 | * Calculates the ratio of wrong predictions based on the new threshold |
||
244 | * value given as the parameter |
||
245 | */ |
||
246 | protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values): array |
||
285 | |||
286 | /** |
||
287 | * Returns the probability of the sample of belonging to the given label |
||
288 | * |
||
289 | * Probability of a sample is calculated as the proportion of the label |
||
290 | * within the labels of the training samples in the decision node |
||
291 | * |
||
292 | * @param mixed $label |
||
293 | */ |
||
294 | protected function predictProbability(array $sample, $label): float |
||
303 | |||
304 | /** |
||
305 | * @return mixed |
||
306 | */ |
||
307 | protected function predictSampleBinary(array $sample) |
||
315 | |||
316 | protected function resetBinary(): void |
||
319 | } |
||
320 |
This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.