1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Phpml\Classification\DecisionTree; |
6
|
|
|
|
7
|
|
|
use Phpml\Math\Comparison; |
8
|
|
|
|
9
|
|
|
class DecisionTreeLeaf |
10
|
|
|
{ |
11
|
|
|
/** |
12
|
|
|
* @var string|int |
13
|
|
|
*/ |
14
|
|
|
public $value; |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* @var float |
18
|
|
|
*/ |
19
|
|
|
public $numericValue; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* @var string |
23
|
|
|
*/ |
24
|
|
|
public $operator; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @var int |
28
|
|
|
*/ |
29
|
|
|
public $columnIndex; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @var DecisionTreeLeaf|null |
33
|
|
|
*/ |
34
|
|
|
public $leftLeaf; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @var DecisionTreeLeaf|null |
38
|
|
|
*/ |
39
|
|
|
public $rightLeaf; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @var array |
43
|
|
|
*/ |
44
|
|
|
public $records = []; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* Class value represented by the leaf, this value is non-empty |
48
|
|
|
* only for terminal leaves |
49
|
|
|
* |
50
|
|
|
* @var string |
51
|
|
|
*/ |
52
|
|
|
public $classValue = ''; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* @var bool |
56
|
|
|
*/ |
57
|
|
|
public $isTerminal = false; |
58
|
|
|
|
59
|
|
|
/** |
60
|
|
|
* @var bool |
61
|
|
|
*/ |
62
|
|
|
public $isContinuous = false; |
63
|
|
|
|
64
|
|
|
/** |
65
|
|
|
* @var float |
66
|
|
|
*/ |
67
|
|
|
public $giniIndex = 0; |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* @var int |
71
|
|
|
*/ |
72
|
|
|
public $level = 0; |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* HTML representation of the tree without column names |
76
|
|
|
*/ |
77
|
|
|
public function __toString(): string |
78
|
|
|
{ |
79
|
|
|
return $this->getHTML(); |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
public function evaluate(array $record): bool |
83
|
|
|
{ |
84
|
|
|
$recordField = $record[$this->columnIndex]; |
85
|
|
|
|
86
|
|
|
if ($this->isContinuous) { |
87
|
|
|
return Comparison::compare((string) $recordField, $this->numericValue, $this->operator); |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
return $recordField == $this->value; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Returns Mean Decrease Impurity (MDI) in the node. |
95
|
|
|
* For terminal nodes, this value is equal to 0 |
96
|
|
|
*/ |
97
|
|
|
public function getNodeImpurityDecrease(int $parentRecordCount): float |
98
|
|
|
{ |
99
|
|
|
if ($this->isTerminal) { |
100
|
|
|
return 0.0; |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
$nodeSampleCount = (float) count($this->records); |
104
|
|
|
$iT = $this->giniIndex; |
105
|
|
|
|
106
|
|
View Code Duplication |
if ($this->leftLeaf !== null) { |
|
|
|
|
107
|
|
|
$pL = count($this->leftLeaf->records) / $nodeSampleCount; |
108
|
|
|
$iT -= $pL * $this->leftLeaf->giniIndex; |
109
|
|
|
} |
110
|
|
|
|
111
|
|
View Code Duplication |
if ($this->rightLeaf !== null) { |
|
|
|
|
112
|
|
|
$pR = count($this->rightLeaf->records) / $nodeSampleCount; |
113
|
|
|
$iT -= $pR * $this->rightLeaf->giniIndex; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
return $iT * $nodeSampleCount / $parentRecordCount; |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* Returns HTML representation of the node including children nodes |
121
|
|
|
*/ |
122
|
|
|
public function getHTML($columnNames = null): string |
123
|
|
|
{ |
124
|
|
|
if ($this->isTerminal) { |
125
|
|
|
$value = "<b>${this}->classValue</b>"; |
126
|
|
|
} else { |
127
|
|
|
$value = $this->value; |
128
|
|
|
if ($columnNames !== null) { |
129
|
|
|
$col = $columnNames[$this->columnIndex]; |
130
|
|
|
} else { |
131
|
|
|
$col = "col_$this->columnIndex"; |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
if (!preg_match('/^[<>=]{1,2}/', (string) $value)) { |
135
|
|
|
$value = "=${value}"; |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
$value = "<b>${col} ${value}</b><br>Gini: ".number_format($this->giniIndex, 2); |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
$str = "<table ><tr><td colspan=3 align=center style='border:1px solid;'>${value}</td></tr>"; |
142
|
|
|
|
143
|
|
|
if ($this->leftLeaf !== null || $this->rightLeaf !== null) { |
144
|
|
|
$str .= '<tr>'; |
145
|
|
|
if ($this->leftLeaf !== null) { |
146
|
|
|
$str .= '<td valign=top><b>| Yes</b><br>'.$this->leftLeaf->getHTML($columnNames).'</td>'; |
147
|
|
|
} else { |
148
|
|
|
$str .= '<td></td>'; |
149
|
|
|
} |
150
|
|
|
|
151
|
|
|
$str .= '<td> </td>'; |
152
|
|
|
if ($this->rightLeaf !== null) { |
153
|
|
|
$str .= '<td valign=top align=right><b>No |</b><br>'.$this->rightLeaf->getHTML($columnNames).'</td>'; |
154
|
|
|
} else { |
155
|
|
|
$str .= '<td></td>'; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
$str .= '</tr>'; |
159
|
|
|
} |
160
|
|
|
|
161
|
|
|
$str .= '</table>'; |
162
|
|
|
|
163
|
|
|
return $str; |
164
|
|
|
} |
165
|
|
|
} |
166
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.