1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Phpml\Classification\DecisionTree; |
6
|
|
|
|
7
|
|
|
class DecisionTreeLeaf |
8
|
|
|
{ |
9
|
|
|
/** |
10
|
|
|
* @var string |
11
|
|
|
*/ |
12
|
|
|
public $value; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* @var float |
16
|
|
|
*/ |
17
|
|
|
public $numericValue; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* @var string |
21
|
|
|
*/ |
22
|
|
|
public $operator; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* @var int |
26
|
|
|
*/ |
27
|
|
|
public $columnIndex; |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* @var DecisionTreeLeaf |
31
|
|
|
*/ |
32
|
|
|
public $leftLeaf = null; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* @var DecisionTreeLeaf |
36
|
|
|
*/ |
37
|
|
|
public $rightLeaf= null; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* @var array |
41
|
|
|
*/ |
42
|
|
|
public $records = []; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* Class value represented by the leaf, this value is non-empty |
46
|
|
|
* only for terminal leaves |
47
|
|
|
* |
48
|
|
|
* @var string |
49
|
|
|
*/ |
50
|
|
|
public $classValue = ''; |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* @var bool |
54
|
|
|
*/ |
55
|
|
|
public $isTerminal = false; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @var bool |
59
|
|
|
*/ |
60
|
|
|
public $isContinuous = false; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* @var float |
64
|
|
|
*/ |
65
|
|
|
public $giniIndex = 0; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @var int |
69
|
|
|
*/ |
70
|
|
|
public $level = 0; |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* @param array $record |
74
|
|
|
* @return bool |
75
|
|
|
*/ |
76
|
|
|
public function evaluate($record) |
77
|
|
|
{ |
78
|
|
|
$recordField = $record[$this->columnIndex]; |
79
|
|
|
|
80
|
|
|
if ($this->isContinuous) { |
81
|
|
|
$op = $this->operator; |
82
|
|
|
$value = $this->numericValue; |
83
|
|
|
if (is_null($recordField)) { |
84
|
|
|
return false; |
85
|
|
|
} |
86
|
|
|
$recordField = strval($recordField); |
87
|
|
|
switch ($op) { |
88
|
|
|
case '>=': |
89
|
|
|
return ($recordField >= $value); |
90
|
|
|
case '<=': |
91
|
|
|
return ($recordField <= $value); |
92
|
|
|
case '>': |
93
|
|
|
return ($recordField > $value); |
94
|
|
|
case '<': |
95
|
|
|
return ($recordField < $value); |
96
|
|
|
case '==': |
97
|
|
|
case '=': |
98
|
|
|
return ($recordField == $value); |
99
|
|
|
default: |
100
|
|
|
eval("\$result = $recordField $op $value;"); |
|
|
|
|
101
|
|
|
|
102
|
|
|
return $result; |
|
|
|
|
103
|
|
|
} |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
return $recordField == $this->value; |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
/** |
110
|
|
|
* Returns Mean Decrease Impurity (MDI) in the node. |
111
|
|
|
* For terminal nodes, this value is equal to 0 |
112
|
|
|
* |
113
|
|
|
* @param int $parentRecordCount |
114
|
|
|
* |
115
|
|
|
* @return float |
116
|
|
|
*/ |
117
|
|
|
public function getNodeImpurityDecrease(int $parentRecordCount) |
118
|
|
|
{ |
119
|
|
|
if ($this->isTerminal) { |
120
|
|
|
return 0.0; |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
$nodeSampleCount = (float)count($this->records); |
124
|
|
|
$iT = $this->giniIndex; |
125
|
|
|
|
126
|
|
|
if ($this->leftLeaf) { |
127
|
|
|
$pL = count($this->leftLeaf->records)/$nodeSampleCount; |
128
|
|
|
$iT -= $pL * $this->leftLeaf->giniIndex; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
if ($this->rightLeaf) { |
132
|
|
|
$pR = count($this->rightLeaf->records)/$nodeSampleCount; |
133
|
|
|
$iT -= $pR * $this->rightLeaf->giniIndex; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
return $iT * $nodeSampleCount / $parentRecordCount; |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* Returns HTML representation of the node including children nodes |
141
|
|
|
* |
142
|
|
|
* @param $columnNames |
143
|
|
|
* @return string |
144
|
|
|
*/ |
145
|
|
|
public function getHTML($columnNames = null) |
146
|
|
|
{ |
147
|
|
|
if ($this->isTerminal) { |
148
|
|
|
$value = "<b>$this->classValue</b>"; |
149
|
|
|
} else { |
150
|
|
|
$value = $this->value; |
151
|
|
|
if ($columnNames !== null) { |
152
|
|
|
$col = $columnNames[$this->columnIndex]; |
153
|
|
|
} else { |
154
|
|
|
$col = "col_$this->columnIndex"; |
155
|
|
|
} |
156
|
|
|
if (!preg_match("/^[<>=]{1,2}/", $value)) { |
157
|
|
|
$value = "=$value"; |
158
|
|
|
} |
159
|
|
|
$value = "<b>$col $value</b><br>Gini: ". number_format($this->giniIndex, 2); |
160
|
|
|
} |
161
|
|
|
$str = "<table ><tr><td colspan=3 align=center style='border:1px solid;'> |
162
|
|
|
$value</td></tr>"; |
163
|
|
|
if ($this->leftLeaf || $this->rightLeaf) { |
164
|
|
|
$str .='<tr>'; |
165
|
|
|
if ($this->leftLeaf) { |
166
|
|
|
$str .="<td valign=top><b>| Yes</b><br>" . $this->leftLeaf->getHTML($columnNames) . "</td>"; |
167
|
|
|
} else { |
168
|
|
|
$str .='<td></td>'; |
169
|
|
|
} |
170
|
|
|
$str .='<td> </td>'; |
171
|
|
|
if ($this->rightLeaf) { |
172
|
|
|
$str .="<td valign=top align=right><b>No |</b><br>" . $this->rightLeaf->getHTML($columnNames) . "</td>"; |
173
|
|
|
} else { |
174
|
|
|
$str .='<td></td>'; |
175
|
|
|
} |
176
|
|
|
$str .= '</tr>'; |
177
|
|
|
} |
178
|
|
|
$str .= '</table>'; |
179
|
|
|
return $str; |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
/** |
183
|
|
|
* HTML representation of the tree without column names |
184
|
|
|
* |
185
|
|
|
* @return string |
186
|
|
|
*/ |
187
|
|
|
public function __toString() |
188
|
|
|
{ |
189
|
|
|
return $this->getHTML(); |
190
|
|
|
} |
191
|
|
|
} |
192
|
|
|
|
On one hand,
eval
might be exploited by malicious users if they somehow manage to inject dynamic content. On the other hand, with the emergence of faster PHP runtimes like the HHVM,eval
prevents some optimization that they perform.