Test Failed
Pull Request — master (#63)
by
unknown
02:47
created

StochasticGD   A

Complexity

Total Complexity 18

Size/Duplication

Total Lines 238
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 0
Metric Value
wmc 18
lcom 1
cbo 1
dl 0
loc 238
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 7 1
A setChangeThreshold() 0 6 1
A setLearningRate() 0 6 1
A setMaxIterations() 0 6 1
B runOptimization() 0 39 5
B updateTheta() 0 27 3
B earlyStop() 0 21 5
A getCostValues() 0 4 1
1
<?php declare(strict_types=1);
2
3
namespace Phpml\Helper\Optimizer;
4
5
/**
6
 * Stochastic Gradient Descent optimization method
7
 * to find a solution for the equation A.ϴ = y where
8
 *  A (samples) and y (targets) are known and ϴ is unknown.
9
 */
10
class StochasticGD extends Optimizer
11
{
12
    /**
13
     * A (samples)
14
     *
15
     * @var array
16
     */
17
    protected $samples;
18
19
    /**
20
     * y (targets)
21
     *
22
     * @var array
23
     */
24
    protected $targets;
25
26
    /**
27
     * Callback function to get the gradient and cost value
28
     * for a specific set of theta (ϴ) and a pair of sample & target
29
     *
30
     * @var \Closure
31
     */
32
    protected $gradientCb;
33
34
    /**
35
     * Maximum number of iterations used to train the model
36
     *
37
     * @var int
38
     */
39
    protected $maxIterations = 1000;
40
41
    /**
42
     * Learning rate is used to control the speed of the optimization.<br>
43
     *
44
     * Larger values of lr may overshoot the optimum or even cause divergence
45
     * while small values slows down the convergence and increases the time
46
     * required for the training
47
     *
48
     * @var float
49
     */
50
    protected $learningRate = 0.001;
51
52
    /**
53
     * Minimum amount of change in the weights and error values
54
     * between iterations that needs to be obtained to continue the training
55
     *
56
     * @var float
57
     */
58
    protected $threshold = 1e-3;
59
60
    /**
61
     * List of values obtained by evaluating the cost function at each iteration
62
     * of the algorithm
63
     *
64
     * @var array
65
     */
66
    protected $costValues= [];
67
68
    /**
69
     * Initializes the SGD optimizer for the given number of dimensions
70
     *
71
     * @param int $dimensions
72
     */
73
    public function __construct(int $dimensions)
74
    {
75
        // Add one more dimension for the bias
76
        parent::__construct($dimensions + 1);
77
78
        $this->dimensions = $dimensions;
79
    }
80
81
    /**
82
     * Sets minimum value for the change in the theta values
83
     * between iterations to continue the iterations.<br>
84
     *
85
     * If change in the theta is less than given value then the
86
     * algorithm will stop training
87
     *
88
     * @param float $threshold
89
     *
90
     * @return $this
91
     */
92
    public function setChangeThreshold(float $threshold = 1e-5)
93
    {
94
        $this->threshold = $threshold;
95
96
        return $this;
97
    }
98
99
    /**
100
     * @param float $learningRate
101
     *
102
     * @return $this
103
     */
104
    public function setLearningRate(float $learningRate)
105
    {
106
        $this->learningRate = $learningRate;
107
108
        return $this;
109
    }
110
111
    /**
112
     * @param int $maxIterations
113
     *
114
     * @return $this
115
     */
116
    public function setMaxIterations(int $maxIterations)
117
    {
118
        $this->maxIterations = $maxIterations;
119
120
        return $this;
121
    }
122
123
    /**
124
     * Optimization procedure finds the unknow variables for the equation A.ϴ = y
125
     * for the given samples (A) and targets (y).<br>
126
     *
127
     * The cost function to minimize and the gradient of the function are to be
128
     * handled by the callback function provided as the third parameter of the method.
129
     *
130
     * @param array $samples
131
     * @param array $targets
132
     * @param \Closure $gradientCb
133
     *
134
     * @return array
135
     */
136
    public function runOptimization(array $samples, array $targets, \Closure $gradientCb)
137
    {
138
        $this->samples = $samples;
139
        $this->targets = $targets;
140
        $this->gradientCb = $gradientCb;
141
142
        $currIter = 0;
143
        $bestTheta = null;
144
        $bestScore = 0.0;
145
        $bestWeightIter = 0;
0 ignored issues
show
Unused Code introduced by
$bestWeightIter is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
146
        $this->costValues = [];
147
148
        while ($this->maxIterations > $currIter++) {
149
            $theta = $this->theta;
150
151
            // Update the guess
152
            $cost = $this->updateTheta();
153
154
            // Save the best theta in the "pocket" so that
155
            // any future set of theta worse than this will be disregarded
156
            if ($bestTheta == null || $cost <= $bestScore) {
157
                $bestTheta = $theta;
158
                $bestScore = $cost;
159
                $bestWeightIter = $currIter;
0 ignored issues
show
Unused Code introduced by
$bestWeightIter is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
160
            }
161
162
            // Add the cost value for this iteration to the list
163
            $this->costValues[] = $cost;
164
165
            // Check for early stop
166
            if ($this->earlyStop($theta)) {
167
                break;
168
            }
169
        }
170
171
        // Solution in the pocket is better than or equal to the last state
172
        // so, we use this solution
173
        return $this->theta = $bestTheta;
0 ignored issues
show
Documentation Bug introduced by
It seems like $bestTheta can be null. However, the property $theta is declared as array. Maybe change the type of the property to array|null or add a type check?

Our type inference engine has found an assignment of a scalar value (like a string, an integer or null) to a property which is an array.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property.

To type hint that a parameter can be either an array or null, you can set a type hint of array and a default value of null. The PHP interpreter will then accept both an array or null for that parameter.

function aContainsB(array $needle = null, array  $haystack) {
    if (!$needle) {
        return false;
    }

    return array_intersect($haystack, $needle) == $haystack;
}

The function can be called with either null or an array for the parameter $needle but will only accept an array as $haystack.

Loading history...
174
    }
175
176
    /**
177
     * @return float
178
     */
179
    protected function updateTheta()
180
    {
181
        $jValue = 0.0;
182
        $theta = $this->theta;
183
184
        foreach ($this->samples as $index => $sample) {
185
            $target = $this->targets[$index];
186
187
            $result = ($this->gradientCb)($theta, $sample, $target);
188
189
            list($error, $gradient, $penalty) = array_pad($result, 3, 0);
190
191
            // Update bias
192
            $this->theta[0] -= $this->learningRate * $gradient;
193
194
            // Update other values
195
            for ($i=1; $i <= $this->dimensions; $i++) {
196
                $this->theta[$i] -= $this->learningRate *
197
                    ($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]);
198
            }
199
200
            // Sum error rate
201
            $jValue += $error;
202
        }
203
204
        return $jValue / count($this->samples);
205
    }
206
207
    /**
208
     * Checks if the optimization is not effective enough and can be stopped
209
     * in case large enough changes in the solution do not happen
210
     *
211
     * @param array $oldTheta
212
     *
213
     * @return boolean
214
     */
215
    protected function earlyStop($oldTheta)
216
    {
217
        // Check for early stop: No change larger than threshold (default 1e-5)
218
        $diff = array_map(
219
            function ($w1, $w2) {
220
                return abs($w1 - $w2) > $this->threshold ? 1 : 0;
221
            },
222
            $oldTheta, $this->theta);
223
224
        if (array_sum($diff) == 0) {
225
            return true;
226
        }
227
228
        // Check if the last two cost values are almost the same
229
        $costs = array_slice($this->costValues, -2);
230
        if (count($costs) == 2 && abs($costs[1] - $costs[0]) < $this->threshold) {
231
            return true;
232
        }
233
234
        return false;
235
    }
236
237
    /**
238
     * Returns the list of cost values for each iteration executed in
239
     * last run of the optimization
240
     *
241
     * @return array
242
     */
243
    public function getCostValues()
244
    {
245
        return $this->costValues;
246
    }
247
}
248