ConjugateGradient::getAlpha()   B
last analyzed

Complexity

Conditions 8
Paths 18

Size

Total Lines 45
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 45
rs 8.4444
c 0
b 0
f 0
cc 8
nc 18
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Helper\Optimizer;
6
7
use Closure;
8
9
/**
10
 * Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x
11
 * See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
12
 *
13
 * The method applied below is explained in the below document in a practical manner
14
 *  - http://web.cs.iastate.edu/~cs577/handouts/conjugate-gradient.pdf
15
 *
16
 * However it is compliant with the general Conjugate Gradient method with
17
 * Fletcher-Reeves update method. Note that, the f(x) is assumed to be one-dimensional
18
 * and one gradient is utilized for all dimensions in the given data.
19
 */
20
class ConjugateGradient extends GD
21
{
22
    public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
23
    {
24
        $this->samples = $samples;
25
        $this->targets = $targets;
26
        $this->gradientCb = $gradientCb;
27
        $this->sampleCount = count($samples);
28
        $this->costValues = [];
29
30
        $d = MP::muls($this->gradient($this->theta), -1);
31
32
        for ($i = 0; $i < $this->maxIterations; ++$i) {
33
            // Obtain α that minimizes f(θ + α.d)
34
            $alpha = $this->getAlpha($d);
35
36
            // θ(k+1) = θ(k) + α.d
37
            $thetaNew = $this->getNewTheta($alpha, $d);
38
39
            // β = ||∇f(x(k+1))||²  ∕  ||∇f(x(k))||²
40
            $beta = $this->getBeta($thetaNew);
41
42
            // d(k+1) =–∇f(x(k+1)) + β(k).d(k)
43
            $d = $this->getNewDirection($thetaNew, $beta, $d);
44
45
            // Save values for the next iteration
46
            $oldTheta = $this->theta;
47
            $this->costValues[] = $this->cost($thetaNew);
48
49
            $this->theta = $thetaNew;
50
            if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) {
51
                break;
52
            }
53
        }
54
55
        $this->clear();
56
57
        return $this->theta;
58
    }
59
60
    /**
61
     * Executes the callback function for the problem and returns
62
     * sum of the gradient for all samples & targets.
63
     */
64
    protected function gradient(array $theta): array
65
    {
66
        [, $updates, $penalty] = parent::gradient($theta);
67
68
        // Calculate gradient for each dimension
69
        $gradient = [];
70
        for ($i = 0; $i <= $this->dimensions; ++$i) {
71
            if ($i === 0) {
72
                $gradient[$i] = array_sum($updates);
73
            } else {
74
                $col = array_column($this->samples, $i - 1);
75
                $error = 0;
76
                foreach ($col as $index => $val) {
77
                    $error += $val * $updates[$index];
78
                }
79
80
                $gradient[$i] = $error + $penalty * $theta[$i];
81
            }
82
        }
83
84
        return $gradient;
85
    }
86
87
    /**
88
     * Returns the value of f(x) for given solution
89
     */
90
    protected function cost(array $theta): float
91
    {
92
        [$cost] = parent::gradient($theta);
93
94
        return array_sum($cost) / (int) $this->sampleCount;
95
    }
96
97
    /**
98
     * Calculates alpha that minimizes the function f(θ + α.d)
99
     * by performing a line search that does not rely upon the derivation.
100
     *
101
     * There are several alternatives for this function. For now, we
102
     * prefer a method inspired from the bisection method for its simplicity.
103
     * This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01
104
     *
105
     * Algorithm as follows:
106
     *  a) Probe a small alpha  (0.0001) and calculate cost function
107
     *  b) Probe a larger alpha (0.01) and calculate cost function
108
     *		b-1) If cost function decreases, continue enlarging alpha
109
     *		b-2) If cost function increases, take the midpoint and try again
110
     */
111
    protected function getAlpha(array $d): float
112
    {
113
        $small = MP::muls($d, 0.0001);
114
        $large = MP::muls($d, 0.01);
115
116
        // Obtain θ + α.d for two initial values, x0 and x1
117
        $x0 = MP::add($this->theta, $small);
118
        $x1 = MP::add($this->theta, $large);
119
120
        $epsilon = 0.0001;
121
        $iteration = 0;
122
        do {
123
            $fx1 = $this->cost($x1);
124
            $fx0 = $this->cost($x0);
125
126
            // If the difference between two values is small enough
127
            // then break the loop
128
            if (abs($fx1 - $fx0) <= $epsilon) {
129
                break;
130
            }
131
132
            if ($fx1 < $fx0) {
133
                $x0 = $x1;
134
                $x1 = MP::adds($x1, 0.01); // Enlarge second
135
            } else {
136
                $x1 = MP::divs(MP::add($x1, $x0), 2.0);
137
            } // Get to the midpoint
138
139
            $error = $fx1 / $this->dimensions;
140
        } while ($error <= $epsilon || $iteration++ < 10);
141
142
        // Return α = θ / d
143
        // For accuracy, choose a dimension which maximize |d[i]|
144
        $imax = 0;
145
        for ($i = 1; $i <= $this->dimensions; ++$i) {
146
            if (abs($d[$i]) > abs($d[$imax])) {
147
                $imax = $i;
148
            }
149
        }
150
151
        if ($d[$imax] == 0) {
152
            return $x1[$imax] - $this->theta[$imax];
153
        }
154
155
        return ($x1[$imax] - $this->theta[$imax]) / $d[$imax];
156
    }
157
158
    /**
159
     * Calculates new set of solutions with given alpha (for each θ(k)) and
160
     * gradient direction.
161
     *
162
     * θ(k+1) = θ(k) + α.d
163
     */
164
    protected function getNewTheta(float $alpha, array $d): array
165
    {
166
        return MP::add($this->theta, MP::muls($d, $alpha));
167
    }
168
169
    /**
170
     * Calculates new beta (β) for given set of solutions by using
171
     * Fletcher–Reeves method.
172
     *
173
     * β = ||f(x(k+1))||²  ∕  ||f(x(k))||²
174
     *
175
     * See:
176
     *  R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149–154.
177
     */
178
    protected function getBeta(array $newTheta): float
179
    {
180
        $gNew = $this->gradient($newTheta);
181
        $gOld = $this->gradient($this->theta);
182
        $dNew = 0;
183
        $dOld = 1e-100;
184
        for ($i = 0; $i <= $this->dimensions; ++$i) {
185
            $dNew += $gNew[$i] ** 2;
186
            $dOld += $gOld[$i] ** 2;
187
        }
188
189
        return $dNew / $dOld;
190
    }
191
192
    /**
193
     * Calculates the new conjugate direction
194
     *
195
     * d(k+1) =–∇f(x(k+1)) + β(k).d(k)
196
     */
197
    protected function getNewDirection(array $theta, float $beta, array $d): array
198
    {
199
        $grad = $this->gradient($theta);
200
201
        return MP::add(MP::muls($grad, -1), MP::muls($d, $beta));
202
    }
203
}
204
205
/**
206
 * Handles element-wise vector operations between vector-vector
207
 * and vector-scalar variables
208
 */
209
class MP
210
{
211
    /**
212
     * Element-wise <b>multiplication</b> of two vectors of the same size
213
     */
214
    public static function mul(array $m1, array $m2): array
215
    {
216
        $res = [];
217
        foreach ($m1 as $i => $val) {
218
            $res[] = $val * $m2[$i];
219
        }
220
221
        return $res;
222
    }
223
224
    /**
225
     * Element-wise <b>division</b> of two vectors of the same size
226
     */
227
    public static function div(array $m1, array $m2): array
228
    {
229
        $res = [];
230
        foreach ($m1 as $i => $val) {
231
            $res[] = $val / $m2[$i];
232
        }
233
234
        return $res;
235
    }
236
237
    /**
238
     * Element-wise <b>addition</b> of two vectors of the same size
239
     */
240
    public static function add(array $m1, array $m2, int $mag = 1): array
241
    {
242
        $res = [];
243
        foreach ($m1 as $i => $val) {
244
            $res[] = $val + $mag * $m2[$i];
245
        }
246
247
        return $res;
248
    }
249
250
    /**
251
     * Element-wise <b>subtraction</b> of two vectors of the same size
252
     */
253
    public static function sub(array $m1, array $m2): array
254
    {
255
        return self::add($m1, $m2, -1);
256
    }
257
258
    /**
259
     * Element-wise <b>multiplication</b> of a vector with a scalar
260
     */
261
    public static function muls(array $m1, float $m2): array
262
    {
263
        $res = [];
264
        foreach ($m1 as $val) {
265
            $res[] = $val * $m2;
266
        }
267
268
        return $res;
269
    }
270
271
    /**
272
     * Element-wise <b>division</b> of a vector with a scalar
273
     */
274
    public static function divs(array $m1, float $m2): array
275
    {
276
        $res = [];
277
        foreach ($m1 as $val) {
278
            $res[] = $val / ($m2 + 1e-32);
279
        }
280
281
        return $res;
282
    }
283
284
    /**
285
     * Element-wise <b>addition</b> of a vector with a scalar
286
     */
287
    public static function adds(array $m1, float $m2, int $mag = 1): array
288
    {
289
        $res = [];
290
        foreach ($m1 as $val) {
291
            $res[] = $val + $mag * $m2;
292
        }
293
294
        return $res;
295
    }
296
297
    /**
298
     * Element-wise <b>subtraction</b> of a vector with a scalar
299
     */
300
    public static function subs(array $m1, float $m2): array
301
    {
302
        return self::adds($m1, $m2, -1);
303
    }
304
}
305