StochasticGD::getCostValues() - Code Metrics - php-ai/php-ml - Measure and Improve Code Quality continuously with Scrutinizer

StochasticGD::getCostValues() A
last analyzed 2020-05-15 05:48 UTC

↳ Parent: StochasticGD

Complexity

Conditions	1
Paths	1

Size

Total Lines	3
Code Lines	1

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	1
dl	0
loc	3
rs	10
c	0
b	0
f	0
cc	1
nc	1
nop	0

<?php

declare(strict_types=1);

namespace Phpml\Helper\Optimizer;

use Closure;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\InvalidOperationException;

/**
 * Stochastic Gradient Descent optimization method
 * to find a solution for the equation A.ϴ = y where
 *  A (samples) and y (targets) are known and ϴ is unknown.
 */
class StochasticGD extends Optimizer
{
    /**
     * A (samples)
     *
     * @var array
     */
    protected $samples = [];

    /**
     * y (targets)
     *
     * @var array
     */
    protected $targets = [];

    /**
     * Callback function to get the gradient and cost value
     * for a specific set of theta (ϴ) and a pair of sample & target
     *
     * @var \Closure|null
     */
    protected $gradientCb;

    /**
     * Maximum number of iterations used to train the model
     *
     * @var int
     */
    protected $maxIterations = 1000;

    /**
     * Learning rate is used to control the speed of the optimization.<br>
     *
     * Larger values of lr may overshoot the optimum or even cause divergence
     * while small values slows down the convergence and increases the time
     * required for the training
     *
     * @var float
     */
    protected $learningRate = 0.001;

    /**
     * Minimum amount of change in the weights and error values
     * between iterations that needs to be obtained to continue the training
     *
     * @var float
     */
    protected $threshold = 1e-4;

    /**
     * Enable/Disable early stopping by checking the weight & cost values
     * to see whether they changed large enough to continue the optimization
     *
     * @var bool
     */
    protected $enableEarlyStop = true;

    /**
     * List of values obtained by evaluating the cost function at each iteration
     * of the algorithm
     *
     * @var array
     */
    protected $costValues = [];

    /**
     * Initializes the SGD optimizer for the given number of dimensions
     */
    public function __construct(int $dimensions)
    {
        // Add one more dimension for the bias
        parent::__construct($dimensions + 1);

        $this->dimensions = $dimensions;
    }

    public function setTheta(array $theta): Optimizer
    {
        if (count($theta) !== $this->dimensions + 1) {
            throw new InvalidArgumentException(sprintf('Number of values in the weights array should be %s', $this->dimensions + 1));
        }

        $this->theta = $theta;

        return $this;
    }

    /**
     * Sets minimum value for the change in the theta values
     * between iterations to continue the iterations.<br>
     *
     * If change in the theta is less than given value then the
     * algorithm will stop training
     *
     * @return $this
     */
    public function setChangeThreshold(float $threshold = 1e-5)
    {
        $this->threshold = $threshold;

        return $this;
    }

    /**
     * Enable/Disable early stopping by checking at each iteration
     * whether changes in theta or cost value are not large enough
     *
     * @return $this
     */
    public function setEarlyStop(bool $enable = true)
    {
        $this->enableEarlyStop = $enable;

        return $this;
    }

    /**
     * @return $this
     */
    public function setLearningRate(float $learningRate)
    {
        $this->learningRate = $learningRate;

        return $this;
    }

    /**
     * @return $this
     */
    public function setMaxIterations(int $maxIterations)
    {
        $this->maxIterations = $maxIterations;

        return $this;
    }

    /**
     * Optimization procedure finds the unknow variables for the equation A.ϴ = y
     * for the given samples (A) and targets (y).<br>
     *
     * The cost function to minimize and the gradient of the function are to be
     * handled by the callback function provided as the third parameter of the method.
     */
    public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
    {
        $this->samples = $samples;
        $this->targets = $targets;
        $this->gradientCb = $gradientCb;

        $currIter = 0;
        $bestTheta = null;
        $bestScore = 0.0;
        $this->costValues = [];

        while ($this->maxIterations > $currIter++) {
            $theta = $this->theta;

            // Update the guess
            $cost = $this->updateTheta();

            // Save the best theta in the "pocket" so that
            // any future set of theta worse than this will be disregarded
            if ($bestTheta === null || $cost <= $bestScore) {
                $bestTheta = $theta;
                $bestScore = $cost;
            }

            // Add the cost value for this iteration to the list
            $this->costValues[] = $cost;

            // Check for early stop
            if ($this->enableEarlyStop && $this->earlyStop($theta)) {
                break;
            }
        }

        $this->clear();

        // Solution in the pocket is better than or equal to the last state
        // so, we use this solution
        return $this->theta = (array) $bestTheta;
    }

    /**
     * Returns the list of cost values for each iteration executed in
     * last run of the optimization
     */
    public function getCostValues(): array
    {
        return $this->costValues;
    }

    protected function updateTheta(): float
    {
        $jValue = 0.0;
        $theta = $this->theta;

        if ($this->gradientCb === null) {
            throw new InvalidOperationException('Gradient callback is not defined');
        }

        foreach ($this->samples as $index => $sample) {
            $target = $this->targets[$index];

            $result = ($this->gradientCb)($theta, $sample, $target);

            [$error, $gradient, $penalty] = array_pad($result, 3, 0);

            // Update bias
            $this->theta[0] -= $this->learningRate * $gradient;

            // Update other values
            for ($i = 1; $i <= $this->dimensions; ++$i) {
                $this->theta[$i] -= $this->learningRate *
                    ($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]);
            }

            // Sum error rate
            $jValue += $error;
        }

        return $jValue / count($this->samples);
    }

    /**
     * Checks if the optimization is not effective enough and can be stopped
     * in case large enough changes in the solution do not happen
     */
    protected function earlyStop(array $oldTheta): bool
    {
        // Check for early stop: No change larger than threshold (default 1e-5)
        $diff = array_map(
            function ($w1, $w2) {
                return abs($w1 - $w2) > $this->threshold ? 1 : 0;
            },
            $oldTheta,
            $this->theta
        );

        if (array_sum($diff) == 0) {
            return true;
        }

        // Check if the last two cost values are almost the same
        $costs = array_slice($this->costValues, -2);
        if (count($costs) === 2 && abs($costs[1] - $costs[0]) < $this->threshold) {
            return true;
        }

        return false;
    }

    /**
     * Clears the optimizer internal vars after the optimization process.
     */
    protected function clear(): void
    {
        $this->samples = [];
        $this->targets = [];
        $this->gradientCb = null;
    }
}


1			<?php
2
3			declare(strict_types=1);
4
5			namespace Phpml\Helper\Optimizer;
6
7			use Closure;
8			use Phpml\Exception\InvalidArgumentException;
9			use Phpml\Exception\InvalidOperationException;
10
11			/**
12			* Stochastic Gradient Descent optimization method
13			* to find a solution for the equation A.ϴ = y where
14			* A (samples) and y (targets) are known and ϴ is unknown.
15			*/
16			class StochasticGD extends Optimizer
17			{
18			/**
19			* A (samples)
20			*
21			* @var array
22			*/
23			protected $samples = [];
24
25			/**
26			* y (targets)
27			*
28			* @var array
29			*/
30			protected $targets = [];
31
32			/**
33			* Callback function to get the gradient and cost value
34			* for a specific set of theta (ϴ) and a pair of sample & target
35			*
36			* @var \Closure\|null
37			*/
38			protected $gradientCb;
39
40			/**
41			* Maximum number of iterations used to train the model
42			*
43			* @var int
44			*/
45			protected $maxIterations = 1000;
46
47			/**
48			* Learning rate is used to control the speed of the optimization.<br>
49			*
50			* Larger values of lr may overshoot the optimum or even cause divergence
51			* while small values slows down the convergence and increases the time
52			* required for the training
53			*
54			* @var float
55			*/
56			protected $learningRate = 0.001;
57
58			/**
59			* Minimum amount of change in the weights and error values
60			* between iterations that needs to be obtained to continue the training
61			*
62			* @var float
63			*/
64			protected $threshold = 1e-4;
65
66			/**
67			* Enable/Disable early stopping by checking the weight & cost values
68			* to see whether they changed large enough to continue the optimization
69			*
70			* @var bool
71			*/
72			protected $enableEarlyStop = true;
73
74			/**
75			* List of values obtained by evaluating the cost function at each iteration
76			* of the algorithm
77			*
78			* @var array
79			*/
80			protected $costValues = [];
81
82			/**
83			* Initializes the SGD optimizer for the given number of dimensions
84			*/
85			public function __construct(int $dimensions)
86			{
87			// Add one more dimension for the bias
88			parent::__construct($dimensions + 1);
89
90			$this->dimensions = $dimensions;
91			}
92
93			public function setTheta(array $theta): Optimizer
94			{
95			if (count($theta) !== $this->dimensions + 1) {
96			throw new InvalidArgumentException(sprintf('Number of values in the weights array should be %s', $this->dimensions + 1));
97			}
98
99			$this->theta = $theta;
100
101			return $this;
102			}
103
104			/**
105			* Sets minimum value for the change in the theta values
106			* between iterations to continue the iterations.<br>
107			*
108			* If change in the theta is less than given value then the
109			* algorithm will stop training
110			*
111			* @return $this
112			*/
113			public function setChangeThreshold(float $threshold = 1e-5)
114			{
115			$this->threshold = $threshold;
116
117			return $this;
118			}
119
120			/**
121			* Enable/Disable early stopping by checking at each iteration
122			* whether changes in theta or cost value are not large enough
123			*
124			* @return $this
125			*/
126			public function setEarlyStop(bool $enable = true)
127			{
128			$this->enableEarlyStop = $enable;
129
130			return $this;
131			}
132
133			/**
134			* @return $this
135			*/
136			public function setLearningRate(float $learningRate)
137			{
138			$this->learningRate = $learningRate;
139
140			return $this;
141			}
142
143			/**
144			* @return $this
145			*/
146			public function setMaxIterations(int $maxIterations)
147			{
148			$this->maxIterations = $maxIterations;
149
150			return $this;
151			}
152
153			/**
154			* Optimization procedure finds the unknow variables for the equation A.ϴ = y
155			* for the given samples (A) and targets (y).<br>
156			*
157			* The cost function to minimize and the gradient of the function are to be
158			* handled by the callback function provided as the third parameter of the method.
159			*/
160			public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
161			{
162			$this->samples = $samples;
163			$this->targets = $targets;
164			$this->gradientCb = $gradientCb;
165
166			$currIter = 0;
167			$bestTheta = null;
168			$bestScore = 0.0;
169			$this->costValues = [];
170
171			while ($this->maxIterations > $currIter++) {
172			$theta = $this->theta;
173
174			// Update the guess
175			$cost = $this->updateTheta();
176
177			// Save the best theta in the "pocket" so that
178			// any future set of theta worse than this will be disregarded
179			if ($bestTheta === null \|\| $cost <= $bestScore) {
180			$bestTheta = $theta;
181			$bestScore = $cost;
182			}
183
184			// Add the cost value for this iteration to the list
185			$this->costValues[] = $cost;
186
187			// Check for early stop
188			if ($this->enableEarlyStop && $this->earlyStop($theta)) {
189			break;
190			}
191			}
192
193			$this->clear();
194
195			// Solution in the pocket is better than or equal to the last state
196			// so, we use this solution
197			return $this->theta = (array) $bestTheta;
198			}
199
200			/**
201			* Returns the list of cost values for each iteration executed in
202			* last run of the optimization
203			*/
204			public function getCostValues(): array
205			{
206			return $this->costValues;
207			}
208
209			protected function updateTheta(): float
210			{
211			$jValue = 0.0;
212			$theta = $this->theta;
213
214			if ($this->gradientCb === null) {
215			throw new InvalidOperationException('Gradient callback is not defined');
216			}
217
218			foreach ($this->samples as $index => $sample) {
219			$target = $this->targets[$index];
220
221			$result = ($this->gradientCb)($theta, $sample, $target);
222
223			[$error, $gradient, $penalty] = array_pad($result, 3, 0);
224
225			// Update bias
226			$this->theta[0] -= $this->learningRate * $gradient;
227
228			// Update other values
229			for ($i = 1; $i <= $this->dimensions; ++$i) {
230			$this->theta[$i] -= $this->learningRate *
231			($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]);
232			}
233
234			// Sum error rate
235			$jValue += $error;
236			}
237
238			return $jValue / count($this->samples);
239			}
240
241			/**
242			* Checks if the optimization is not effective enough and can be stopped
243			* in case large enough changes in the solution do not happen
244			*/
245			protected function earlyStop(array $oldTheta): bool
246			{
247			// Check for early stop: No change larger than threshold (default 1e-5)
248			$diff = array_map(
249			function ($w1, $w2) {
250			return abs($w1 - $w2) > $this->threshold ? 1 : 0;
251			},
252			$oldTheta,
253			$this->theta
254			);
255
256			if (array_sum($diff) == 0) {
257			return true;
258			}
259
260			// Check if the last two cost values are almost the same
261			$costs = array_slice($this->costValues, -2);
262			if (count($costs) === 2 && abs($costs[1] - $costs[0]) < $this->threshold) {
263			return true;
264			}
265
266			return false;
267			}
268
269			/**
270			* Clears the optimizer internal vars after the optimization process.
271			*/
272			protected function clear(): void
273			{
274			$this->samples = [];
275			$this->targets = [];
276			$this->gradientCb = null;
277			}
278			}
279

php-ai / php-ml

StochasticGD::getCostValues() A last analyzed 2020-05-15 05:48 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

StochasticGD::getCostValues() A
last analyzed 2020-05-15 05:48 UTC