Perceptron - Code Metrics - Inspection of "One-v-Rest Classification technique applied to lin..." - php-ai/php-ml - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Branch — master (01bb82)

by Arkadiusz

created 2017-03-05 15:35 UTC

Perceptron A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	289
Duplicated Lines	1.04 %

Coupling/Cohesion

Components	1
Dependencies	3

Importance

Changes

Metric	Value
wmc	33
lcom	1
cbo	3
dl	3
loc	289
rs	9.3999
c	0
b	0
f	0

10 Methods

Rating	Name	Duplication	Size	Complexity
B	__construct()	0	18	5
A	setChangeThreshold()	0	4	1
C	runTraining()	0	43	8
A	earlyStop()	0	15	3
B	trainBinary()	3	29	6
A	checkNormalizedSample()	0	10	2
A	output()	0	13	3
A	outputClass()	0	4	2
A	predictProbability()	0	11	2
A	predictSampleBinary()	0	8	1

How to fix Duplicated Code

<?php

declare(strict_types=1);

namespace Phpml\Classification\Linear;

use Phpml\Helper\Predictable;
use Phpml\Helper\OneVsRest;
use Phpml\Classification\Classifier;
use Phpml\Preprocessing\Normalizer;

class Perceptron implements Classifier
{
    use Predictable, OneVsRest;

    /**
     * The function whose result will be used to calculate the network error
     * for each instance
     *
     * @var string
     */
    protected static $errorFunction = 'outputClass';

   /**
     * @var array
     */
    protected $samples = [];

    /**
     * @var array
     */
    protected $targets = [];

    /**
     * @var array
     */
    protected $labels = [];

    /**
     * @var int
     */
    protected $featureCount = 0;

    /**
     * @var array
     */
    protected $weights;

    /**
     * @var float
     */
    protected $learningRate;

    /**
     * @var int
     */
    protected $maxIterations;

    /**
     * @var Normalizer
     */
    protected $normalizer;

    /**
     * Minimum amount of change in the weights between iterations
     * that needs to be obtained to continue the training
     *
     * @var float
     */
    protected $threshold = 1e-5;

    /**
     * Initalize a perceptron classifier with given learning rate and maximum
     * number of iterations used while training the perceptron <br>
     *
     * Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive) <br>
     * Maximum number of iterations can be an integer value greater than 0
     * @param int $learningRate
     * @param int $maxIterations
     */
    public function __construct(float $learningRate = 0.001, int $maxIterations = 1000,
        bool $normalizeInputs = true)
    {
        if ($learningRate <= 0.0 || $learningRate > 1.0) {
            throw new \Exception("Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)");
        }

        if ($maxIterations <= 0) {
            throw new \Exception("Maximum number of iterations should be an integer greater than 0");
        }

        if ($normalizeInputs) {
            $this->normalizer = new Normalizer(Normalizer::NORM_STD);
        }

        $this->learningRate = $learningRate;
        $this->maxIterations = $maxIterations;
    }

    /**
     * Sets minimum value for the change in the weights
     * between iterations to continue the iterations.<br>
     *
     * If the weight change is less than given value then the
     * algorithm will stop training
     *
     * @param float $threshold
     */
    public function setChangeThreshold(float $threshold = 1e-5)
    {
        $this->threshold = $threshold;
    }

   /**
     * @param array $samples
     * @param array $targets
     */
    public function trainBinary(array $samples, array $targets)
    {
        $this->labels = array_keys(array_count_values($targets));
        if (count($this->labels) > 2) {
            throw new \Exception("Perceptron is for binary (two-class) classification only");
        }

        if ($this->normalizer) {
            $this->normalizer->transform($samples);
        }

        // Set all target values to either -1 or 1
        $this->labels = [1 => $this->labels[0], -1 => $this->labels[1]];
        foreach ($targets as $target) {

            $this->targets[] = strval($target) == strval($this->labels[1]) ? 1 : -1;
        }

        // Set samples and feature count vars
        $this->samples = array_merge($this->samples, $samples);
        $this->featureCount = count($this->samples[0]);

        // Init weights with random values
        $this->weights = array_fill(0, $this->featureCount + 1, 0);
        foreach ($this->weights as &$weight) {
            $weight = rand() / (float) getrandmax();
        }
        // Do training
        $this->runTraining();
    }

    /**
     * Adapts the weights with respect to given samples and targets
     * by use of perceptron learning rule
     */
    protected function runTraining()
    {
        $currIter = 0;
        $bestWeights = null;
        $bestScore = count($this->samples);
        $bestWeightIter = 0;
$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

        while ($this->maxIterations > $currIter++) {
            $weights = $this->weights;
            $misClassified = 0;
            foreach ($this->samples as $index => $sample) {
                $target = $this->targets[$index];
                $prediction = $this->{static::$errorFunction}($sample);
                $update = $target - $prediction;
                if ($target != $prediction) {
                    $misClassified++;
                }
                // Update bias
                $this->weights[0] += $update * $this->learningRate; // Bias
                // Update other weights
                for ($i=1; $i <= $this->featureCount; $i++) {
                    $this->weights[$i] += $update * $sample[$i - 1] * $this->learningRate;
                }
            }

            // Save the best weights in the "pocket" so that
            // any future weights worse than this will be disregarded
            if ($bestWeights == null || $misClassified <= $bestScore) {
                $bestWeights = $weights;
                $bestScore = $misClassified;
                $bestWeightIter = $currIter;
$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}
            }

            // Check for early stop
            if ($this->earlyStop($weights)) {
                break;
            }
        }

        // The weights in the pocket are better than or equal to the last state
        // so, we use these weights
        $this->weights = $bestWeights;
function aContainsB(array $needle = null, array  $haystack) {
    if (!$needle) {
        return false;
    }

    return array_intersect($haystack, $needle) == $haystack;
}
    }

    /**
     * @param array $oldWeights
     *
     * @return boolean
     */
    protected function earlyStop($oldWeights)
    {
        // Check for early stop: No change larger than 1e-5
        $diff = array_map(
            function ($w1, $w2) {
                return abs($w1 - $w2) > 1e-5 ? 1 : 0;
            },
            $oldWeights, $this->weights);

        if (array_sum($diff) == 0) {
            return true;
        }

        return false;
    }

    /**
     * Checks if the sample should be normalized and if so, returns the
     * normalized sample
     *
     * @param array $sample
     *
     * @return array
     */
    protected function checkNormalizedSample(array $sample)
    {
        if ($this->normalizer) {
            $samples = [$sample];
            $this->normalizer->transform($samples);
            $sample = $samples[0];
        }

        return $sample;
    }

    /**
     * Calculates net output of the network as a float value for the given input
     *
     * @param array $sample
     * @return int
     */
    protected function output(array $sample)
    {
        $sum = 0;
        foreach ($this->weights as $index => $w) {
            if ($index == 0) {
                $sum += $w;
            } else {
                $sum += $w * $sample[$index - 1];
            }
        }

        return $sum;
    }

    /**
     * Returns the class value (either -1 or 1) for the given input
     *
     * @param array $sample
     * @return int
     */
    protected function outputClass(array $sample)
    {
        return $this->output($sample) > 0 ? 1 : -1;
    }

    /**
     * Returns the probability of the sample of belonging to the given label.
     *
     * The probability is simply taken as the distance of the sample
     * to the decision plane.
     *
     * @param array $sample
     * @param mixed $label
     */
    protected function predictProbability(array $sample, $label)
    {
        $predicted = $this->predictSampleBinary($sample);

        if (strval($predicted) == strval($label)) {
            $sample = $this->checkNormalizedSample($sample);
            return abs($this->output($sample));
        }

        return 0.0;
    }

    /**
     * @param array $sample
     * @return mixed
     */
    protected function predictSampleBinary(array $sample)
    {
        $sample = $this->checkNormalizedSample($sample);

        $predictedClass = $this->outputClass($sample);

        return $this->labels[ $predictedClass ];
    }
}


1		<?php
2
3		declare(strict_types=1);
4
5		namespace Phpml\Classification\Linear;
6
7		use Phpml\Helper\Predictable;
8		use Phpml\Helper\OneVsRest;
9		use Phpml\Classification\Classifier;
10		use Phpml\Preprocessing\Normalizer;
11
12		class Perceptron implements Classifier
13		{
14		use Predictable, OneVsRest;
15
16		/**
17		* The function whose result will be used to calculate the network error
18		* for each instance
19		*
20		* @var string
21		*/
22		protected static $errorFunction = 'outputClass';
23
24		/**
25		* @var array
26		*/
27		protected $samples = [];
28
29		/**
30		* @var array
31		*/
32		protected $targets = [];
33
34		/**
35		* @var array
36		*/
37		protected $labels = [];
38
39		/**
40		* @var int
41		*/
42		protected $featureCount = 0;
43
44		/**
45		* @var array
46		*/
47		protected $weights;
48
49		/**
50		* @var float
51		*/
52		protected $learningRate;
53
54		/**
55		* @var int
56		*/
57		protected $maxIterations;
58
59		/**
60		* @var Normalizer
61		*/
62		protected $normalizer;
63
64		/**
65		* Minimum amount of change in the weights between iterations
66		* that needs to be obtained to continue the training
67		*
68		* @var float
69		*/
70		protected $threshold = 1e-5;
71
72		/**
73		* Initalize a perceptron classifier with given learning rate and maximum
74		* number of iterations used while training the perceptron <br>
75		*
76		* Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive) <br>
77		* Maximum number of iterations can be an integer value greater than 0
78		* @param int $learningRate
79		* @param int $maxIterations
80		*/
81		public function __construct(float $learningRate = 0.001, int $maxIterations = 1000,
82		bool $normalizeInputs = true)
83		{
84		if ($learningRate <= 0.0 \|\| $learningRate > 1.0) {
85		throw new \Exception("Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)");
86		}
87
88		if ($maxIterations <= 0) {
89		throw new \Exception("Maximum number of iterations should be an integer greater than 0");
90		}
91
92		if ($normalizeInputs) {
93		$this->normalizer = new Normalizer(Normalizer::NORM_STD);
94		}
95
96		$this->learningRate = $learningRate;
97		$this->maxIterations = $maxIterations;
98		}
99
100		/**
101		* Sets minimum value for the change in the weights
102		* between iterations to continue the iterations.<br>
103		*
104		* If the weight change is less than given value then the
105		* algorithm will stop training
106		*
107		* @param float $threshold
108		*/
109		public function setChangeThreshold(float $threshold = 1e-5)
110		{
111		$this->threshold = $threshold;
112		}
113
114		/**
115		* @param array $samples
116		* @param array $targets
117		*/
118		public function trainBinary(array $samples, array $targets)
119		{
120		$this->labels = array_keys(array_count_values($targets));
121		if (count($this->labels) > 2) {
122		throw new \Exception("Perceptron is for binary (two-class) classification only");
123		}
124
125		if ($this->normalizer) {
126		$this->normalizer->transform($samples);
127		}
128
129		// Set all target values to either -1 or 1
130		$this->labels = [1 => $this->labels[0], -1 => $this->labels[1]];
131	View Code Duplication	foreach ($targets as $target) {
		0 ignored issues – show Duplication introduced 2017-02-19 10:47 UTC by Report Bug Copy Issue Report This code seems to be duplicated across your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
132		$this->targets[] = strval($target) == strval($this->labels[1]) ? 1 : -1;
133		}
134
135		// Set samples and feature count vars
136		$this->samples = array_merge($this->samples, $samples);
137		$this->featureCount = count($this->samples[0]);
138
139		// Init weights with random values
140		$this->weights = array_fill(0, $this->featureCount + 1, 0);
141		foreach ($this->weights as &$weight) {
142		$weight = rand() / (float) getrandmax();
143		}
144		// Do training
145		$this->runTraining();
146		}
147
148		/**
149		* Adapts the weights with respect to given samples and targets
150		* by use of perceptron learning rule
151		*/
152		protected function runTraining()
153		{
154		$currIter = 0;
155		$bestWeights = null;
156		$bestScore = count($this->samples);
157		$bestWeightIter = 0;
		0 ignored issues – show Unused Code introduced 2017-02-25 14:37 UTC by Report Bug Copy Issue Report `$bestWeightIter` is not used, you could remove the assignment. This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value'; $higher = false; if (rand(1, 6) > 3) { $higher = true; } else { $higher = false; } Both the `$myVar` assignment in line 1 and the `$higher` assignment in line 2 are dead. The first because `$myVar` is never used and the second because `$higher` is always overwritten for every possible time line. Loading history...
158
159		while ($this->maxIterations > $currIter++) {
160		$weights = $this->weights;
161		$misClassified = 0;
162		foreach ($this->samples as $index => $sample) {
163		$target = $this->targets[$index];
164		$prediction = $this->{static::$errorFunction}($sample);
165		$update = $target - $prediction;
166		if ($target != $prediction) {
167		$misClassified++;
168		}
169		// Update bias
170		$this->weights[0] += $update * $this->learningRate; // Bias
171		// Update other weights
172		for ($i=1; $i <= $this->featureCount; $i++) {
173		$this->weights[$i] += $update * $sample[$i - 1] * $this->learningRate;
174		}
175		}
176
177		// Save the best weights in the "pocket" so that
178		// any future weights worse than this will be disregarded
179		if ($bestWeights == null \|\| $misClassified <= $bestScore) {
180		$bestWeights = $weights;
181		$bestScore = $misClassified;
182		$bestWeightIter = $currIter;
		0 ignored issues – show Unused Code introduced 2017-02-25 14:37 UTC by Report Bug Copy Issue Report `$bestWeightIter` is not used, you could remove the assignment. This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value'; $higher = false; if (rand(1, 6) > 3) { $higher = true; } else { $higher = false; } Both the `$myVar` assignment in line 1 and the `$higher` assignment in line 2 are dead. The first because `$myVar` is never used and the second because `$higher` is always overwritten for every possible time line. Loading history...
183		}
184
185		// Check for early stop
186		if ($this->earlyStop($weights)) {
187		break;
188		}
189		}
190
191		// The weights in the pocket are better than or equal to the last state
192		// so, we use these weights
193		$this->weights = $bestWeights;
		0 ignored issues – show Documentation Bug introduced 2017-02-25 14:37 UTC by Report Bug Copy Issue Report It seems like `$bestWeights` can be `null`. However, the property `$weights` is declared as `array`. Maybe change the type of the property to `array\|null` or add a type check? Our type inference engine has found an assignment of a scalar value (like a string, an integer or null) to a property which is an array. Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property. To type hint that a parameter can be either an array or null, you can set a type hint of array and a default value of null. The PHP interpreter will then accept both an array or null for that parameter. function aContainsB(array $needle = null, array $haystack) { if (!$needle) { return false; } return array_intersect($haystack, $needle) == $haystack; } The function can be called with either null or an array for the parameter `$needle` but will only accept an array as `$haystack`. Loading history...
194		}
195
196		/**
197		* @param array $oldWeights
198		*
199		* @return boolean
200		*/
201		protected function earlyStop($oldWeights)
202		{
203		// Check for early stop: No change larger than 1e-5
204		$diff = array_map(
205		function ($w1, $w2) {
206		return abs($w1 - $w2) > 1e-5 ? 1 : 0;
207		},
208		$oldWeights, $this->weights);
209
210		if (array_sum($diff) == 0) {
211		return true;
212		}
213
214		return false;
215		}
216
217		/**
218		* Checks if the sample should be normalized and if so, returns the
219		* normalized sample
220		*
221		* @param array $sample
222		*
223		* @return array
224		*/
225		protected function checkNormalizedSample(array $sample)
226		{
227		if ($this->normalizer) {
228		$samples = [$sample];
229		$this->normalizer->transform($samples);
230		$sample = $samples[0];
231		}
232
233		return $sample;
234		}
235
236		/**
237		* Calculates net output of the network as a float value for the given input
238		*
239		* @param array $sample
240		* @return int
241		*/
242		protected function output(array $sample)
243		{
244		$sum = 0;
245		foreach ($this->weights as $index => $w) {
246		if ($index == 0) {
247		$sum += $w;
248		} else {
249		$sum += $w * $sample[$index - 1];
250		}
251		}
252
253		return $sum;
254		}
255
256		/**
257		* Returns the class value (either -1 or 1) for the given input
258		*
259		* @param array $sample
260		* @return int
261		*/
262		protected function outputClass(array $sample)
263		{
264		return $this->output($sample) > 0 ? 1 : -1;
265		}
266
267		/**
268		* Returns the probability of the sample of belonging to the given label.
269		*
270		* The probability is simply taken as the distance of the sample
271		* to the decision plane.
272		*
273		* @param array $sample
274		* @param mixed $label
275		*/
276		protected function predictProbability(array $sample, $label)
277		{
278		$predicted = $this->predictSampleBinary($sample);
279
280		if (strval($predicted) == strval($label)) {
281		$sample = $this->checkNormalizedSample($sample);
282		return abs($this->output($sample));
283		}
284
285		return 0.0;
286		}
287
288		/**
289		* @param array $sample
290		* @return mixed
291		*/
292		protected function predictSampleBinary(array $sample)
293		{
294		$sample = $this->checkNormalizedSample($sample);
295
296		$predictedClass = $this->outputClass($sample);
297
298		return $this->labels[ $predictedClass ];
299		}
300		}
301

php-ai / php-ml

Branch — master (01bb82)

Perceptron A

Complexity

Size/Duplication

Coupling/Cohesion

Importance

10 Methods

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like