Test Failed
Push — master ( 87eb8d...25783a )
by Paul
03:58
created

Rating::flattenCounts()   A

Complexity

Conditions 2
Paths 1

Size

Total Lines 9
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
eloc 6
nc 1
nop 1
dl 0
loc 9
ccs 0
cts 8
cp 0
crap 6
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace GeminiLabs\SiteReviews\Modules;
4
5
class Rating
6
{
7
	/**
8
	 * The more sure we are of the confidence interval (the higher the confidence level), the less
9
	 * precise the estimation will be as the margin for error will be higher.
10
	 * @see http://homepages.math.uic.edu/~bpower6/stat101/Confidence%20Intervals.pdf
11
	 * @see https://www.thecalculator.co/math/Confidence-Interval-Calculator-210.html
12
	 * @see https://www.youtube.com/watch?v=grodoLzThy4
13
	 * @see https://en.wikipedia.org/wiki/Standard_score
14
	 * @var array
15
	 */
16
	const CONFIDENCE_LEVEL_Z_SCORES = [
17
		50 => 0.67449,
18
		70 => 1.04,
19
		75 => 1.15035,
20
		80 => 1.282,
21
		85 => 1.44,
22
		90 => 1.64485,
23
		92 => 1.75,
24
		95 => 1.95996,
25
		96 => 2.05,
26
		97 => 2.17009,
27
		98 => 2.326,
28
		99 => 2.57583,
29
		'99.5' => 2.81,
30
		'99.8' => 3.08,
31
		'99.9' => 3.29053,
32
	];
33
34
	/**
35
	 * @var int
36
	 */
37
	const MAX_RATING = 5;
38
39
	/**
40
	 * @var int
41
	 */
42
	const MIN_RATING = 1;
43
44
	/**
45
	 * @param int $roundBy
46
	 * @return float
47
	 */
48
	public function getAverage( array $ratingCounts, $roundBy = 1 )
49
	{
50
		$average = array_sum( $ratingCounts );
51
		if( $average > 0 ) {
52
			$average = round( $this->getTotalSum( $ratingCounts ) / $average, intval( $roundBy ));
53
		}
54
		return floatval( apply_filters( 'site-reviews/rating/average', $average, $ratingCounts ));
55
	}
56
57
	/**
58
	 * Get the lower bound for up/down ratings
59
	 * Method receives an up/down ratings array: [1, -1, -1, 1, 1, -1]
60
	 * @see http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
61
	 * @see https://news.ycombinator.com/item?id=10481507
62
	 * @see https://dataorigami.net/blogs/napkin-folding/79030467-an-algorithm-to-sort-top-comments
63
	 * @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html
64
	 * @param int $confidencePercentage
65
	 * @return int|float
66
	 */
67
	public function getLowerBound( array $upDownCounts = [0, 0], $confidencePercentage = 95 )
68
	{
69
		$numRatings = array_sum( $upDownCounts );
70
		if( $numRatings < 1 ) {
71
			return 0;
72
		}
73
		$z = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage];
74
		$phat = 1 * $upDownCounts[1] / $numRatings;
75
		return ( $phat + $z * $z / ( 2 * $numRatings ) - $z * sqrt(( $phat * ( 1 - $phat ) + $z * $z / ( 4 * $numRatings )) / $numRatings )) / ( 1 + $z * $z / $numRatings );
76
	}
77
78
	/**
79
	 * @return int|float
80
	 */
81
	public function getOverallPercentage( array $ratingCounts )
82
	{
83
		return round( $this->getAverage( $ratingCounts ) * 100 / static::MAX_RATING, 2 );
84
	}
85
86
	/**
87
	 * @return array
88
	 */
89
	public function getPercentages( array $ratingCounts )
90
	{
91
		$total = array_sum( $ratingCounts );
92
		foreach( $ratingCounts as $index => $count ) {
93
			if( empty( $count ))continue;
94
			$ratingCounts[$index] = $count / $total * 100;
95
		}
96
		return $this->getRoundedPercentages( $ratingCounts );
97
	}
98
99
	/**
100
	 * @return float
101
	 */
102
	public function getRanking( array $ratingCounts )
103
	{
104
		return floatval( apply_filters( 'site-reviews/bayesian/ranking',
105
			$this->getRankingUsingImdb( $ratingCounts ),
106
			$ratingCounts,
107
			$this
108
		));
109
	}
110
111
	/**
112
	 * Get the bayesian ranking for an array of reviews
113
	 * This formula is the same one used by IMDB to rank their top 250 films
114
	 * @see https://www.xkcd.com/937/
115
	 * @see https://districtdatalabs.silvrback.com/computing-a-bayesian-estimate-of-star-rating-means
116
	 * @see http://fulmicoton.com/posts/bayesian_rating/
117
	 * @see https://stats.stackexchange.com/questions/93974/is-there-an-equivalent-to-lower-bound-of-wilson-score-confidence-interval-for-va
118
	 * @param int $confidencePercentage
119
	 * @return int|float
120
	 */
121
	public function getRankingUsingImdb( array $ratingCounts, $confidencePercentage = 70 )
122
	{
123
		$avgRating = $this->getAverage( $ratingCounts );
124
		// Represents a prior (your prior opinion without data) for the average star rating. A higher prior also means a higher margin for error.
125
		// This could also be the average score of all items instead of a fixed value.
126
		$bayesMean = ( $confidencePercentage / 100 ) * static::MAX_RATING; // prior, 70% = 3.5
127
		// Represents the number of ratings expected to begin observing a pattern that would put confidence in the prior.
128
		$bayesMinimal = 10; // confidence
129
		$numOfReviews = array_sum( $ratingCounts );
130
		return $avgRating > 0
131
			? (( $bayesMinimal * $bayesMean ) + ( $avgRating * $numOfReviews )) / ( $bayesMinimal + $numOfReviews )
132
			: 0;
133
	}
134
135
	/**
136
	 * The quality of a 5 star rating depends not only on the average number of stars but also on
137
	 * the number of reviews. This method calculates the bayesian ranking of a page by its number
138
	 * of reviews and their rating.
139
	 * @see http://www.evanmiller.org/ranking-items-with-star-ratings.html
140
	 * @see https://stackoverflow.com/questions/1411199/what-is-a-better-way-to-sort-by-a-5-star-rating/1411268
141
	 * @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html
142
	 * @param int $confidencePercentage
143
	 * @return float
144
	 */
145
	public function getRankingUsingZScores( array $ratingCounts, $confidencePercentage = 90 )
146
	{
147
		$ratingCountsSum = array_sum( $ratingCounts ) + static::MAX_RATING;
148
		$weight = $this->getWeight( $ratingCounts, $ratingCountsSum );
149
		$weightPow2 = $this->getWeight( $ratingCounts, $ratingCountsSum, true );
150
		$zScore = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage];
151
		return $weight - $zScore * sqrt(( $weightPow2 - pow( $weight, 2 )) / ( $ratingCountsSum + 1 ));
152
	}
153
154
	/**
155
	 * @param int $target
156
	 * @return array
157
	 */
158
	protected function getRoundedPercentages( array $percentages, $totalPercent = 100 )
159
	{
160
		array_walk( $percentages, function( &$percent, $index ) {
161
			$percent = [
162
				'index' => $index,
163
				'percent' => floor( $percent ),
164
				'remainder' => fmod( $percent, 1 ),
165
			];
166
		});
167
		$indexes = array_column( $percentages, 'index' );
168
		$remainders = array_column( $percentages, 'remainder' );
169
		array_multisort( $remainders, SORT_DESC, SORT_STRING, $indexes, SORT_DESC, $percentages );
170
		$i = 0;
171
		if( array_sum( array_column( $percentages, 'percent' )) > 0 ) {
172
			while( array_sum( array_column( $percentages, 'percent' )) < $totalPercent ) {
173
				$percentages[$i]['percent']++;
174
				$i++;
175
			}
176
		}
177
		array_multisort( $indexes, SORT_DESC, $percentages );
178
		return array_combine( $indexes, array_column( $percentages, 'percent' ));
179
	}
180
181
	/**
182
	 * @return int
183
	 */
184
	protected function getTotalSum( array $ratingCounts )
185
	{
186
		return array_reduce( $ratingCounts, function( $carry, $count ) {
187
			return $carry + $count;
188
		});
189
	}
190
191
	/**
192
	 * @param int|double $ratingCountsSum
193
	 * @param bool $powerOf2
194
	 * @return float
195
	 */
196
	protected function getWeight( array $ratingCounts, $ratingCountsSum, $powerOf2 = false )
197
	{
198
		return array_reduce( array_keys( $ratingCounts ),
199
			function( $count, $rating ) use( $ratingCounts, $ratingCountsSum, $powerOf2 ) {
200
				$ratingLevel = $powerOf2
201
					? pow( $rating, 2 )
202
					: $rating;
203
				return $count + ( $ratingLevel * ( $ratingCounts[$rating] + 1 )) / $ratingCountsSum;
204
			}
205
		);
206
	}
207
}
208