Passed
Push — master ( 6f6d9e...e32a3c )
by Paul
04:05
created

Rating::getAverage()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
eloc 5
nc 2
nop 2
dl 0
loc 8
ccs 0
cts 8
cp 0
crap 6
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace GeminiLabs\SiteReviews\Modules;
4
5
class Rating
6
{
7
	/**
8
	 * The more sure we are of the confidence interval (the higher the confidence level), the less
9
	 * precise the estimation will be as the margin for error will be higher.
10
	 * @see http://homepages.math.uic.edu/~bpower6/stat101/Confidence%20Intervals.pdf
11
	 * @see https://www.thecalculator.co/math/Confidence-Interval-Calculator-210.html
12
	 * @see https://www.youtube.com/watch?v=grodoLzThy4
13
	 * @see https://en.wikipedia.org/wiki/Standard_score
14
	 * @var array
15
	 */
16
	const CONFIDENCE_LEVEL_Z_SCORES = [
17
		50 => 0.67449,
18
		70 => 1.04,
19
		75 => 1.15035,
20
		80 => 1.282,
21
		85 => 1.44,
22
		90 => 1.64485,
23
		92 => 1.75,
24
		95 => 1.95996,
25
		96 => 2.05,
26
		97 => 2.17009,
27
		98 => 2.326,
28
		99 => 2.57583,
29
		'99.5' => 2.81,
30
		'99.8' => 3.08,
31
		'99.9' => 3.29053,
32
	];
33
34
	/**
35
	 * @var int
36
	 */
37
	const MAX_RATING = 5;
38
39
	/**
40
	 * @var int
41
	 */
42
	const MIN_RATING = 1;
43
44
	/**
45
	 * @param int $roundBy
46
	 * @return float
47
	 */
48
	public function getAverage( array $reviewCounts, $roundBy = 1 )
49
	{
50
		$counts = $this->flattenCounts( $reviewCounts );
51
		$average = array_sum( $counts );
52
		if( $average > 0 ) {
53
			$average = round( $this->getTotalSum( $counts ) / $average, intval( $roundBy ));
54
		}
55
		return floatval( apply_filters( 'site-reviews/rating/average', $average, $counts, $reviewCounts ));
56
	}
57
58
	/**
59
	 * Get the lower bound for up/down ratings
60
	 * Method receives an up/down ratings array: [1, -1, -1, 1, 1, -1]
61
	 * @see http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
62
	 * @see https://news.ycombinator.com/item?id=10481507
63
	 * @see https://dataorigami.net/blogs/napkin-folding/79030467-an-algorithm-to-sort-top-comments
64
	 * @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html
65
	 * @param int $confidencePercentage
66
	 * @return int|float
67
	 */
68
	public function getLowerBound( array $upDownRatings = [0, 0], $confidencePercentage = 95 )
69
	{
70
		$numRatings = array_sum( $upDownRatings );
71
		if( $numRatings < 1 ) {
72
			return 0;
73
		}
74
		$z = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage];
75
		$phat = 1 * $upDownRatings[1] / $numRatings;
76
		return ( $phat + $z * $z / ( 2 * $numRatings ) - $z * sqrt(( $phat * ( 1 - $phat ) + $z * $z / ( 4 * $numRatings )) / $numRatings )) / ( 1 + $z * $z / $numRatings );
77
	}
78
79
	/**
80
	 * @return int|float
81
	 */
82
	public function getOverallPercentage( array $reviewCounts )
83
	{
84
		return round( $this->getAverage( $reviewCounts ) * 100 / static::MAX_RATING, 2 );
85
	}
86
87
	/**
88
	 * @return array
89
	 */
90
	public function getPercentages( array $reviewCounts )
91
	{
92
		$counts = $this->flattenCounts( $reviewCounts );
93
		$total = array_sum( $counts );
94
		foreach( $counts as $index => $count ) {
95
			if( empty( $count ))continue;
96
			$counts[$index] = $count / $total * 100;
97
		}
98
		return $this->getRoundedPercentages( $counts );
99
	}
100
101
	/**
102
	 * @return float
103
	 */
104
	public function getRanking()
105
	{
106
		$counts = $this->flattenCounts( $reviewCounts );
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $reviewCounts seems to be never defined.
Loading history...
107
		return floatval( apply_filters( 'site-reviews/bayesian/ranking',
108
			$this->getRankingUsingImdb( $counts ),
109
			$counts,
110
			$this
111
		));
112
	}
113
114
	/**
115
	 * @return array
116
	 */
117
	protected function flattenCounts( array $reviewCounts )
118
	{
119
		$counts = [];
120
		array_walk_recursive( $reviewCounts, function( $num, $index ) use( &$counts ) {
121
			$counts[$index] = isset($counts[$index])
122
				? $num + $counts[$index]
123
				: $num;
124
		});
125
		return $counts;
126
	}
127
128
	/**
129
	 * Get the bayesian ranking for an array of reviews
130
	 * This formula is the same one used by IMDB to rank their top 250 films
131
	 * @see https://www.xkcd.com/937/
132
	 * @see https://districtdatalabs.silvrback.com/computing-a-bayesian-estimate-of-star-rating-means
133
	 * @see http://fulmicoton.com/posts/bayesian_rating/
134
	 * @see https://stats.stackexchange.com/questions/93974/is-there-an-equivalent-to-lower-bound-of-wilson-score-confidence-interval-for-va
135
	 * @param int $confidencePercentage
136
	 * @return int|float
137
	 */
138
	protected function getRankingUsingImdb( array $ratingCounts, $confidencePercentage = 70 )
139
	{
140
		$avgRating = $this->getAverage( $ratingCounts );
141
		// Represents a prior (your prior opinion without data) for the average star rating. A higher prior also means a higher margin for error.
142
		// This could also be the average score of all items instead of a fixed value.
143
		$bayesMean = ( $confidencePercentage / 100 ) * static::MAX_RATING; // prior, 70% = 3.5
144
		// Represents the number of ratings expected to begin observing a pattern that would put confidence in the prior.
145
		$bayesMinimal = 10; // confidence
146
		$numOfReviews = array_sum( $ratingCounts );
147
		return $avgRating > 0
148
			? (( $bayesMinimal * $bayesMean ) + ( $avgRating * $numOfReviews )) / ( $bayesMinimal + $numOfReviews )
149
			: 0;
150
	}
151
152
	/**
153
	 * The quality of a 5 star rating depends not only on the average number of stars but also on
154
	 * the number of reviews. This method calculates the bayesian ranking of a page by its number
155
	 * of reviews and their rating.
156
	 * @see http://www.evanmiller.org/ranking-items-with-star-ratings.html
157
	 * @see https://stackoverflow.com/questions/1411199/what-is-a-better-way-to-sort-by-a-5-star-rating/1411268
158
	 * @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html
159
	 * @param int $confidencePercentage
160
	 * @return float
161
	 */
162
	protected function getRankingUsingZScores( array $ratingCounts, $confidencePercentage = 90 )
163
	{
164
		$ratingCountsSum = array_sum( $ratingCounts ) + static::MAX_RATING;
165
		$weight = $this->getWeight( $ratingCounts, $ratingCountsSum );
166
		$weightPow2 = $this->getWeight( $ratingCounts, $ratingCountsSum, true );
167
		$zScore = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage];
168
		return $weight - $zScore * sqrt(( $weightPow2 - pow( $weight, 2 )) / ( $ratingCountsSum + 1 ));
169
	}
170
171
	/**
172
	 * @param int $target
173
	 * @return array
174
	 */
175
	protected function getRoundedPercentages( array $percentages, $totalPercent = 100 )
176
	{
177
		array_walk( $percentages, function( &$percent, $index ) {
178
			$percent = [
179
				'index' => $index,
180
				'percent' => floor( $percent ),
181
				'remainder' => fmod( $percent, 1 ),
182
			];
183
		});
184
		$indexes = array_column( $percentages, 'index' );
185
		$remainders = array_column( $percentages, 'remainder' );
186
		array_multisort( $remainders, SORT_DESC, SORT_STRING, $indexes, SORT_DESC, $percentages );
187
		$i = 0;
188
		if( array_sum( array_column( $percentages, 'percent' )) > 0 ) {
189
			while( array_sum( array_column( $percentages, 'percent' )) < $totalPercent ) {
190
				$percentages[$i]['percent']++;
191
				$i++;
192
			}
193
		}
194
		array_multisort( $indexes, SORT_DESC, $percentages );
195
		return array_combine( $indexes, array_column( $percentages, 'percent' ));
196
	}
197
198
	/**
199
	 * @return int
200
	 */
201
	protected function getTotalSum( array $ratingCounts )
202
	{
203
		return array_reduce( $ratingCounts, function( $carry, $count ) {
204
			return $carry + $count;
205
		});
206
	}
207
208
	/**
209
	 * @param int|double $ratingCountsSum
210
	 * @param bool $powerOf2
211
	 * @return float
212
	 */
213
	protected function getWeight( array $ratingCounts, $ratingCountsSum, $powerOf2 = false )
214
	{
215
		return array_reduce( array_keys( $ratingCounts ),
216
			function( $count, $rating ) use( $ratingCounts, $ratingCountsSum, $powerOf2 ) {
217
				$ratingLevel = $powerOf2
218
					? pow( $rating, 2 )
219
					: $rating;
220
				return $count + ( $ratingLevel * ( $ratingCounts[$rating] + 1 )) / $ratingCountsSum;
221
			}
222
		);
223
	}
224
}
225