1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace GeminiLabs\SiteReviews\Modules; |
4
|
|
|
|
5
|
|
|
class Rating |
6
|
|
|
{ |
7
|
|
|
/** |
8
|
|
|
* The more sure we are of the confidence interval (the higher the confidence level), the less |
9
|
|
|
* precise the estimation will be as the margin for error will be higher. |
10
|
|
|
* @see http://homepages.math.uic.edu/~bpower6/stat101/Confidence%20Intervals.pdf |
11
|
|
|
* @see https://www.thecalculator.co/math/Confidence-Interval-Calculator-210.html |
12
|
|
|
* @see https://www.youtube.com/watch?v=grodoLzThy4 |
13
|
|
|
* @see https://en.wikipedia.org/wiki/Standard_score |
14
|
|
|
* @var array |
15
|
|
|
*/ |
16
|
|
|
const CONFIDENCE_LEVEL_Z_SCORES = [ |
17
|
|
|
50 => 0.67449, |
18
|
|
|
70 => 1.04, |
19
|
|
|
75 => 1.15035, |
20
|
|
|
80 => 1.282, |
21
|
|
|
85 => 1.44, |
22
|
|
|
90 => 1.64485, |
23
|
|
|
92 => 1.75, |
24
|
|
|
95 => 1.95996, |
25
|
|
|
96 => 2.05, |
26
|
|
|
97 => 2.17009, |
27
|
|
|
98 => 2.326, |
28
|
|
|
99 => 2.57583, |
29
|
|
|
'99.5' => 2.81, |
30
|
|
|
'99.8' => 3.08, |
31
|
|
|
'99.9' => 3.29053, |
32
|
|
|
]; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* @var int |
36
|
|
|
*/ |
37
|
|
|
const MAX_RATING = 5; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* @var int |
41
|
|
|
*/ |
42
|
|
|
const MIN_RATING = 1; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* @param int $roundBy |
46
|
|
|
* @return float |
47
|
|
|
*/ |
48
|
|
|
public function getAverage( array $reviewCounts, $roundBy = 1 ) |
49
|
|
|
{ |
50
|
|
|
$counts = $this->flattenCounts( $reviewCounts ); |
51
|
|
|
$average = array_sum( $counts ); |
52
|
|
|
if( $average > 0 ) { |
53
|
|
|
$average = round( $this->getTotalSum( $counts ) / $average, intval( $roundBy )); |
54
|
|
|
} |
55
|
|
|
return floatval( apply_filters( 'site-reviews/rating/average', $average, $counts, $reviewCounts )); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* Get the lower bound for up/down ratings |
60
|
|
|
* Method receives an up/down ratings array: [1, -1, -1, 1, 1, -1] |
61
|
|
|
* @see http://www.evanmiller.org/how-not-to-sort-by-average-rating.html |
62
|
|
|
* @see https://news.ycombinator.com/item?id=10481507 |
63
|
|
|
* @see https://dataorigami.net/blogs/napkin-folding/79030467-an-algorithm-to-sort-top-comments |
64
|
|
|
* @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html |
65
|
|
|
* @param int $confidencePercentage |
66
|
|
|
* @return int|float |
67
|
|
|
*/ |
68
|
|
|
public function getLowerBound( array $upDownRatings = [0, 0], $confidencePercentage = 95 ) |
69
|
|
|
{ |
70
|
|
|
$numRatings = array_sum( $upDownRatings ); |
71
|
|
|
if( $numRatings < 1 ) { |
72
|
|
|
return 0; |
73
|
|
|
} |
74
|
|
|
$z = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage]; |
75
|
|
|
$phat = 1 * $upDownRatings[1] / $numRatings; |
76
|
|
|
return ( $phat + $z * $z / ( 2 * $numRatings ) - $z * sqrt(( $phat * ( 1 - $phat ) + $z * $z / ( 4 * $numRatings )) / $numRatings )) / ( 1 + $z * $z / $numRatings ); |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* @return int|float |
81
|
|
|
*/ |
82
|
|
|
public function getOverallPercentage( array $reviewCounts ) |
83
|
|
|
{ |
84
|
|
|
return round( $this->getAverage( $reviewCounts ) * 100 / static::MAX_RATING, 2 ); |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* @return array |
89
|
|
|
*/ |
90
|
|
|
public function getPercentages( array $reviewCounts ) |
91
|
|
|
{ |
92
|
|
|
$counts = $this->flattenCounts( $reviewCounts ); |
93
|
|
|
$total = array_sum( $counts ); |
94
|
|
|
foreach( $counts as $index => $count ) { |
95
|
|
|
if( empty( $count ))continue; |
96
|
|
|
$counts[$index] = $count / $total * 100; |
97
|
|
|
} |
98
|
|
|
return $this->getRoundedPercentages( $counts ); |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
/** |
102
|
|
|
* @return float |
103
|
|
|
*/ |
104
|
|
|
public function getRanking() |
105
|
|
|
{ |
106
|
|
|
$counts = $this->flattenCounts( $reviewCounts ); |
|
|
|
|
107
|
|
|
return floatval( apply_filters( 'site-reviews/bayesian/ranking', |
108
|
|
|
$this->getRankingUsingImdb( $counts ), |
109
|
|
|
$counts, |
110
|
|
|
$this |
111
|
|
|
)); |
112
|
|
|
} |
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* @return array |
116
|
|
|
*/ |
117
|
|
|
protected function flattenCounts( array $reviewCounts ) |
118
|
|
|
{ |
119
|
|
|
$counts = []; |
120
|
|
|
array_walk_recursive( $reviewCounts, function( $num, $index ) use( &$counts ) { |
121
|
|
|
$counts[$index] = isset($counts[$index]) |
122
|
|
|
? $num + $counts[$index] |
123
|
|
|
: $num; |
124
|
|
|
}); |
125
|
|
|
return $counts; |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* Get the bayesian ranking for an array of reviews |
130
|
|
|
* This formula is the same one used by IMDB to rank their top 250 films |
131
|
|
|
* @see https://www.xkcd.com/937/ |
132
|
|
|
* @see https://districtdatalabs.silvrback.com/computing-a-bayesian-estimate-of-star-rating-means |
133
|
|
|
* @see http://fulmicoton.com/posts/bayesian_rating/ |
134
|
|
|
* @see https://stats.stackexchange.com/questions/93974/is-there-an-equivalent-to-lower-bound-of-wilson-score-confidence-interval-for-va |
135
|
|
|
* @param int $confidencePercentage |
136
|
|
|
* @return int|float |
137
|
|
|
*/ |
138
|
|
|
protected function getRankingUsingImdb( array $ratingCounts, $confidencePercentage = 70 ) |
139
|
|
|
{ |
140
|
|
|
$avgRating = $this->getAverage( $ratingCounts ); |
141
|
|
|
// Represents a prior (your prior opinion without data) for the average star rating. A higher prior also means a higher margin for error. |
142
|
|
|
// This could also be the average score of all items instead of a fixed value. |
143
|
|
|
$bayesMean = ( $confidencePercentage / 100 ) * static::MAX_RATING; // prior, 70% = 3.5 |
144
|
|
|
// Represents the number of ratings expected to begin observing a pattern that would put confidence in the prior. |
145
|
|
|
$bayesMinimal = 10; // confidence |
146
|
|
|
$numOfReviews = array_sum( $ratingCounts ); |
147
|
|
|
return $avgRating > 0 |
148
|
|
|
? (( $bayesMinimal * $bayesMean ) + ( $avgRating * $numOfReviews )) / ( $bayesMinimal + $numOfReviews ) |
149
|
|
|
: 0; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* The quality of a 5 star rating depends not only on the average number of stars but also on |
154
|
|
|
* the number of reviews. This method calculates the bayesian ranking of a page by its number |
155
|
|
|
* of reviews and their rating. |
156
|
|
|
* @see http://www.evanmiller.org/ranking-items-with-star-ratings.html |
157
|
|
|
* @see https://stackoverflow.com/questions/1411199/what-is-a-better-way-to-sort-by-a-5-star-rating/1411268 |
158
|
|
|
* @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html |
159
|
|
|
* @param int $confidencePercentage |
160
|
|
|
* @return float |
161
|
|
|
*/ |
162
|
|
|
protected function getRankingUsingZScores( array $ratingCounts, $confidencePercentage = 90 ) |
163
|
|
|
{ |
164
|
|
|
$ratingCountsSum = array_sum( $ratingCounts ) + static::MAX_RATING; |
165
|
|
|
$weight = $this->getWeight( $ratingCounts, $ratingCountsSum ); |
166
|
|
|
$weightPow2 = $this->getWeight( $ratingCounts, $ratingCountsSum, true ); |
167
|
|
|
$zScore = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage]; |
168
|
|
|
return $weight - $zScore * sqrt(( $weightPow2 - pow( $weight, 2 )) / ( $ratingCountsSum + 1 )); |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
/** |
172
|
|
|
* @param int $target |
173
|
|
|
* @return array |
174
|
|
|
*/ |
175
|
|
|
protected function getRoundedPercentages( array $percentages, $totalPercent = 100 ) |
176
|
|
|
{ |
177
|
|
|
array_walk( $percentages, function( &$percent, $index ) { |
178
|
|
|
$percent = [ |
179
|
|
|
'index' => $index, |
180
|
|
|
'percent' => floor( $percent ), |
181
|
|
|
'remainder' => fmod( $percent, 1 ), |
182
|
|
|
]; |
183
|
|
|
}); |
184
|
|
|
$indexes = array_column( $percentages, 'index' ); |
185
|
|
|
$remainders = array_column( $percentages, 'remainder' ); |
186
|
|
|
array_multisort( $remainders, SORT_DESC, SORT_STRING, $indexes, SORT_DESC, $percentages ); |
187
|
|
|
$i = 0; |
188
|
|
|
if( array_sum( array_column( $percentages, 'percent' )) > 0 ) { |
189
|
|
|
while( array_sum( array_column( $percentages, 'percent' )) < $totalPercent ) { |
190
|
|
|
$percentages[$i]['percent']++; |
191
|
|
|
$i++; |
192
|
|
|
} |
193
|
|
|
} |
194
|
|
|
array_multisort( $indexes, SORT_DESC, $percentages ); |
195
|
|
|
return array_combine( $indexes, array_column( $percentages, 'percent' )); |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* @return int |
200
|
|
|
*/ |
201
|
|
|
protected function getTotalSum( array $ratingCounts ) |
202
|
|
|
{ |
203
|
|
|
return array_reduce( $ratingCounts, function( $carry, $count ) { |
204
|
|
|
return $carry + $count; |
205
|
|
|
}); |
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
/** |
209
|
|
|
* @param int|double $ratingCountsSum |
210
|
|
|
* @param bool $powerOf2 |
211
|
|
|
* @return float |
212
|
|
|
*/ |
213
|
|
|
protected function getWeight( array $ratingCounts, $ratingCountsSum, $powerOf2 = false ) |
214
|
|
|
{ |
215
|
|
|
return array_reduce( array_keys( $ratingCounts ), |
216
|
|
|
function( $count, $rating ) use( $ratingCounts, $ratingCountsSum, $powerOf2 ) { |
217
|
|
|
$ratingLevel = $powerOf2 |
218
|
|
|
? pow( $rating, 2 ) |
219
|
|
|
: $rating; |
220
|
|
|
return $count + ( $ratingLevel * ( $ratingCounts[$rating] + 1 )) / $ratingCountsSum; |
221
|
|
|
} |
222
|
|
|
); |
223
|
|
|
} |
224
|
|
|
} |
225
|
|
|
|