|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace GeminiLabs\SiteReviews\Modules; |
|
4
|
|
|
|
|
5
|
|
|
class Rating |
|
6
|
|
|
{ |
|
7
|
|
|
/** |
|
8
|
|
|
* The more sure we are of the confidence interval (the higher the confidence level), the less |
|
9
|
|
|
* precise the estimation will be as the margin for error will be higher. |
|
10
|
|
|
* @see http://homepages.math.uic.edu/~bpower6/stat101/Confidence%20Intervals.pdf |
|
11
|
|
|
* @see https://www.thecalculator.co/math/Confidence-Interval-Calculator-210.html |
|
12
|
|
|
* @see https://www.youtube.com/watch?v=grodoLzThy4 |
|
13
|
|
|
* @see https://en.wikipedia.org/wiki/Standard_score |
|
14
|
|
|
* @var array |
|
15
|
|
|
*/ |
|
16
|
|
|
const CONFIDENCE_LEVEL_Z_SCORES = [ |
|
17
|
|
|
50 => 0.67449, |
|
18
|
|
|
70 => 1.04, |
|
19
|
|
|
75 => 1.15035, |
|
20
|
|
|
80 => 1.282, |
|
21
|
|
|
85 => 1.44, |
|
22
|
|
|
90 => 1.64485, |
|
23
|
|
|
92 => 1.75, |
|
24
|
|
|
95 => 1.95996, |
|
25
|
|
|
96 => 2.05, |
|
26
|
|
|
97 => 2.17009, |
|
27
|
|
|
98 => 2.326, |
|
28
|
|
|
99 => 2.57583, |
|
29
|
|
|
'99.5' => 2.81, |
|
30
|
|
|
'99.8' => 3.08, |
|
31
|
|
|
'99.9' => 3.29053, |
|
32
|
|
|
]; |
|
33
|
|
|
|
|
34
|
|
|
/** |
|
35
|
|
|
* @var int |
|
36
|
|
|
*/ |
|
37
|
|
|
const MAX_RATING = 5; |
|
38
|
|
|
|
|
39
|
|
|
/** |
|
40
|
|
|
* @var int |
|
41
|
|
|
*/ |
|
42
|
|
|
const MIN_RATING = 1; |
|
43
|
|
|
|
|
44
|
|
|
/** |
|
45
|
|
|
* @param int $roundBy |
|
46
|
|
|
* @return float |
|
47
|
|
|
*/ |
|
48
|
|
|
public function getAverage( array $ratingCounts, $roundBy = 1 ) |
|
49
|
|
|
{ |
|
50
|
|
|
$average = array_sum( $ratingCounts ); |
|
51
|
|
|
if( $average > 0 ) { |
|
52
|
|
|
$average = round( $this->getTotalSum( $ratingCounts ) / $average, intval( $roundBy )); |
|
53
|
|
|
} |
|
54
|
|
|
return floatval( apply_filters( 'site-reviews/rating/average', $average, $ratingCounts )); |
|
55
|
|
|
} |
|
56
|
|
|
|
|
57
|
|
|
/** |
|
58
|
|
|
* Get the lower bound for up/down ratings |
|
59
|
|
|
* Method receives an up/down ratings array: [1, -1, -1, 1, 1, -1] |
|
60
|
|
|
* @see http://www.evanmiller.org/how-not-to-sort-by-average-rating.html |
|
61
|
|
|
* @see https://news.ycombinator.com/item?id=10481507 |
|
62
|
|
|
* @see https://dataorigami.net/blogs/napkin-folding/79030467-an-algorithm-to-sort-top-comments |
|
63
|
|
|
* @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html |
|
64
|
|
|
* @param int $confidencePercentage |
|
65
|
|
|
* @return int|float |
|
66
|
|
|
*/ |
|
67
|
|
|
public function getLowerBound( array $upDownCounts = [0, 0], $confidencePercentage = 95 ) |
|
68
|
|
|
{ |
|
69
|
|
|
$numRatings = array_sum( $upDownCounts ); |
|
70
|
|
|
if( $numRatings < 1 ) { |
|
71
|
|
|
return 0; |
|
72
|
|
|
} |
|
73
|
|
|
$z = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage]; |
|
74
|
|
|
$phat = 1 * $upDownCounts[1] / $numRatings; |
|
75
|
|
|
return ( $phat + $z * $z / ( 2 * $numRatings ) - $z * sqrt(( $phat * ( 1 - $phat ) + $z * $z / ( 4 * $numRatings )) / $numRatings )) / ( 1 + $z * $z / $numRatings ); |
|
76
|
|
|
} |
|
77
|
|
|
|
|
78
|
|
|
/** |
|
79
|
|
|
* @return int|float |
|
80
|
|
|
*/ |
|
81
|
|
|
public function getOverallPercentage( array $ratingCounts ) |
|
82
|
|
|
{ |
|
83
|
|
|
return round( $this->getAverage( $ratingCounts ) * 100 / static::MAX_RATING, 2 ); |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
|
|
/** |
|
87
|
|
|
* @return array |
|
88
|
|
|
*/ |
|
89
|
|
|
public function getPercentages( array $ratingCounts ) |
|
90
|
|
|
{ |
|
91
|
|
|
$total = array_sum( $ratingCounts ); |
|
92
|
|
|
foreach( $ratingCounts as $index => $count ) { |
|
93
|
|
|
if( empty( $count ))continue; |
|
94
|
|
|
$ratingCounts[$index] = $count / $total * 100; |
|
95
|
|
|
} |
|
96
|
|
|
return $this->getRoundedPercentages( $ratingCounts ); |
|
97
|
|
|
} |
|
98
|
|
|
|
|
99
|
|
|
/** |
|
100
|
|
|
* @return float |
|
101
|
|
|
*/ |
|
102
|
|
|
public function getRanking( array $ratingCounts ) |
|
103
|
|
|
{ |
|
104
|
|
|
return floatval( apply_filters( 'site-reviews/bayesian/ranking', |
|
105
|
|
|
$this->getRankingUsingImdb( $ratingCounts ), |
|
106
|
|
|
$ratingCounts, |
|
107
|
|
|
$this |
|
108
|
|
|
)); |
|
109
|
|
|
} |
|
110
|
|
|
|
|
111
|
|
|
/** |
|
112
|
|
|
* Get the bayesian ranking for an array of reviews |
|
113
|
|
|
* This formula is the same one used by IMDB to rank their top 250 films |
|
114
|
|
|
* @see https://www.xkcd.com/937/ |
|
115
|
|
|
* @see https://districtdatalabs.silvrback.com/computing-a-bayesian-estimate-of-star-rating-means |
|
116
|
|
|
* @see http://fulmicoton.com/posts/bayesian_rating/ |
|
117
|
|
|
* @see https://stats.stackexchange.com/questions/93974/is-there-an-equivalent-to-lower-bound-of-wilson-score-confidence-interval-for-va |
|
118
|
|
|
* @param int $confidencePercentage |
|
119
|
|
|
* @return int|float |
|
120
|
|
|
*/ |
|
121
|
|
|
public function getRankingUsingImdb( array $ratingCounts, $confidencePercentage = 70 ) |
|
122
|
|
|
{ |
|
123
|
|
|
$avgRating = $this->getAverage( $ratingCounts ); |
|
124
|
|
|
// Represents a prior (your prior opinion without data) for the average star rating. A higher prior also means a higher margin for error. |
|
125
|
|
|
// This could also be the average score of all items instead of a fixed value. |
|
126
|
|
|
$bayesMean = ( $confidencePercentage / 100 ) * static::MAX_RATING; // prior, 70% = 3.5 |
|
127
|
|
|
// Represents the number of ratings expected to begin observing a pattern that would put confidence in the prior. |
|
128
|
|
|
$bayesMinimal = 10; // confidence |
|
129
|
|
|
$numOfReviews = array_sum( $ratingCounts ); |
|
130
|
|
|
return $avgRating > 0 |
|
131
|
|
|
? (( $bayesMinimal * $bayesMean ) + ( $avgRating * $numOfReviews )) / ( $bayesMinimal + $numOfReviews ) |
|
132
|
|
|
: 0; |
|
133
|
|
|
} |
|
134
|
|
|
|
|
135
|
|
|
/** |
|
136
|
|
|
* The quality of a 5 star rating depends not only on the average number of stars but also on |
|
137
|
|
|
* the number of reviews. This method calculates the bayesian ranking of a page by its number |
|
138
|
|
|
* of reviews and their rating. |
|
139
|
|
|
* @see http://www.evanmiller.org/ranking-items-with-star-ratings.html |
|
140
|
|
|
* @see https://stackoverflow.com/questions/1411199/what-is-a-better-way-to-sort-by-a-5-star-rating/1411268 |
|
141
|
|
|
* @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html |
|
142
|
|
|
* @param int $confidencePercentage |
|
143
|
|
|
* @return float |
|
144
|
|
|
*/ |
|
145
|
|
|
public function getRankingUsingZScores( array $ratingCounts, $confidencePercentage = 90 ) |
|
146
|
|
|
{ |
|
147
|
|
|
$ratingCountsSum = array_sum( $ratingCounts ) + static::MAX_RATING; |
|
148
|
|
|
$weight = $this->getWeight( $ratingCounts, $ratingCountsSum ); |
|
149
|
|
|
$weightPow2 = $this->getWeight( $ratingCounts, $ratingCountsSum, true ); |
|
150
|
|
|
$zScore = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage]; |
|
151
|
|
|
return $weight - $zScore * sqrt(( $weightPow2 - pow( $weight, 2 )) / ( $ratingCountsSum + 1 )); |
|
152
|
|
|
} |
|
153
|
|
|
|
|
154
|
|
|
/** |
|
155
|
|
|
* @param int $target |
|
156
|
|
|
* @return array |
|
157
|
|
|
*/ |
|
158
|
|
|
protected function getRoundedPercentages( array $percentages, $totalPercent = 100 ) |
|
159
|
|
|
{ |
|
160
|
|
|
array_walk( $percentages, function( &$percent, $index ) { |
|
161
|
|
|
$percent = [ |
|
162
|
|
|
'index' => $index, |
|
163
|
|
|
'percent' => floor( $percent ), |
|
164
|
|
|
'remainder' => fmod( $percent, 1 ), |
|
165
|
|
|
]; |
|
166
|
|
|
}); |
|
167
|
|
|
$indexes = array_column( $percentages, 'index' ); |
|
168
|
|
|
$remainders = array_column( $percentages, 'remainder' ); |
|
169
|
|
|
array_multisort( $remainders, SORT_DESC, SORT_STRING, $indexes, SORT_DESC, $percentages ); |
|
170
|
|
|
$i = 0; |
|
171
|
|
|
if( array_sum( array_column( $percentages, 'percent' )) > 0 ) { |
|
172
|
|
|
while( array_sum( array_column( $percentages, 'percent' )) < $totalPercent ) { |
|
173
|
|
|
$percentages[$i]['percent']++; |
|
174
|
|
|
$i++; |
|
175
|
|
|
} |
|
176
|
|
|
} |
|
177
|
|
|
array_multisort( $indexes, SORT_DESC, $percentages ); |
|
178
|
|
|
return array_combine( $indexes, array_column( $percentages, 'percent' )); |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
/** |
|
182
|
|
|
* @return int |
|
183
|
|
|
*/ |
|
184
|
|
|
protected function getTotalSum( array $ratingCounts ) |
|
185
|
|
|
{ |
|
186
|
|
|
return array_reduce( $ratingCounts, function( $carry, $count ) { |
|
187
|
|
|
return $carry + $count; |
|
188
|
|
|
}); |
|
189
|
|
|
} |
|
190
|
|
|
|
|
191
|
|
|
/** |
|
192
|
|
|
* @param int|double $ratingCountsSum |
|
193
|
|
|
* @param bool $powerOf2 |
|
194
|
|
|
* @return float |
|
195
|
|
|
*/ |
|
196
|
|
|
protected function getWeight( array $ratingCounts, $ratingCountsSum, $powerOf2 = false ) |
|
197
|
|
|
{ |
|
198
|
|
|
return array_reduce( array_keys( $ratingCounts ), |
|
199
|
|
|
function( $count, $rating ) use( $ratingCounts, $ratingCountsSum, $powerOf2 ) { |
|
200
|
|
|
$ratingLevel = $powerOf2 |
|
201
|
|
|
? pow( $rating, 2 ) |
|
202
|
|
|
: $rating; |
|
203
|
|
|
return $count + ( $ratingLevel * ( $ratingCounts[$rating] + 1 )) / $ratingCountsSum; |
|
204
|
|
|
} |
|
205
|
|
|
); |
|
206
|
|
|
} |
|
207
|
|
|
} |
|
208
|
|
|
|