1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace GeminiLabs\SiteReviews\Modules; |
4
|
|
|
|
5
|
|
|
class Rating |
6
|
|
|
{ |
7
|
|
|
/** |
8
|
|
|
* The more sure we are of the confidence interval (the higher the confidence level), the less |
9
|
|
|
* precise the estimation will be as the margin for error will be higher. |
10
|
|
|
* @see http://homepages.math.uic.edu/~bpower6/stat101/Confidence%20Intervals.pdf |
11
|
|
|
* @see https://www.thecalculator.co/math/Confidence-Interval-Calculator-210.html |
12
|
|
|
* @see https://www.youtube.com/watch?v=grodoLzThy4 |
13
|
|
|
* @see https://en.wikipedia.org/wiki/Standard_score |
14
|
|
|
* @var array |
15
|
|
|
*/ |
16
|
|
|
const CONFIDENCE_LEVEL_Z_SCORES = [ |
17
|
|
|
50 => 0.67449, |
18
|
|
|
70 => 1.04, |
19
|
|
|
75 => 1.15035, |
20
|
|
|
80 => 1.282, |
21
|
|
|
85 => 1.44, |
22
|
|
|
90 => 1.64485, |
23
|
|
|
92 => 1.75, |
24
|
|
|
95 => 1.95996, |
25
|
|
|
96 => 2.05, |
26
|
|
|
97 => 2.17009, |
27
|
|
|
98 => 2.326, |
28
|
|
|
99 => 2.57583, |
29
|
|
|
'99.5' => 2.81, |
30
|
|
|
'99.8' => 3.08, |
31
|
|
|
'99.9' => 3.29053, |
32
|
|
|
]; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* @var int |
36
|
|
|
*/ |
37
|
|
|
const MAX_RATING = 5; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* @var int |
41
|
|
|
*/ |
42
|
|
|
const MIN_RATING = 1; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* @param int $roundBy |
46
|
|
|
* @return float |
47
|
|
|
*/ |
48
|
|
|
public function getAverage( array $ratingCounts, $roundBy = 1 ) |
49
|
|
|
{ |
50
|
|
|
$average = array_sum( $ratingCounts ); |
51
|
|
|
if( $average > 0 ) { |
52
|
|
|
$average = round( $this->getTotalSum( $ratingCounts ) / $average, intval( $roundBy )); |
53
|
|
|
} |
54
|
|
|
return floatval( apply_filters( 'site-reviews/rating/average', $average, $ratingCounts )); |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Get the lower bound for up/down ratings |
59
|
|
|
* Method receives an up/down ratings array: [1, -1, -1, 1, 1, -1] |
60
|
|
|
* @see http://www.evanmiller.org/how-not-to-sort-by-average-rating.html |
61
|
|
|
* @see https://news.ycombinator.com/item?id=10481507 |
62
|
|
|
* @see https://dataorigami.net/blogs/napkin-folding/79030467-an-algorithm-to-sort-top-comments |
63
|
|
|
* @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html |
64
|
|
|
* @param int $confidencePercentage |
65
|
|
|
* @return int|float |
66
|
|
|
*/ |
67
|
|
|
public function getLowerBound( array $upDownCounts = [0, 0], $confidencePercentage = 95 ) |
68
|
|
|
{ |
69
|
|
|
$numRatings = array_sum( $upDownCounts ); |
70
|
|
|
if( $numRatings < 1 ) { |
71
|
|
|
return 0; |
72
|
|
|
} |
73
|
|
|
$z = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage]; |
74
|
|
|
$phat = 1 * $upDownCounts[1] / $numRatings; |
75
|
|
|
return ( $phat + $z * $z / ( 2 * $numRatings ) - $z * sqrt(( $phat * ( 1 - $phat ) + $z * $z / ( 4 * $numRatings )) / $numRatings )) / ( 1 + $z * $z / $numRatings ); |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* @return int|float |
80
|
|
|
*/ |
81
|
|
|
public function getOverallPercentage( array $ratingCounts ) |
82
|
|
|
{ |
83
|
|
|
return round( $this->getAverage( $ratingCounts ) * 100 / static::MAX_RATING, 2 ); |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
/** |
87
|
|
|
* @return array |
88
|
|
|
*/ |
89
|
|
|
public function getPercentages( array $ratingCounts ) |
90
|
|
|
{ |
91
|
|
|
$total = array_sum( $ratingCounts ); |
92
|
|
|
foreach( $ratingCounts as $index => $count ) { |
93
|
|
|
if( empty( $count ))continue; |
94
|
|
|
$ratingCounts[$index] = $count / $total * 100; |
95
|
|
|
} |
96
|
|
|
return $this->getRoundedPercentages( $ratingCounts ); |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* @return float |
101
|
|
|
*/ |
102
|
|
|
public function getRanking( array $ratingCounts ) |
103
|
|
|
{ |
104
|
|
|
return floatval( apply_filters( 'site-reviews/bayesian/ranking', |
105
|
|
|
$this->getRankingUsingImdb( $ratingCounts ), |
106
|
|
|
$ratingCounts, |
107
|
|
|
$this |
108
|
|
|
)); |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
/** |
112
|
|
|
* Get the bayesian ranking for an array of reviews |
113
|
|
|
* This formula is the same one used by IMDB to rank their top 250 films |
114
|
|
|
* @see https://www.xkcd.com/937/ |
115
|
|
|
* @see https://districtdatalabs.silvrback.com/computing-a-bayesian-estimate-of-star-rating-means |
116
|
|
|
* @see http://fulmicoton.com/posts/bayesian_rating/ |
117
|
|
|
* @see https://stats.stackexchange.com/questions/93974/is-there-an-equivalent-to-lower-bound-of-wilson-score-confidence-interval-for-va |
118
|
|
|
* @param int $confidencePercentage |
119
|
|
|
* @return int|float |
120
|
|
|
*/ |
121
|
|
|
public function getRankingUsingImdb( array $ratingCounts, $confidencePercentage = 70 ) |
122
|
|
|
{ |
123
|
|
|
$avgRating = $this->getAverage( $ratingCounts ); |
124
|
|
|
// Represents a prior (your prior opinion without data) for the average star rating. A higher prior also means a higher margin for error. |
125
|
|
|
// This could also be the average score of all items instead of a fixed value. |
126
|
|
|
$bayesMean = ( $confidencePercentage / 100 ) * static::MAX_RATING; // prior, 70% = 3.5 |
127
|
|
|
// Represents the number of ratings expected to begin observing a pattern that would put confidence in the prior. |
128
|
|
|
$bayesMinimal = 10; // confidence |
129
|
|
|
$numOfReviews = array_sum( $ratingCounts ); |
130
|
|
|
return $avgRating > 0 |
131
|
|
|
? (( $bayesMinimal * $bayesMean ) + ( $avgRating * $numOfReviews )) / ( $bayesMinimal + $numOfReviews ) |
132
|
|
|
: 0; |
133
|
|
|
} |
134
|
|
|
|
135
|
|
|
/** |
136
|
|
|
* The quality of a 5 star rating depends not only on the average number of stars but also on |
137
|
|
|
* the number of reviews. This method calculates the bayesian ranking of a page by its number |
138
|
|
|
* of reviews and their rating. |
139
|
|
|
* @see http://www.evanmiller.org/ranking-items-with-star-ratings.html |
140
|
|
|
* @see https://stackoverflow.com/questions/1411199/what-is-a-better-way-to-sort-by-a-5-star-rating/1411268 |
141
|
|
|
* @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html |
142
|
|
|
* @param int $confidencePercentage |
143
|
|
|
* @return float |
144
|
|
|
*/ |
145
|
|
|
public function getRankingUsingZScores( array $ratingCounts, $confidencePercentage = 90 ) |
146
|
|
|
{ |
147
|
|
|
$ratingCountsSum = array_sum( $ratingCounts ) + static::MAX_RATING; |
148
|
|
|
$weight = $this->getWeight( $ratingCounts, $ratingCountsSum ); |
149
|
|
|
$weightPow2 = $this->getWeight( $ratingCounts, $ratingCountsSum, true ); |
150
|
|
|
$zScore = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage]; |
151
|
|
|
return $weight - $zScore * sqrt(( $weightPow2 - pow( $weight, 2 )) / ( $ratingCountsSum + 1 )); |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
/** |
155
|
|
|
* @param int $target |
156
|
|
|
* @return array |
157
|
|
|
*/ |
158
|
|
|
protected function getRoundedPercentages( array $percentages, $totalPercent = 100 ) |
159
|
|
|
{ |
160
|
|
|
array_walk( $percentages, function( &$percent, $index ) { |
161
|
|
|
$percent = [ |
162
|
|
|
'index' => $index, |
163
|
|
|
'percent' => floor( $percent ), |
164
|
|
|
'remainder' => fmod( $percent, 1 ), |
165
|
|
|
]; |
166
|
|
|
}); |
167
|
|
|
$indexes = array_column( $percentages, 'index' ); |
168
|
|
|
$remainders = array_column( $percentages, 'remainder' ); |
169
|
|
|
array_multisort( $remainders, SORT_DESC, SORT_STRING, $indexes, SORT_DESC, $percentages ); |
170
|
|
|
$i = 0; |
171
|
|
|
if( array_sum( array_column( $percentages, 'percent' )) > 0 ) { |
172
|
|
|
while( array_sum( array_column( $percentages, 'percent' )) < $totalPercent ) { |
173
|
|
|
$percentages[$i]['percent']++; |
174
|
|
|
$i++; |
175
|
|
|
} |
176
|
|
|
} |
177
|
|
|
array_multisort( $indexes, SORT_DESC, $percentages ); |
178
|
|
|
return array_combine( $indexes, array_column( $percentages, 'percent' )); |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
/** |
182
|
|
|
* @return int |
183
|
|
|
*/ |
184
|
|
|
protected function getTotalSum( array $ratingCounts ) |
185
|
|
|
{ |
186
|
|
|
return array_reduce( $ratingCounts, function( $carry, $count ) { |
187
|
|
|
return $carry + $count; |
188
|
|
|
}); |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
/** |
192
|
|
|
* @param int|double $ratingCountsSum |
193
|
|
|
* @param bool $powerOf2 |
194
|
|
|
* @return float |
195
|
|
|
*/ |
196
|
|
|
protected function getWeight( array $ratingCounts, $ratingCountsSum, $powerOf2 = false ) |
197
|
|
|
{ |
198
|
|
|
return array_reduce( array_keys( $ratingCounts ), |
199
|
|
|
function( $count, $rating ) use( $ratingCounts, $ratingCountsSum, $powerOf2 ) { |
200
|
|
|
$ratingLevel = $powerOf2 |
201
|
|
|
? pow( $rating, 2 ) |
202
|
|
|
: $rating; |
203
|
|
|
return $count + ( $ratingLevel * ( $ratingCounts[$rating] + 1 )) / $ratingCountsSum; |
204
|
|
|
} |
205
|
|
|
); |
206
|
|
|
} |
207
|
|
|
} |
208
|
|
|
|