Test Failed
Push — tmp ( 15f615...89cc97 )
by Paul
10:31 queued 04:40
created

Rating::getRoundedPercentages()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 21
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
eloc 15
c 0
b 0
f 0
nc 2
nop 2
dl 0
loc 21
ccs 0
cts 20
cp 0
crap 12
rs 9.7666
1
<?php
2
3
namespace GeminiLabs\SiteReviews\Modules;
4
5
class Rating
6
{
7
    /**
8
     * The more sure we are of the confidence interval (the higher the confidence level), the less
9
     * precise the estimation will be as the margin for error will be higher.
10
     * @see http://homepages.math.uic.edu/~bpower6/stat101/Confidence%20Intervals.pdf
11
     * @see https://www.thecalculator.co/math/Confidence-Interval-Calculator-210.html
12
     * @see https://www.youtube.com/watch?v=grodoLzThy4
13
     * @see https://en.wikipedia.org/wiki/Standard_score
14
     * @var array
15
     */
16
    const CONFIDENCE_LEVEL_Z_SCORES = [
17
        50 => 0.67449,
18
        70 => 1.04,
19
        75 => 1.15035,
20
        80 => 1.282,
21
        85 => 1.44,
22
        90 => 1.64485,
23
        92 => 1.75,
24
        95 => 1.95996,
25
        96 => 2.05,
26
        97 => 2.17009,
27
        98 => 2.326,
28
        99 => 2.57583,
29
        '99.5' => 2.81,
30
        '99.8' => 3.08,
31
        '99.9' => 3.29053,
32
    ];
33
34
    /**
35
     * @var int
36
     */
37
    const MAX_RATING = 5;
38
39
    /**
40
     * @var int
41
     */
42
    const MIN_RATING = 1;
43
44
    /**
45
     * @param int $roundBy
46
     * @return float
47
     */
48
    public function average(array $ratingCounts, $roundBy = 1)
49
    {
50
        $average = array_sum($ratingCounts);
51
        if ($average > 0) {
52
            $average = $this->totalSum($ratingCounts) / $average;
53
        }
54
        $roundedAverage = round($average, intval($roundBy));
55
        return glsr()->filterFloat('rating/average', $roundedAverage, $ratingCounts, $average);
56
    }
57
58
    /**
59
     * @return array
60
     */
61
    public function emptyArray()
62
    {
63
        return array_fill_keys(range(0, glsr()->constant('MAX_RATING', __CLASS__)), 0);
64
    }
65
66
    /**
67
     * Get the lower bound for up/down ratings
68
     * Method receives an up/down ratings array: [1, -1, -1, 1, 1, -1].
69
     * @see http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
70
     * @see https://news.ycombinator.com/item?id=10481507
71
     * @see https://dataorigami.net/blogs/napkin-folding/79030467-an-algorithm-to-sort-top-comments
72
     * @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html
73
     * @param int $confidencePercentage
74
     * @return int|float
75
     */
76
    public function lowerBound(array $upDownCounts = [0, 0], $confidencePercentage = 95)
77
    {
78
        $numRatings = array_sum($upDownCounts);
79
        if ($numRatings < 1) {
80
            return 0;
81
        }
82
        $z = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage];
83
        $phat = 1 * $upDownCounts[1] / $numRatings;
84
        return ($phat + $z * $z / (2 * $numRatings) - $z * sqrt(($phat * (1 - $phat) + $z * $z / (4 * $numRatings)) / $numRatings)) / (1 + $z * $z / $numRatings);
85
    }
86
87
    /**
88
     * @return int|float
89
     */
90
    public function overallPercentage(array $ratingCounts)
91
    {
92
        return round($this->average($ratingCounts) * 100 / glsr()->constant('MAX_RATING', __CLASS__), 2);
93
    }
94
95
    /**
96
     * @return array
97
     */
98
    public function percentages(array $ratingCounts)
99
    {
100
        $total = array_sum($ratingCounts);
101
        foreach ($ratingCounts as $index => $count) {
102
            if (empty($count)) {
103
                continue;
104
            }
105
            $ratingCounts[$index] = $count / $total * 100;
106
        }
107
        return $this->roundedPercentages($ratingCounts);
108
    }
109
110
    /**
111
     * @return float
112
     */
113
    public function ranking(array $ratingCounts)
114
    {
115
        return glsr()->filterFloat('rating/ranking',
116
            $this->rankingUsingImdb($ratingCounts),
117
            $ratingCounts,
118
            $this
119
        );
120
    }
121
122
    /**
123
     * Get the bayesian ranking for an array of reviews
124
     * This formula is the same one used by IMDB to rank their top 250 films.
125
     * @see https://www.xkcd.com/937/
126
     * @see https://districtdatalabs.silvrback.com/computing-a-bayesian-estimate-of-star-rating-means
127
     * @see http://fulmicoton.com/posts/bayesian_rating/
128
     * @see https://stats.stackexchange.com/questions/93974/is-there-an-equivalent-to-lower-bound-of-wilson-score-confidence-interval-for-va
129
     * @param int $confidencePercentage
130
     * @return int|float
131
     */
132
    public function rankingUsingImdb(array $ratingCounts, $confidencePercentage = 70)
133
    {
134
        $avgRating = $this->average($ratingCounts);
135
        // Represents a prior (your prior opinion without data) for the average star rating. A higher prior also means a higher margin for error.
136
        // This could also be the average score of all items instead of a fixed value.
137
        $bayesMean = ($confidencePercentage / 100) * glsr()->constant('MAX_RATING', __CLASS__); // prior, 70% = 3.5
138
        // Represents the number of ratings expected to begin observing a pattern that would put confidence in the prior.
139
        $bayesMinimal = 10; // confidence
140
        $numOfReviews = array_sum($ratingCounts);
141
        return $avgRating > 0
142
            ? (($bayesMinimal * $bayesMean) + ($avgRating * $numOfReviews)) / ($bayesMinimal + $numOfReviews)
143
            : 0;
144
    }
145
146
    /**
147
     * The quality of a 5 star rating depends not only on the average number of stars but also on
148
     * the number of reviews. This method calculates the bayesian ranking of a page by its number
149
     * of reviews and their rating.
150
     * @see http://www.evanmiller.org/ranking-items-with-star-ratings.html
151
     * @see https://stackoverflow.com/questions/1411199/what-is-a-better-way-to-sort-by-a-5-star-rating/1411268
152
     * @see http://julesjacobs.github.io/2015/08/17/bayesian-scoring-of-ratings.html
153
     * @param int $confidencePercentage
154
     * @return float
155
     */
156
    public function rankingUsingZScores(array $ratingCounts, $confidencePercentage = 90)
157
    {
158
        $ratingCountsSum = array_sum($ratingCounts) + glsr()->constant('MAX_RATING', __CLASS__);
159
        $weight = $this->weight($ratingCounts, $ratingCountsSum);
160
        $weightPow2 = $this->weight($ratingCounts, $ratingCountsSum, true);
161
        $zScore = static::CONFIDENCE_LEVEL_Z_SCORES[$confidencePercentage];
162
        return $weight - $zScore * sqrt(($weightPow2 - pow($weight, 2)) / ($ratingCountsSum + 1));
163
    }
164
165
    /**
166
     * @param int $target
167
     * @return array
168
     */
169
    protected function roundedPercentages(array $percentages, $totalPercent = 100)
170
    {
171
        array_walk($percentages, function (&$percent, $index) {
172
            $percent = [
173
                'index' => $index,
174
                'percent' => floor($percent),
175
                'remainder' => fmod($percent, 1),
176
            ];
177
        });
178
        $indexes = wp_list_pluck($percentages, 'index');
179
        $remainders = wp_list_pluck($percentages, 'remainder');
180
        array_multisort($remainders, SORT_DESC, SORT_STRING, $indexes, SORT_DESC, $percentages);
181
        $i = 0;
182
        if (array_sum(wp_list_pluck($percentages, 'percent')) > 0) {
183
            while (array_sum(wp_list_pluck($percentages, 'percent')) < $totalPercent) {
184
                ++$percentages[$i]['percent'];
185
                ++$i;
186
            }
187
        }
188
        array_multisort($indexes, SORT_DESC, $percentages);
189
        return array_combine($indexes, wp_list_pluck($percentages, 'percent'));
0 ignored issues
show
Bug Best Practice introduced by
The expression return array_combine($in...ercentages, 'percent')) could also return false which is incompatible with the documented return type array. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
190
    }
191
192
    /**
193
     * @return int
194
     */
195
    protected function totalSum(array $ratingCounts)
196
    {
197
        return array_reduce(array_keys($ratingCounts), function ($carry, $index) use ($ratingCounts) {
198
            return $carry + ($index * $ratingCounts[$index]);
199
        });
200
    }
201
202
    /**
203
     * @param int|float $ratingCountsSum
204
     * @param bool $powerOf2
205
     * @return float
206
     */
207
    protected function weight(array $ratingCounts, $ratingCountsSum, $powerOf2 = false)
208
    {
209
        return array_reduce(array_keys($ratingCounts),
210
            function ($count, $rating) use ($ratingCounts, $ratingCountsSum, $powerOf2) {
211
                $ratingLevel = $powerOf2
212
                    ? pow($rating, 2)
213
                    : $rating;
214
                return $count + ($ratingLevel * ($ratingCounts[$rating] + 1)) / $ratingCountsSum;
215
            }
216
        );
217
    }
218
}
219