Completed
Pull Request — master (#14)
by Shlomi
04:23 queued 02:18
created

ethically.fairness.metrics.score   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 238
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 100
dl 0
loc 238
rs 10
c 0
b 0
f 0
wmc 20

10 Functions

Rating   Name   Duplication   Size   Complexity  
A _proportion() 0 5 1
A _get_labels() 0 10 3
A sufficiency_score() 0 37 3
A independence_score() 0 23 2
A separation_score() 0 30 2
A _normalize_by_attr() 0 11 2
A roc_curve_by_attr() 0 54 2
A roc_auc_score_by_attr() 0 21 1
A _groupby_y_x_sens() 0 5 1
A _all_equal() 0 12 3
1
from collections import Counter
2
from functools import partial
3
4
import numpy as np
5
import pandas as pd
6
from sklearn.metrics import roc_auc_score, roc_curve
7
from sklearn.utils.multiclass import unique_labels
8
9
from ethically.fairness.metrics.utils import _assert_binary
10
11
12
def _proportion(data, labels):
13
    counts = Counter(data)
14
    assert set(counts.keys()).issubset(labels)
15
    return (counts[labels[1]]
16
            / (counts[labels[0]] + counts[labels[1]]))
17
18
19
def _get_labels(ys, labels):
20
21
    if labels is None:
22
        labels = unique_labels(ys)
23
    else:
24
        labels = np.asarray(labels)
25
        if np.all([l not in ys for l in labels]):
26
            raise ValueError('At least one label specified must be in y.')
27
28
    return labels
29
30
31
def _normalize_by_attr(y_score, x_sens, ndigits=1):
32
    y_score_within = y_score[:]
33
34
    for indices in x_sens.groupby(x_sens).groups.values():
35
        y_score_within[indices] = (y_score_within[indices]
36
                                   .rank(pct=True))
37
38
    y_score_within = (np.floor(y_score_within * (10**ndigits))
39
                      / (10**ndigits))
40
41
    return y_score_within
42
43
44
def independence_score(y_score, x_sens,
45
                       as_df=False):
46
    """Compute the independence criteria for score prediction.
47
48
    In classification terminology, it is the **acceptance rate**
49
    grouped by the score and the sensitive attribute.
50
51
    :param y_score: Estimated target score as returned by a classifier.
52
    :param x_sens: Sensitive attribute values corresponded to each
53
                   estimated target.
54
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
55
                  or as :class:`pandas.DataFrame`(if ``True``).
56
    :return: Independence criteria.
57
    :rtype: dict or :class:`pandas.DataFrame`
58
    """
59
    criterion = pd.crosstab(index=y_score,
60
                            columns=x_sens,
61
                            normalize='columns')
62
63
    if not as_df:
64
        criterion = criterion.to_dict()
65
66
    return criterion
67
68
69
def separation_score(y_true, y_score, x_sens,
70
                     labels=None,
71
                     as_df=False):
72
    """Compute the separation criteria for score prediction.
73
74
    In classification terminology, it is the **FPR** and **TPR**
75
    grouped by the score and the sensitive attribute.
76
77
    :param y_true: Binary ground truth (correct) target values.
78
    :param y_score: Estimated target score as returned by a classifier.
79
    :param x_sens: Sensitive attribute values corresponded to each
80
                   estimated target.
81
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
82
                  or as :class:`pandas.DataFrame` (if ``True``).
83
    :return: Separation criteria.
84
    :rtype: dict or :class:`pandas.DataFrame`
85
    """
86
87
    _assert_binary(y_true)
88
89
    labels = _get_labels(y_score, labels)
90
91
    criterion = pd.crosstab(index=y_score,
92
                            columns=[y_true, x_sens],
93
                            normalize=True)
94
95
    if not as_df:
96
        criterion = criterion.to_dict()
97
98
    return criterion
99
100
101
def sufficiency_score(y_true, y_score, x_sens,
102
                      labels=None,
103
                      within_score_percentile=False,
104
                      as_df=False):
105
    """Compute the sufficiency criteria for score prediction.
106
107
    In classification terminology, it is the **PPV** and the **NPV**
108
    grouped by the score and the sensitive attribute.
109
110
    :param y_true: Binary ground truth (correct) target values.
111
    :param y_score: Estimated target score as returned by a classifier.
112
    :param x_sens: Sensitive attribute values corresponded to each
113
                   target.
114
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
115
                  or as :class:`pandas.DataFrame` (if ``True``).
116
    :return: Sufficiency criteria.
117
    :rtype: dict or :class:`pandas.DataFrame`
118
    """
119
120
    _assert_binary(y_true)
121
122
    labels = _get_labels(y_true, labels)
123
124
    if within_score_percentile:
125
        y_score = _normalize_by_attr(y_score, x_sens,
126
                                     within_score_percentile)
127
128
    criterion = pd.crosstab(index=y_score,
129
                            columns=x_sens,
130
                            values=y_true,
131
                            aggfunc=partial(_proportion,
132
                                            labels=labels))
133
134
    if not as_df:
135
        criterion = criterion.to_dict()
136
137
    return criterion
138
139
140
def _all_equal(iterator):
141
    iterator = iter(iterator)
142
143
    try:
144
        first = next(iterator)
145
    except StopIteration:
146
        return True
147
148
    try:
149
        return all(np.allclose(first, rest) for rest in iterator)
150
    except ValueError:
151
        return False
152
153
154
def _groupby_y_x_sens(y_true, y_score, x_sens):
155
    return (pd.DataFrame({'y_true': y_true,
156
                          'y_score': y_score,
157
                          'x_sens': x_sens})
158
            .groupby('x_sens'))
159
160
161
def roc_curve_by_attr(y_true, y_score, x_sens,
162
                      pos_label=None, sample_weight=None,
163
                      drop_intermediate=False):
164
    """Compute Receiver operating characteristic (ROC) by attribute.
165
166
    Based on :func:`sklearn.metrics.roc_curve`
167
168
    :param y_true: Binary ground truth (correct) target values.
169
    :param y_score: Estimated target score as returned by a classifier.
170
    :param x_sens: Sensitive attribute values corresponded to each
171
                   estimated target.
172
    :param pos_label: Label considered as positive and others
173
                      are considered negative.
174
    :param sample_weight: Sample weights.
175
    :param drop_intermediate: Whether to drop some suboptimal
176
                              thresholds which would not appear on
177
                              a plotted ROC curve.
178
                              This is useful in order to create
179
                              lighter ROC curves.
180
    :return: For each value of sensitive attribute:
181
             - fpr - Increasing false positive rates such
182
               that element i is the false positive rate
183
               of predictions with score >= thresholds[i].
184
             - fpr - Increasing true positive rates such
185
               that element i is the true positive rate
186
               of predictions with score >= thresholds[i].
187
             - thresholds -
188
               Decreasing thresholds on the decision function
189
               used to compute fpr and tpr. thresholds[0] represents
190
               no instances being predicted and is arbitrarily set
191
               to max(y_score) + 1.
192
    :rtype: dict
193
194
    """
195
196
    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
197
198
    roc_curves = {x_sens_value: roc_curve(group['y_true'],
199
                                          group['y_score'],
200
                                          pos_label, sample_weight,
201
                                          drop_intermediate)
202
                  for x_sens_value, group in grouped}
203
204
    if not _all_equal(thresholds
205
                      for _, _, thresholds in roc_curves.values()):
206
        raise NotImplementedError('All the scores values should'
207
                                  ' appear for each sensitive'
208
                                  ' attribute value.'
209
                                  ' It will be implemented'
210
                                  ' in the future.'
211
                                  ' Please post your use-case in'
212
                                  ' https://github.com/EthicallyAI/ethically/issues/15')  # pylint: disable=line-too-long
213
214
    return roc_curves
215
216
217
def roc_auc_score_by_attr(y_true, y_score, x_sens,
218
                          sample_weight=None):
219
    """Compute Area Under the ROC (AUC) by attribute.
220
221
    Based on function:`sklearn.metrics.roc_auc_score`
222
223
    :param y_true: Binary ground truth (correct) target values.
224
    :param y_score: Estimated target score as returned by a classifier.
225
    :param x_sens: Sensitive attribute values corresponded to each
226
                   estimated target.
227
    :param sample_weight: Sample weights.
228
    :return: ROC AUC grouped by the sensitive attribute.
229
    :rtype: dict
230
    """
231
232
    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
233
234
    return {x_sens_value: roc_auc_score(group['y_true'],
235
                                        group['y_score'],
236
                                        sample_weight=sample_weight)
237
            for x_sens_value, group in grouped}
238