responsibly.fairness.metrics.score   A
last analyzed

Complexity

Total Complexity 19

Size/Duplication

Total Lines 234
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 96
dl 0
loc 234
rs 10
c 0
b 0
f 0
wmc 19

9 Functions

Rating   Name   Duplication   Size   Complexity  
A _proportion() 0 5 1
A _normalize_by_attr() 0 11 2
A _all_equal() 0 12 3
A _get_labels() 0 10 3
A sufficiency_score() 0 37 3
A independence_score() 0 23 2
A separation_score() 0 30 2
A roc_auc_score_by_attr() 0 21 1
A roc_curve_by_attr() 0 55 2
1
from collections import Counter
2
from functools import partial
3
4
import numpy as np
5
import pandas as pd
6
from sklearn.metrics import roc_auc_score, roc_curve
7
from sklearn.utils.multiclass import unique_labels
8
9
from responsibly.fairness.metrics.utils import (
10
    _assert_binary, _groupby_y_x_sens,
11
)
12
13
14
def _proportion(data, labels):
15
    counts = Counter(data)
16
    assert set(counts.keys()).issubset(labels)
17
    return (counts[labels[1]]
18
            / (counts[labels[0]] + counts[labels[1]]))
19
20
21
def _get_labels(ys, labels):
22
23
    if labels is None:
24
        labels = unique_labels(ys)
25
    else:
26
        labels = np.asarray(labels)
27
        if np.all([label not in ys for label in labels]):
28
            raise ValueError('At least one label specified must be in y.')
29
30
    return labels
31
32
33
def _normalize_by_attr(y_score, x_sens, ndigits=1):
34
    y_score_within = y_score[:]
35
36
    for indices in x_sens.groupby(x_sens).groups.values():
37
        y_score_within[indices] = (y_score_within[indices]
38
                                   .rank(pct=True))
39
40
    y_score_within = (np.floor(y_score_within * (10**ndigits))
41
                      / (10**ndigits))
42
43
    return y_score_within
44
45
46
def independence_score(y_score, x_sens,
47
                       as_df=False):
48
    """Compute the independence criteria for score prediction.
49
50
    In classification terminology, it is the **acceptance rate**
51
    grouped by the score and the sensitive attribute.
52
53
    :param y_score: Estimated target score as returned by a classifier.
54
    :param x_sens: Sensitive attribute values corresponded to each
55
                   estimated target.
56
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
57
                  or as :class:`pandas.DataFrame`(if ``True``).
58
    :return: Independence criteria.
59
    :rtype: dict or :class:`pandas.DataFrame`
60
    """
61
    criterion = pd.crosstab(index=y_score,
62
                            columns=x_sens,
63
                            normalize='columns')
64
65
    if not as_df:
66
        criterion = criterion.to_dict()
67
68
    return criterion
69
70
71
def separation_score(y_true, y_score, x_sens,
72
                     labels=None,
73
                     as_df=False):
74
    """Compute the separation criteria for score prediction.
75
76
    In classification terminology, it is the **FPR** and **TPR**
77
    grouped by the score and the sensitive attribute.
78
79
    :param y_true: Binary ground truth (correct) target values.
80
    :param y_score: Estimated target score as returned by a classifier.
81
    :param x_sens: Sensitive attribute values corresponded to each
82
                   estimated target.
83
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
84
                  or as :class:`pandas.DataFrame` (if ``True``).
85
    :return: Separation criteria.
86
    :rtype: dict or :class:`pandas.DataFrame`
87
    """
88
89
    _assert_binary(y_true)
90
91
    labels = _get_labels(y_score, labels)
92
93
    criterion = pd.crosstab(index=y_score,
94
                            columns=[y_true, x_sens],
95
                            normalize=True)
96
97
    if not as_df:
98
        criterion = criterion.to_dict()
99
100
    return criterion
101
102
103
def sufficiency_score(y_true, y_score, x_sens,
104
                      labels=None,
105
                      within_score_percentile=False,
106
                      as_df=False):
107
    """Compute the sufficiency criteria for score prediction.
108
109
    In classification terminology, it is the **PPV** and the **NPV**
110
    grouped by the score and the sensitive attribute.
111
112
    :param y_true: Binary ground truth (correct) target values.
113
    :param y_score: Estimated target score as returned by a classifier.
114
    :param x_sens: Sensitive attribute values corresponded to each
115
                   target.
116
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
117
                  or as :class:`pandas.DataFrame` (if ``True``).
118
    :return: Sufficiency criteria.
119
    :rtype: dict or :class:`pandas.DataFrame`
120
    """
121
122
    _assert_binary(y_true)
123
124
    labels = _get_labels(y_true, labels)
125
126
    if within_score_percentile:
127
        y_score = _normalize_by_attr(y_score, x_sens,
128
                                     within_score_percentile)
129
130
    criterion = pd.crosstab(index=y_score,
131
                            columns=x_sens,
132
                            values=y_true,
133
                            aggfunc=partial(_proportion,
134
                                            labels=labels))
135
136
    if not as_df:
137
        criterion = criterion.to_dict()
138
139
    return criterion
140
141
142
def _all_equal(iterator):
143
    iterator = iter(iterator)
144
145
    try:
146
        first = next(iterator)
147
    except StopIteration:
148
        return True
149
150
    try:
151
        return all(np.allclose(first, rest) for rest in iterator)
152
    except ValueError:
153
        return False
154
155
156
def roc_curve_by_attr(y_true, y_score, x_sens,
157
                      pos_label=None, sample_weight=None,
158
                      drop_intermediate=False):
159
    """Compute Receiver operating characteristic (ROC) by attribute.
160
161
    Based on :func:`sklearn.metrics.roc_curve`
162
163
    :param y_true: Binary ground truth (correct) target values.
164
    :param y_score: Estimated target score as returned by a classifier.
165
    :param x_sens: Sensitive attribute values corresponded to each
166
                   estimated target.
167
    :param pos_label: Label considered as positive and others
168
                      are considered negative.
169
    :param sample_weight: Sample weights.
170
    :param drop_intermediate: Whether to drop some suboptimal
171
                              thresholds which would not appear on
172
                              a plotted ROC curve.
173
                              This is useful in order to create
174
                              lighter ROC curves.
175
    :return: For each value of sensitive attribute:
176
             - fpr - Increasing false positive rates such
177
               that element i is the false positive rate
178
               of predictions with score >= thresholds[i].
179
             - fpr - Increasing true positive rates such
180
               that element i is the true positive rate
181
               of predictions with score >= thresholds[i].
182
             - thresholds -
183
               Decreasing thresholds on the decision function
184
               used to compute fpr and tpr. thresholds[0] represents
185
               no instances being predicted and is arbitrarily set
186
               to max(y_score) + 1.
187
    :rtype: dict
188
189
    """
190
191
    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
192
193
    # pylint: disable=too-many-function-args
194
    roc_curves = {x_sens_value: roc_curve(group['y_true'],
195
                                          group['y_score'],
196
                                          pos_label, sample_weight,
197
                                          drop_intermediate)
198
                  for x_sens_value, group in grouped}
199
200
    if not _all_equal(thresholds
201
                      for _, _, thresholds in roc_curves.values()):
202
        raise NotImplementedError('All the scores values should'
203
                                  ' appear for each sensitive'
204
                                  ' attribute value.'
205
                                  ' It will be implemented'
206
                                  ' in the future.'
207
                                  ' Please post your use-case in'
208
                                  ' https://github.com/ResponsiblyAI/responsibly/issues/15')  # pylint: disable=line-too-long
209
210
    return roc_curves
211
212
213
def roc_auc_score_by_attr(y_true, y_score, x_sens,
214
                          sample_weight=None):
215
    """Compute Area Under the ROC (AUC) by attribute.
216
217
    Based on function:`sklearn.metrics.roc_auc_score`
218
219
    :param y_true: Binary ground truth (correct) target values.
220
    :param y_score: Estimated target score as returned by a classifier.
221
    :param x_sens: Sensitive attribute values corresponded to each
222
                   estimated target.
223
    :param sample_weight: Sample weights.
224
    :return: ROC AUC grouped by the sensitive attribute.
225
    :rtype: dict
226
    """
227
228
    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
229
230
    return {x_sens_value: roc_auc_score(group['y_true'],
231
                                        group['y_score'],
232
                                        sample_weight=sample_weight)
233
            for x_sens_value, group in grouped}
234