|
1
|
|
|
import pandas as pd |
|
2
|
|
|
from pandas.core.algorithms import unique as _unique |
|
3
|
|
|
from sklearn.metrics import confusion_matrix |
|
4
|
|
|
|
|
5
|
|
|
from responsibly.fairness.metrics.utils import _assert_binary |
|
6
|
|
|
|
|
7
|
|
|
|
|
8
|
|
|
def _select_dict(d, keys): |
|
9
|
|
|
return {k: d[k] for k in keys} |
|
10
|
|
|
|
|
11
|
|
|
|
|
12
|
|
|
def _nested_select_dict(d, nested_keys): |
|
13
|
|
|
return {k: |
|
14
|
|
|
_select_dict(v, nested_keys) |
|
15
|
|
|
for k, v in d.items()} |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
def _choose_other(item, iterable): |
|
19
|
|
|
return next(other for other in iterable |
|
20
|
|
|
if other != item) |
|
21
|
|
|
|
|
22
|
|
|
|
|
23
|
|
|
def _nested_diff_and_ratio(d, nested_key, first, second): |
|
24
|
|
|
|
|
25
|
|
|
assert d.keys() == {first, second} |
|
26
|
|
|
|
|
27
|
|
|
return {'diff': d[first][nested_key] - d[second][nested_key], |
|
28
|
|
|
'ratio': d[first][nested_key] / d[second][nested_key]} |
|
29
|
|
|
|
|
30
|
|
|
|
|
31
|
|
|
def binary_stats_by_attr(y_true, y_pred, x_attr, |
|
32
|
|
|
labels=None): |
|
33
|
|
|
# pylint: disable=too-many-locals |
|
34
|
|
|
|
|
35
|
|
|
_assert_binary(y_true, y_pred) |
|
36
|
|
|
|
|
37
|
|
|
stats = {} |
|
38
|
|
|
|
|
39
|
|
|
for x_att_val in _unique(x_attr): |
|
40
|
|
|
mask = (x_attr == x_att_val) |
|
41
|
|
|
|
|
42
|
|
|
tn, fp, fn, tp = confusion_matrix(y_true[mask], |
|
43
|
|
|
y_pred[mask], |
|
44
|
|
|
labels=labels).ravel() |
|
45
|
|
|
|
|
46
|
|
|
pos = tp + fn |
|
47
|
|
|
neg = tn + fp |
|
48
|
|
|
|
|
49
|
|
|
acceptance = tp + fp |
|
50
|
|
|
rejection = tn + fn |
|
51
|
|
|
|
|
52
|
|
|
correct = tp + tn |
|
53
|
|
|
|
|
54
|
|
|
total = pos + neg |
|
55
|
|
|
|
|
56
|
|
|
stats[x_att_val] = { |
|
57
|
|
|
'total': int(total), |
|
58
|
|
|
'proportion': total / len(x_attr), |
|
59
|
|
|
'pos': int(pos), |
|
60
|
|
|
'neg': int(neg), |
|
61
|
|
|
'base_rate': pos / total, |
|
62
|
|
|
'acceptance_rate': acceptance / total, |
|
63
|
|
|
'tn': int(tn), |
|
64
|
|
|
'fp': int(fp), |
|
65
|
|
|
'fn': int(fn), |
|
66
|
|
|
'tp': int(tp), |
|
67
|
|
|
'accuracy': correct / total, |
|
68
|
|
|
'balanced_accuracy': (tp / pos + tn / neg) / 2, |
|
69
|
|
|
'tpr': tp / pos, |
|
70
|
|
|
'tnr': tn / neg, |
|
71
|
|
|
'fnr': fn / pos, |
|
72
|
|
|
'fpr': fp / neg, |
|
73
|
|
|
'ppv': tp / acceptance, |
|
74
|
|
|
'npv': tn / rejection |
|
75
|
|
|
} |
|
76
|
|
|
|
|
77
|
|
|
return stats |
|
78
|
|
|
|
|
79
|
|
|
|
|
80
|
|
|
def compare_privileged(stats, |
|
81
|
|
|
x_sens_privileged=None): |
|
82
|
|
|
# pylint: disable=line-too-long |
|
83
|
|
|
|
|
84
|
|
|
if len(stats) != 2: |
|
85
|
|
|
if x_sens_privileged is not None: |
|
86
|
|
|
raise ValueError('x_sens_privileged should have' |
|
87
|
|
|
'only two values for comparision' |
|
88
|
|
|
'(difference and ratio).') |
|
89
|
|
|
|
|
90
|
|
|
return None |
|
91
|
|
|
|
|
92
|
|
|
comparison = {} |
|
93
|
|
|
|
|
94
|
|
|
if x_sens_privileged is None: |
|
95
|
|
|
x_sens_privileged = next(iter(stats)) |
|
96
|
|
|
|
|
97
|
|
|
x_sens_unprivileged = _choose_other(x_sens_privileged, |
|
98
|
|
|
stats) |
|
99
|
|
|
|
|
100
|
|
|
comparison['x_sens_privileged'] = x_sens_privileged |
|
101
|
|
|
comparison['x_sens_unprivileged'] = x_sens_unprivileged |
|
102
|
|
|
|
|
103
|
|
|
comparison['metrics'] = {} |
|
104
|
|
|
|
|
105
|
|
|
metrics = next(iter(stats.values())).keys() |
|
106
|
|
|
|
|
107
|
|
|
for metric in metrics: |
|
108
|
|
|
comparison['metrics'][metric] = _nested_diff_and_ratio(stats, |
|
109
|
|
|
metric, |
|
110
|
|
|
x_sens_unprivileged, |
|
111
|
|
|
x_sens_privileged) |
|
112
|
|
|
|
|
113
|
|
|
return comparison |
|
114
|
|
|
|
|
115
|
|
|
|
|
116
|
|
|
def group_fairness_criterion_binary(y_true, y_pred, x_sens, |
|
117
|
|
|
metrics, |
|
118
|
|
|
x_sens_privileged=None, |
|
119
|
|
|
labels=None, |
|
120
|
|
|
as_df=False): |
|
121
|
|
|
|
|
122
|
|
|
stats = binary_stats_by_attr(y_true, y_pred, x_sens, |
|
123
|
|
|
labels=labels) |
|
124
|
|
|
|
|
125
|
|
|
criterion = _nested_select_dict(stats, |
|
126
|
|
|
metrics) |
|
127
|
|
|
|
|
128
|
|
|
comparison = compare_privileged(criterion, |
|
129
|
|
|
x_sens_privileged) |
|
130
|
|
|
|
|
131
|
|
|
if as_df: |
|
132
|
|
|
criterion = pd.DataFrame(criterion) |
|
133
|
|
|
|
|
134
|
|
|
if comparison is not None: |
|
135
|
|
|
vs_name = ('{x_sens_unprivileged} vs. {x_sens_privileged}' |
|
136
|
|
|
.format(**comparison)) |
|
137
|
|
|
|
|
138
|
|
|
comparison = pd.DataFrame(comparison['metrics']) |
|
139
|
|
|
comparison.index.name = vs_name |
|
140
|
|
|
|
|
141
|
|
|
return criterion, comparison |
|
142
|
|
|
|
|
143
|
|
|
|
|
144
|
|
|
def independence_binary(y_pred, x_sens, |
|
145
|
|
|
x_sens_privileged=None, |
|
146
|
|
|
labels=None, |
|
147
|
|
|
as_df=False): |
|
148
|
|
|
"""Compute the independence criteria for binary prediction. |
|
149
|
|
|
|
|
150
|
|
|
In classification terminology, it is the **acceptance rate** |
|
151
|
|
|
grouped by the sensitive attribute. |
|
152
|
|
|
|
|
153
|
|
|
:param y_pred: Estimated targets as returned by a classifier. |
|
154
|
|
|
:param x_sens: Sensitive attribute values corresponded to each |
|
155
|
|
|
target. |
|
156
|
|
|
:param x_sens_privileged: The privileged value in the |
|
157
|
|
|
sensitive attribute. Relevent only |
|
158
|
|
|
if there are only two values for |
|
159
|
|
|
the sensitive attribute. |
|
160
|
|
|
:param labels: List of labels to choose the negative and positive target. |
|
161
|
|
|
This may be used to reorder or select a subset of labels. |
|
162
|
|
|
If none is given, those that appear at least once in |
|
163
|
|
|
y_pred are used in sorted order; first is negative |
|
164
|
|
|
and the second is positive. |
|
165
|
|
|
:param as_df: Whether to return the results as `dict` (if `False`) |
|
166
|
|
|
or as :class:`pandas.DataFrame` (if `True`). |
|
167
|
|
|
:return: Independence criteria and comparision if there are |
|
168
|
|
|
only two values for the sensitive attribute. |
|
169
|
|
|
:rtype: tuple |
|
170
|
|
|
""" |
|
171
|
|
|
|
|
172
|
|
|
# hack to keep the same strutcure of code |
|
173
|
|
|
# for independence as seperation and sufficiency |
|
174
|
|
|
# we take only acceptance_rate |
|
175
|
|
|
return group_fairness_criterion_binary(y_pred, y_pred, x_sens, |
|
176
|
|
|
('acceptance_rate',), |
|
177
|
|
|
x_sens_privileged, |
|
178
|
|
|
labels, |
|
179
|
|
|
as_df) |
|
180
|
|
|
|
|
181
|
|
|
|
|
182
|
|
|
def separation_binary(y_true, y_pred, x_sens, |
|
183
|
|
|
x_sens_privileged=None, |
|
184
|
|
|
labels=None, |
|
185
|
|
|
as_df=False): |
|
186
|
|
|
"""Compute the separation criteria for binary prediction. |
|
187
|
|
|
|
|
188
|
|
|
In classification terminology, it is the **TPR**, **FPR**, |
|
189
|
|
|
**TNR** and **FNR** grouped by the sensitive attribute. |
|
190
|
|
|
|
|
191
|
|
|
:param y_true: Binary ground truth (correct) target values. |
|
192
|
|
|
:param y_pred: Estimated binary targets as returned |
|
193
|
|
|
by a classifier. |
|
194
|
|
|
:param x_sens: Sensitive attribute values corresponded to each |
|
195
|
|
|
target. |
|
196
|
|
|
:param x_sens_privileged: The privileged value in the |
|
197
|
|
|
sensitive attribute. Relevent only |
|
198
|
|
|
if there are only two values for |
|
199
|
|
|
the sensitive attribute. |
|
200
|
|
|
:param labels: List of labels to choose the negative and positive target. |
|
201
|
|
|
This may be used to reorder or select a subset of labels. |
|
202
|
|
|
If none is given, those that appear at least once in |
|
203
|
|
|
y_pred are used in sorted order; first is negative |
|
204
|
|
|
and the second is positive. |
|
205
|
|
|
:param as_df: Whether to return the results as `dict` (if `False`) |
|
206
|
|
|
or as :class:`pandas.DataFrame` (if `True`). |
|
207
|
|
|
:return: Separation criteria and comparision if there are |
|
208
|
|
|
only two values for the sensitive attribute. |
|
209
|
|
|
:rtype: tuple |
|
210
|
|
|
""" |
|
211
|
|
|
|
|
212
|
|
|
return group_fairness_criterion_binary(y_true, y_pred, x_sens, |
|
213
|
|
|
('tpr', 'fpr', 'tnr', 'fnr'), |
|
214
|
|
|
x_sens_privileged, |
|
215
|
|
|
labels, |
|
216
|
|
|
as_df) |
|
217
|
|
|
|
|
218
|
|
|
|
|
219
|
|
|
def sufficiency_binary(y_true, y_pred, x_sens, |
|
220
|
|
|
x_sens_privileged=None, |
|
221
|
|
|
labels=None, |
|
222
|
|
|
as_df=False): |
|
223
|
|
|
"""Compute the sufficiency criteria for binary prediction. |
|
224
|
|
|
|
|
225
|
|
|
In classification terminology, it is the **PPV** and **NPV** |
|
226
|
|
|
grouped by the sensitive attribute. |
|
227
|
|
|
|
|
228
|
|
|
:param y_true: Binary ground truth (correct) target values. |
|
229
|
|
|
:param y_pred: Binary estimated targets as returned by |
|
230
|
|
|
a classifier. |
|
231
|
|
|
:param x_sens: Sensitive attribute values corresponded to each |
|
232
|
|
|
target. |
|
233
|
|
|
:param x_sens_privileged: The privileged value in the |
|
234
|
|
|
sensitive attribute. Relevent only |
|
235
|
|
|
if there are only two values for |
|
236
|
|
|
the sensitive attribute. |
|
237
|
|
|
:param labels: List of labels to choose the negative and positive target. |
|
238
|
|
|
This may be used to reorder or select a subset of labels. |
|
239
|
|
|
If none is given, those that appear at least once in |
|
240
|
|
|
y_pred are used in sorted order; first is negative |
|
241
|
|
|
and the second is positive. |
|
242
|
|
|
:param as_df: Whether to return the results as `dict` (if `False`) |
|
243
|
|
|
or as :class:`pandas.DataFrame` (if `True`). |
|
244
|
|
|
:return: Sufficiency criteria and comparision if there are |
|
245
|
|
|
only two values for the sensitive attribute. |
|
246
|
|
|
:rtype: tuple |
|
247
|
|
|
""" |
|
248
|
|
|
|
|
249
|
|
|
return group_fairness_criterion_binary(y_true, y_pred, x_sens, |
|
250
|
|
|
('ppv', 'npv'), |
|
251
|
|
|
x_sens_privileged, |
|
252
|
|
|
labels, |
|
253
|
|
|
as_df) |
|
254
|
|
|
|
|
255
|
|
|
|
|
256
|
|
|
def report_binary(y_true, y_pred, x_sens, |
|
257
|
|
|
labels=None): |
|
258
|
|
|
"""Generate a report of criteria for binary prediction. |
|
259
|
|
|
|
|
260
|
|
|
In classification terminology, the statistics are |
|
261
|
|
|
grouped by the sensitive attribute: |
|
262
|
|
|
- Number of observations per group |
|
263
|
|
|
- Proportion of of observations per group |
|
264
|
|
|
- Base rate |
|
265
|
|
|
- Acceptance rate |
|
266
|
|
|
- FNR |
|
267
|
|
|
- TPR |
|
268
|
|
|
- PPV |
|
269
|
|
|
- NPV |
|
270
|
|
|
|
|
271
|
|
|
:param y_true: Binary ground truth (correct) target values. |
|
272
|
|
|
:param y_pred: Binary estimated targets as returned by |
|
273
|
|
|
a classifier. |
|
274
|
|
|
:param x_sens: Sensitive attribute values corresponded to each |
|
275
|
|
|
target. |
|
276
|
|
|
:param labels: List of labels to choose the negative and positive target. |
|
277
|
|
|
This may be used to reorder or select a subset of labels. |
|
278
|
|
|
If none is given, those that appear at least once in |
|
279
|
|
|
y_pred are used in sorted order; first is negative |
|
280
|
|
|
and the second is positive. |
|
281
|
|
|
:return: Classification statistics grouped by the |
|
282
|
|
|
sensitive attribute. |
|
283
|
|
|
:rtype: :class:`pandas.DataFrame` |
|
284
|
|
|
""" |
|
285
|
|
|
|
|
286
|
|
|
stats = binary_stats_by_attr(y_true, y_pred, x_sens, labels) |
|
287
|
|
|
stats_df = pd.DataFrame(stats) |
|
288
|
|
|
|
|
289
|
|
|
return stats_df.loc[['total', 'proportion', 'base_rate', |
|
290
|
|
|
'acceptance_rate', 'accuracy', |
|
291
|
|
|
'fnr', 'fpr', 'ppv', 'npv']] |
|
292
|
|
|
|