1
|
|
|
import pandas as pd |
2
|
|
|
from pandas.core.algorithms import unique as _unique |
3
|
|
|
from sklearn.metrics import confusion_matrix |
4
|
|
|
|
5
|
|
|
from ethically.fairness.metrics.utils import _assert_binary |
6
|
|
|
|
7
|
|
|
|
8
|
|
|
def _select_dict(d, keys): |
9
|
|
|
return {k: d[k] for k in keys} |
10
|
|
|
|
11
|
|
|
|
12
|
|
|
def _nested_select_dict(d, nested_keys): |
13
|
|
|
return {k: |
14
|
|
|
_select_dict(v, nested_keys) |
15
|
|
|
for k, v in d.items()} |
16
|
|
|
|
17
|
|
|
|
18
|
|
|
def _choose_other(item, iterable): |
19
|
|
|
return next(other for other in iterable |
20
|
|
|
if other != item) |
21
|
|
|
|
22
|
|
|
|
23
|
|
|
def _nested_diff_and_ratio(d, nested_key, first, second): |
24
|
|
|
|
25
|
|
|
assert d.keys() == {first, second} |
26
|
|
|
|
27
|
|
|
return {'diff': d[first][nested_key] - d[second][nested_key], |
28
|
|
|
'ratio': d[first][nested_key] / d[second][nested_key]} |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
def binary_stats_by_attr(y_true, y_pred, x_attr, |
32
|
|
|
labels=None): |
33
|
|
|
# pylint: disable=too-many-locals |
34
|
|
|
|
35
|
|
|
_assert_binary(y_true, y_pred) |
36
|
|
|
|
37
|
|
|
stats = {} |
38
|
|
|
|
39
|
|
|
for x_att_val in _unique(x_attr): |
40
|
|
|
mask = (x_attr == x_att_val) |
41
|
|
|
|
42
|
|
|
tn, fp, fn, tp = confusion_matrix(y_true[mask], |
43
|
|
|
y_pred[mask], |
44
|
|
|
labels=labels).ravel() |
45
|
|
|
|
46
|
|
|
pos = tp + fn |
47
|
|
|
neg = tn + fp |
48
|
|
|
|
49
|
|
|
acceptance = tp + fp |
50
|
|
|
rejection = tn + fn |
51
|
|
|
|
52
|
|
|
correct = tp + tn |
53
|
|
|
|
54
|
|
|
total = pos + neg |
55
|
|
|
|
56
|
|
|
stats[x_att_val] = { |
57
|
|
|
'total': int(total), |
58
|
|
|
'proportion': total / len(x_attr), |
59
|
|
|
'pos': int(pos), |
60
|
|
|
'neg': int(neg), |
61
|
|
|
'base_rate': pos / total, |
62
|
|
|
'acceptance_rate': acceptance / total, |
63
|
|
|
'tn': int(tn), |
64
|
|
|
'fp': int(fp), |
65
|
|
|
'fn': int(fn), |
66
|
|
|
'tp': int(tp), |
67
|
|
|
'accuracy': correct / total, |
68
|
|
|
'balanced_accuracy': (tp / pos + tn / neg) / 2, |
69
|
|
|
'tpr': tp / pos, |
70
|
|
|
'tnr': tn / neg, |
71
|
|
|
'fnr': fn / pos, |
72
|
|
|
'fpr': fp / neg, |
73
|
|
|
'ppv': tp / acceptance, |
74
|
|
|
'npv': tn / rejection |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
return stats |
78
|
|
|
|
79
|
|
|
|
80
|
|
|
def compare_privileged(stats, |
81
|
|
|
x_sens_privileged=None): |
82
|
|
|
# pylint: disable=line-too-long |
83
|
|
|
|
84
|
|
|
if len(stats) != 2: |
85
|
|
|
if x_sens_privileged is not None: |
86
|
|
|
raise ValueError('x_sens_privileged should have' |
87
|
|
|
'only two values for comparision' |
88
|
|
|
'(difference and ratio).') |
89
|
|
|
|
90
|
|
|
return None |
91
|
|
|
|
92
|
|
|
comparison = {} |
93
|
|
|
|
94
|
|
|
if x_sens_privileged is None: |
95
|
|
|
x_sens_privileged = next(iter(stats)) |
96
|
|
|
|
97
|
|
|
x_sens_unprivileged = _choose_other(x_sens_privileged, |
98
|
|
|
stats) |
99
|
|
|
|
100
|
|
|
comparison['x_sens_privileged'] = x_sens_privileged |
101
|
|
|
comparison['x_sens_unprivileged'] = x_sens_unprivileged |
102
|
|
|
|
103
|
|
|
comparison['metrics'] = {} |
104
|
|
|
|
105
|
|
|
metrics = next(iter(stats.values())).keys() |
106
|
|
|
|
107
|
|
|
for metric in metrics: |
108
|
|
|
comparison['metrics'][metric] = _nested_diff_and_ratio(stats, |
109
|
|
|
metric, |
110
|
|
|
x_sens_unprivileged, |
111
|
|
|
x_sens_privileged) |
112
|
|
|
|
113
|
|
|
return comparison |
114
|
|
|
|
115
|
|
|
|
116
|
|
|
def group_fairness_criterion_binary(y_true, y_pred, x_sens, |
117
|
|
|
metrics, |
118
|
|
|
x_sens_privileged=None, |
119
|
|
|
labels=None, |
120
|
|
|
as_df=False): |
121
|
|
|
|
122
|
|
|
stats = binary_stats_by_attr(y_true, y_pred, x_sens, |
123
|
|
|
labels=labels) |
124
|
|
|
|
125
|
|
|
criterion = _nested_select_dict(stats, |
126
|
|
|
metrics) |
127
|
|
|
|
128
|
|
|
comparison = compare_privileged(criterion, |
129
|
|
|
x_sens_privileged) |
130
|
|
|
|
131
|
|
|
if as_df: |
132
|
|
|
criterion = pd.DataFrame(criterion) |
133
|
|
|
|
134
|
|
|
if comparison is not None: |
135
|
|
|
vs_name = ('{x_sens_unprivileged} vs. {x_sens_privileged}' |
136
|
|
|
.format(**comparison)) |
137
|
|
|
|
138
|
|
|
comparison = pd.DataFrame(comparison['metrics']) |
139
|
|
|
comparison.index.name = vs_name |
140
|
|
|
|
141
|
|
|
return criterion, comparison |
142
|
|
|
|
143
|
|
|
|
144
|
|
|
def independence_binary(y_pred, x_sens, |
145
|
|
|
x_sens_privileged=None, |
146
|
|
|
labels=None, |
147
|
|
|
as_df=False): |
148
|
|
|
"""Compute the independence criteria for binary prediction. |
149
|
|
|
|
150
|
|
|
In classification terminology, it is the **acceptance rate** |
151
|
|
|
grouped by the sensitive attribute. |
152
|
|
|
|
153
|
|
|
:param y_pred: Estimated targets as returned by a classifier. |
154
|
|
|
:param x_sens: Sensitive attribute values corresponded to each |
155
|
|
|
target. |
156
|
|
|
:param x_sens_privileged: The privileged value in the |
157
|
|
|
sensitive attribute. Relevent only |
158
|
|
|
if there are only two values for |
159
|
|
|
the sensitive attribute. |
160
|
|
|
:param labels: List of labels to choose the negative and positive target. |
161
|
|
|
This may be used to reorder or select a subset of labels. |
162
|
|
|
If none is given, those that appear at least once in |
163
|
|
|
y_pred are used in sorted order; first is negative |
164
|
|
|
and the second is positive. |
165
|
|
|
:param as_df: Whether to return the results as `dict` (if `False`) |
166
|
|
|
or as :class:`pandas.DataFrame` (if `True`). |
167
|
|
|
:return: Independence criteria and comparision if there are |
168
|
|
|
only two values for the sensitive attribute. |
169
|
|
|
:rtype: tuple |
170
|
|
|
""" |
171
|
|
|
|
172
|
|
|
# hack to keep the same strutcure of code |
173
|
|
|
# for independence as seperation and sufficiency |
174
|
|
|
# we take only acceptance_rate |
175
|
|
|
return group_fairness_criterion_binary(y_pred, y_pred, x_sens, |
176
|
|
|
('acceptance_rate',), |
177
|
|
|
x_sens_privileged, |
178
|
|
|
labels, |
179
|
|
|
as_df) |
180
|
|
|
|
181
|
|
|
|
182
|
|
|
def separation_binary(y_true, y_pred, x_sens, |
183
|
|
|
x_sens_privileged=None, |
184
|
|
|
labels=None, |
185
|
|
|
as_df=False): |
186
|
|
|
"""Compute the separation criteria for binary prediction. |
187
|
|
|
|
188
|
|
|
In classification terminology, it is the **TPR**, **FPR**, |
189
|
|
|
**TNR** and **FNR** grouped by the sensitive attribute. |
190
|
|
|
|
191
|
|
|
:param y_true: Binary ground truth (correct) target values. |
192
|
|
|
:param y_pred: Estimated binary targets as returned |
193
|
|
|
by a classifier. |
194
|
|
|
:param x_sens: Sensitive attribute values corresponded to each |
195
|
|
|
target. |
196
|
|
|
:param x_sens_privileged: The privileged value in the |
197
|
|
|
sensitive attribute. Relevent only |
198
|
|
|
if there are only two values for |
199
|
|
|
the sensitive attribute. |
200
|
|
|
:param labels: List of labels to choose the negative and positive target. |
201
|
|
|
This may be used to reorder or select a subset of labels. |
202
|
|
|
If none is given, those that appear at least once in |
203
|
|
|
y_pred are used in sorted order; first is negative |
204
|
|
|
and the second is positive. |
205
|
|
|
:param as_df: Whether to return the results as `dict` (if `False`) |
206
|
|
|
or as :class:`pandas.DataFrame` (if `True`). |
207
|
|
|
:return: Separation criteria and comparision if there are |
208
|
|
|
only two values for the sensitive attribute. |
209
|
|
|
:rtype: tuple |
210
|
|
|
""" |
211
|
|
|
|
212
|
|
|
return group_fairness_criterion_binary(y_true, y_pred, x_sens, |
213
|
|
|
('tpr', 'fpr', 'tnr', 'fnr'), |
214
|
|
|
x_sens_privileged, |
215
|
|
|
labels, |
216
|
|
|
as_df) |
217
|
|
|
|
218
|
|
|
|
219
|
|
|
def sufficiency_binary(y_true, y_pred, x_sens, |
220
|
|
|
x_sens_privileged=None, |
221
|
|
|
labels=None, |
222
|
|
|
as_df=False): |
223
|
|
|
"""Compute the sufficiency criteria for binary prediction. |
224
|
|
|
|
225
|
|
|
In classification terminology, it is the **PPV** and **NPV** |
226
|
|
|
grouped by the sensitive attribute. |
227
|
|
|
|
228
|
|
|
:param y_true: Binary ground truth (correct) target values. |
229
|
|
|
:param y_pred: Binary estimated targets as returned by |
230
|
|
|
a classifier. |
231
|
|
|
:param x_sens: Sensitive attribute values corresponded to each |
232
|
|
|
target. |
233
|
|
|
:param x_sens_privileged: The privileged value in the |
234
|
|
|
sensitive attribute. Relevent only |
235
|
|
|
if there are only two values for |
236
|
|
|
the sensitive attribute. |
237
|
|
|
:param labels: List of labels to choose the negative and positive target. |
238
|
|
|
This may be used to reorder or select a subset of labels. |
239
|
|
|
If none is given, those that appear at least once in |
240
|
|
|
y_pred are used in sorted order; first is negative |
241
|
|
|
and the second is positive. |
242
|
|
|
:param as_df: Whether to return the results as `dict` (if `False`) |
243
|
|
|
or as :class:`pandas.DataFrame` (if `True`). |
244
|
|
|
:return: Sufficiency criteria and comparision if there are |
245
|
|
|
only two values for the sensitive attribute. |
246
|
|
|
:rtype: tuple |
247
|
|
|
""" |
248
|
|
|
|
249
|
|
|
return group_fairness_criterion_binary(y_true, y_pred, x_sens, |
250
|
|
|
('ppv', 'npv'), |
251
|
|
|
x_sens_privileged, |
252
|
|
|
labels, |
253
|
|
|
as_df) |
254
|
|
|
|
255
|
|
|
|
256
|
|
|
def report_binary(y_true, y_pred, x_sens, |
257
|
|
|
labels=None): |
258
|
|
|
"""Generate a report of criteria for binary prediction. |
259
|
|
|
|
260
|
|
|
In classification terminology, the statistics are |
261
|
|
|
grouped by the sensitive attribute: |
262
|
|
|
- Number of observations per group |
263
|
|
|
- Proportion of of observations per group |
264
|
|
|
- Base rate |
265
|
|
|
- Acceptance rate |
266
|
|
|
- FNR |
267
|
|
|
- TPR |
268
|
|
|
- PPV |
269
|
|
|
- NPV |
270
|
|
|
|
271
|
|
|
:param y_true: Binary ground truth (correct) target values. |
272
|
|
|
:param y_pred: Binary estimated targets as returned by |
273
|
|
|
a classifier. |
274
|
|
|
:param x_sens: Sensitive attribute values corresponded to each |
275
|
|
|
target. |
276
|
|
|
:param labels: List of labels to choose the negative and positive target. |
277
|
|
|
This may be used to reorder or select a subset of labels. |
278
|
|
|
If none is given, those that appear at least once in |
279
|
|
|
y_pred are used in sorted order; first is negative |
280
|
|
|
and the second is positive. |
281
|
|
|
:return: Classification statistics grouped by the |
282
|
|
|
sensitive attribute. |
283
|
|
|
:rtype: :class:`pandas.DataFrame` |
284
|
|
|
""" |
285
|
|
|
|
286
|
|
|
stats = binary_stats_by_attr(y_true, y_pred, x_sens, labels) |
287
|
|
|
stats_df = pd.DataFrame(stats) |
288
|
|
|
|
289
|
|
|
return stats_df.loc[['total', 'proportion', 'base_rate', |
290
|
|
|
'acceptance_rate', 'accuracy', |
291
|
|
|
'fnr', 'fpr', 'ppv', 'npv']] |
292
|
|
|
|