Completed
Pull Request — master (#12)
by Shlomi
02:38
created

ethically.fairness.metrics.score   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 212
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 86
dl 0
loc 212
rs 10
c 0
b 0
f 0
wmc 16

9 Functions

Rating   Name   Duplication   Size   Complexity  
A _proportion() 0 5 1
A _get_labels() 0 10 3
A roc_curve_by_attr() 0 42 1
A sufficiency_score() 0 37 3
A roc_auc_score_by_attr() 0 21 1
A _groupby_y_x_sens() 0 5 1
A independence_score() 0 23 2
A separation_score() 0 30 2
A _normalize_by_attr() 0 11 2
1
from collections import Counter
2
from functools import partial
3
4
import numpy as np
5
import pandas as pd
6
from sklearn.metrics import roc_auc_score, roc_curve
7
from sklearn.utils.multiclass import unique_labels
8
9
from ethically.fairness.metrics.utils import _assert_binary
10
11
12
def _proportion(data, labels):
13
    counts = Counter(data)
14
    assert set(counts.keys()).issubset(labels)
15
    return (counts[labels[1]]
16
            / (counts[labels[0]] + counts[labels[1]]))
17
18
19
def _get_labels(ys, labels):
20
21
    if labels is None:
22
        labels = unique_labels(ys)
23
    else:
24
        labels = np.asarray(labels)
25
        if np.all([l not in ys for l in labels]):
26
            raise ValueError('At least one label specified must be in y.')
27
28
    return labels
29
30
31
def _normalize_by_attr(y_score, x_sens, ndigits=1):
32
    y_score_within = y_score[:]
33
34
    for indices in x_sens.groupby(x_sens).groups.values():
35
        y_score_within[indices] = (y_score_within[indices]
36
                                   .rank(pct=True))
37
38
    y_score_within = (np.floor(y_score_within * (10**ndigits))
39
                      / (10**ndigits))
40
41
    return y_score_within
42
43
44
def independence_score(y_score, x_sens,
45
                       as_df=False):
46
    """Compute the independence criteria for score prediction.
47
48
    In classification terminology, it is the **acceptance rate**
49
    grouped by the score and the sensitive attribute.
50
51
    :param y_score: Estimated target score as returned by a classifier.
52
    :param x_sens: Sensitive attribute values corresponded to each
53
                   estimated target.
54
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
55
                  or as :class:`pandas.DataFrame (if ``True``).
56
    :return: Independence criteria.
57
    :rtype: dict or pandas.DataFrame
58
    """
59
    criterion = pd.crosstab(index=y_score,
60
                            columns=x_sens,
61
                            normalize='columns')
62
63
    if not as_df:
64
        criterion = criterion.to_dict()
65
66
    return criterion
67
68
69
def separation_score(y_true, y_score, x_sens,
70
                     labels=None,
71
                     as_df=False):
72
    """Compute the separation criteria for score prediction.
73
74
    In classification terminology, it is the **FPR** and **TPR**
75
    grouped by the score and the sensitive attribute.
76
77
    :param y_true: Binary ground truth (correct) target values.
78
    :param y_score: Estimated target score as returned by a classifier.
79
    :param x_sens: Sensitive attribute values corresponded to each
80
                   estimated target.
81
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
82
                  or as :class:`pandas.DataFrame` (if ``True``).
83
    :return: Separation criteria.
84
    :rtype: dict or pandas.DataFrame
85
    """
86
87
    _assert_binary(y_true)
88
89
    labels = _get_labels(y_score, labels)
90
91
    criterion = pd.crosstab(index=y_score,
92
                            columns=[y_true, x_sens],
93
                            normalize=True)
94
95
    if not as_df:
96
        criterion = criterion.to_dict()
97
98
    return criterion
99
100
101
def sufficiency_score(y_true, y_score, x_sens,
102
                      labels=None,
103
                      within_score_percentile=False,
104
                      as_df=False):
105
    """Compute the sufficiency criteria for score prediction.
106
107
    In classification terminology, it is the **PPV** and the **NPV**
108
    grouped by the score and the sensitive attribute.
109
110
    :param y_true: Binary ground truth (correct) target values.
111
    :param y_score: Estimated target score as returned by a classifier.
112
    :param x_sens: Sensitive attribute values corresponded to each
113
                   target.
114
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
115
                  or as :class:`pandas.DataFrame` (if ``True``).
116
    :return: Sufficiency criteria.
117
    :rtype: dict or pandas.DataFrame
118
    """
119
120
    _assert_binary(y_true)
121
122
    labels = _get_labels(y_true, labels)
123
124
    if within_score_percentile:
125
        y_score = _normalize_by_attr(y_score, x_sens,
126
                                     within_score_percentile)
127
128
    criterion = pd.crosstab(index=y_score,
129
                            columns=x_sens,
130
                            values=y_true,
131
                            aggfunc=partial(_proportion,
132
                                            labels=labels))
133
134
    if not as_df:
135
        criterion = criterion.to_dict()
136
137
    return criterion
138
139
140
def _groupby_y_x_sens(y_true, y_score, x_sens):
141
    return (pd.DataFrame({'y_true': y_true,
142
                          'y_score': y_score,
143
                          'x_sens': x_sens})
144
            .groupby('x_sens'))
145
146
147
def roc_curve_by_attr(y_true, y_score, x_sens,
148
                      pos_label=None, sample_weight=None,
149
                      drop_intermediate=False):
150
    """Compute Receiver operating characteristic (ROC) by attribute.
151
152
    Based on :func:`sklearn.metrics.roc_curve`
153
154
    :param y_true: Binary ground truth (correct) target values.
155
    :param y_score: Estimated target score as returned by a classifier.
156
    :param x_sens: Sensitive attribute values corresponded to each
157
                   estimated target.
158
    :param pos_label: Label considered as positive and others
159
                      are considered negative.
160
    :param sample_weight: Sample weights.
161
    :param drop_intermediate: Whether to drop some suboptimal
162
                              thresholds which would not appear on
163
                              a plotted ROC curve.
164
                              This is useful in order to create
165
                              lighter ROC curves.
166
    :return: For each value of sensitive attribute:
167
             - fpr - Increasing false positive rates such
168
               that element i is the false positive rate
169
               of predictions with score >= thresholds[i].
170
             - fpr - Increasing true positive rates such
171
               that element i is the true positive rate
172
               of predictions with score >= thresholds[i].
173
             - thresholds -
174
               Decreasing thresholds on the decision function
175
               used to compute fpr and tpr. thresholds[0] represents
176
               no instances being predicted and is arbitrarily set
177
               to max(y_score) + 1.
178
    :rtype: dict
179
180
    """
181
182
    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
183
184
    return {x_sens_value: roc_curve(group['y_true'],
185
                                    group['y_score'],
186
                                    pos_label, sample_weight,
187
                                    drop_intermediate)
188
            for x_sens_value, group in grouped}
189
190
191
def roc_auc_score_by_attr(y_true, y_score, x_sens,
192
                          sample_weight=None):
193
    """Compute Area Under the ROC (AUC) by attribute.
194
195
    Based on function:`sklearn.metrics.roc_auc_score`
196
197
    :param y_true: Binary ground truth (correct) target values.
198
    :param y_score: Estimated target score as returned by a classifier.
199
    :param x_sens: Sensitive attribute values corresponded to each
200
                   estimated target.
201
    :param sample_weight: Sample weights.
202
    :return: ROC AUC grouped by the sensitive attribute.
203
    :rtype: dict
204
    """
205
206
    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
207
208
    return {x_sens_value: roc_auc_score(group['y_true'],
209
                                        group['y_score'],
210
                                        sample_weight=sample_weight)
211
            for x_sens_value, group in grouped}
212