ethically.fairness.metrics.score - Code Metrics - Inspection of "Reorganizing and extending words embedding docs" - ResponsiblyAI/responsibly - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#12)

by Shlomi

created 2019-04-08 15:48 UTC

ethically.fairness.metrics.score A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	212
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	86
dl	0
loc	212
rs	10
c	0
b	0
f	0
wmc	16

9 Functions

Rating	Name	Size	Complexity
A	_proportion()	5	1
A	_get_labels()	10	3
A	roc_curve_by_attr()	42	1
A	sufficiency_score()	37	3
A	roc_auc_score_by_attr()	21	1
A	_groupby_y_x_sens()	5	1
A	independence_score()	23	2
A	separation_score()	30	2
A	_normalize_by_attr()	11	2

from collections import Counter
from functools import partial

import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.utils.multiclass import unique_labels

from ethically.fairness.metrics.utils import _assert_binary


def _proportion(data, labels):
    counts = Counter(data)
    assert set(counts.keys()).issubset(labels)
    return (counts[labels[1]]
            / (counts[labels[0]] + counts[labels[1]]))


def _get_labels(ys, labels):

    if labels is None:
        labels = unique_labels(ys)
    else:
        labels = np.asarray(labels)
        if np.all([l not in ys for l in labels]):
            raise ValueError('At least one label specified must be in y.')

    return labels


def _normalize_by_attr(y_score, x_sens, ndigits=1):
    y_score_within = y_score[:]

    for indices in x_sens.groupby(x_sens).groups.values():
        y_score_within[indices] = (y_score_within[indices]
                                   .rank(pct=True))

    y_score_within = (np.floor(y_score_within * (10**ndigits))
                      / (10**ndigits))

    return y_score_within


def independence_score(y_score, x_sens,
                       as_df=False):
    """Compute the independence criteria for score prediction.

    In classification terminology, it is the **acceptance rate**
    grouped by the score and the sensitive attribute.

    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame (if ``True``).
    :return: Independence criteria.
    :rtype: dict or pandas.DataFrame
    """
    criterion = pd.crosstab(index=y_score,
                            columns=x_sens,
                            normalize='columns')

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def separation_score(y_true, y_score, x_sens,
                     labels=None,
                     as_df=False):
    """Compute the separation criteria for score prediction.

    In classification terminology, it is the **FPR** and **TPR**
    grouped by the score and the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame` (if ``True``).
    :return: Separation criteria.
    :rtype: dict or pandas.DataFrame
    """

    _assert_binary(y_true)

    labels = _get_labels(y_score, labels)

    criterion = pd.crosstab(index=y_score,
                            columns=[y_true, x_sens],
                            normalize=True)

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def sufficiency_score(y_true, y_score, x_sens,
                      labels=None,
                      within_score_percentile=False,
                      as_df=False):
    """Compute the sufficiency criteria for score prediction.

    In classification terminology, it is the **PPV** and the **NPV**
    grouped by the score and the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame` (if ``True``).
    :return: Sufficiency criteria.
    :rtype: dict or pandas.DataFrame
    """

    _assert_binary(y_true)

    labels = _get_labels(y_true, labels)

    if within_score_percentile:
        y_score = _normalize_by_attr(y_score, x_sens,
                                     within_score_percentile)

    criterion = pd.crosstab(index=y_score,
                            columns=x_sens,
                            values=y_true,
                            aggfunc=partial(_proportion,
                                            labels=labels))

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def _groupby_y_x_sens(y_true, y_score, x_sens):
    return (pd.DataFrame({'y_true': y_true,
                          'y_score': y_score,
                          'x_sens': x_sens})
            .groupby('x_sens'))


def roc_curve_by_attr(y_true, y_score, x_sens,
                      pos_label=None, sample_weight=None,
                      drop_intermediate=False):
    """Compute Receiver operating characteristic (ROC) by attribute.

    Based on :func:`sklearn.metrics.roc_curve`

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param pos_label: Label considered as positive and others
                      are considered negative.
    :param sample_weight: Sample weights.
    :param drop_intermediate: Whether to drop some suboptimal
                              thresholds which would not appear on
                              a plotted ROC curve.
                              This is useful in order to create
                              lighter ROC curves.
    :return: For each value of sensitive attribute:
             - fpr - Increasing false positive rates such
               that element i is the false positive rate
               of predictions with score >= thresholds[i].
             - fpr - Increasing true positive rates such
               that element i is the true positive rate
               of predictions with score >= thresholds[i].
             - thresholds -
               Decreasing thresholds on the decision function
               used to compute fpr and tpr. thresholds[0] represents
               no instances being predicted and is arbitrarily set
               to max(y_score) + 1.
    :rtype: dict

    """

    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)

    return {x_sens_value: roc_curve(group['y_true'],
                                    group['y_score'],
                                    pos_label, sample_weight,
                                    drop_intermediate)
            for x_sens_value, group in grouped}


def roc_auc_score_by_attr(y_true, y_score, x_sens,
                          sample_weight=None):
    """Compute Area Under the ROC (AUC) by attribute.

    Based on function:`sklearn.metrics.roc_auc_score`

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param sample_weight: Sample weights.
    :return: ROC AUC grouped by the sensitive attribute.
    :rtype: dict
    """

    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)

    return {x_sens_value: roc_auc_score(group['y_true'],
                                        group['y_score'],
                                        sample_weight=sample_weight)
            for x_sens_value, group in grouped}


1			from collections import Counter
2			from functools import partial
3
4			import numpy as np
5			import pandas as pd
6			from sklearn.metrics import roc_auc_score, roc_curve
7			from sklearn.utils.multiclass import unique_labels
8
9			from ethically.fairness.metrics.utils import _assert_binary
10
11
12			def _proportion(data, labels):
13			counts = Counter(data)
14			assert set(counts.keys()).issubset(labels)
15			return (counts[labels[1]]
16			/ (counts[labels[0]] + counts[labels[1]]))
17
18
19			def _get_labels(ys, labels):
20
21			if labels is None:
22			labels = unique_labels(ys)
23			else:
24			labels = np.asarray(labels)
25			if np.all([l not in ys for l in labels]):
26			raise ValueError('At least one label specified must be in y.')
27
28			return labels
29
30
31			def _normalize_by_attr(y_score, x_sens, ndigits=1):
32			y_score_within = y_score[:]
33
34			for indices in x_sens.groupby(x_sens).groups.values():
35			y_score_within[indices] = (y_score_within[indices]
36			.rank(pct=True))
37
38			y_score_within = (np.floor(y_score_within * (10**ndigits))
39			/ (10**ndigits))
40
41			return y_score_within
42
43
44			def independence_score(y_score, x_sens,
45			as_df=False):
46			"""Compute the independence criteria for score prediction.
47
48			In classification terminology, it is the acceptance rate
49			grouped by the score and the sensitive attribute.
50
51			:param y_score: Estimated target score as returned by a classifier.
52			:param x_sens: Sensitive attribute values corresponded to each
53			estimated target.
54			:param as_df: Whether to return the results as ``dict`` (if ``False``)
55			or as :class:`pandas.DataFrame (if ``True``).
56			:return: Independence criteria.
57			:rtype: dict or pandas.DataFrame
58			"""
59			criterion = pd.crosstab(index=y_score,
60			columns=x_sens,
61			normalize='columns')
62
63			if not as_df:
64			criterion = criterion.to_dict()
65
66			return criterion
67
68
69			def separation_score(y_true, y_score, x_sens,
70			labels=None,
71			as_df=False):
72			"""Compute the separation criteria for score prediction.
73
74			In classification terminology, it is the FPR and TPR
75			grouped by the score and the sensitive attribute.
76
77			:param y_true: Binary ground truth (correct) target values.
78			:param y_score: Estimated target score as returned by a classifier.
79			:param x_sens: Sensitive attribute values corresponded to each
80			estimated target.
81			:param as_df: Whether to return the results as ``dict`` (if ``False``)
82			or as :class:`pandas.DataFrame` (if ``True``).
83			:return: Separation criteria.
84			:rtype: dict or pandas.DataFrame
85			"""
86
87			_assert_binary(y_true)
88
89			labels = _get_labels(y_score, labels)
90
91			criterion = pd.crosstab(index=y_score,
92			columns=[y_true, x_sens],
93			normalize=True)
94
95			if not as_df:
96			criterion = criterion.to_dict()
97
98			return criterion
99
100
101			def sufficiency_score(y_true, y_score, x_sens,
102			labels=None,
103			within_score_percentile=False,
104			as_df=False):
105			"""Compute the sufficiency criteria for score prediction.
106
107			In classification terminology, it is the PPV and the NPV
108			grouped by the score and the sensitive attribute.
109
110			:param y_true: Binary ground truth (correct) target values.
111			:param y_score: Estimated target score as returned by a classifier.
112			:param x_sens: Sensitive attribute values corresponded to each
113			target.
114			:param as_df: Whether to return the results as ``dict`` (if ``False``)
115			or as :class:`pandas.DataFrame` (if ``True``).
116			:return: Sufficiency criteria.
117			:rtype: dict or pandas.DataFrame
118			"""
119
120			_assert_binary(y_true)
121
122			labels = _get_labels(y_true, labels)
123
124			if within_score_percentile:
125			y_score = _normalize_by_attr(y_score, x_sens,
126			within_score_percentile)
127
128			criterion = pd.crosstab(index=y_score,
129			columns=x_sens,
130			values=y_true,
131			aggfunc=partial(_proportion,
132			labels=labels))
133
134			if not as_df:
135			criterion = criterion.to_dict()
136
137			return criterion
138
139
140			def _groupby_y_x_sens(y_true, y_score, x_sens):
141			return (pd.DataFrame({'y_true': y_true,
142			'y_score': y_score,
143			'x_sens': x_sens})
144			.groupby('x_sens'))
145
146
147			def roc_curve_by_attr(y_true, y_score, x_sens,
148			pos_label=None, sample_weight=None,
149			drop_intermediate=False):
150			"""Compute Receiver operating characteristic (ROC) by attribute.
151
152			Based on :func:`sklearn.metrics.roc_curve`
153
154			:param y_true: Binary ground truth (correct) target values.
155			:param y_score: Estimated target score as returned by a classifier.
156			:param x_sens: Sensitive attribute values corresponded to each
157			estimated target.
158			:param pos_label: Label considered as positive and others
159			are considered negative.
160			:param sample_weight: Sample weights.
161			:param drop_intermediate: Whether to drop some suboptimal
162			thresholds which would not appear on
163			a plotted ROC curve.
164			This is useful in order to create
165			lighter ROC curves.
166			:return: For each value of sensitive attribute:
167			- fpr - Increasing false positive rates such
168			that element i is the false positive rate
169			of predictions with score >= thresholds[i].
170			- fpr - Increasing true positive rates such
171			that element i is the true positive rate
172			of predictions with score >= thresholds[i].
173			- thresholds -
174			Decreasing thresholds on the decision function
175			used to compute fpr and tpr. thresholds[0] represents
176			no instances being predicted and is arbitrarily set
177			to max(y_score) + 1.
178			:rtype: dict
179
180			"""
181
182			grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
183
184			return {x_sens_value: roc_curve(group['y_true'],
185			group['y_score'],
186			pos_label, sample_weight,
187			drop_intermediate)
188			for x_sens_value, group in grouped}
189
190
191			def roc_auc_score_by_attr(y_true, y_score, x_sens,
192			sample_weight=None):
193			"""Compute Area Under the ROC (AUC) by attribute.
194
195			Based on function:`sklearn.metrics.roc_auc_score`
196
197			:param y_true: Binary ground truth (correct) target values.
198			:param y_score: Estimated target score as returned by a classifier.
199			:param x_sens: Sensitive attribute values corresponded to each
200			estimated target.
201			:param sample_weight: Sample weights.
202			:return: ROC AUC grouped by the sensitive attribute.
203			:rtype: dict
204			"""
205
206			grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
207
208			return {x_sens_value: roc_auc_score(group['y_true'],
209			group['y_score'],
210			sample_weight=sample_weight)
211			for x_sens_value, group in grouped}
212

ResponsiblyAI / responsibly

Pull Request — master (#12)

ethically.fairness.metrics.score A

Complexity

Size/Duplication

Importance

9 Functions

Duplication Side-by-Side

Filter issues like