responsibly.fairness.metrics.score - Code Metrics - Inspection of "Useful threshold api (#31)" - ResponsiblyAI/responsibly - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 400a5c...f23aca )

by Shlomi

created 2019-08-04 03:01 UTC

responsibly.fairness.metrics.score A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	233
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	96
dl	0
loc	233
rs	10
c	0
b	0
f	0
wmc	19

9 Functions

Rating	Name	Size	Complexity
A	_proportion()	5	1
A	_normalize_by_attr()	11	2
A	roc_auc_score_by_attr()	21	1
A	_all_equal()	12	3
A	_get_labels()	10	3
A	roc_curve_by_attr()	54	2
A	sufficiency_score()	37	3
A	independence_score()	23	2
A	separation_score()	30	2

from collections import Counter
from functools import partial

import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.utils.multiclass import unique_labels

from responsibly.fairness.metrics.utils import (
    _assert_binary, _groupby_y_x_sens,
)


def _proportion(data, labels):
    counts = Counter(data)
    assert set(counts.keys()).issubset(labels)
    return (counts[labels[1]]
            / (counts[labels[0]] + counts[labels[1]]))


def _get_labels(ys, labels):

    if labels is None:
        labels = unique_labels(ys)
    else:
        labels = np.asarray(labels)
        if np.all([l not in ys for l in labels]):
            raise ValueError('At least one label specified must be in y.')

    return labels


def _normalize_by_attr(y_score, x_sens, ndigits=1):
    y_score_within = y_score[:]

    for indices in x_sens.groupby(x_sens).groups.values():
        y_score_within[indices] = (y_score_within[indices]
                                   .rank(pct=True))

    y_score_within = (np.floor(y_score_within * (10**ndigits))
                      / (10**ndigits))

    return y_score_within


def independence_score(y_score, x_sens,
                       as_df=False):
    """Compute the independence criteria for score prediction.

    In classification terminology, it is the **acceptance rate**
    grouped by the score and the sensitive attribute.

    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame`(if ``True``).
    :return: Independence criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """
    criterion = pd.crosstab(index=y_score,
                            columns=x_sens,
                            normalize='columns')

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def separation_score(y_true, y_score, x_sens,
                     labels=None,
                     as_df=False):
    """Compute the separation criteria for score prediction.

    In classification terminology, it is the **FPR** and **TPR**
    grouped by the score and the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame` (if ``True``).
    :return: Separation criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """

    _assert_binary(y_true)

    labels = _get_labels(y_score, labels)

    criterion = pd.crosstab(index=y_score,
                            columns=[y_true, x_sens],
                            normalize=True)

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def sufficiency_score(y_true, y_score, x_sens,
                      labels=None,
                      within_score_percentile=False,
                      as_df=False):
    """Compute the sufficiency criteria for score prediction.

    In classification terminology, it is the **PPV** and the **NPV**
    grouped by the score and the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame` (if ``True``).
    :return: Sufficiency criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """

    _assert_binary(y_true)

    labels = _get_labels(y_true, labels)

    if within_score_percentile:
        y_score = _normalize_by_attr(y_score, x_sens,
                                     within_score_percentile)

    criterion = pd.crosstab(index=y_score,
                            columns=x_sens,
                            values=y_true,
                            aggfunc=partial(_proportion,
                                            labels=labels))

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def _all_equal(iterator):
    iterator = iter(iterator)

    try:
        first = next(iterator)
    except StopIteration:
        return True

    try:
        return all(np.allclose(first, rest) for rest in iterator)
    except ValueError:
        return False


def roc_curve_by_attr(y_true, y_score, x_sens,
                      pos_label=None, sample_weight=None,
                      drop_intermediate=False):
    """Compute Receiver operating characteristic (ROC) by attribute.

    Based on :func:`sklearn.metrics.roc_curve`

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param pos_label: Label considered as positive and others
                      are considered negative.
    :param sample_weight: Sample weights.
    :param drop_intermediate: Whether to drop some suboptimal
                              thresholds which would not appear on
                              a plotted ROC curve.
                              This is useful in order to create
                              lighter ROC curves.
    :return: For each value of sensitive attribute:
             - fpr - Increasing false positive rates such
               that element i is the false positive rate
               of predictions with score >= thresholds[i].
             - fpr - Increasing true positive rates such
               that element i is the true positive rate
               of predictions with score >= thresholds[i].
             - thresholds -
               Decreasing thresholds on the decision function
               used to compute fpr and tpr. thresholds[0] represents
               no instances being predicted and is arbitrarily set
               to max(y_score) + 1.
    :rtype: dict

    """

    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)

    roc_curves = {x_sens_value: roc_curve(group['y_true'],
                                          group['y_score'],
                                          pos_label, sample_weight,
                                          drop_intermediate)
                  for x_sens_value, group in grouped}

    if not _all_equal(thresholds
                      for _, _, thresholds in roc_curves.values()):
        raise NotImplementedError('All the scores values should'
                                  ' appear for each sensitive'
                                  ' attribute value.'
                                  ' It will be implemented'
                                  ' in the future.'
                                  ' Please post your use-case in'
                                  ' https://github.com/ResponsiblyAI/responsibly/issues/15')  # pylint: disable=line-too-long

    return roc_curves


def roc_auc_score_by_attr(y_true, y_score, x_sens,
                          sample_weight=None):
    """Compute Area Under the ROC (AUC) by attribute.

    Based on function:`sklearn.metrics.roc_auc_score`

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param sample_weight: Sample weights.
    :return: ROC AUC grouped by the sensitive attribute.
    :rtype: dict
    """

    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)

    return {x_sens_value: roc_auc_score(group['y_true'],
                                        group['y_score'],
                                        sample_weight=sample_weight)
            for x_sens_value, group in grouped}


1			from collections import Counter
2			from functools import partial
3
4			import numpy as np
5			import pandas as pd
6			from sklearn.metrics import roc_auc_score, roc_curve
7			from sklearn.utils.multiclass import unique_labels
8
9			from responsibly.fairness.metrics.utils import (
10			_assert_binary, _groupby_y_x_sens,
11			)
12
13
14			def _proportion(data, labels):
15			counts = Counter(data)
16			assert set(counts.keys()).issubset(labels)
17			return (counts[labels[1]]
18			/ (counts[labels[0]] + counts[labels[1]]))
19
20
21			def _get_labels(ys, labels):
22
23			if labels is None:
24			labels = unique_labels(ys)
25			else:
26			labels = np.asarray(labels)
27			if np.all([l not in ys for l in labels]):
28			raise ValueError('At least one label specified must be in y.')
29
30			return labels
31
32
33			def _normalize_by_attr(y_score, x_sens, ndigits=1):
34			y_score_within = y_score[:]
35
36			for indices in x_sens.groupby(x_sens).groups.values():
37			y_score_within[indices] = (y_score_within[indices]
38			.rank(pct=True))
39
40			y_score_within = (np.floor(y_score_within * (10**ndigits))
41			/ (10**ndigits))
42
43			return y_score_within
44
45
46			def independence_score(y_score, x_sens,
47			as_df=False):
48			"""Compute the independence criteria for score prediction.
49
50			In classification terminology, it is the acceptance rate
51			grouped by the score and the sensitive attribute.
52
53			:param y_score: Estimated target score as returned by a classifier.
54			:param x_sens: Sensitive attribute values corresponded to each
55			estimated target.
56			:param as_df: Whether to return the results as ``dict`` (if ``False``)
57			or as :class:`pandas.DataFrame`(if ``True``).
58			:return: Independence criteria.
59			:rtype: dict or :class:`pandas.DataFrame`
60			"""
61			criterion = pd.crosstab(index=y_score,
62			columns=x_sens,
63			normalize='columns')
64
65			if not as_df:
66			criterion = criterion.to_dict()
67
68			return criterion
69
70
71			def separation_score(y_true, y_score, x_sens,
72			labels=None,
73			as_df=False):
74			"""Compute the separation criteria for score prediction.
75
76			In classification terminology, it is the FPR and TPR
77			grouped by the score and the sensitive attribute.
78
79			:param y_true: Binary ground truth (correct) target values.
80			:param y_score: Estimated target score as returned by a classifier.
81			:param x_sens: Sensitive attribute values corresponded to each
82			estimated target.
83			:param as_df: Whether to return the results as ``dict`` (if ``False``)
84			or as :class:`pandas.DataFrame` (if ``True``).
85			:return: Separation criteria.
86			:rtype: dict or :class:`pandas.DataFrame`
87			"""
88
89			_assert_binary(y_true)
90
91			labels = _get_labels(y_score, labels)
92
93			criterion = pd.crosstab(index=y_score,
94			columns=[y_true, x_sens],
95			normalize=True)
96
97			if not as_df:
98			criterion = criterion.to_dict()
99
100			return criterion
101
102
103			def sufficiency_score(y_true, y_score, x_sens,
104			labels=None,
105			within_score_percentile=False,
106			as_df=False):
107			"""Compute the sufficiency criteria for score prediction.
108
109			In classification terminology, it is the PPV and the NPV
110			grouped by the score and the sensitive attribute.
111
112			:param y_true: Binary ground truth (correct) target values.
113			:param y_score: Estimated target score as returned by a classifier.
114			:param x_sens: Sensitive attribute values corresponded to each
115			target.
116			:param as_df: Whether to return the results as ``dict`` (if ``False``)
117			or as :class:`pandas.DataFrame` (if ``True``).
118			:return: Sufficiency criteria.
119			:rtype: dict or :class:`pandas.DataFrame`
120			"""
121
122			_assert_binary(y_true)
123
124			labels = _get_labels(y_true, labels)
125
126			if within_score_percentile:
127			y_score = _normalize_by_attr(y_score, x_sens,
128			within_score_percentile)
129
130			criterion = pd.crosstab(index=y_score,
131			columns=x_sens,
132			values=y_true,
133			aggfunc=partial(_proportion,
134			labels=labels))
135
136			if not as_df:
137			criterion = criterion.to_dict()
138
139			return criterion
140
141
142			def _all_equal(iterator):
143			iterator = iter(iterator)
144
145			try:
146			first = next(iterator)
147			except StopIteration:
148			return True
149
150			try:
151			return all(np.allclose(first, rest) for rest in iterator)
152			except ValueError:
153			return False
154
155
156			def roc_curve_by_attr(y_true, y_score, x_sens,
157			pos_label=None, sample_weight=None,
158			drop_intermediate=False):
159			"""Compute Receiver operating characteristic (ROC) by attribute.
160
161			Based on :func:`sklearn.metrics.roc_curve`
162
163			:param y_true: Binary ground truth (correct) target values.
164			:param y_score: Estimated target score as returned by a classifier.
165			:param x_sens: Sensitive attribute values corresponded to each
166			estimated target.
167			:param pos_label: Label considered as positive and others
168			are considered negative.
169			:param sample_weight: Sample weights.
170			:param drop_intermediate: Whether to drop some suboptimal
171			thresholds which would not appear on
172			a plotted ROC curve.
173			This is useful in order to create
174			lighter ROC curves.
175			:return: For each value of sensitive attribute:
176			- fpr - Increasing false positive rates such
177			that element i is the false positive rate
178			of predictions with score >= thresholds[i].
179			- fpr - Increasing true positive rates such
180			that element i is the true positive rate
181			of predictions with score >= thresholds[i].
182			- thresholds -
183			Decreasing thresholds on the decision function
184			used to compute fpr and tpr. thresholds[0] represents
185			no instances being predicted and is arbitrarily set
186			to max(y_score) + 1.
187			:rtype: dict
188
189			"""
190
191			grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
192
193			roc_curves = {x_sens_value: roc_curve(group['y_true'],
194			group['y_score'],
195			pos_label, sample_weight,
196			drop_intermediate)
197			for x_sens_value, group in grouped}
198
199			if not _all_equal(thresholds
200			for _, _, thresholds in roc_curves.values()):
201			raise NotImplementedError('All the scores values should'
202			' appear for each sensitive'
203			' attribute value.'
204			' It will be implemented'
205			' in the future.'
206			' Please post your use-case in'
207			' https://github.com/ResponsiblyAI/responsibly/issues/15') # pylint: disable=line-too-long
208
209			return roc_curves
210
211
212			def roc_auc_score_by_attr(y_true, y_score, x_sens,
213			sample_weight=None):
214			"""Compute Area Under the ROC (AUC) by attribute.
215
216			Based on function:`sklearn.metrics.roc_auc_score`
217
218			:param y_true: Binary ground truth (correct) target values.
219			:param y_score: Estimated target score as returned by a classifier.
220			:param x_sens: Sensitive attribute values corresponded to each
221			estimated target.
222			:param sample_weight: Sample weights.
223			:return: ROC AUC grouped by the sensitive attribute.
224			:rtype: dict
225			"""
226
227			grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
228
229			return {x_sens_value: roc_auc_score(group['y_true'],
230			group['y_score'],
231			sample_weight=sample_weight)
232			for x_sens_value, group in grouped}
233

ResponsiblyAI / responsibly

Push — master ( 400a5c...f23aca )

responsibly.fairness.metrics.score A

Complexity

Size/Duplication

Importance

9 Functions

Duplication Side-by-Side

Filter issues like