responsibly.fairness.metrics.score - Code Metrics - ResponsiblyAI/responsibly - Measure and Improve Code Quality continuously with Scrutinizer

responsibly.fairness.metrics.score A
last analyzed 2021-04-02 13:01 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	234
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	96
dl	0
loc	234
rs	10
c	0
b	0
f	0
wmc	19

9 Functions

Rating	Name	Size	Complexity
A	_proportion()	5	1
A	_normalize_by_attr()	11	2
A	_all_equal()	12	3
A	_get_labels()	10	3
A	sufficiency_score()	37	3
A	independence_score()	23	2
A	separation_score()	30	2
A	roc_auc_score_by_attr()	21	1
A	roc_curve_by_attr()	55	2

from collections import Counter
from functools import partial

import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.utils.multiclass import unique_labels

from responsibly.fairness.metrics.utils import (
    _assert_binary, _groupby_y_x_sens,
)


def _proportion(data, labels):
    counts = Counter(data)
    assert set(counts.keys()).issubset(labels)
    return (counts[labels[1]]
            / (counts[labels[0]] + counts[labels[1]]))


def _get_labels(ys, labels):

    if labels is None:
        labels = unique_labels(ys)
    else:
        labels = np.asarray(labels)
        if np.all([label not in ys for label in labels]):
            raise ValueError('At least one label specified must be in y.')

    return labels


def _normalize_by_attr(y_score, x_sens, ndigits=1):
    y_score_within = y_score[:]

    for indices in x_sens.groupby(x_sens).groups.values():
        y_score_within[indices] = (y_score_within[indices]
                                   .rank(pct=True))

    y_score_within = (np.floor(y_score_within * (10**ndigits))
                      / (10**ndigits))

    return y_score_within


def independence_score(y_score, x_sens,
                       as_df=False):
    """Compute the independence criteria for score prediction.

    In classification terminology, it is the **acceptance rate**
    grouped by the score and the sensitive attribute.

    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame`(if ``True``).
    :return: Independence criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """
    criterion = pd.crosstab(index=y_score,
                            columns=x_sens,
                            normalize='columns')

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def separation_score(y_true, y_score, x_sens,
                     labels=None,
                     as_df=False):
    """Compute the separation criteria for score prediction.

    In classification terminology, it is the **FPR** and **TPR**
    grouped by the score and the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame` (if ``True``).
    :return: Separation criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """

    _assert_binary(y_true)

    labels = _get_labels(y_score, labels)

    criterion = pd.crosstab(index=y_score,
                            columns=[y_true, x_sens],
                            normalize=True)

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def sufficiency_score(y_true, y_score, x_sens,
                      labels=None,
                      within_score_percentile=False,
                      as_df=False):
    """Compute the sufficiency criteria for score prediction.

    In classification terminology, it is the **PPV** and the **NPV**
    grouped by the score and the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame` (if ``True``).
    :return: Sufficiency criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """

    _assert_binary(y_true)

    labels = _get_labels(y_true, labels)

    if within_score_percentile:
        y_score = _normalize_by_attr(y_score, x_sens,
                                     within_score_percentile)

    criterion = pd.crosstab(index=y_score,
                            columns=x_sens,
                            values=y_true,
                            aggfunc=partial(_proportion,
                                            labels=labels))

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def _all_equal(iterator):
    iterator = iter(iterator)

    try:
        first = next(iterator)
    except StopIteration:
        return True

    try:
        return all(np.allclose(first, rest) for rest in iterator)
    except ValueError:
        return False


def roc_curve_by_attr(y_true, y_score, x_sens,
                      pos_label=None, sample_weight=None,
                      drop_intermediate=False):
    """Compute Receiver operating characteristic (ROC) by attribute.

    Based on :func:`sklearn.metrics.roc_curve`

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param pos_label: Label considered as positive and others
                      are considered negative.
    :param sample_weight: Sample weights.
    :param drop_intermediate: Whether to drop some suboptimal
                              thresholds which would not appear on
                              a plotted ROC curve.
                              This is useful in order to create
                              lighter ROC curves.
    :return: For each value of sensitive attribute:
             - fpr - Increasing false positive rates such
               that element i is the false positive rate
               of predictions with score >= thresholds[i].
             - fpr - Increasing true positive rates such
               that element i is the true positive rate
               of predictions with score >= thresholds[i].
             - thresholds -
               Decreasing thresholds on the decision function
               used to compute fpr and tpr. thresholds[0] represents
               no instances being predicted and is arbitrarily set
               to max(y_score) + 1.
    :rtype: dict

    """

    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)

    # pylint: disable=too-many-function-args
    roc_curves = {x_sens_value: roc_curve(group['y_true'],
                                          group['y_score'],
                                          pos_label, sample_weight,
                                          drop_intermediate)
                  for x_sens_value, group in grouped}

    if not _all_equal(thresholds
                      for _, _, thresholds in roc_curves.values()):
        raise NotImplementedError('All the scores values should'
                                  ' appear for each sensitive'
                                  ' attribute value.'
                                  ' It will be implemented'
                                  ' in the future.'
                                  ' Please post your use-case in'
                                  ' https://github.com/ResponsiblyAI/responsibly/issues/15')  # pylint: disable=line-too-long

    return roc_curves


def roc_auc_score_by_attr(y_true, y_score, x_sens,
                          sample_weight=None):
    """Compute Area Under the ROC (AUC) by attribute.

    Based on function:`sklearn.metrics.roc_auc_score`

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param sample_weight: Sample weights.
    :return: ROC AUC grouped by the sensitive attribute.
    :rtype: dict
    """

    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)

    return {x_sens_value: roc_auc_score(group['y_true'],
                                        group['y_score'],
                                        sample_weight=sample_weight)
            for x_sens_value, group in grouped}


1			from collections import Counter
2			from functools import partial
3
4			import numpy as np
5			import pandas as pd
6			from sklearn.metrics import roc_auc_score, roc_curve
7			from sklearn.utils.multiclass import unique_labels
8
9			from responsibly.fairness.metrics.utils import (
10			_assert_binary, _groupby_y_x_sens,
11			)
12
13
14			def _proportion(data, labels):
15			counts = Counter(data)
16			assert set(counts.keys()).issubset(labels)
17			return (counts[labels[1]]
18			/ (counts[labels[0]] + counts[labels[1]]))
19
20
21			def _get_labels(ys, labels):
22
23			if labels is None:
24			labels = unique_labels(ys)
25			else:
26			labels = np.asarray(labels)
27			if np.all([label not in ys for label in labels]):
28			raise ValueError('At least one label specified must be in y.')
29
30			return labels
31
32
33			def _normalize_by_attr(y_score, x_sens, ndigits=1):
34			y_score_within = y_score[:]
35
36			for indices in x_sens.groupby(x_sens).groups.values():
37			y_score_within[indices] = (y_score_within[indices]
38			.rank(pct=True))
39
40			y_score_within = (np.floor(y_score_within * (10**ndigits))
41			/ (10**ndigits))
42
43			return y_score_within
44
45
46			def independence_score(y_score, x_sens,
47			as_df=False):
48			"""Compute the independence criteria for score prediction.
49
50			In classification terminology, it is the acceptance rate
51			grouped by the score and the sensitive attribute.
52
53			:param y_score: Estimated target score as returned by a classifier.
54			:param x_sens: Sensitive attribute values corresponded to each
55			estimated target.
56			:param as_df: Whether to return the results as ``dict`` (if ``False``)
57			or as :class:`pandas.DataFrame`(if ``True``).
58			:return: Independence criteria.
59			:rtype: dict or :class:`pandas.DataFrame`
60			"""
61			criterion = pd.crosstab(index=y_score,
62			columns=x_sens,
63			normalize='columns')
64
65			if not as_df:
66			criterion = criterion.to_dict()
67
68			return criterion
69
70
71			def separation_score(y_true, y_score, x_sens,
72			labels=None,
73			as_df=False):
74			"""Compute the separation criteria for score prediction.
75
76			In classification terminology, it is the FPR and TPR
77			grouped by the score and the sensitive attribute.
78
79			:param y_true: Binary ground truth (correct) target values.
80			:param y_score: Estimated target score as returned by a classifier.
81			:param x_sens: Sensitive attribute values corresponded to each
82			estimated target.
83			:param as_df: Whether to return the results as ``dict`` (if ``False``)
84			or as :class:`pandas.DataFrame` (if ``True``).
85			:return: Separation criteria.
86			:rtype: dict or :class:`pandas.DataFrame`
87			"""
88
89			_assert_binary(y_true)
90
91			labels = _get_labels(y_score, labels)
92
93			criterion = pd.crosstab(index=y_score,
94			columns=[y_true, x_sens],
95			normalize=True)
96
97			if not as_df:
98			criterion = criterion.to_dict()
99
100			return criterion
101
102
103			def sufficiency_score(y_true, y_score, x_sens,
104			labels=None,
105			within_score_percentile=False,
106			as_df=False):
107			"""Compute the sufficiency criteria for score prediction.
108
109			In classification terminology, it is the PPV and the NPV
110			grouped by the score and the sensitive attribute.
111
112			:param y_true: Binary ground truth (correct) target values.
113			:param y_score: Estimated target score as returned by a classifier.
114			:param x_sens: Sensitive attribute values corresponded to each
115			target.
116			:param as_df: Whether to return the results as ``dict`` (if ``False``)
117			or as :class:`pandas.DataFrame` (if ``True``).
118			:return: Sufficiency criteria.
119			:rtype: dict or :class:`pandas.DataFrame`
120			"""
121
122			_assert_binary(y_true)
123
124			labels = _get_labels(y_true, labels)
125
126			if within_score_percentile:
127			y_score = _normalize_by_attr(y_score, x_sens,
128			within_score_percentile)
129
130			criterion = pd.crosstab(index=y_score,
131			columns=x_sens,
132			values=y_true,
133			aggfunc=partial(_proportion,
134			labels=labels))
135
136			if not as_df:
137			criterion = criterion.to_dict()
138
139			return criterion
140
141
142			def _all_equal(iterator):
143			iterator = iter(iterator)
144
145			try:
146			first = next(iterator)
147			except StopIteration:
148			return True
149
150			try:
151			return all(np.allclose(first, rest) for rest in iterator)
152			except ValueError:
153			return False
154
155
156			def roc_curve_by_attr(y_true, y_score, x_sens,
157			pos_label=None, sample_weight=None,
158			drop_intermediate=False):
159			"""Compute Receiver operating characteristic (ROC) by attribute.
160
161			Based on :func:`sklearn.metrics.roc_curve`
162
163			:param y_true: Binary ground truth (correct) target values.
164			:param y_score: Estimated target score as returned by a classifier.
165			:param x_sens: Sensitive attribute values corresponded to each
166			estimated target.
167			:param pos_label: Label considered as positive and others
168			are considered negative.
169			:param sample_weight: Sample weights.
170			:param drop_intermediate: Whether to drop some suboptimal
171			thresholds which would not appear on
172			a plotted ROC curve.
173			This is useful in order to create
174			lighter ROC curves.
175			:return: For each value of sensitive attribute:
176			- fpr - Increasing false positive rates such
177			that element i is the false positive rate
178			of predictions with score >= thresholds[i].
179			- fpr - Increasing true positive rates such
180			that element i is the true positive rate
181			of predictions with score >= thresholds[i].
182			- thresholds -
183			Decreasing thresholds on the decision function
184			used to compute fpr and tpr. thresholds[0] represents
185			no instances being predicted and is arbitrarily set
186			to max(y_score) + 1.
187			:rtype: dict
188
189			"""
190
191			grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
192
193			# pylint: disable=too-many-function-args
194			roc_curves = {x_sens_value: roc_curve(group['y_true'],
195			group['y_score'],
196			pos_label, sample_weight,
197			drop_intermediate)
198			for x_sens_value, group in grouped}
199
200			if not _all_equal(thresholds
201			for _, _, thresholds in roc_curves.values()):
202			raise NotImplementedError('All the scores values should'
203			' appear for each sensitive'
204			' attribute value.'
205			' It will be implemented'
206			' in the future.'
207			' Please post your use-case in'
208			' https://github.com/ResponsiblyAI/responsibly/issues/15') # pylint: disable=line-too-long
209
210			return roc_curves
211
212
213			def roc_auc_score_by_attr(y_true, y_score, x_sens,
214			sample_weight=None):
215			"""Compute Area Under the ROC (AUC) by attribute.
216
217			Based on function:`sklearn.metrics.roc_auc_score`
218
219			:param y_true: Binary ground truth (correct) target values.
220			:param y_score: Estimated target score as returned by a classifier.
221			:param x_sens: Sensitive attribute values corresponded to each
222			estimated target.
223			:param sample_weight: Sample weights.
224			:return: ROC AUC grouped by the sensitive attribute.
225			:rtype: dict
226			"""
227
228			grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
229
230			return {x_sens_value: roc_auc_score(group['y_true'],
231			group['y_score'],
232			sample_weight=sample_weight)
233			for x_sens_value, group in grouped}
234

ResponsiblyAI / responsibly

responsibly.fairness.metrics.score A last analyzed 2021-04-02 13:01 UTC

Complexity

Size/Duplication

Importance

9 Functions

Duplication Side-by-Side

Filter issues like

responsibly.fairness.metrics.score A
last analyzed 2021-04-02 13:01 UTC