ethically.fairness.metrics.score - Code Metrics - Inspection of "Version 0.0.3" - ResponsiblyAI/responsibly - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#14)

by Shlomi

created 2019-04-10 14:24 UTC

ethically.fairness.metrics.score A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	238
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	100
dl	0
loc	238
rs	10
c	0
b	0
f	0
wmc	20

10 Functions

Rating	Name	Size	Complexity
A	_proportion()	5	1
A	_get_labels()	10	3
A	sufficiency_score()	37	3
A	independence_score()	23	2
A	separation_score()	30	2
A	_normalize_by_attr()	11	2
A	roc_curve_by_attr()	54	2
A	roc_auc_score_by_attr()	21	1
A	_groupby_y_x_sens()	5	1
A	_all_equal()	12	3

from collections import Counter
from functools import partial

import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.utils.multiclass import unique_labels

from ethically.fairness.metrics.utils import _assert_binary


def _proportion(data, labels):
    counts = Counter(data)
    assert set(counts.keys()).issubset(labels)
    return (counts[labels[1]]
            / (counts[labels[0]] + counts[labels[1]]))


def _get_labels(ys, labels):

    if labels is None:
        labels = unique_labels(ys)
    else:
        labels = np.asarray(labels)
        if np.all([l not in ys for l in labels]):
            raise ValueError('At least one label specified must be in y.')

    return labels


def _normalize_by_attr(y_score, x_sens, ndigits=1):
    y_score_within = y_score[:]

    for indices in x_sens.groupby(x_sens).groups.values():
        y_score_within[indices] = (y_score_within[indices]
                                   .rank(pct=True))

    y_score_within = (np.floor(y_score_within * (10**ndigits))
                      / (10**ndigits))

    return y_score_within


def independence_score(y_score, x_sens,
                       as_df=False):
    """Compute the independence criteria for score prediction.

    In classification terminology, it is the **acceptance rate**
    grouped by the score and the sensitive attribute.

    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame`(if ``True``).
    :return: Independence criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """
    criterion = pd.crosstab(index=y_score,
                            columns=x_sens,
                            normalize='columns')

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def separation_score(y_true, y_score, x_sens,
                     labels=None,
                     as_df=False):
    """Compute the separation criteria for score prediction.

    In classification terminology, it is the **FPR** and **TPR**
    grouped by the score and the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame` (if ``True``).
    :return: Separation criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """

    _assert_binary(y_true)

    labels = _get_labels(y_score, labels)

    criterion = pd.crosstab(index=y_score,
                            columns=[y_true, x_sens],
                            normalize=True)

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def sufficiency_score(y_true, y_score, x_sens,
                      labels=None,
                      within_score_percentile=False,
                      as_df=False):
    """Compute the sufficiency criteria for score prediction.

    In classification terminology, it is the **PPV** and the **NPV**
    grouped by the score and the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   target.
    :param as_df: Whether to return the results as ``dict`` (if ``False``)
                  or as :class:`pandas.DataFrame` (if ``True``).
    :return: Sufficiency criteria.
    :rtype: dict or :class:`pandas.DataFrame`
    """

    _assert_binary(y_true)

    labels = _get_labels(y_true, labels)

    if within_score_percentile:
        y_score = _normalize_by_attr(y_score, x_sens,
                                     within_score_percentile)

    criterion = pd.crosstab(index=y_score,
                            columns=x_sens,
                            values=y_true,
                            aggfunc=partial(_proportion,
                                            labels=labels))

    if not as_df:
        criterion = criterion.to_dict()

    return criterion


def _all_equal(iterator):
    iterator = iter(iterator)

    try:
        first = next(iterator)
    except StopIteration:
        return True

    try:
        return all(np.allclose(first, rest) for rest in iterator)
    except ValueError:
        return False


def _groupby_y_x_sens(y_true, y_score, x_sens):
    return (pd.DataFrame({'y_true': y_true,
                          'y_score': y_score,
                          'x_sens': x_sens})
            .groupby('x_sens'))


def roc_curve_by_attr(y_true, y_score, x_sens,
                      pos_label=None, sample_weight=None,
                      drop_intermediate=False):
    """Compute Receiver operating characteristic (ROC) by attribute.

    Based on :func:`sklearn.metrics.roc_curve`

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param pos_label: Label considered as positive and others
                      are considered negative.
    :param sample_weight: Sample weights.
    :param drop_intermediate: Whether to drop some suboptimal
                              thresholds which would not appear on
                              a plotted ROC curve.
                              This is useful in order to create
                              lighter ROC curves.
    :return: For each value of sensitive attribute:
             - fpr - Increasing false positive rates such
               that element i is the false positive rate
               of predictions with score >= thresholds[i].
             - fpr - Increasing true positive rates such
               that element i is the true positive rate
               of predictions with score >= thresholds[i].
             - thresholds -
               Decreasing thresholds on the decision function
               used to compute fpr and tpr. thresholds[0] represents
               no instances being predicted and is arbitrarily set
               to max(y_score) + 1.
    :rtype: dict

    """

    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)

    roc_curves = {x_sens_value: roc_curve(group['y_true'],
                                          group['y_score'],
                                          pos_label, sample_weight,
                                          drop_intermediate)
                  for x_sens_value, group in grouped}

    if not _all_equal(thresholds
                      for _, _, thresholds in roc_curves.values()):
        raise NotImplementedError('All the scores values should'
                                  ' appear for each sensitive'
                                  ' attribute value.'
                                  ' It will be implemented'
                                  ' in the future.'
                                  ' Please post your use-case in'
                                  ' https://github.com/EthicallyAI/ethically/issues/15')  # pylint: disable=line-too-long

    return roc_curves


def roc_auc_score_by_attr(y_true, y_score, x_sens,
                          sample_weight=None):
    """Compute Area Under the ROC (AUC) by attribute.

    Based on function:`sklearn.metrics.roc_auc_score`

    :param y_true: Binary ground truth (correct) target values.
    :param y_score: Estimated target score as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   estimated target.
    :param sample_weight: Sample weights.
    :return: ROC AUC grouped by the sensitive attribute.
    :rtype: dict
    """

    grouped = _groupby_y_x_sens(y_true, y_score, x_sens)

    return {x_sens_value: roc_auc_score(group['y_true'],
                                        group['y_score'],
                                        sample_weight=sample_weight)
            for x_sens_value, group in grouped}


1			from collections import Counter
2			from functools import partial
3
4			import numpy as np
5			import pandas as pd
6			from sklearn.metrics import roc_auc_score, roc_curve
7			from sklearn.utils.multiclass import unique_labels
8
9			from ethically.fairness.metrics.utils import _assert_binary
10
11
12			def _proportion(data, labels):
13			counts = Counter(data)
14			assert set(counts.keys()).issubset(labels)
15			return (counts[labels[1]]
16			/ (counts[labels[0]] + counts[labels[1]]))
17
18
19			def _get_labels(ys, labels):
20
21			if labels is None:
22			labels = unique_labels(ys)
23			else:
24			labels = np.asarray(labels)
25			if np.all([l not in ys for l in labels]):
26			raise ValueError('At least one label specified must be in y.')
27
28			return labels
29
30
31			def _normalize_by_attr(y_score, x_sens, ndigits=1):
32			y_score_within = y_score[:]
33
34			for indices in x_sens.groupby(x_sens).groups.values():
35			y_score_within[indices] = (y_score_within[indices]
36			.rank(pct=True))
37
38			y_score_within = (np.floor(y_score_within * (10**ndigits))
39			/ (10**ndigits))
40
41			return y_score_within
42
43
44			def independence_score(y_score, x_sens,
45			as_df=False):
46			"""Compute the independence criteria for score prediction.
47
48			In classification terminology, it is the acceptance rate
49			grouped by the score and the sensitive attribute.
50
51			:param y_score: Estimated target score as returned by a classifier.
52			:param x_sens: Sensitive attribute values corresponded to each
53			estimated target.
54			:param as_df: Whether to return the results as ``dict`` (if ``False``)
55			or as :class:`pandas.DataFrame`(if ``True``).
56			:return: Independence criteria.
57			:rtype: dict or :class:`pandas.DataFrame`
58			"""
59			criterion = pd.crosstab(index=y_score,
60			columns=x_sens,
61			normalize='columns')
62
63			if not as_df:
64			criterion = criterion.to_dict()
65
66			return criterion
67
68
69			def separation_score(y_true, y_score, x_sens,
70			labels=None,
71			as_df=False):
72			"""Compute the separation criteria for score prediction.
73
74			In classification terminology, it is the FPR and TPR
75			grouped by the score and the sensitive attribute.
76
77			:param y_true: Binary ground truth (correct) target values.
78			:param y_score: Estimated target score as returned by a classifier.
79			:param x_sens: Sensitive attribute values corresponded to each
80			estimated target.
81			:param as_df: Whether to return the results as ``dict`` (if ``False``)
82			or as :class:`pandas.DataFrame` (if ``True``).
83			:return: Separation criteria.
84			:rtype: dict or :class:`pandas.DataFrame`
85			"""
86
87			_assert_binary(y_true)
88
89			labels = _get_labels(y_score, labels)
90
91			criterion = pd.crosstab(index=y_score,
92			columns=[y_true, x_sens],
93			normalize=True)
94
95			if not as_df:
96			criterion = criterion.to_dict()
97
98			return criterion
99
100
101			def sufficiency_score(y_true, y_score, x_sens,
102			labels=None,
103			within_score_percentile=False,
104			as_df=False):
105			"""Compute the sufficiency criteria for score prediction.
106
107			In classification terminology, it is the PPV and the NPV
108			grouped by the score and the sensitive attribute.
109
110			:param y_true: Binary ground truth (correct) target values.
111			:param y_score: Estimated target score as returned by a classifier.
112			:param x_sens: Sensitive attribute values corresponded to each
113			target.
114			:param as_df: Whether to return the results as ``dict`` (if ``False``)
115			or as :class:`pandas.DataFrame` (if ``True``).
116			:return: Sufficiency criteria.
117			:rtype: dict or :class:`pandas.DataFrame`
118			"""
119
120			_assert_binary(y_true)
121
122			labels = _get_labels(y_true, labels)
123
124			if within_score_percentile:
125			y_score = _normalize_by_attr(y_score, x_sens,
126			within_score_percentile)
127
128			criterion = pd.crosstab(index=y_score,
129			columns=x_sens,
130			values=y_true,
131			aggfunc=partial(_proportion,
132			labels=labels))
133
134			if not as_df:
135			criterion = criterion.to_dict()
136
137			return criterion
138
139
140			def _all_equal(iterator):
141			iterator = iter(iterator)
142
143			try:
144			first = next(iterator)
145			except StopIteration:
146			return True
147
148			try:
149			return all(np.allclose(first, rest) for rest in iterator)
150			except ValueError:
151			return False
152
153
154			def _groupby_y_x_sens(y_true, y_score, x_sens):
155			return (pd.DataFrame({'y_true': y_true,
156			'y_score': y_score,
157			'x_sens': x_sens})
158			.groupby('x_sens'))
159
160
161			def roc_curve_by_attr(y_true, y_score, x_sens,
162			pos_label=None, sample_weight=None,
163			drop_intermediate=False):
164			"""Compute Receiver operating characteristic (ROC) by attribute.
165
166			Based on :func:`sklearn.metrics.roc_curve`
167
168			:param y_true: Binary ground truth (correct) target values.
169			:param y_score: Estimated target score as returned by a classifier.
170			:param x_sens: Sensitive attribute values corresponded to each
171			estimated target.
172			:param pos_label: Label considered as positive and others
173			are considered negative.
174			:param sample_weight: Sample weights.
175			:param drop_intermediate: Whether to drop some suboptimal
176			thresholds which would not appear on
177			a plotted ROC curve.
178			This is useful in order to create
179			lighter ROC curves.
180			:return: For each value of sensitive attribute:
181			- fpr - Increasing false positive rates such
182			that element i is the false positive rate
183			of predictions with score >= thresholds[i].
184			- fpr - Increasing true positive rates such
185			that element i is the true positive rate
186			of predictions with score >= thresholds[i].
187			- thresholds -
188			Decreasing thresholds on the decision function
189			used to compute fpr and tpr. thresholds[0] represents
190			no instances being predicted and is arbitrarily set
191			to max(y_score) + 1.
192			:rtype: dict
193
194			"""
195
196			grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
197
198			roc_curves = {x_sens_value: roc_curve(group['y_true'],
199			group['y_score'],
200			pos_label, sample_weight,
201			drop_intermediate)
202			for x_sens_value, group in grouped}
203
204			if not _all_equal(thresholds
205			for _, _, thresholds in roc_curves.values()):
206			raise NotImplementedError('All the scores values should'
207			' appear for each sensitive'
208			' attribute value.'
209			' It will be implemented'
210			' in the future.'
211			' Please post your use-case in'
212			' https://github.com/EthicallyAI/ethically/issues/15') # pylint: disable=line-too-long
213
214			return roc_curves
215
216
217			def roc_auc_score_by_attr(y_true, y_score, x_sens,
218			sample_weight=None):
219			"""Compute Area Under the ROC (AUC) by attribute.
220
221			Based on function:`sklearn.metrics.roc_auc_score`
222
223			:param y_true: Binary ground truth (correct) target values.
224			:param y_score: Estimated target score as returned by a classifier.
225			:param x_sens: Sensitive attribute values corresponded to each
226			estimated target.
227			:param sample_weight: Sample weights.
228			:return: ROC AUC grouped by the sensitive attribute.
229			:rtype: dict
230			"""
231
232			grouped = _groupby_y_x_sens(y_true, y_score, x_sens)
233
234			return {x_sens_value: roc_auc_score(group['y_true'],
235			group['y_score'],
236			sample_weight=sample_weight)
237			for x_sens_value, group in grouped}
238

ResponsiblyAI / responsibly

Pull Request — master (#14)

ethically.fairness.metrics.score A

Complexity

Size/Duplication

Importance

10 Functions

Duplication Side-by-Side

Filter issues like