ethically.fairness.metrics.binary.report_binary() - Code Metrics - Inspection of "Merge pull request #18 from EthicallyAI/dev" - ResponsiblyAI/responsibly - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 170db5...8af2aa )

by Shlomi

created 2019-04-10 20:28 UTC

ethically.fairness.metrics.binary.report_binary() A

↳ Parent: ethically.fairness.metrics.binary

Complexity

Conditions

Size

Total Lines	36
Code Lines	7

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	7
nop	4
dl	0
loc	36
rs	10
c	0
b	0
f	0

import pandas as pd
from pandas.core.algorithms import unique as _unique
from sklearn.metrics import confusion_matrix

from ethically.fairness.metrics.utils import _assert_binary


def _select_dict(d, keys):
    return {k: d[k] for k in keys}


def _nested_select_dict(d, nested_keys):
    return {k:
            _select_dict(v, nested_keys)
            for k, v in d.items()}


def _choose_other(item, iterable):
    return next(other for other in iterable
                if other != item)


def _nested_diff_and_ratio(d, nested_key, first, second):

    assert d.keys() == {first, second}

    return {'diff': d[first][nested_key] - d[second][nested_key],
            'ratio': d[first][nested_key] / d[second][nested_key]}


def binary_stats_by_attr(y_true, y_pred, x_attr,
                         labels=None):
    # pylint: disable=too-many-locals

    _assert_binary(y_true, y_pred)

    stats = {}

    for x_att_val in _unique(x_attr):
        mask = (x_attr == x_att_val)

        tn, fp, fn, tp = confusion_matrix(y_true[mask],
                                          y_pred[mask],
                                          labels=labels).ravel()

        pos = tp + fn
        neg = tn + fp

        acceptance = tp + fp
        rejection = tn + fn

        correct = tp + tn

        total = pos + neg

        stats[x_att_val] = {
            'total': int(total),
            'proportion': total / len(x_attr),
            'pos': int(pos),
            'neg': int(neg),
            'base_rate': pos / total,
            'acceptance_rate': acceptance / total,
            'tn': int(tn),
            'fp': int(fp),
            'fn': int(fn),
            'tp': int(tp),
            'accuracy': correct / total,
            'balanced_accuracy': (tp / pos + tn / neg) / 2,
            'tpr': tp / pos,
            'tnr': tn / neg,
            'fnr': fn / pos,
            'fpr': fp / neg,
            'ppv': tp / acceptance,
            'npv': tn / rejection
        }

    return stats


def compare_privileged(stats,
                       x_sens_privileged=None):
    # pylint: disable=line-too-long

    if len(stats) != 2:
        if x_sens_privileged is not None:
            raise ValueError('x_sens_privileged should have'
                             'only two values for comparision'
                             '(difference and ratio).')

        return None

    comparison = {}

    if x_sens_privileged is None:
        x_sens_privileged = next(iter(stats))

    x_sens_unprivileged = _choose_other(x_sens_privileged,
                                        stats)

    comparison['x_sens_privileged'] = x_sens_privileged
    comparison['x_sens_unprivileged'] = x_sens_unprivileged

    comparison['metrics'] = {}

    metrics = next(iter(stats.values())).keys()

    for metric in metrics:
        comparison['metrics'][metric] = _nested_diff_and_ratio(stats,
                                                               metric,
                                                               x_sens_unprivileged,
                                                               x_sens_privileged)

    return comparison


def group_fairness_criterion_binary(y_true, y_pred, x_sens,
                                    metrics,
                                    x_sens_privileged=None,
                                    labels=None,
                                    as_df=False):

    stats = binary_stats_by_attr(y_true, y_pred, x_sens,
                                 labels=labels)

    criterion = _nested_select_dict(stats,
                                    metrics)

    comparison = compare_privileged(criterion,
                                    x_sens_privileged)

    if as_df:
        criterion = pd.DataFrame(criterion)

        if comparison is not None:
            vs_name = ('{x_sens_unprivileged} vs. {x_sens_privileged}'
                       .format(**comparison))

            comparison = pd.DataFrame(comparison['metrics'])
            comparison.index.name = vs_name

    return criterion, comparison


def independence_binary(y_pred, x_sens,
                        x_sens_privileged=None,
                        labels=None,
                        as_df=False):
    """Compute the independence criteria for binary prediction.

    In classification terminology, it is the **acceptance rate**
    grouped by the sensitive attribute.

    :param y_pred: Estimated targets as returned by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   target.
    :param x_sens_privileged: The privileged value in the
                              sensitive attribute. Relevent only
                              if there are only two values for
                              the sensitive attribute.
    :param labels: List of labels to choose the negative and positive target.
                   This may be used to reorder or select a subset of labels.
                   If none is given, those that appear at least once in
                   y_pred are used in sorted order; first is negative
                   and the second is positive.
    :param as_df: Whether to return the results as `dict` (if `False`)
                  or as :class:`pandas.DataFrame` (if `True`).
    :return: Independence criteria and comparision if there are
             only two values for the sensitive attribute.
    :rtype: tuple
    """

    # hack to keep the same strutcure of code
    # for independence as seperation and sufficiency
    # we take only acceptance_rate
    return group_fairness_criterion_binary(y_pred, y_pred, x_sens,
                                           ('acceptance_rate',),
                                           x_sens_privileged,
                                           labels,
                                           as_df)


def separation_binary(y_true, y_pred, x_sens,
                      x_sens_privileged=None,
                      labels=None,
                      as_df=False):
    """Compute the separation criteria for binary prediction.

    In classification terminology, it is the **TPR**, **FPR**,
    **TNR** and **FNR** grouped by the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_pred: Estimated binary targets as returned
                   by a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   target.
    :param x_sens_privileged: The privileged value in the
                              sensitive attribute. Relevent only
                              if there are only two values for
                              the sensitive attribute.
    :param labels: List of labels to choose the negative and positive target.
                   This may be used to reorder or select a subset of labels.
                   If none is given, those that appear at least once in
                   y_pred are used in sorted order; first is negative
                   and the second is positive.
    :param as_df: Whether to return the results as `dict` (if `False`)
                  or as :class:`pandas.DataFrame` (if `True`).
    :return: Separation criteria and comparision if there are
             only two values for the sensitive attribute.
    :rtype: tuple
    """

    return group_fairness_criterion_binary(y_true, y_pred, x_sens,
                                           ('tpr', 'fpr', 'tnr', 'fnr'),
                                           x_sens_privileged,
                                           labels,
                                           as_df)


def sufficiency_binary(y_true, y_pred, x_sens,
                       x_sens_privileged=None,
                       labels=None,
                       as_df=False):
    """Compute the sufficiency criteria for binary prediction.

    In classification terminology, it is the **PPV** and **NPV**
    grouped by the sensitive attribute.

    :param y_true: Binary ground truth (correct) target values.
    :param y_pred: Binary estimated targets as returned by
                   a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   target.
    :param x_sens_privileged: The privileged value in the
                              sensitive attribute. Relevent only
                              if there are only two values for
                              the sensitive attribute.
    :param labels: List of labels to choose the negative and positive target.
                   This may be used to reorder or select a subset of labels.
                   If none is given, those that appear at least once in
                   y_pred are used in sorted order; first is negative
                   and the second is positive.
    :param as_df: Whether to return the results as `dict` (if `False`)
                  or as :class:`pandas.DataFrame` (if `True`).
    :return: Sufficiency criteria and comparision if there are
             only two values for the sensitive attribute.
    :rtype: tuple
    """

    return group_fairness_criterion_binary(y_true, y_pred, x_sens,
                                           ('ppv', 'npv'),
                                           x_sens_privileged,
                                           labels,
                                           as_df)


def report_binary(y_true, y_pred, x_sens,
                  labels=None):
    """Generate a report of criteria for binary prediction.

    In classification terminology, the statistics are
    grouped by the sensitive attribute:
    - Number of observations per group
    - Proportion of of observations per group
    - Base rate
    - Acceptance rate
    - FNR
    - TPR
    - PPV
    - NPV

    :param y_true: Binary ground truth (correct) target values.
    :param y_pred: Binary estimated targets as returned by
                   a classifier.
    :param x_sens: Sensitive attribute values corresponded to each
                   target.
    :param labels: List of labels to choose the negative and positive target.
                   This may be used to reorder or select a subset of labels.
                   If none is given, those that appear at least once in
                   y_pred are used in sorted order; first is negative
                   and the second is positive.
    :return: Classification statistics grouped by the
             sensitive attribute.
    :rtype: :class:`pandas.DataFrame`
    """

    stats = binary_stats_by_attr(y_true, y_pred, x_sens, labels)
    stats_df = pd.DataFrame(stats)

    return stats_df.loc[['total', 'proportion', 'base_rate',
                         'acceptance_rate', 'accuracy',
                         'fnr', 'fpr', 'ppv', 'npv']]


1			import pandas as pd
2			from pandas.core.algorithms import unique as _unique
3			from sklearn.metrics import confusion_matrix
4
5			from ethically.fairness.metrics.utils import _assert_binary
6
7
8			def _select_dict(d, keys):
9			return {k: d[k] for k in keys}
10
11
12			def _nested_select_dict(d, nested_keys):
13			return {k:
14			_select_dict(v, nested_keys)
15			for k, v in d.items()}
16
17
18			def _choose_other(item, iterable):
19			return next(other for other in iterable
20			if other != item)
21
22
23			def _nested_diff_and_ratio(d, nested_key, first, second):
24
25			assert d.keys() == {first, second}
26
27			return {'diff': d[first][nested_key] - d[second][nested_key],
28			'ratio': d[first][nested_key] / d[second][nested_key]}
29
30
31			def binary_stats_by_attr(y_true, y_pred, x_attr,
32			labels=None):
33			# pylint: disable=too-many-locals
34
35			_assert_binary(y_true, y_pred)
36
37			stats = {}
38
39			for x_att_val in _unique(x_attr):
40			mask = (x_attr == x_att_val)
41
42			tn, fp, fn, tp = confusion_matrix(y_true[mask],
43			y_pred[mask],
44			labels=labels).ravel()
45
46			pos = tp + fn
47			neg = tn + fp
48
49			acceptance = tp + fp
50			rejection = tn + fn
51
52			correct = tp + tn
53
54			total = pos + neg
55
56			stats[x_att_val] = {
57			'total': int(total),
58			'proportion': total / len(x_attr),
59			'pos': int(pos),
60			'neg': int(neg),
61			'base_rate': pos / total,
62			'acceptance_rate': acceptance / total,
63			'tn': int(tn),
64			'fp': int(fp),
65			'fn': int(fn),
66			'tp': int(tp),
67			'accuracy': correct / total,
68			'balanced_accuracy': (tp / pos + tn / neg) / 2,
69			'tpr': tp / pos,
70			'tnr': tn / neg,
71			'fnr': fn / pos,
72			'fpr': fp / neg,
73			'ppv': tp / acceptance,
74			'npv': tn / rejection
75			}
76
77			return stats
78
79
80			def compare_privileged(stats,
81			x_sens_privileged=None):
82			# pylint: disable=line-too-long
83
84			if len(stats) != 2:
85			if x_sens_privileged is not None:
86			raise ValueError('x_sens_privileged should have'
87			'only two values for comparision'
88			'(difference and ratio).')
89
90			return None
91
92			comparison = {}
93
94			if x_sens_privileged is None:
95			x_sens_privileged = next(iter(stats))
96
97			x_sens_unprivileged = _choose_other(x_sens_privileged,
98			stats)
99
100			comparison['x_sens_privileged'] = x_sens_privileged
101			comparison['x_sens_unprivileged'] = x_sens_unprivileged
102
103			comparison['metrics'] = {}
104
105			metrics = next(iter(stats.values())).keys()
106
107			for metric in metrics:
108			comparison['metrics'][metric] = _nested_diff_and_ratio(stats,
109			metric,
110			x_sens_unprivileged,
111			x_sens_privileged)
112
113			return comparison
114
115
116			def group_fairness_criterion_binary(y_true, y_pred, x_sens,
117			metrics,
118			x_sens_privileged=None,
119			labels=None,
120			as_df=False):
121
122			stats = binary_stats_by_attr(y_true, y_pred, x_sens,
123			labels=labels)
124
125			criterion = _nested_select_dict(stats,
126			metrics)
127
128			comparison = compare_privileged(criterion,
129			x_sens_privileged)
130
131			if as_df:
132			criterion = pd.DataFrame(criterion)
133
134			if comparison is not None:
135			vs_name = ('{x_sens_unprivileged} vs. {x_sens_privileged}'
136			.format(**comparison))
137
138			comparison = pd.DataFrame(comparison['metrics'])
139			comparison.index.name = vs_name
140
141			return criterion, comparison
142
143
144			def independence_binary(y_pred, x_sens,
145			x_sens_privileged=None,
146			labels=None,
147			as_df=False):
148			"""Compute the independence criteria for binary prediction.
149
150			In classification terminology, it is the acceptance rate
151			grouped by the sensitive attribute.
152
153			:param y_pred: Estimated targets as returned by a classifier.
154			:param x_sens: Sensitive attribute values corresponded to each
155			target.
156			:param x_sens_privileged: The privileged value in the
157			sensitive attribute. Relevent only
158			if there are only two values for
159			the sensitive attribute.
160			:param labels: List of labels to choose the negative and positive target.
161			This may be used to reorder or select a subset of labels.
162			If none is given, those that appear at least once in
163			y_pred are used in sorted order; first is negative
164			and the second is positive.
165			:param as_df: Whether to return the results as `dict` (if `False`)
166			or as :class:`pandas.DataFrame` (if `True`).
167			:return: Independence criteria and comparision if there are
168			only two values for the sensitive attribute.
169			:rtype: tuple
170			"""
171
172			# hack to keep the same strutcure of code
173			# for independence as seperation and sufficiency
174			# we take only acceptance_rate
175			return group_fairness_criterion_binary(y_pred, y_pred, x_sens,
176			('acceptance_rate',),
177			x_sens_privileged,
178			labels,
179			as_df)
180
181
182			def separation_binary(y_true, y_pred, x_sens,
183			x_sens_privileged=None,
184			labels=None,
185			as_df=False):
186			"""Compute the separation criteria for binary prediction.
187
188			In classification terminology, it is the TPR, FPR,
189			TNR and FNR grouped by the sensitive attribute.
190
191			:param y_true: Binary ground truth (correct) target values.
192			:param y_pred: Estimated binary targets as returned
193			by a classifier.
194			:param x_sens: Sensitive attribute values corresponded to each
195			target.
196			:param x_sens_privileged: The privileged value in the
197			sensitive attribute. Relevent only
198			if there are only two values for
199			the sensitive attribute.
200			:param labels: List of labels to choose the negative and positive target.
201			This may be used to reorder or select a subset of labels.
202			If none is given, those that appear at least once in
203			y_pred are used in sorted order; first is negative
204			and the second is positive.
205			:param as_df: Whether to return the results as `dict` (if `False`)
206			or as :class:`pandas.DataFrame` (if `True`).
207			:return: Separation criteria and comparision if there are
208			only two values for the sensitive attribute.
209			:rtype: tuple
210			"""
211
212			return group_fairness_criterion_binary(y_true, y_pred, x_sens,
213			('tpr', 'fpr', 'tnr', 'fnr'),
214			x_sens_privileged,
215			labels,
216			as_df)
217
218
219			def sufficiency_binary(y_true, y_pred, x_sens,
220			x_sens_privileged=None,
221			labels=None,
222			as_df=False):
223			"""Compute the sufficiency criteria for binary prediction.
224
225			In classification terminology, it is the PPV and NPV
226			grouped by the sensitive attribute.
227
228			:param y_true: Binary ground truth (correct) target values.
229			:param y_pred: Binary estimated targets as returned by
230			a classifier.
231			:param x_sens: Sensitive attribute values corresponded to each
232			target.
233			:param x_sens_privileged: The privileged value in the
234			sensitive attribute. Relevent only
235			if there are only two values for
236			the sensitive attribute.
237			:param labels: List of labels to choose the negative and positive target.
238			This may be used to reorder or select a subset of labels.
239			If none is given, those that appear at least once in
240			y_pred are used in sorted order; first is negative
241			and the second is positive.
242			:param as_df: Whether to return the results as `dict` (if `False`)
243			or as :class:`pandas.DataFrame` (if `True`).
244			:return: Sufficiency criteria and comparision if there are
245			only two values for the sensitive attribute.
246			:rtype: tuple
247			"""
248
249			return group_fairness_criterion_binary(y_true, y_pred, x_sens,
250			('ppv', 'npv'),
251			x_sens_privileged,
252			labels,
253			as_df)
254
255
256			def report_binary(y_true, y_pred, x_sens,
257			labels=None):
258			"""Generate a report of criteria for binary prediction.
259
260			In classification terminology, the statistics are
261			grouped by the sensitive attribute:
262			- Number of observations per group
263			- Proportion of of observations per group
264			- Base rate
265			- Acceptance rate
266			- FNR
267			- TPR
268			- PPV
269			- NPV
270
271			:param y_true: Binary ground truth (correct) target values.
272			:param y_pred: Binary estimated targets as returned by
273			a classifier.
274			:param x_sens: Sensitive attribute values corresponded to each
275			target.
276			:param labels: List of labels to choose the negative and positive target.
277			This may be used to reorder or select a subset of labels.
278			If none is given, those that appear at least once in
279			y_pred are used in sorted order; first is negative
280			and the second is positive.
281			:return: Classification statistics grouped by the
282			sensitive attribute.
283			:rtype: :class:`pandas.DataFrame`
284			"""
285
286			stats = binary_stats_by_attr(y_true, y_pred, x_sens, labels)
287			stats_df = pd.DataFrame(stats)
288
289			return stats_df.loc[['total', 'proportion', 'base_rate',
290			'acceptance_rate', 'accuracy',
291			'fnr', 'fpr', 'ppv', 'npv']]
292

ResponsiblyAI / responsibly

Push — master ( 170db5...8af2aa )

ethically.fairness.metrics.binary.report_binary() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like