1
|
|
|
""" |
2
|
|
|
Collection of common benchmark datasets from fairness research. |
3
|
|
|
|
4
|
|
|
Each dataset object contains a :class:`pandas.DataFrame` as `df` attribute |
5
|
|
|
that holds the actual data. |
6
|
|
|
The dataset object will take care of loading, preprocessing |
7
|
|
|
and validating the data. |
8
|
|
|
The preprocessing is done by standard practices that are associated with |
9
|
|
|
this data set: from its manual (e.g., README) |
10
|
|
|
or as other did in the literature. |
11
|
|
|
|
12
|
|
|
See :class:`responsibly.dataset.Dataset` |
13
|
|
|
for additional attribute and complete documentation. |
14
|
|
|
|
15
|
|
|
Currently these are the available datasets: |
16
|
|
|
|
17
|
|
|
- ProPublica recidivism/COMPAS dataset, |
18
|
|
|
see: :class:`~responsibly.dataset.COMPASDataset` |
19
|
|
|
|
20
|
|
|
- Adult dataset, |
21
|
|
|
see: :class:`~responsibly.dataset.AdultDataset` |
22
|
|
|
|
23
|
|
|
- German credit dataset, |
24
|
|
|
see: :class:`~responsibly.dataset.GermanDataset` |
25
|
|
|
|
26
|
|
|
- FICO credit score dataset, |
27
|
|
|
see :func:`~responsibly.dataset.build_FICO_dataset` |
28
|
|
|
|
29
|
|
|
Usage |
30
|
|
|
----- |
31
|
|
|
.. code:: python |
32
|
|
|
|
33
|
|
|
>>> from responsibly.dataset import COMPASDataset |
34
|
|
|
>>> compas_ds = COMPASDataset() |
35
|
|
|
>>> print(compas_ds) |
36
|
|
|
<ProPublica Recidivism/COMPAS Dataset. 6172 rows, 56 columns in |
37
|
|
|
which {race, sex} are sensitive attributes> |
38
|
|
|
>>> type(compas_ds.df) |
39
|
|
|
<class 'pandas.core.frame.DataFrame'> |
40
|
|
|
>>> compas_ds.df['race'].value_counts() |
41
|
|
|
African-American 3175 |
42
|
|
|
Caucasian 2103 |
43
|
|
|
Hispanic 509 |
44
|
|
|
Other 343 |
45
|
|
|
Asian 31 |
46
|
|
|
Native American 11 |
47
|
|
|
Name: race, dtype: int64 |
48
|
|
|
""" |
49
|
|
|
|
50
|
|
|
from responsibly.dataset.adult import AdultDataset |
51
|
|
|
from responsibly.dataset.compas import COMPASDataset |
52
|
|
|
from responsibly.dataset.core import Dataset |
53
|
|
|
from responsibly.dataset.fico import build_FICO_dataset |
54
|
|
|
from responsibly.dataset.german import GermanDataset |
55
|
|
|
|