ethically.dataset.core.Dataset.__init__() - Code Metrics - Inspection of "Merge pull request #18 from EthicallyAI/dev" - ResponsiblyAI/responsibly - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 170db5...8af2aa )

by Shlomi

created 2019-04-10 20:28 UTC

ethically.dataset.core.Dataset.init() A

↳ Parent: ethically.dataset.core

Complexity

Conditions

Size

Total Lines	26
Code Lines	9

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	9
nop	4
dl	0
loc	26
rs	9.95
c	0
b	0
f	0

import abc


class Dataset(abc.ABC):
    """Base class for datasets.

    Attributes
        - `df` - :class:`pandas.DataFrame` that holds the actual data.

        - `target` - Column name of the variable to predict
                    (ground truth)

        - `sensitive_attributes` - Column name of the
                                sensitive attributes

        - `prediction` - Columns name of the
                        prediction (optional)

    """

    @abc.abstractmethod
    def __init__(self, target, sensitive_attributes, prediction=None):
        """Load, preprocess and validate the dataset.

        :param target: Column name of the variable
                    to predict (ground truth)
        :param sensitive_attributes: Column name of the
                                    sensitive attributes
        :param prediction: Columns name of the
                           prediction (optional)
        :type target: str
        :type sensitive_attributes: list
        :type prediction: str
        """

        self.df = self._load_data()

        self._preprocess()

        self._name = self.__doc__.splitlines()[0]

        self.target = target
        self.sensitive_attributes = sensitive_attributes
        self.prediction = prediction

        self._validate()

    def __str__(self):
        return ('<{} {} rows, {} columns'
                ' in which {{{}}} are sensitive attributes>'
                .format(self._name,
                        len(self.df),
                        len(self.df.columns),
                        ', '.join(self.sensitive_attributes)))

    @abc.abstractmethod
    def _load_data(self):
        pass

    @abc.abstractmethod
    def _preprocess(self):
        pass

    @abc.abstractmethod
    def _validate(self):
        # pylint: disable=line-too-long

        assert self.target in self.df.columns,\
            ('the target label \'{}\' should be in the columns'
             .format(self.target))

        assert all(attr in self.df.columns
                   for attr in self.sensitive_attributes),\
            ('the sensitive attributes {{{}}} should be in the columns'
             .format(','.join(attr for attr in self.sensitive_attributes
                              if attr not in self.df.columns)))

        # assert all(attr in SENSITIVE_ATTRIBUTES
        #           for attr in self.sensitive_attributes),\
        # ('the sensitive attributes {} can be only from {}.'  # noqa
        #  .format(self.sensitive_attributes, SENSITIVE_ATTRIBUTES))


1			import abc
2
3
4			class Dataset(abc.ABC):
5			"""Base class for datasets.
6
7			Attributes
8			- `df` - :class:`pandas.DataFrame` that holds the actual data.
9
10			- `target` - Column name of the variable to predict
11			(ground truth)
12
13			- `sensitive_attributes` - Column name of the
14			sensitive attributes
15
16			- `prediction` - Columns name of the
17			prediction (optional)
18
19			"""
20
21			@abc.abstractmethod
22			def __init__(self, target, sensitive_attributes, prediction=None):
23			"""Load, preprocess and validate the dataset.
24
25			:param target: Column name of the variable
26			to predict (ground truth)
27			:param sensitive_attributes: Column name of the
28			sensitive attributes
29			:param prediction: Columns name of the
30			prediction (optional)
31			:type target: str
32			:type sensitive_attributes: list
33			:type prediction: str
34			"""
35
36			self.df = self._load_data()
37
38			self._preprocess()
39
40			self._name = self.__doc__.splitlines()[0]
41
42			self.target = target
43			self.sensitive_attributes = sensitive_attributes
44			self.prediction = prediction
45
46			self._validate()
47
48			def __str__(self):
49			return ('<{} {} rows, {} columns'
50			' in which {{{}}} are sensitive attributes>'
51			.format(self._name,
52			len(self.df),
53			len(self.df.columns),
54			', '.join(self.sensitive_attributes)))
55
56			@abc.abstractmethod
57			def _load_data(self):
58			pass
59
60			@abc.abstractmethod
61			def _preprocess(self):
62			pass
63
64			@abc.abstractmethod
65			def _validate(self):
66			# pylint: disable=line-too-long
67
68			assert self.target in self.df.columns,\
69			('the target label \'{}\' should be in the columns'
70			.format(self.target))
71
72			assert all(attr in self.df.columns
73			for attr in self.sensitive_attributes),\
74			('the sensitive attributes {{{}}} should be in the columns'
75			.format(','.join(attr for attr in self.sensitive_attributes
76			if attr not in self.df.columns)))
77
78			# assert all(attr in SENSITIVE_ATTRIBUTES
79			# for attr in self.sensitive_attributes),\
80			# ('the sensitive attributes {} can be only from {}.' # noqa
81			# .format(self.sensitive_attributes, SENSITIVE_ATTRIBUTES))
82

ResponsiblyAI / responsibly

Push — master ( 170db5...8af2aa )

ethically.dataset.core.Dataset.__init__() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

ethically.dataset.core.Dataset.init() A