tests.test_builders   A
last analyzed

Complexity

Total Complexity 41

Size/Duplication

Total Lines 194
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 152
dl 0
loc 194
rs 9.1199
c 0
b 0
f 0
wmc 41

2 Functions

Rating   Name   Duplication   Size   Complexity  
A always_fail() 0 2 1
A always_ok() 0 2 1

15 Methods

Rating   Name   Duplication   Size   Complexity  
A TestBuilders.test_condition() 0 10 2
A TestBuilders.test_typed_subclass() 0 5 1
A TestBuilders.test_require_and_reserve_index() 0 13 1
A TestBuilders.test_secure() 0 5 2
A TestBuilders.test_require_and_reserve_col() 0 8 1
A TestBuilders.test_drop() 0 8 1
A TestBuilders.test_reserve_dtype() 0 7 2
A TestBuilders.test_bad_require() 0 8 5
A TestBuilders.test_dtype_post_process() 0 18 2
A TestBuilders.test_drop_clash() 0 4 2
A TestBuilders.test_matrix_subclass() 0 5 1
A TestBuilders.test_bad_reserve() 0 8 5
A TestBuilders.test_bad_type() 0 7 3
B TestBuilders.test_already_added() 0 15 8
A TestBuilders.test_strict() 0 23 3

How to fix   Complexity   

Complexity

Complex classes like tests.test_builders often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to typed-dfs
2
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/typed-dfs
3
# SPDX-License-Identifier: Apache-2.0
4
import numpy as np
5
import pandas as pd
6
import pytest
7
8
# noinspection PyProtectedMember
9
from typeddfs._pretty_dfs import PrettyDf
10
from typeddfs.base_dfs import BaseDf
11
from typeddfs.builders import MatrixDfBuilder, TypedDfBuilder
12
from typeddfs.df_errors import (
13
    ClashError,
14
    DfTypeConstructionError,
15
    UnexpectedColumnError,
16
    UnexpectedIndexNameError,
17
    VerificationFailedError,
18
)
19
from typeddfs.df_typing import DfTyping
20
from typeddfs.typed_dfs import TypedDf
21
22
23
def always_ok(x):
24
    return None
25
26
27
def always_fail(x):
28
    return "OH NO"
29
30
31
class TestBuilders:
32
    def test_typed_subclass(self):
33
        t1 = TypedDfBuilder("t1").build()
34
        t2 = TypedDfBuilder("t2").subclass(t1).build()
35
        assert issubclass(t2, t1)
36
        assert not issubclass(t1, t2)
37
38
    def test_matrix_subclass(self):
39
        t1 = MatrixDfBuilder("t1").build()
40
        t2 = MatrixDfBuilder("t2").subclass(t1).build()
41
        assert issubclass(t2, t1)
42
        assert not issubclass(t1, t2)
43
44
    def test_condition(self):
45
        t = TypedDfBuilder("a").verify(always_ok).build()
46
        typ: DfTyping = t.get_typing()
47
        assert typ.required_columns == []
48
        assert typ.required_index_names == []
49
        assert typ.verifications == [always_ok]
50
        TypedDf(pd.DataFrame())
51
        t = TypedDfBuilder("a").verify(always_fail).build()
52
        with pytest.raises(VerificationFailedError):
53
            t.convert(pd.DataFrame())
54
55
    def test_require_and_reserve_col(self):
56
        t = TypedDfBuilder("a").require("column").reserve("reserved").build()
57
        typ: DfTyping = t.get_typing()
58
        assert typ.required_columns == ["column"]
59
        assert typ.reserved_columns == ["reserved"]
60
        assert typ.required_index_names == []
61
        assert typ.reserved_index_names == []
62
        assert typ.verifications == []
63
64
    def test_require_and_reserve_index(self):
65
        t = (
66
            TypedDfBuilder("a").require("column", index=True).reserve("reserved", index=True)
67
        ).build()
68
        typ: DfTyping = t.get_typing()
69
        assert typ.required_columns == []
70
        assert typ.reserved_columns == []
71
        assert typ.required_index_names == ["column"]
72
        assert typ.reserved_index_names == ["reserved"]
73
        assert typ.known_index_names == ["column", "reserved"]
74
        assert typ.known_column_names == []
75
        assert typ.known_names == ["column", "reserved"]
76
        assert typ.verifications == []
77
78
    def test_drop(self):
79
        t = TypedDfBuilder("a").reserve("column").drop("trash").build()
80
        typ: DfTyping = t.get_typing()
81
        assert typ.columns_to_drop == {"trash"}
82
        df = t.convert(pd.DataFrame([pd.Series(dict(x="x", zz="y"))]))
83
        assert df.column_names() == ["x", "zz"]
84
        df = t.convert(pd.DataFrame([pd.Series(dict(x="x", trash="y"))]))
85
        assert df.column_names() == ["x"]
86
87
    def test_drop_clash(self):
88
        t = TypedDfBuilder("a").reserve("trash").drop("trash")
89
        with pytest.raises(ClashError):
90
            t.build()
91
92
    def test_secure(self):
93
        TypedDfBuilder("a").secure().hash(alg="sha256").build()
94
        TypedDfBuilder("a").hash(alg="sha1").build()
95
        with pytest.raises(DfTypeConstructionError):
96
            TypedDfBuilder("a").secure().hash(alg="sha1").build()
97
98
    def test_bad_type(self):
99
        with pytest.raises(TypeError):
100
            # noinspection PyTypeChecker
101
            TypedDfBuilder(None).build()
102
        with pytest.raises(TypeError):
103
            # noinspection PyTypeChecker
104
            TypedDfBuilder(5).build()
105
106
    def test_bad_require(self):
107
        for index in [True, False]:
108
            with pytest.raises(ClashError):
109
                TypedDfBuilder("a").require("level_0", index=index)
110
            with pytest.raises(ClashError):
111
                TypedDfBuilder("a").require("abc", "level_0", index=index)
112
            with pytest.raises(ClashError):
113
                TypedDfBuilder("a").require("abc", "index", index=index)
114
115
    def test_bad_reserve(self):
116
        for index in [True, False]:
117
            with pytest.raises(ClashError):
118
                TypedDfBuilder("a").reserve("level_0", index=index)
119
            with pytest.raises(ClashError):
120
                TypedDfBuilder("a").reserve("abc", "level_0", index=index)
121
            with pytest.raises(ClashError):
122
                TypedDfBuilder("a").reserve("abc", "index", index=index)
123
124
    def test_already_added(self):
125
        for cola in [True, False]:
126
            for indexa in [True, False]:
127
                for colb in [True, False]:
128
                    for indexb in [True, False]:
129
                        builder = TypedDfBuilder("a")
130
                        if cola:
131
                            builder = builder.require("a", index=indexa)
132
                        else:
133
                            cola = builder.reserve("a", index=indexa)
134
                        with pytest.raises(ClashError):
135
                            if colb:
136
                                builder.require("a", index=indexb)
137
                            else:
138
                                builder.reserve("a", index=indexb)
139
140
    def test_strict(self):
141
        # strict columns but not index
142
        t = TypedDfBuilder("a").strict(index=False, cols=True).build()
143
        typ: DfTyping = t.get_typing()
144
        assert typ.more_indices_allowed
145
        assert not typ.more_columns_allowed
146
        t.convert(pd.DataFrame([pd.Series(dict(x="x"))]).set_index("x"))
147
        with pytest.raises(UnexpectedColumnError):
148
            t.convert(pd.DataFrame([pd.Series(dict(x="x"))]))
149
        # strict index but not columns
150
        t = TypedDfBuilder("a").strict(True, False).build()
151
        typ: DfTyping = t.get_typing()
152
        assert typ.more_columns_allowed
153
        assert not typ.more_indices_allowed
154
        t.convert(pd.DataFrame([pd.Series(dict(x="x"))]))
155
        with pytest.raises(UnexpectedIndexNameError):
156
            df = PrettyDf(pd.DataFrame([pd.Series(dict(x="x"))]).set_index("x"))
157
            assert df.index_names() == ["x"]
158
            assert df.column_names() == []
159
            t.convert(df)
160
        # neither strict
161
        t = TypedDfBuilder("a").strict(False, False).build()
162
        t.convert(pd.DataFrame([pd.Series(dict(x="x"))]))
163
164
    def test_reserve_dtype(self):
165
        t = TypedDfBuilder("a").reserve("x", dtype=np.float32).build()
166
        df = t.convert(pd.DataFrame([pd.Series(dict(x="0.5"))]))
167
        assert df.column_names() == ["x"]
168
        assert df.to_numpy().tolist() == [[0.5]]
169
        with pytest.raises(ValueError):
170
            t.convert(pd.DataFrame([pd.Series(dict(x="kitten"))]))
171
172
    def test_dtype_post_process(self):
173
        # make sure these happen in the right order:
174
        # 1. dtype conversions
175
        # 2. post-processing
176
        # 3. final conditions
177
178
        def post(dd: BaseDf) -> BaseDf:
179
            assert dd["x"].dtype == np.float32
180
            dd2 = dd.copy()
181
            dd2["x"] += 9
182
            return dd2
183
184
        def cond(dd: BaseDf):
185
            return None if dd["x"].dtype == np.float32 else "failed"
186
187
        t = (TypedDfBuilder("a").reserve("x", dtype=np.float32).post(post).verify(cond)).build()
188
        df = t.convert(pd.DataFrame([pd.Series(dict(x="0.5"))]))
189
        assert df.to_numpy().tolist() == [[9.5]]
190
191
192
if __name__ == "__main__":
193
    pytest.main()
194