1
|
|
|
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to typed-dfs |
2
|
|
|
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/typed-dfs |
3
|
|
|
# SPDX-License-Identifier: Apache-2.0 |
4
|
|
|
import numpy as np |
5
|
|
|
import pandas as pd |
6
|
|
|
import pytest |
7
|
|
|
|
8
|
|
|
# noinspection PyProtectedMember |
9
|
|
|
from typeddfs._pretty_dfs import PrettyDf |
10
|
|
|
from typeddfs.base_dfs import BaseDf |
11
|
|
|
from typeddfs.builders import MatrixDfBuilder, TypedDfBuilder |
12
|
|
|
from typeddfs.df_errors import ( |
13
|
|
|
ClashError, |
14
|
|
|
DfTypeConstructionError, |
15
|
|
|
UnexpectedColumnError, |
16
|
|
|
UnexpectedIndexNameError, |
17
|
|
|
VerificationFailedError, |
18
|
|
|
) |
19
|
|
|
from typeddfs.df_typing import DfTyping |
20
|
|
|
from typeddfs.typed_dfs import TypedDf |
21
|
|
|
|
22
|
|
|
|
23
|
|
|
def always_ok(x): |
24
|
|
|
return None |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
def always_fail(x): |
28
|
|
|
return "OH NO" |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
class TestBuilders: |
32
|
|
|
def test_typed_subclass(self): |
33
|
|
|
t1 = TypedDfBuilder("t1").build() |
34
|
|
|
t2 = TypedDfBuilder("t2").subclass(t1).build() |
35
|
|
|
assert issubclass(t2, t1) |
36
|
|
|
assert not issubclass(t1, t2) |
37
|
|
|
|
38
|
|
|
def test_matrix_subclass(self): |
39
|
|
|
t1 = MatrixDfBuilder("t1").build() |
40
|
|
|
t2 = MatrixDfBuilder("t2").subclass(t1).build() |
41
|
|
|
assert issubclass(t2, t1) |
42
|
|
|
assert not issubclass(t1, t2) |
43
|
|
|
|
44
|
|
|
def test_condition(self): |
45
|
|
|
t = TypedDfBuilder("a").verify(always_ok).build() |
46
|
|
|
typ: DfTyping = t.get_typing() |
47
|
|
|
assert typ.required_columns == [] |
48
|
|
|
assert typ.required_index_names == [] |
49
|
|
|
assert typ.verifications == [always_ok] |
50
|
|
|
TypedDf(pd.DataFrame()) |
51
|
|
|
t = TypedDfBuilder("a").verify(always_fail).build() |
52
|
|
|
with pytest.raises(VerificationFailedError): |
53
|
|
|
t.convert(pd.DataFrame()) |
54
|
|
|
|
55
|
|
|
def test_require_and_reserve_col(self): |
56
|
|
|
t = TypedDfBuilder("a").require("column").reserve("reserved").build() |
57
|
|
|
typ: DfTyping = t.get_typing() |
58
|
|
|
assert typ.required_columns == ["column"] |
59
|
|
|
assert typ.reserved_columns == ["reserved"] |
60
|
|
|
assert typ.required_index_names == [] |
61
|
|
|
assert typ.reserved_index_names == [] |
62
|
|
|
assert typ.verifications == [] |
63
|
|
|
|
64
|
|
|
def test_require_and_reserve_index(self): |
65
|
|
|
t = ( |
66
|
|
|
TypedDfBuilder("a").require("column", index=True).reserve("reserved", index=True) |
67
|
|
|
).build() |
68
|
|
|
typ: DfTyping = t.get_typing() |
69
|
|
|
assert typ.required_columns == [] |
70
|
|
|
assert typ.reserved_columns == [] |
71
|
|
|
assert typ.required_index_names == ["column"] |
72
|
|
|
assert typ.reserved_index_names == ["reserved"] |
73
|
|
|
assert typ.known_index_names == ["column", "reserved"] |
74
|
|
|
assert typ.known_column_names == [] |
75
|
|
|
assert typ.known_names == ["column", "reserved"] |
76
|
|
|
assert typ.verifications == [] |
77
|
|
|
|
78
|
|
|
def test_drop(self): |
79
|
|
|
t = TypedDfBuilder("a").reserve("column").drop("trash").build() |
80
|
|
|
typ: DfTyping = t.get_typing() |
81
|
|
|
assert typ.columns_to_drop == {"trash"} |
82
|
|
|
df = t.convert(pd.DataFrame([pd.Series(dict(x="x", zz="y"))])) |
83
|
|
|
assert df.column_names() == ["x", "zz"] |
84
|
|
|
df = t.convert(pd.DataFrame([pd.Series(dict(x="x", trash="y"))])) |
85
|
|
|
assert df.column_names() == ["x"] |
86
|
|
|
|
87
|
|
|
def test_drop_clash(self): |
88
|
|
|
t = TypedDfBuilder("a").reserve("trash").drop("trash") |
89
|
|
|
with pytest.raises(ClashError): |
90
|
|
|
t.build() |
91
|
|
|
|
92
|
|
|
def test_secure(self): |
93
|
|
|
TypedDfBuilder("a").secure().hash(alg="sha256").build() |
94
|
|
|
TypedDfBuilder("a").hash(alg="sha1").build() |
95
|
|
|
with pytest.raises(DfTypeConstructionError): |
96
|
|
|
TypedDfBuilder("a").secure().hash(alg="sha1").build() |
97
|
|
|
|
98
|
|
|
def test_bad_type(self): |
99
|
|
|
with pytest.raises(TypeError): |
100
|
|
|
# noinspection PyTypeChecker |
101
|
|
|
TypedDfBuilder(None).build() |
102
|
|
|
with pytest.raises(TypeError): |
103
|
|
|
# noinspection PyTypeChecker |
104
|
|
|
TypedDfBuilder(5).build() |
105
|
|
|
|
106
|
|
|
def test_bad_require(self): |
107
|
|
|
for index in [True, False]: |
108
|
|
|
with pytest.raises(ClashError): |
109
|
|
|
TypedDfBuilder("a").require("level_0", index=index) |
110
|
|
|
with pytest.raises(ClashError): |
111
|
|
|
TypedDfBuilder("a").require("abc", "level_0", index=index) |
112
|
|
|
with pytest.raises(ClashError): |
113
|
|
|
TypedDfBuilder("a").require("abc", "index", index=index) |
114
|
|
|
|
115
|
|
|
def test_bad_reserve(self): |
116
|
|
|
for index in [True, False]: |
117
|
|
|
with pytest.raises(ClashError): |
118
|
|
|
TypedDfBuilder("a").reserve("level_0", index=index) |
119
|
|
|
with pytest.raises(ClashError): |
120
|
|
|
TypedDfBuilder("a").reserve("abc", "level_0", index=index) |
121
|
|
|
with pytest.raises(ClashError): |
122
|
|
|
TypedDfBuilder("a").reserve("abc", "index", index=index) |
123
|
|
|
|
124
|
|
|
def test_already_added(self): |
125
|
|
|
for cola in [True, False]: |
126
|
|
|
for indexa in [True, False]: |
127
|
|
|
for colb in [True, False]: |
128
|
|
|
for indexb in [True, False]: |
129
|
|
|
builder = TypedDfBuilder("a") |
130
|
|
|
if cola: |
131
|
|
|
builder = builder.require("a", index=indexa) |
132
|
|
|
else: |
133
|
|
|
cola = builder.reserve("a", index=indexa) |
134
|
|
|
with pytest.raises(ClashError): |
135
|
|
|
if colb: |
136
|
|
|
builder.require("a", index=indexb) |
137
|
|
|
else: |
138
|
|
|
builder.reserve("a", index=indexb) |
139
|
|
|
|
140
|
|
|
def test_strict(self): |
141
|
|
|
# strict columns but not index |
142
|
|
|
t = TypedDfBuilder("a").strict(index=False, cols=True).build() |
143
|
|
|
typ: DfTyping = t.get_typing() |
144
|
|
|
assert typ.more_indices_allowed |
145
|
|
|
assert not typ.more_columns_allowed |
146
|
|
|
t.convert(pd.DataFrame([pd.Series(dict(x="x"))]).set_index("x")) |
147
|
|
|
with pytest.raises(UnexpectedColumnError): |
148
|
|
|
t.convert(pd.DataFrame([pd.Series(dict(x="x"))])) |
149
|
|
|
# strict index but not columns |
150
|
|
|
t = TypedDfBuilder("a").strict(True, False).build() |
151
|
|
|
typ: DfTyping = t.get_typing() |
152
|
|
|
assert typ.more_columns_allowed |
153
|
|
|
assert not typ.more_indices_allowed |
154
|
|
|
t.convert(pd.DataFrame([pd.Series(dict(x="x"))])) |
155
|
|
|
with pytest.raises(UnexpectedIndexNameError): |
156
|
|
|
df = PrettyDf(pd.DataFrame([pd.Series(dict(x="x"))]).set_index("x")) |
157
|
|
|
assert df.index_names() == ["x"] |
158
|
|
|
assert df.column_names() == [] |
159
|
|
|
t.convert(df) |
160
|
|
|
# neither strict |
161
|
|
|
t = TypedDfBuilder("a").strict(False, False).build() |
162
|
|
|
t.convert(pd.DataFrame([pd.Series(dict(x="x"))])) |
163
|
|
|
|
164
|
|
|
def test_reserve_dtype(self): |
165
|
|
|
t = TypedDfBuilder("a").reserve("x", dtype=np.float32).build() |
166
|
|
|
df = t.convert(pd.DataFrame([pd.Series(dict(x="0.5"))])) |
167
|
|
|
assert df.column_names() == ["x"] |
168
|
|
|
assert df.to_numpy().tolist() == [[0.5]] |
169
|
|
|
with pytest.raises(ValueError): |
170
|
|
|
t.convert(pd.DataFrame([pd.Series(dict(x="kitten"))])) |
171
|
|
|
|
172
|
|
|
def test_dtype_post_process(self): |
173
|
|
|
# make sure these happen in the right order: |
174
|
|
|
# 1. dtype conversions |
175
|
|
|
# 2. post-processing |
176
|
|
|
# 3. final conditions |
177
|
|
|
|
178
|
|
|
def post(dd: BaseDf) -> BaseDf: |
179
|
|
|
assert dd["x"].dtype == np.float32 |
180
|
|
|
dd2 = dd.copy() |
181
|
|
|
dd2["x"] += 9 |
182
|
|
|
return dd2 |
183
|
|
|
|
184
|
|
|
def cond(dd: BaseDf): |
185
|
|
|
return None if dd["x"].dtype == np.float32 else "failed" |
186
|
|
|
|
187
|
|
|
t = (TypedDfBuilder("a").reserve("x", dtype=np.float32).post(post).verify(cond)).build() |
188
|
|
|
df = t.convert(pd.DataFrame([pd.Series(dict(x="0.5"))])) |
189
|
|
|
assert df.to_numpy().tolist() == [[9.5]] |
190
|
|
|
|
191
|
|
|
|
192
|
|
|
if __name__ == "__main__": |
193
|
|
|
pytest.main() |
194
|
|
|
|