|
1
|
|
|
import pytest |
|
2
|
|
|
import math |
|
3
|
|
|
from collections import Counter |
|
4
|
|
|
import numpy as np |
|
5
|
|
|
from pandas import isnull |
|
6
|
|
|
|
|
7
|
|
|
@pytest.fixture |
|
8
|
|
|
def eliminate_nan_n_None_command(somagic, test_datapoints): |
|
9
|
|
|
|
|
10
|
|
|
def _eliminate_nan_n_None_command(_data_manager, datapoints, attribute): |
|
11
|
|
|
c = Counter([type(x) for x in datapoints.column(attribute)]) |
|
12
|
|
|
majority_type, max_value = c.most_common()[0] |
|
13
|
|
|
assert majority_type == list |
|
14
|
|
|
|
|
15
|
|
|
def check(x, target_type): |
|
16
|
|
|
if type(x) == target_type: |
|
17
|
|
|
return x |
|
18
|
|
|
try: |
|
19
|
|
|
if isnull(x): |
|
20
|
|
|
return majority_type() |
|
21
|
|
|
except ValueError as e: |
|
22
|
|
|
pass |
|
23
|
|
|
return x |
|
24
|
|
|
|
|
25
|
|
|
datapoints.observations[attribute] = datapoints.observations[attribute].map(lambda a: check(a, majority_type)) |
|
26
|
|
|
# datapoints.add_column(frame, attribute) |
|
27
|
|
|
assert all([type(x) == majority_type for x in datapoints.column(attribute)]) |
|
28
|
|
|
|
|
29
|
|
|
somagic.commands_decorators.data_manager_command()(_eliminate_nan_n_None_command) |
|
30
|
|
|
return getattr(somagic.command, _eliminate_nan_n_None_command.__name__) |
|
31
|
|
|
|
|
32
|
|
|
|
|
33
|
|
|
def test_encoding_list_nominal(somagic, test_datapoints, |
|
34
|
|
|
eliminate_nan_n_None_command, |
|
35
|
|
|
test_json_data): |
|
36
|
|
|
dt_manager = somagic._data_manager |
|
37
|
|
|
assert len(dt_manager.engine.datapoints_manager.datapoints_registry.objects) == 1 |
|
38
|
|
|
|
|
39
|
|
|
from collections import Counter |
|
40
|
|
|
c = Counter([type(x) for x in dt_manager.datapoints.column('flavors')]) |
|
41
|
|
|
assert c == Counter({list: 98, type(None): 2}) |
|
42
|
|
|
|
|
43
|
|
|
cmd = eliminate_nan_n_None_command |
|
44
|
|
|
cmd.args = [somagic.datapoints, 'flavors'] |
|
45
|
|
|
cmd.execute() |
|
46
|
|
|
|
|
47
|
|
|
c = Counter([type(x) for x in dt_manager.datapoints.column('flavors')]) |
|
48
|
|
|
assert c == Counter({list: 100}) |
|
49
|
|
|
assert all([type(x) == list for x in dt_manager.datapoints.column('flavors')]) |
|
50
|
|
|
|
|
51
|
|
|
cmd = dt_manager.command.encode_nominal_subsets_command |
|
52
|
|
|
cmd.args = [dt_manager.datapoints, 'flavors', 'encoded_flavors'] |
|
53
|
|
|
cmd.execute() |
|
54
|
|
|
|
|
55
|
|
|
# assert set(dt_manager.datapoints.attributes) == set(_ for _ in list(test_json_data['attributes']) + ['encoded_flavors']) |
|
56
|
|
|
|