Passed
Push — dev ( 1b3874...6a1e3c )
by Konstantinos
03:33
created

green_magic.strainmaster.StrainMaster.mpeta()   A

Complexity

Conditions 2

Size

Total Lines 29
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 21
dl 0
loc 29
rs 9.376
c 0
b 0
f 0
cc 2
nop 1
1
import pickle
2
import logging
3
from .strain_dataset import create_dataset_from_pickle
4
from .clustering import get_model_quality_reporter
5
from .data.dataset import DatapointsManager
6
from green_magic.utils import Invoker, CommandHistory
7
from .data.backend.engine import DataEngine
8
9
_log = logging.getLogger(__name__)
10
11
12
class StrainMaster:
13
    __instance = None
14
15
    def __new__(cls, *args, **kwargs):
16
        if not cls.__instance:
17
            from green_magic.data.commands_manager import CommandsManager
18
            from green_magic.data.backend import Backend
19
            from green_magic.data.data_manager import DataManager
20
            from green_magic.data.backend import panda_handling
21
22
            print("!1111111", DataEngine.subclasses)
23
24
            cls.__instance = super().__new__(cls)
25
            DataEngine.new('pd')
26
            cls.__instance.data_api = DataManager(CommandsManager(), Backend(DataEngine.create('pd')))
27
            # make the datapoint_manager listen to newly created Datapoints objects events
28
            cls.__instance.data_api.backend.engine.datapoints_factory.subject.attach(cls.__instance.data_api.backend.datapoints_manager)
29
        return cls.__instance
30
31
    def __call__(self, *args, **kwargs):
32
        """
33
        Call to update any of 'datasets_dir' and/or 'maps_dir'
34
        """
35
        self._datasets_dir = kwargs.get('datasets_dir', self._datasets_dir)
36
        self._maps_dir = kwargs.get('maps_dir', self._maps_dir)
37
        self.map_manager.maps_dir = self._maps_dir
38
        return self
39
40
    def __init__(self, datasets_dir=None, maps_dir=None): pass
41
42
    @property
43
    def commands(self):
44
        """Get a Command object from the pool of Command prototypes"""
45
        return self.data_api.command
46
47
    @property
48
    def datasets_dir(self):
49
        return self._datasets_dir
50
51
    @datasets_dir.setter
52
    def datasets_dir(self, dataset_directory_path):
53
        self._datasets_dir = dataset_directory_path
54
        # self.map_manager.maps_dir = dataset_directory_path
55
56
    def strain_names(self, coordinates):
57
        g = ((self.dt.datapoint_index2_id[_], self.som.bmus[_]) for _ in range(len(self.dt)))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
58
        return [n for n, c in g if c[0] == coordinates['x'] and c[1] == coordinates['y']]
59
60
    @property
61
    def dt(self):
62
        """
63
        Returns the currently selected/active dataset as a reference to a StrainDataset object.\n
64
        :return: the reference to the dataset
65
        :rtype: green_magic.strain_dataset.StrainDataset
66
        """
67
        return self.data_api.backend.datapoints_manager.datapoints
68
69
        # if self.selected_dt_id not in self._id2dataset:
70
        #     raise InvalidDatasetSelectionError("Requested dataset with id '{}', but StrainMaster knows only of [{}].".format(self.selected_dt_id, ', '.join(self._id2dataset.keys())))
71
        # return self._id2dataset[self.selected_dt_id]
72
73
    @property
74
    def som(self):
75
        """
76
        Returns the currently selected/active som instance, as a reference to a som object.\n
77
        :return: the reference to the self-organizing map
78
        :rtype: somoclu.Somoclu
79
        """
80
        return self.map_manager.som
81
82
    @property
83
    def model_quality(self):
84
        return get_model_quality_reporter(self, self.selected_dt_id)
85
86
    def set_feature_vectors(self, list_of_variables=None):
87
        _ = self.get_feature_vectors(self.dt, list_of_variables=list_of_variables)
88
89
    def get_feature_vectors(self, strain_dataset, list_of_variables=None):
90
        """Call this function to get the encoded feature as a list of vectors
91
        This method must be called
92
        :param strain_dataset:
93
        :param list_of_variables:
94
        :return:
95
        """
96
        if not list_of_variables:
97
            return strain_dataset.load_feature_vectors()
98
        else:
99
            strain_dataset.use_variables(list_of_variables)
100
            return strain_dataset.load_feature_vectors()
101
102
    # def create_strain_dataset(self, jl_file, dataset_id, ffilter=''):
103
    #     data_set = StrainDataset(dataset_id)
104
    #     with open(jl_file, 'r') as json_lines_file:
105
    #         for line in json_lines_file:
106
    #             strain_dict = json.loads(line)
107
    #             if ffilter.split(':')[0] in strain_dict:
108
    #                 if strain_dict[ffilter.split(':')[0]] == ffilter.split(':')[1]:  # if datapoint meets criteria, add it
109
    #                     data_set.add(strain_dict)
110
    #                     if 'description' in strain_dict:
111
    #                         self.lexicon.munch(strain_dict['description'])
112
    #             else:
113
    #                 data_set.add(strain_dict)
114
    #                 if 'description' in strain_dict:
115
    #                     self.lexicon.munch(strain_dict['description'])
116
    #     data_set.load_feature_indexes()
117
    #     self._id2dataset[dataset_id] = data_set
118
    #     self.selected_dt_id = dataset_id
119
    #     _log.info("Created StrainDataset object with id '{}'".format(data_set.name))
120
    #     assert data_set.name == dataset_id
121
    #     return data_set
122
123
    def load_dataset(self, a_file):
124
        strain_dataset = create_dataset_from_pickle(self._datasets_dir + '/' + a_file)
125
        self._id2dataset[strain_dataset.name] = strain_dataset
126
        self.selected_dt_id = strain_dataset.name
127
        _log.info("Loaded dataset with id '{}'".format(strain_dataset.name))
128
        return strain_dataset
129
130
    def save_active_dataset(self):
131
        self.save_dataset(self.selected_dt_id)
132
133
    def save_dataset(self, strain_dataset_id):
134
        dataset = self._id2dataset[strain_dataset_id]
135
        if dataset.has_missing_values:
136
            name = '-not-clean'
137
        else:
138
            name = '-clean'
139
        name = self._datasets_dir + '/' + dataset.name + name + '.pk'
140
        try:
141
            with open(name, 'wb') as pickled_dataset:
142
                pickle.dump(dataset, pickled_dataset, protocol=pickle.HIGHEST_PROTOCOL)
143
            _log.info("Saved dataset with id '{}' as {}".format(strain_dataset_id, name))
144
        except RuntimeError as e:
145
            _log.debug(e)
146
            _log.info("Failed to save dataset wtih id {}".format(strain_dataset_id))
147
148
    def __getitem__(self, wd_id):
149
        self.selected_dt_id = wd_id
150
        return self
151
152
153
class InvalidDatasetSelectionError(Exception): pass
154