hyperactive.memory.memory_helper._get_file_path() - Code Metrics - Inspection of "add opt para tests" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 77ef8d...d45822 )

by Simon

created 2020-02-20 15:32 UTC

hyperactive.memory.memory_helper._get_file_path() A

↳ Parent: hyperactive.memory.memory_helper

Complexity

Conditions

Size

Total Lines	8
Code Lines	6

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	6
nop	3
dl	0
loc	8
rs	10
c	0
b	0
f	0

# Author: Simon Blanke
# Email: [email protected]
# License: MIT License

import os
import sys
import json
import dill
import shutil
import pathlib
from fnmatch import fnmatch

import numpy as np
import pandas as pd

from .util import get_hash, get_model_id, get_func_str
from .paths import get_meta_path, get_meta_data_name


meta_path = get_meta_path()

"""
def get_best_models(X, y):
    # TODO: model_dict   key:model   value:score

    return model_dict


def get_model_search_config(model):
    # TODO
    return search_config


def get_model_init_config(model):
    # TODO
    return init_config
"""


def get_best_model(X, y):
    meta_data_paths = []
    pattern = get_meta_data_name(X, y)

    for path, subdirs, files in os.walk(meta_path):
        for name in files:
            if fnmatch(name, pattern):
                meta_data_paths.append(pathlib.PurePath(path, name))

    score_best = -np.inf

    for path in meta_data_paths:
        path = str(path)
        meta_data = pd.read_csv(path)
        scores = meta_data["_score_"].values

        # score_mean = scores.mean()
        # score_std = scores.std()
        score_max = scores.max()
        # score_min = scores.min()

        if score_max > score_best:
            score_best = score_max

            model_path = path.rsplit("dataset_id:", 1)[0]

            obj_func_path = model_path + "objective_function.pkl"
            search_space_path = model_path + "search_space.pkl"

            with open(obj_func_path, "rb") as fp:
                obj_func = dill.load(fp)

            with open(search_space_path, "rb") as fp:
                search_space = dill.load(fp)

        # exec(get_func_str(obj_func))

        return (
            score_best,
            {obj_func: search_space},

            {obj_func: None},
        )  # TODO: init_config


def reset_memory(force_true=False):
    if force_true:
        _reset_memory()
    elif query_yes_no():
        _reset_memory()


def _reset_memory():
    dirs = next(os.walk(meta_path))[1]
    for dir in dirs:
        shutil.rmtree(meta_path + dir)

    with open(meta_path + "model_connections.json", "w") as f:
        json.dump({}, f, indent=4)

    print("Memory reset successful")


def query_yes_no():
    valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
    question = "Delete the entire long term memory?"

    while True:
        sys.stdout.write(question + " [y/n] ")
        choice = input().lower()
        if choice == "":
            return False
        elif choice in valid:
            return valid[choice]
        else:
            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")


def delete_model(model):
    model_hash = get_model_id(model)
    path = meta_path + str(model_hash)

    if os.path.exists(path) and os.path.isdir(path):
        shutil.rmtree(meta_path + str(model_hash))
        print("Model data successfully removed")
    else:
        print("Model data not found in memory")


def delete_model_dataset(model, X, y):
    csv_file = _get_file_path(model, X, y)

    if os.path.exists(csv_file):
        os.remove(csv_file)
        print("Model data successfully removed")
    else:
        print("Model data not found in memory")


def connect_model_IDs(model1, model2):
    # do checks if search space has same dim

    with open(meta_path + "model_connections.json") as f:
        data = json.load(f)

    model1_hash = get_model_id(model1)
    model2_hash = get_model_id(model2)

    if model1_hash in data:
        key_model = model1_hash
        value_model = model2_hash
        data = _connect_key2value(data, key_model, value_model)
    else:
        data[model1_hash] = [model2_hash]
        print("IDs successfully connected")

    if model2_hash in data:
        key_model = model2_hash
        value_model = model1_hash
        data = _connect_key2value(data, key_model, value_model)
    else:
        data[model2_hash] = [model1_hash]
        print("IDs successfully connected")

    with open(meta_path + "model_connections.json", "w") as f:
        json.dump(data, f, indent=4)


def _connect_key2value(data, key_model, value_model):
    if value_model in data[key_model]:
        print("IDs of models are already connected")
    else:
        data[key_model].append(value_model)
        print("IDs successfully connected")

    return data


def _split_key_value(data, key_model, value_model):
    if value_model in data[key_model]:
        data[key_model].remove(value_model)

        if len(data[key_model]) == 0:
            del data[key_model]
        print("ID connection successfully deleted")
    else:
        print("IDs of models are not connected")

    return data


def split_model_IDs(model1, model2):
    # TODO: do checks if search space has same dim

    with open(meta_path + "model_connections.json") as f:
        data = json.load(f)

    model1_hash = get_model_id(model1)
    model2_hash = get_model_id(model2)

    if model1_hash in data:
        key_model = model1_hash
        value_model = model2_hash
        data = _split_key_value(data, key_model, value_model)
    else:
        print("IDs of models are not connected")

    if model2_hash in data:
        key_model = model2_hash
        value_model = model1_hash
        data = _split_key_value(data, key_model, value_model)
    else:
        print("IDs of models are not connected")

    with open(meta_path + "model_connections.json", "w") as f:
        json.dump(data, f, indent=4)


def _get_file_path(model, X, y):
    func_path_ = get_model_id(model) + "/"
    func_path = meta_path + func_path_

    feature_hash = get_hash(X)
    label_hash = get_hash(y)

    return func_path + (feature_hash + "_" + label_hash + "_.csv")


1			# Author: Simon Blanke
2			# Email: [email protected]
3			# License: MIT License
4
5			import os
6			import sys
7			import json
8			import dill
9			import shutil
10			import pathlib
11			from fnmatch import fnmatch
12
13			import numpy as np
14			import pandas as pd
15
16			from .util import get_hash, get_model_id, get_func_str
17			from .paths import get_meta_path, get_meta_data_name
18
19
20			meta_path = get_meta_path()
21
22			"""
23			def get_best_models(X, y):
24			# TODO: model_dict key:model value:score
25
26			return model_dict
27
28
29			def get_model_search_config(model):
30			# TODO
31			return search_config
32
33
34			def get_model_init_config(model):
35			# TODO
36			return init_config
37			"""
38
39
40			def get_best_model(X, y):
41			meta_data_paths = []
42			pattern = get_meta_data_name(X, y)
43
44			for path, subdirs, files in os.walk(meta_path):
45			for name in files:
46			if fnmatch(name, pattern):
47			meta_data_paths.append(pathlib.PurePath(path, name))
48
49			score_best = -np.inf
50
51			for path in meta_data_paths:
52			path = str(path)
53			meta_data = pd.read_csv(path)
54			scores = meta_data["_score_"].values
55
56			# score_mean = scores.mean()
57			# score_std = scores.std()
58			score_max = scores.max()
59			# score_min = scores.min()
60
61			if score_max > score_best:
62			score_best = score_max
63
64			model_path = path.rsplit("dataset_id:", 1)[0]
65
66			obj_func_path = model_path + "objective_function.pkl"
67			search_space_path = model_path + "search_space.pkl"
68
69			with open(obj_func_path, "rb") as fp:
70			obj_func = dill.load(fp)
71
72			with open(search_space_path, "rb") as fp:
73			search_space = dill.load(fp)
74
75			# exec(get_func_str(obj_func))
76
77			return (
78			score_best,
79			{obj_func: search_space},
			0 ignored issues – show introduced 2020-02-20 15:34 UTC by Report Bug Copy Issue Report The variable `search_space` does not seem to be defined in case `score_max > score_best` on line `61` is `False`. Are you sure this can never be the case? Loading history... introduced 2020-02-20 15:34 UTC by Report Bug Copy Issue Report The variable `obj_func` does not seem to be defined in case `score_max > score_best` on line `61` is `False`. Are you sure this can never be the case? Loading history...
80			{obj_func: None},
81			) # TODO: init_config
82
83
84			def reset_memory(force_true=False):
85			if force_true:
86			_reset_memory()
87			elif query_yes_no():
88			_reset_memory()
89
90
91			def _reset_memory():
92			dirs = next(os.walk(meta_path))[1]
93			for dir in dirs:
94			shutil.rmtree(meta_path + dir)
95
96			with open(meta_path + "model_connections.json", "w") as f:
97			json.dump({}, f, indent=4)
98
99			print("Memory reset successful")
100
101
102			def query_yes_no():
103			valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
104			question = "Delete the entire long term memory?"
105
106			while True:
107			sys.stdout.write(question + " [y/n] ")
108			choice = input().lower()
109			if choice == "":
110			return False
111			elif choice in valid:
112			return valid[choice]
113			else:
114			sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
115
116
117			def delete_model(model):
118			model_hash = get_model_id(model)
119			path = meta_path + str(model_hash)
120
121			if os.path.exists(path) and os.path.isdir(path):
122			shutil.rmtree(meta_path + str(model_hash))
123			print("Model data successfully removed")
124			else:
125			print("Model data not found in memory")
126
127
128			def delete_model_dataset(model, X, y):
129			csv_file = _get_file_path(model, X, y)
130
131			if os.path.exists(csv_file):
132			os.remove(csv_file)
133			print("Model data successfully removed")
134			else:
135			print("Model data not found in memory")
136
137
138			def connect_model_IDs(model1, model2):
139			# do checks if search space has same dim
140
141			with open(meta_path + "model_connections.json") as f:
142			data = json.load(f)
143
144			model1_hash = get_model_id(model1)
145			model2_hash = get_model_id(model2)
146
147			if model1_hash in data:
148			key_model = model1_hash
149			value_model = model2_hash
150			data = _connect_key2value(data, key_model, value_model)
151			else:
152			data[model1_hash] = [model2_hash]
153			print("IDs successfully connected")
154
155			if model2_hash in data:
156			key_model = model2_hash
157			value_model = model1_hash
158			data = _connect_key2value(data, key_model, value_model)
159			else:
160			data[model2_hash] = [model1_hash]
161			print("IDs successfully connected")
162
163			with open(meta_path + "model_connections.json", "w") as f:
164			json.dump(data, f, indent=4)
165
166
167			def _connect_key2value(data, key_model, value_model):
168			if value_model in data[key_model]:
169			print("IDs of models are already connected")
170			else:
171			data[key_model].append(value_model)
172			print("IDs successfully connected")
173
174			return data
175
176
177			def _split_key_value(data, key_model, value_model):
178			if value_model in data[key_model]:
179			data[key_model].remove(value_model)
180
181			if len(data[key_model]) == 0:
182			del data[key_model]
183			print("ID connection successfully deleted")
184			else:
185			print("IDs of models are not connected")
186
187			return data
188
189
190			def split_model_IDs(model1, model2):
191			# TODO: do checks if search space has same dim
192
193			with open(meta_path + "model_connections.json") as f:
194			data = json.load(f)
195
196			model1_hash = get_model_id(model1)
197			model2_hash = get_model_id(model2)
198
199			if model1_hash in data:
200			key_model = model1_hash
201			value_model = model2_hash
202			data = _split_key_value(data, key_model, value_model)
203			else:
204			print("IDs of models are not connected")
205
206			if model2_hash in data:
207			key_model = model2_hash
208			value_model = model1_hash
209			data = _split_key_value(data, key_model, value_model)
210			else:
211			print("IDs of models are not connected")
212
213			with open(meta_path + "model_connections.json", "w") as f:
214			json.dump(data, f, indent=4)
215
216
217			def _get_file_path(model, X, y):
218			func_path_ = get_model_id(model) + "/"
219			func_path = meta_path + func_path_
220
221			feature_hash = get_hash(X)
222			label_hash = get_hash(y)
223
224			return func_path + (feature_hash + "_" + label_hash + "_.csv")
225

SimonBlanke / Hyperactive

Push — master ( 77ef8d...d45822 )

hyperactive.memory.memory_helper._get_file_path() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like