|
1
|
|
|
import h5py |
|
2
|
|
|
import pickle |
|
3
|
|
|
import logging |
|
4
|
|
|
import xlsxwriter |
|
5
|
|
|
import collections |
|
6
|
|
|
from collections import OrderedDict |
|
7
|
|
|
from datetime import datetime |
|
8
|
|
|
import numpy as np |
|
9
|
|
|
from . import overall_performance_index, per_class_performance_index, get_performance_array, get_confusion_matrix |
|
10
|
|
|
from .event import score_segment |
|
11
|
|
|
from ..logging import logging_name |
|
12
|
|
|
from ..CASAS.fuel import CASASFuel |
|
13
|
|
|
|
|
14
|
|
|
logger = logging.getLogger(__name__) |
|
15
|
|
|
|
|
16
|
|
|
|
|
17
|
|
|
class LearningResult: |
|
18
|
|
|
"""LearningResult is a class that stores results of a learning run. |
|
19
|
|
|
|
|
20
|
|
|
It may be a single-shot run or a time-based analysis. The result structure holds the parameters for the model |
|
21
|
|
|
as well as the evaluation result for easy plot. |
|
22
|
|
|
|
|
23
|
|
|
The parameters need to be set at the time of creation, such as number of total events, splits, class description, |
|
24
|
|
|
feature array. However, the prediction, event.rst-based scoring can be added and modified at run-time - in case |
|
25
|
|
|
of failure at run-time. |
|
26
|
|
|
|
|
27
|
|
|
Parameters: |
|
28
|
|
|
name (:obj:`str`): Name of the learning run. |
|
29
|
|
|
description (:obj:`str`): Description of the learning result. |
|
30
|
|
|
classes (:obj:`list` of :obj:`str`): List of description of target classes. |
|
31
|
|
|
num_events (:obj:`int`): Number of total entries in the test set. |
|
32
|
|
|
bg_class (:obj:`str`): Name of the class that is considered background. |
|
33
|
|
|
splits (:obj:`OrderedDict`): List of splits with name of splits as key and the size of each split as value. |
|
34
|
|
|
|
|
35
|
|
|
Attributes: |
|
36
|
|
|
name (:obj:`str`): Name of the learning run |
|
37
|
|
|
data (:obj:`str`): Path to the h5py dataset directory |
|
38
|
|
|
mode (:obj:`str`): valid choices are `single_shot`, `by_week` or `by_day` |
|
39
|
|
|
created_time (:obj:`float`): created time since Epoch in seconds |
|
40
|
|
|
modified_time (:obj:`float`): record modified time since Epoch in seconds |
|
41
|
|
|
overall_performance (:class:`numpy.array`): overall performance of the learning |
|
42
|
|
|
per_class_performance (:class:`numpy.array`): overall per-class performance of the learning |
|
43
|
|
|
confusion_matrix (:class:`numpy.array`): overall confusion matrix |
|
44
|
|
|
records (:obj:`collections.OrderedDict`): Ordered dictionary storing all records |
|
45
|
|
|
""" |
|
46
|
|
|
def __init__(self, name, classes, num_events, bg_class=None, splits=None, description=''): |
|
47
|
|
|
cur_time = datetime.now() |
|
48
|
|
|
self.name = name |
|
49
|
|
|
self.description = description |
|
50
|
|
|
self.classes = classes |
|
51
|
|
|
self.created_time = cur_time |
|
52
|
|
|
self.modified_time = cur_time |
|
53
|
|
|
self.performance = {} |
|
54
|
|
|
self.splits = OrderedDict() |
|
55
|
|
|
if splits is not None: |
|
56
|
|
|
index = 0 |
|
57
|
|
|
for name, length in splits: |
|
58
|
|
|
self.splits[name] = { |
|
59
|
|
|
'start': index, |
|
60
|
|
|
'stop': index+length, |
|
61
|
|
|
'model_path': '' |
|
62
|
|
|
} |
|
63
|
|
|
index += length |
|
64
|
|
|
else: |
|
65
|
|
|
self.splits['None'] = { |
|
66
|
|
|
'start': 0, |
|
67
|
|
|
'stop': num_events, |
|
68
|
|
|
'model_path': '' |
|
69
|
|
|
} |
|
70
|
|
|
self.truth = np.empty(shape=(num_events, ), dtype=np.int) |
|
71
|
|
|
self.prediction = np.empty(shape=(num_events, ), dtype=np.int) |
|
72
|
|
|
self.time = np.empty(shape=(num_events, ), dtype='datetime64[ns]') |
|
73
|
|
|
self.num_events = num_events |
|
74
|
|
|
if bg_class is None: |
|
75
|
|
|
self.bg_class_id = -1 |
|
76
|
|
|
elif bg_class in self.classes: |
|
77
|
|
|
self.bg_class_id = self.classes.index(bg_class) |
|
78
|
|
|
else: |
|
79
|
|
|
raise ValueError('Background class %s not in the target classes list.' % bg_class) |
|
80
|
|
|
|
|
81
|
|
|
def record_result(self, model_file, time, truth, prediction, split=None): |
|
82
|
|
|
"""Record the result of a split |
|
83
|
|
|
|
|
84
|
|
|
Args: |
|
85
|
|
|
model_file (:obj:`str`): Path to the file that stores the model parameters |
|
86
|
|
|
split (:obj:`str`): Name of the split the record is for |
|
87
|
|
|
time (:obj:`list` of :obj:`datetime`): Corresponding datetime |
|
88
|
|
|
truth (:obj:`numpy.ndarray`): Array that holds the ground truth for the targeting split |
|
89
|
|
|
prediction (:obj:`numpy.ndarray`): Array that holds the prediction for the targeting split |
|
90
|
|
|
""" |
|
91
|
|
|
split_name = str(split) |
|
92
|
|
|
if split_name not in self.splits.keys(): |
|
93
|
|
|
return ValueError('Split %s not found in the result.' % split) |
|
94
|
|
|
start_pos = self.splits[split_name]['start'] |
|
95
|
|
|
stop_pos = self.splits[split_name]['stop'] |
|
96
|
|
|
self.truth[start_pos:stop_pos] = truth.astype(dtype=np.int) |
|
97
|
|
|
self.prediction[start_pos:stop_pos] = prediction.astype(dtype=np.int) |
|
98
|
|
|
self.time[start_pos:stop_pos] = time |
|
99
|
|
|
self.splits[split_name]['model_path'] = model_file |
|
100
|
|
|
# Calculate performance metrics for the split |
|
101
|
|
|
confusion_matrix = get_confusion_matrix(len(self.classes), |
|
102
|
|
|
self.truth[start_pos:stop_pos], |
|
103
|
|
|
self.prediction[start_pos:stop_pos] |
|
104
|
|
|
) |
|
105
|
|
|
self.splits[split_name]['confusion_matrix'] = confusion_matrix |
|
106
|
|
|
# After confusion metrix, one can calculate traditional multi-class performance |
|
107
|
|
|
overall_performance, per_class_performance = get_performance_array(confusion_matrix) |
|
108
|
|
|
self.splits[split_name]['overall_performance'] = overall_performance |
|
109
|
|
|
self.splits[split_name]['per_class_performance'] = per_class_performance |
|
110
|
|
|
# Note: Event-based scoring can be done after all split are logged in. |
|
111
|
|
|
|
|
112
|
|
|
def get_record_of_split(self, split): |
|
113
|
|
|
"""Get result corresponding to specific split |
|
114
|
|
|
|
|
115
|
|
|
Args: |
|
116
|
|
|
split (:obj:`str`): Name of the split. |
|
117
|
|
|
|
|
118
|
|
|
Returns: |
|
119
|
|
|
:obj:`dict`: |
|
120
|
|
|
""" |
|
121
|
|
|
if split in self.splits.keys(): |
|
122
|
|
|
return self.splits[split] |
|
123
|
|
|
else: |
|
124
|
|
|
logger.error('Cannot find split %s.' % split) |
|
125
|
|
|
return None |
|
126
|
|
|
|
|
127
|
|
|
def get_time_list(self): |
|
128
|
|
|
time_list = [datetime.utcfromtimestamp(item.astype(datetime) * 1e-9) for item in self.time] |
|
129
|
|
|
return time_list |
|
130
|
|
|
|
|
131
|
|
|
def event_based_scoring(self): |
|
132
|
|
|
"""Event based segment scoring |
|
133
|
|
|
""" |
|
134
|
|
|
self.performance['event_scoring'] = score_segment(self.truth, self.prediction, bg_label=self.bg_class_id) |
|
135
|
|
|
|
|
136
|
|
|
def calculate_overall_performance(self): |
|
137
|
|
|
"""Calculate overall performance |
|
138
|
|
|
""" |
|
139
|
|
|
confusion_matrix = get_confusion_matrix(len(self.classes), self.truth, self.prediction) |
|
140
|
|
|
overall_performance, per_class_performance = get_performance_array(confusion_matrix) |
|
141
|
|
|
self.performance['overall_performance'] = overall_performance |
|
142
|
|
|
self.performance['per_class_performance'] = per_class_performance |
|
143
|
|
|
|
|
144
|
|
|
def save_to_file(self, filename): |
|
145
|
|
|
"""Pickle to file |
|
146
|
|
|
""" |
|
147
|
|
|
f = open(filename, 'wb') |
|
148
|
|
|
pickle.dump(self, f, protocol=pickle.HIGHEST_PROTOCOL) |
|
149
|
|
|
f.close() |
|
150
|
|
|
|
|
151
|
|
|
@staticmethod |
|
152
|
|
|
def load_from_file(filename): |
|
153
|
|
|
"""Load LearningResult from file |
|
154
|
|
|
|
|
155
|
|
|
Args: |
|
156
|
|
|
filename (:obj:`str`): Path to the file that stores the result. |
|
157
|
|
|
|
|
158
|
|
|
Returns: |
|
159
|
|
|
:class:`pyActLearn.performance.record.LearningResult`: LearningResult object. |
|
160
|
|
|
""" |
|
161
|
|
|
f = open(filename, 'rb') |
|
162
|
|
|
result = pickle.load(f) |
|
163
|
|
|
f.close() |
|
164
|
|
|
return result |
|
165
|
|
|
|
|
166
|
|
|
def export_to_xlsx(self, filename, home_info=None): |
|
167
|
|
|
"""Export to XLSX |
|
168
|
|
|
|
|
169
|
|
|
Args: |
|
170
|
|
|
filename (:obj:`str`): path to the file |
|
171
|
|
|
home_info (:class:`pyActLearn.CASAS.fuel.CASASFuel`): dataset information |
|
172
|
|
|
""" |
|
173
|
|
|
workbook = xlsxwriter.Workbook(filename) |
|
174
|
|
|
num_performance = len(per_class_performance_index) |
|
175
|
|
|
num_classes = len(self.classes) |
|
176
|
|
|
# Overall Performance Summary |
|
177
|
|
|
overall_sheet = workbook.add_worksheet('overall') |
|
178
|
|
|
overall_sheet.merge_range(0, 0, 0, len(overall_performance_index) - 1, 'Overall Performance') |
|
179
|
|
|
for c in range(len(overall_performance_index)): |
|
180
|
|
|
overall_sheet.write(1, c, str(overall_performance_index[c])) |
|
181
|
|
|
overall_sheet.write(2, c, self.overall_performance[c]) |
|
182
|
|
|
overall_sheet.merge_range(4, 0, 4, len(per_class_performance_index), 'Per-Class Performance') |
|
183
|
|
|
overall_sheet.write(5, 0, 'Activities') |
|
184
|
|
|
for c in range(len(per_class_performance_index)): |
|
185
|
|
|
overall_sheet.write(5, c + 1, str(per_class_performance_index[c])) |
|
186
|
|
|
for r in range(num_classes): |
|
187
|
|
|
label = home_info.get_activity_by_index(r) |
|
188
|
|
|
overall_sheet.write(r + 6, 0, label) |
|
189
|
|
|
for c in range(num_performance): |
|
190
|
|
|
overall_sheet.write(r + 6, c + 1, self.per_class_performance[r][c]) |
|
191
|
|
|
overall_sheet.merge_range(8 + num_classes, 0, 8 + num_classes, num_classes, 'Confusion Matrix') |
|
192
|
|
|
for i in range(num_classes): |
|
193
|
|
|
label = home_info.get_activity_by_index(i) |
|
194
|
|
|
overall_sheet.write(9 + num_classes, i + 1, label) |
|
195
|
|
|
overall_sheet.write(10 + num_classes + i, 0, label) |
|
196
|
|
|
for r in range(num_classes): |
|
197
|
|
|
for c in range(num_classes): |
|
198
|
|
|
overall_sheet.write(10 + num_classes + r, c + 1, self.confusion_matrix[r][c]) |
|
199
|
|
|
|
|
200
|
|
|
records = self.get_record_keys() |
|
201
|
|
|
|
|
202
|
|
|
# Weekly Performance Summary |
|
203
|
|
|
weekly_sheet = workbook.add_worksheet('weekly') |
|
204
|
|
|
weekly_list_title = ['dataset', '#week'] + overall_performance_index |
|
205
|
|
|
for c in range(len(weekly_list_title)): |
|
206
|
|
|
weekly_sheet.write(0, c, str(weekly_list_title[c])) |
|
207
|
|
|
r = 1 |
|
208
|
|
|
for record_id in records: |
|
209
|
|
|
weekly_sheet.write(r, 0, 'b1') |
|
210
|
|
|
weekly_sheet.write(r, 1, record_id) |
|
211
|
|
|
for c in range(len(overall_performance_index)): |
|
212
|
|
|
weekly_sheet.write(r, c + 2, '%.5f' % self.get_record_by_key(record_id)['overall_performance'][c]) |
|
213
|
|
|
r += 1 |
|
214
|
|
|
dataset_list_title = ['activities'] + per_class_performance_index |
|
215
|
|
|
# Per Week Per Class Summary |
|
216
|
|
|
for record_id in self.get_record_keys(): |
|
217
|
|
|
cur_sheet = workbook.add_worksheet(record_id) |
|
218
|
|
|
for c in range(0, len(dataset_list_title)): |
|
219
|
|
|
cur_sheet.write(0, c, str(dataset_list_title[c])) |
|
220
|
|
|
for r in range(num_classes): |
|
221
|
|
|
label = home_info.get_activity_by_index(r) |
|
222
|
|
|
cur_sheet.write(r+1, 0, label) |
|
223
|
|
|
for c in range(num_performance): |
|
224
|
|
|
cur_sheet.write(r + 1, c + 1, self.get_record_by_key(record_id)['per_class_performance'][r][c]) |
|
225
|
|
|
workbook.close() |
|
226
|
|
|
|
|
227
|
|
|
def export_annotation(self, filename): |
|
228
|
|
|
"""Export back annotation to file |
|
229
|
|
|
""" |
|
230
|
|
|
f = open(filename, 'w') |
|
231
|
|
|
for i in range(self.num_events): |
|
232
|
|
|
f.write('%s %s\n' % ( |
|
233
|
|
|
datetime.utcfromtimestamp(self.time[i].astype(datetime) * 1e-9).strftime('%Y-%m-%d %H:%M:%S'), |
|
234
|
|
|
self.classes[self.prediction[i]] |
|
235
|
|
|
)) |
|
236
|
|
|
f.close() |
|
237
|
|
|
|