1
|
|
|
import pandas as pd |
2
|
|
|
import numpy as np |
3
|
|
|
import datetime |
4
|
|
|
import os |
5
|
|
|
import re |
6
|
|
|
from os import listdir |
7
|
|
|
from os.path import isfile, join |
8
|
|
|
import matplotlib.pyplot as plt |
9
|
|
|
import seaborn as sns |
10
|
|
|
|
11
|
|
|
@profile |
|
|
|
|
12
|
|
|
def date_time_converter(date_time_list): |
13
|
|
|
""" |
14
|
|
|
This function gets the numpy array with date_time in matlab format |
15
|
|
|
and returns a numpy array with date_time in human readable format. |
16
|
|
|
""" |
17
|
|
|
|
18
|
|
|
# Empty array to hold the results |
19
|
|
|
date_time_human = [] |
20
|
|
|
|
21
|
|
|
for i in date_time_list: |
22
|
|
|
date_time_human.append(datetime.datetime.fromordinal(int(i)) + |
23
|
|
|
datetime.timedelta(days=i%1) - datetime.timedelta(days = 366)) |
24
|
|
|
|
25
|
|
|
return date_time_human |
26
|
|
|
|
27
|
|
|
|
28
|
|
|
@profile |
|
|
|
|
29
|
|
|
def PL_samples_file_joiner(data_dir, file_name_format, ignore_file_indices): |
30
|
|
|
""" |
31
|
|
|
This function reads in the data for PL Samples experiment and returns a |
32
|
|
|
nice dataframe with cycles in ascending order. |
33
|
|
|
|
34
|
|
|
Args: |
35
|
|
|
data_dir (string): This is the absolute path to the data directory. |
36
|
|
|
file_name_format (string): Format of the filename, used to deduce other files. |
37
|
|
|
ignore_file_indices (list, int): This list of ints tells which to ignore. |
38
|
|
|
|
39
|
|
|
Returns: |
40
|
|
|
The complete test data in a dataframe with extra column for capacity in Ah. |
41
|
|
|
""" |
42
|
|
|
|
43
|
|
|
# Raise an exception if the type of the inputs is not correct |
44
|
|
|
if not isinstance(data_dir, str): |
45
|
|
|
raise TypeError('data_dir is not of type string') |
46
|
|
|
|
47
|
|
|
if not isinstance(file_name_format, str): |
48
|
|
|
raise TypeError('file_name_format is not of type string') |
49
|
|
|
|
50
|
|
|
if not isinstance(ignore_file_indices, list): |
51
|
|
|
raise TypeError("ignore_file_indices should be a list") |
52
|
|
|
|
53
|
|
|
for i in range(len(ignore_file_indices)): |
54
|
|
|
if not isinstance(ignore_file_indices[i], int): |
|
|
|
|
55
|
|
|
raise TypeError("""ignore_file_indices elements should be |
56
|
|
|
of type integer""") |
57
|
|
|
|
58
|
|
|
if not os.path.exists(join(data_dir, file_name_format)): |
59
|
|
|
raise FileNotFoundError("File {} not found in the location {}" |
60
|
|
|
.format(file_name_format, data_dir)) |
61
|
|
|
|
62
|
|
|
# get the list of files in the directory |
63
|
|
|
onlyfiles = [f for f in listdir(data_dir) if isfile(join(data_dir, f))] |
64
|
|
|
|
65
|
|
|
# Extract the experiment name from the file_name_format |
66
|
|
|
exp_name = file_name_format[0:4] |
67
|
|
|
|
68
|
|
|
# Empty dictionary to hold all the dataframe for various files |
69
|
|
|
dict_files = {} |
70
|
|
|
|
71
|
|
|
# Iterate over all the files of certain type and get the file number from them |
72
|
|
|
for filename in onlyfiles: |
73
|
|
|
if exp_name in filename: |
74
|
|
|
# Extract the filenumber from the name |
75
|
|
|
file_number = re.search(exp_name + '\((.+?)\).csv', filename).group(1) |
76
|
|
|
# Give a value of dataframe to each key |
77
|
|
|
dict_files[int(file_number)] = pd.read_csv(join(data_dir, filename)) |
78
|
|
|
|
79
|
|
|
# Empty dictionary to hold the ordered dictionaries |
80
|
|
|
dict_ordered = {} |
81
|
|
|
# Sort the dictionary based on keys |
82
|
|
|
for key in sorted(dict_files.keys()): |
83
|
|
|
dict_ordered[key] = dict_files[key] |
84
|
|
|
|
85
|
|
|
# Keys with files to keep, remove the ignore indices from all keys |
86
|
|
|
wanted_keys = np.array(list(set(dict_ordered.keys()) - set(ignore_file_indices))) |
87
|
|
|
|
88
|
|
|
# Remove the ignored dataframes for characterization |
89
|
|
|
dict_ord_cycling_data = {k : dict_ordered[k] for k in wanted_keys} |
90
|
|
|
|
91
|
|
|
# Concatenate the dataframes to create the total dataframe |
92
|
|
|
|
93
|
|
|
df_out = None |
94
|
|
|
for k in wanted_keys: |
95
|
|
View Code Duplication |
if df_out is None: |
|
|
|
|
96
|
|
|
df_next = dict_ord_cycling_data[k] |
97
|
|
|
df_out = pd.DataFrame(data=None, columns=df_next.columns) |
98
|
|
|
df_out = pd.concat([df_out, df_next]) |
99
|
|
|
else: |
100
|
|
|
df_next = dict_ord_cycling_data[k] |
101
|
|
|
df_next['Cycle'] = np.array(df_next['Cycle']) + max(np.array(df_out['Cycle'])) |
102
|
|
|
df_next['Time_sec'] = np.array(df_next['Time_sec']) + max(np.array(df_out['Time_sec'])) |
103
|
|
|
df_next['Charge_Ah'] = np.array(df_next['Charge_Ah']) + max(np.array(df_out['Charge_Ah'])) |
104
|
|
|
df_next['Discharge_Ah'] = np.array(df_next['Discharge_Ah']) + max(np.array(df_out['Discharge_Ah'])) |
105
|
|
|
df_out = pd.concat([df_out, df_next]) |
106
|
|
|
|
107
|
|
|
#### |
108
|
|
|
# This has been commented out for performance, as we do not need date_time |
109
|
|
|
#### |
110
|
|
|
# Convert the Date_Time from matlab datenum to human readable Date_Time |
111
|
|
|
# First convert the series into a numpy array |
112
|
|
|
# date_time_matlab = df_out['Date_Time'].tolist() |
113
|
|
|
|
114
|
|
|
# # Apply the conversion to the numpy array |
115
|
|
|
# df_out['Date_Time_new'] = date_time_converter(date_time_matlab) |
116
|
|
|
|
117
|
|
|
# Reset the index and drop the old index |
118
|
|
|
df_out_indexed = df_out.reset_index(drop=True) |
119
|
|
|
|
120
|
|
|
# Proceed further with correcting the capacity |
121
|
|
|
df_grouped = df_out_indexed.groupby(['Cycle']).count() |
122
|
|
|
|
123
|
|
|
# Get the indices when a cycle starts |
124
|
|
|
cycle_start_indices = df_grouped['Time_sec'].cumsum() |
125
|
|
|
|
126
|
|
|
# Get the charge_Ah per cycle |
127
|
|
|
# Create numpy array to store the old charge_Ah row, and then |
128
|
|
|
# perform transformation on it, rather than in the pandas series |
129
|
|
|
# this is a lot faster in this case |
130
|
|
|
charge_cycle_ah = np.array(df_out_indexed['Charge_Ah']) |
131
|
|
|
charge_ah = np.array(df_out_indexed['Charge_Ah']) |
132
|
|
|
|
133
|
|
|
for i in range(1, len(cycle_start_indices)): |
134
|
|
|
a = cycle_start_indices.iloc[i-1] |
135
|
|
|
b = cycle_start_indices.iloc[i] |
136
|
|
|
charge_cycle_ah[a:b] = charge_ah[a:b] - charge_ah[a-1] |
137
|
|
|
|
138
|
|
|
df_out_indexed['charge_cycle_ah'] = charge_cycle_ah |
139
|
|
|
|
140
|
|
|
# Get the discharge_Ah per cycle |
141
|
|
|
discharge_cycle_ah = np.array(df_out_indexed['Discharge_Ah']) |
142
|
|
|
discharge_ah = np.array(df_out_indexed['Discharge_Ah']) |
143
|
|
|
|
144
|
|
|
for i in range(1, len(cycle_start_indices)): |
145
|
|
|
a = cycle_start_indices.iloc[i-1] |
146
|
|
|
b = cycle_start_indices.iloc[i] |
147
|
|
|
discharge_cycle_ah[a:b] = discharge_ah[a:b] - discharge_ah[a-1] |
148
|
|
|
|
149
|
|
|
df_out_indexed['discharge_cycle_ah'] = discharge_cycle_ah |
150
|
|
|
|
151
|
|
|
# This is the data column we can use for prediction. |
152
|
|
|
# This is not totally accurate, as this still has some points that go negative, |
153
|
|
|
# due to incorrect discharge_Ah values every few cycles. |
154
|
|
|
# But the machine learning algorithm should consider these as outliers and |
155
|
|
|
# hopefully get over it. We can come back and correct this. |
156
|
|
|
df_out_indexed['capacity_ah'] = charge_ah - discharge_ah |
157
|
|
|
|
158
|
|
|
return df_out_indexed |
159
|
|
|
|
160
|
|
|
|
161
|
|
|
def PL_samples_capacity_cycles(pl_df): |
162
|
|
|
""" |
163
|
|
|
This function finds the capacity in each cycle from the cumulative capacity |
164
|
|
|
in the original file. |
165
|
|
|
|
166
|
|
|
Args: |
167
|
|
|
|
168
|
|
|
Returns: |
169
|
|
|
""" |
170
|
|
|
|
171
|
|
|
return |
172
|
|
|
|
173
|
|
|
data_dir = '/home/chintan/uwdirect/chintan/BattDeg/data/PL 12,14' |
174
|
|
|
fnf = 'PL12(4).csv' |
175
|
|
|
ignore_indices = [1, 2, 3] |
176
|
|
|
|
177
|
|
|
out_df = PL_samples_file_joiner(data_dir, fnf, ignore_indices) |