|
1
|
|
|
"""This module consists of all the functions requried |
|
2
|
|
|
to calculate the baselines.""" |
|
3
|
|
|
|
|
4
|
|
|
# This module is to fit baseline to calculate peak current |
|
5
|
|
|
# values from cyclic voltammetry data. |
|
6
|
|
|
# If you wish to choose best fitted baseline, |
|
7
|
|
|
# checkout branch baseline_old method2. |
|
8
|
|
|
# If have any questions contact [email protected] |
|
9
|
|
|
|
|
10
|
|
|
import pandas as pd |
|
11
|
|
|
import numpy as np |
|
12
|
|
|
|
|
13
|
|
|
|
|
14
|
|
|
#split forward and backward sweping data, to make it easier for processing. |
|
15
|
|
View Code Duplication |
def split(vector): |
|
|
|
|
|
|
16
|
|
|
""" |
|
17
|
|
|
This function takes an array and splits it into equal two half. |
|
18
|
|
|
---------- |
|
19
|
|
|
Parameters |
|
20
|
|
|
---------- |
|
21
|
|
|
vector : Can be in any form of that can be turned into numpy array. |
|
22
|
|
|
Normally, for the use of this function, it expects pandas DataFrame column. |
|
23
|
|
|
For example, df['potentials'] could be input as the column of x data. |
|
24
|
|
|
------- |
|
25
|
|
|
Returns |
|
26
|
|
|
------- |
|
27
|
|
|
This function returns two equally splited vector. |
|
28
|
|
|
The output then can be used to ease the implementation of peak detection and baseline finding. |
|
29
|
|
|
""" |
|
30
|
|
|
assert isinstance(vector, pd.core.series.Series), "Input should be pandas series" |
|
31
|
|
|
split_top = int(len(vector)/2) |
|
32
|
|
|
end = int(len(vector)) |
|
33
|
|
|
vector1 = np.array(vector)[0:split] |
|
34
|
|
|
vector2 = np.array(vector)[split_top:end] |
|
35
|
|
|
return vector1, vector2 |
|
36
|
|
|
|
|
37
|
|
|
|
|
38
|
|
View Code Duplication |
def critical_idx(arr_x, arr_y): ## Finds index where data set is no longer linear |
|
|
|
|
|
|
39
|
|
|
""" |
|
40
|
|
|
This function takes x and y values callculate the derrivative of x and y, |
|
41
|
|
|
and calculate moving average of 5 and 15 points. Finds intercepts of different |
|
42
|
|
|
moving average curves and return the indexs of the first intercepts. |
|
43
|
|
|
---------- |
|
44
|
|
|
Parameters |
|
45
|
|
|
---------- |
|
46
|
|
|
x : Numpy array. |
|
47
|
|
|
y : Numpy array. |
|
48
|
|
|
Normally, for the use of this function, it expects numpy array |
|
49
|
|
|
that came out from split function. For example, output of |
|
50
|
|
|
split.df['potentials'] could be input for this function as x. |
|
51
|
|
|
------- |
|
52
|
|
|
Returns |
|
53
|
|
|
------- |
|
54
|
|
|
This function returns 5th index of the intercepts of different moving average curves. |
|
55
|
|
|
User can change this function according to baseline |
|
56
|
|
|
branch method 2 to get various indexes.. |
|
57
|
|
|
""" |
|
58
|
|
|
assert isinstance(arr_x, np.ndarray), "Input should be numpy array" |
|
59
|
|
|
assert isinstance(arr_y == np.ndarray), "Input should be numpy array" |
|
60
|
|
|
if arr_x.shape[0] != arr_y.shape[0]: |
|
61
|
|
|
raise ValueError("x and y must have same first dimension, but " |
|
62
|
|
|
"have shapes {} and {}".format(arr_x.shape, arr_y.shape)) |
|
63
|
|
|
k_val = np.diff(arr_y)/(np.diff(arr_x)) #calculated slops of x and y |
|
64
|
|
|
## Calculate moving average for 10 and 15 points. |
|
65
|
|
|
## This two arbitrary number can be tuned to get better fitting. |
|
66
|
|
|
ave10 = [] |
|
67
|
|
|
ave15 = [] |
|
68
|
|
|
for i in range(len(k_val)-10): |
|
69
|
|
|
# The reason to minus 10 is to prevent j from running out of index. |
|
70
|
|
|
a_val = 0 |
|
71
|
|
|
for j in range(0, 5): |
|
72
|
|
|
a_val = a_val + k_val[i+j] |
|
73
|
|
|
ave10.append(round(a_val/10, 5)) |
|
74
|
|
|
# keeping 5 desimal points for more accuracy |
|
75
|
|
|
# This numbers affect how sensitive to noise. |
|
76
|
|
|
for i in range(len(k_val)-15): |
|
77
|
|
|
b_val = 0 |
|
78
|
|
|
for j in range(0, 15): |
|
79
|
|
|
b_val = b_val + k_val[i+j] |
|
80
|
|
|
ave15.append(round(b_val/15, 5)) |
|
81
|
|
|
ave10i = np.asarray(ave10) |
|
82
|
|
|
ave15i = np.asarray(ave15) |
|
83
|
|
|
## Find intercepts of different moving average curves |
|
84
|
|
|
#reshape into one row. |
|
85
|
|
|
idx = np.argwhere(np.diff(np.sign(ave15i - ave10i[:len(ave15i)]) != 0)).reshape(-1)+0 |
|
86
|
|
|
return idx[5] |
|
87
|
|
|
# This is based on the method 1 where user can't choose the baseline. |
|
88
|
|
|
# If wanted to add that, choose method2. |
|
89
|
|
|
|
|
90
|
|
|
|
|
91
|
|
|
def sum_mean(vector): |
|
92
|
|
|
""" |
|
93
|
|
|
This function returns the mean and sum of the given vector. |
|
94
|
|
|
---------- |
|
95
|
|
|
Parameters |
|
96
|
|
|
---------- |
|
97
|
|
|
vector : Can be in any form of that can be turned into numpy array. |
|
98
|
|
|
Normally, for the use of this function, it expects pandas DataFrame column. |
|
99
|
|
|
For example, df['potentials'] could be input as the column of x data. |
|
100
|
|
|
""" |
|
101
|
|
|
assert isinstance(vector == np.ndarray), "Input should be numpy array" |
|
102
|
|
|
a_val = 0 |
|
103
|
|
|
for i in vector: |
|
104
|
|
|
a_val = a_val + i |
|
105
|
|
|
return [a_val, a_val/len(vector)] |
|
106
|
|
|
|
|
107
|
|
|
|
|
108
|
|
View Code Duplication |
def multiplica(vector_x, vector_y): |
|
|
|
|
|
|
109
|
|
|
""" |
|
110
|
|
|
This function returns the sum of the multilica of two given vector. |
|
111
|
|
|
---------- |
|
112
|
|
|
Parameters |
|
113
|
|
|
---------- |
|
114
|
|
|
vector_x, vector_y : Output of the split vector function. |
|
115
|
|
|
Two inputs can be the same vector or different vector with same length. |
|
116
|
|
|
------- |
|
117
|
|
|
Returns |
|
118
|
|
|
------- |
|
119
|
|
|
This function returns a number that is the sum of multiplicity of given two vector. |
|
120
|
|
|
""" |
|
121
|
|
|
assert isinstance(vector_x == np.ndarray), "Input should be numpy array" |
|
122
|
|
|
assert isinstance(vector_y == np.ndarray), "Input should be numpy array" |
|
123
|
|
|
a_val = 0 |
|
124
|
|
|
for vec_x, vec_y in zip(vector_x, vector_y): |
|
125
|
|
|
a_val = a_val + (vec_x * vec_y) |
|
126
|
|
|
return a_val |
|
127
|
|
|
|
|
128
|
|
View Code Duplication |
def linear_coeff(vec_x, vec_y): |
|
|
|
|
|
|
129
|
|
|
""" |
|
130
|
|
|
This function returns the inclination coeffecient and y axis interception coeffecient m and b. |
|
131
|
|
|
---------- |
|
132
|
|
|
Parameters |
|
133
|
|
|
---------- |
|
134
|
|
|
x : Output of the split vector function. |
|
135
|
|
|
y : Output of the split vector function. |
|
136
|
|
|
------- |
|
137
|
|
|
Returns |
|
138
|
|
|
------- |
|
139
|
|
|
float number of m and b. |
|
140
|
|
|
""" |
|
141
|
|
|
m_val = ((multiplica(vec_x, vec_y) - sum_mean(vec_x)[0] * sum_mean(vec_y)[1])/ |
|
142
|
|
|
(multiplica(vec_x, vec_x) - sum_mean(vec_x)[0] * sum_mean(vec_x)[1])) |
|
143
|
|
|
b_val = sum_mean(vec_y)[1] - m_val * sum_mean(vec_x)[1] |
|
144
|
|
|
return m_val, b_val |
|
145
|
|
|
|
|
146
|
|
|
|
|
147
|
|
|
def y_fitted_line(m_val, b_val, vec_x): |
|
148
|
|
|
""" |
|
149
|
|
|
This function returns the fitted baseline constructed by coeffecient m and b and x values. |
|
150
|
|
|
---------- |
|
151
|
|
|
Parameters |
|
152
|
|
|
---------- |
|
153
|
|
|
x : Output of the split vector function. x value of the input. |
|
154
|
|
|
m : inclination of the baseline. |
|
155
|
|
|
b : y intercept of the baseline. |
|
156
|
|
|
------- |
|
157
|
|
|
Returns |
|
158
|
|
|
------- |
|
159
|
|
|
List of constructed y_labels. |
|
160
|
|
|
""" |
|
161
|
|
|
y_base = [] |
|
162
|
|
|
for i in vec_x: |
|
163
|
|
|
y_val = m_val * i + b_val |
|
164
|
|
|
y_base.append(y_val) |
|
165
|
|
|
return y_base |
|
166
|
|
|
|
|
167
|
|
|
|
|
168
|
|
View Code Duplication |
def linear_background(vec_x, vec_y): |
|
|
|
|
|
|
169
|
|
|
""" |
|
170
|
|
|
This function is wrapping function for calculating linear fitted line. |
|
171
|
|
|
It takes x and y values of the cv data, returns the fitted baseline. |
|
172
|
|
|
---------- |
|
173
|
|
|
Parameters |
|
174
|
|
|
---------- |
|
175
|
|
|
x : Output of the split vector function. x value of the cyclic voltammetry data. |
|
176
|
|
|
y : Output of the split vector function. y value of the cyclic voltammetry data. |
|
177
|
|
|
------- |
|
178
|
|
|
Returns |
|
179
|
|
|
------- |
|
180
|
|
|
List of constructed y_labels. |
|
181
|
|
|
""" |
|
182
|
|
|
assert isinstance(vec_x, np.ndarray), "Input of the function should be numpy array" |
|
183
|
|
|
assert isinstance(vec_y, np.ndarray), "Input of the function should be numpy array" |
|
184
|
|
|
idx = critical_idx(vec_x, vec_y) + 5 #this is also arbitrary number we can play with. |
|
185
|
|
|
m_val, b_val = (linear_coeff(vec_x[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))], |
|
186
|
|
|
vec_y[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))])) |
|
187
|
|
|
y_base = y_fitted_line(m_val, b_val, vec_x) |
|
188
|
|
|
return y_base |
|
189
|
|
|
|