|
1
|
|
|
# This module is to fit baseline to calculate peak current |
|
2
|
|
|
# values from cyclic voltammetry data. |
|
3
|
|
|
# If you wish to choose best fitted baseline, |
|
4
|
|
|
# checkout branch baseline_old method2. |
|
5
|
|
|
# If have any questions contact [email protected] |
|
6
|
|
|
|
|
7
|
|
|
import pandas as pd |
|
8
|
|
|
import numpy as np |
|
9
|
|
|
import csv |
|
10
|
|
|
import matplotlib.pyplot as plt |
|
11
|
|
|
import warnings |
|
12
|
|
|
import matplotlib.cbook |
|
13
|
|
|
|
|
14
|
|
|
|
|
15
|
|
|
# split forward and backward sweping data, to make it easier for processing. |
|
16
|
|
|
def split(vector): |
|
17
|
|
|
""" |
|
18
|
|
|
This function takes an array and splits it into equal two half. |
|
19
|
|
|
---------- |
|
20
|
|
|
Parameters |
|
21
|
|
|
---------- |
|
22
|
|
|
vector : Can be in any form of that can be turned into numpy array. |
|
23
|
|
|
Normally, for the use of this function, it expects pandas DataFrame column. |
|
24
|
|
|
For example, df['potentials'] could be input as the column of x data. |
|
25
|
|
|
------- |
|
26
|
|
|
Returns |
|
27
|
|
|
------- |
|
28
|
|
|
This function returns two equally splited vector. |
|
29
|
|
|
The output then can be used to ease the implementation |
|
30
|
|
|
of peak detection and baseline finding. |
|
31
|
|
|
""" |
|
32
|
|
|
(assert type(vector) == pd.core.series.Series, |
|
33
|
|
|
"Input of the function should be pandas series") |
|
34
|
|
|
split = int(len(vector)/2) |
|
35
|
|
|
end = int(len(vector)) |
|
36
|
|
|
vector1 = np.array(vector)[0:split] |
|
37
|
|
|
vector2 = np.array(vector)[split:end] |
|
38
|
|
|
return vector1, vector2 |
|
39
|
|
|
|
|
40
|
|
|
|
|
41
|
|
|
def critical_idx(x, y): # Finds index where data set is no longer linear |
|
42
|
|
|
""" |
|
43
|
|
|
This function takes x and y values callculate the derrivative |
|
44
|
|
|
of x and y, and calculate moving average of 5 and 15 points. |
|
45
|
|
|
Finds intercepts of different moving average curves and |
|
46
|
|
|
return the indexs of the first intercepts. |
|
47
|
|
|
---------- |
|
48
|
|
|
Parameters |
|
49
|
|
|
---------- |
|
50
|
|
|
x : Numpy array. |
|
51
|
|
|
y : Numpy array. |
|
52
|
|
|
Normally, for the use of this function, it expects |
|
53
|
|
|
numpy array that came out from split function. |
|
54
|
|
|
For example, output of split.df['potentials'] |
|
55
|
|
|
could be input for this function as x. |
|
56
|
|
|
------- |
|
57
|
|
|
Returns |
|
58
|
|
|
------- |
|
59
|
|
|
This function returns 5th index of the intercepts |
|
60
|
|
|
of different moving average curves. |
|
61
|
|
|
User can change this function according to |
|
62
|
|
|
baseline branch method 2 to get various indexes.. |
|
63
|
|
|
""" |
|
64
|
|
|
(assert type(x) == np.ndarray, |
|
65
|
|
|
"Input of the function should be numpy array") |
|
66
|
|
|
(assert type(y) == np.ndarray, |
|
67
|
|
|
"Input of the function should be numpy array") |
|
68
|
|
|
if x.shape[0] != y.shape[0]: |
|
69
|
|
|
raise ValueError("x and y must have same first dimension, but " |
|
70
|
|
|
"have shapes {} and {}".format(x.shape, y.shape)) |
|
71
|
|
|
k = np.diff(y)/(np.diff(x)) # calculated slops of x and y |
|
72
|
|
|
# Calculate moving average for 10 and 15 points. |
|
73
|
|
|
# This two arbitrary number can be tuned to get better fitting. |
|
74
|
|
|
ave10 = [] |
|
75
|
|
|
ave15 = [] |
|
76
|
|
|
for i in range(len(k)-10): |
|
77
|
|
|
# The reason to minus 10 is to prevent j from running out of index. |
|
78
|
|
|
a = 0 |
|
79
|
|
|
for j in range(0, 5): |
|
80
|
|
|
a = a + k[i+j] |
|
81
|
|
|
ave10.append(round(a/10, 5)) |
|
82
|
|
|
# keeping 5 desimal points for more accuracy |
|
83
|
|
|
# This numbers affect how sensitive to noise. |
|
84
|
|
|
for i in range(len(k)-15): |
|
85
|
|
|
b = 0 |
|
86
|
|
|
for j in range(0, 15): |
|
87
|
|
|
b = b + k[i+j] |
|
88
|
|
|
ave15.append(round(b/15, 5)) |
|
89
|
|
|
ave10i = np.asarray(ave10) |
|
90
|
|
|
ave15i = np.asarray(ave15) |
|
91
|
|
|
# Find intercepts of different moving average curves |
|
92
|
|
|
# reshape into one row. |
|
93
|
|
|
idx = {np.argwhere(np.diff(np.sign(ave15i - |
|
94
|
|
|
ave10i[:len(ave15i)]) != 0)).reshape(-1) + 0} |
|
95
|
|
|
return idx[5] |
|
96
|
|
|
# This is based on the method 1 where user can't choose the baseline. |
|
97
|
|
|
# If wanted to add that, choose method2. |
|
98
|
|
|
|
|
99
|
|
|
|
|
100
|
|
|
def sum_mean(vector): |
|
101
|
|
|
""" |
|
102
|
|
|
This function returns the mean and sum of the given vector. |
|
103
|
|
|
---------- |
|
104
|
|
|
Parameters |
|
105
|
|
|
---------- |
|
106
|
|
|
vector : Can be in any form of that can be turned into numpy array. |
|
107
|
|
|
Normally, for the use of this function, it expects pandas DataFrame column. |
|
108
|
|
|
For example, df['potentials'] could be input as the column of x data. |
|
109
|
|
|
""" |
|
110
|
|
|
(assert type(vector) == np.ndarray, |
|
111
|
|
|
"Input of the function should be numpy array") |
|
112
|
|
|
a = 0 |
|
113
|
|
|
for i in vector: |
|
114
|
|
|
a = a + i |
|
115
|
|
|
return [a, a/len(vector)] |
|
116
|
|
|
|
|
117
|
|
|
|
|
118
|
|
|
def multiplica(vector_x, vector_y): |
|
119
|
|
|
""" |
|
120
|
|
|
This function returns the sum of the multilica of two given vector. |
|
121
|
|
|
---------- |
|
122
|
|
|
Parameters |
|
123
|
|
|
---------- |
|
124
|
|
|
vector_x, vector_y : Output of the split vector function. |
|
125
|
|
|
Two inputs can be the same vector or different vector with same length. |
|
126
|
|
|
------- |
|
127
|
|
|
Returns |
|
128
|
|
|
------- |
|
129
|
|
|
This function returns a number that is the sum |
|
130
|
|
|
of multiplicity of given two vector. |
|
131
|
|
|
""" |
|
132
|
|
|
(assert type(vector_x) == np.ndarray, |
|
133
|
|
|
"Input of the function should be numpy array") |
|
134
|
|
|
(assert type(vector_y) == np.ndarray, |
|
135
|
|
|
"Input of the function should be numpy array") |
|
136
|
|
|
a = 0 |
|
137
|
|
|
for x, y in zip(vector_x, vector_y): |
|
138
|
|
|
a = a + (x * y) |
|
139
|
|
|
return a |
|
140
|
|
|
|
|
141
|
|
|
|
|
142
|
|
|
def linear_coeff(x, y): |
|
143
|
|
|
""" |
|
144
|
|
|
This function returns the inclination coeffecient and |
|
145
|
|
|
y axis interception coeffecient m and b. |
|
146
|
|
|
---------- |
|
147
|
|
|
Parameters |
|
148
|
|
|
---------- |
|
149
|
|
|
x : Output of the split vector function. |
|
150
|
|
|
y : Output of the split vector function. |
|
151
|
|
|
------- |
|
152
|
|
|
Returns |
|
153
|
|
|
------- |
|
154
|
|
|
float number of m and b. |
|
155
|
|
|
""" |
|
156
|
|
|
m = {(multiplica(x, y) - sum_mean(x)[0] * sum_mean(y)[1]) / |
|
157
|
|
|
(multiplica(x, x) - sum_mean(x)[0] * sum_mean(x)[1])} |
|
158
|
|
|
b = sum_mean(y)[1] - m * sum_mean(x)[1] |
|
159
|
|
|
return m, b |
|
160
|
|
|
|
|
161
|
|
|
|
|
162
|
|
|
def y_fitted_line(m, b, x): |
|
163
|
|
|
""" |
|
164
|
|
|
This function returns the fitted baseline constructed |
|
165
|
|
|
by coeffecient m and b and x values. |
|
166
|
|
|
---------- |
|
167
|
|
|
Parameters |
|
168
|
|
|
---------- |
|
169
|
|
|
x : Output of the split vector function. x value of the input. |
|
170
|
|
|
m : inclination of the baseline. |
|
171
|
|
|
b : y intercept of the baseline. |
|
172
|
|
|
------- |
|
173
|
|
|
Returns |
|
174
|
|
|
------- |
|
175
|
|
|
List of constructed y_labels. |
|
176
|
|
|
""" |
|
177
|
|
|
y_base = [] |
|
178
|
|
|
for i in x: |
|
179
|
|
|
y = m * i + b |
|
180
|
|
|
y_base.append(y) |
|
181
|
|
|
return y_base |
|
182
|
|
|
|
|
183
|
|
|
|
|
184
|
|
|
def linear_background(x, y): |
|
185
|
|
|
""" |
|
186
|
|
|
This function is wrapping function for calculating linear fitted line. |
|
187
|
|
|
It takes x and y values of the cv data, returns the fitted baseline. |
|
188
|
|
|
---------- |
|
189
|
|
|
Parameters |
|
190
|
|
|
---------- |
|
191
|
|
|
x : Output of the split vector function. x value |
|
192
|
|
|
of the cyclic voltammetry data. |
|
193
|
|
|
y : Output of the split vector function. y value |
|
194
|
|
|
of the cyclic voltammetry data. |
|
195
|
|
|
------- |
|
196
|
|
|
Returns |
|
197
|
|
|
------- |
|
198
|
|
|
List of constructed y_labels. |
|
199
|
|
|
""" |
|
200
|
|
|
assert type(x) == np.ndarray, "Input of the function should be numpy array" |
|
201
|
|
|
assert type(y) == np.ndarray, "Input of the function should be numpy array" |
|
202
|
|
|
idx = critical_idx(x, y) + 5 |
|
203
|
|
|
# this is also arbitrary number we can play with. |
|
204
|
|
|
m, b = {linear_coeff(x[(idx - int(0.5 * idx)): (idx + int(0.5 * idx))], |
|
205
|
|
|
y[(idx - int(0.5 * idx)): (idx + int(0.5 * idx))])} |
|
206
|
|
|
y_base = y_fitted_line(m, b, x) |
|
207
|
|
|
return y_base |
|
208
|
|
|
|